verible-verilog-format: apply it on core directory (#1540)

using verible-v0.0-3422-g520ca4b9/bin/verible-verilog-format
with default configuration

Note: two files are not correctly handled by verible
- core/include/std_cache_pkg.sv
- core/cache_subsystem/cva6_hpdcache_if_adapter.sv
This commit is contained in:
André Sintzoff 2023-10-18 16:36:00 +02:00 committed by GitHub
parent 3d47805dfc
commit 7cd183b710
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
94 changed files with 21423 additions and 19841 deletions

View file

@ -13,56 +13,59 @@
// Date: 20.11.2020 // Date: 20.11.2020
// Description: Functional unit that dispatches CVA6 instructions to accelerators. // Description: Functional unit that dispatches CVA6 instructions to accelerators.
module acc_dispatcher import ariane_pkg::*; import riscv::*; #( module acc_dispatcher
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter type acc_req_t = acc_pkg::accelerator_req_t, import riscv::*;
parameter type acc_resp_t = acc_pkg::accelerator_resp_t, #(
parameter type acc_cfg_t = logic, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter acc_cfg_t AccCfg = '0 parameter type acc_req_t = acc_pkg::accelerator_req_t,
parameter type acc_resp_t = acc_pkg::accelerator_resp_t,
parameter type acc_cfg_t = logic,
parameter acc_cfg_t AccCfg = '0
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// Interface with the CSR regfile // Interface with the CSR regfile
input logic acc_cons_en_i, // Accelerator memory consistent mode input logic acc_cons_en_i, // Accelerator memory consistent mode
output logic acc_fflags_valid_o, output logic acc_fflags_valid_o,
output logic [4:0] acc_fflags_o, output logic [4:0] acc_fflags_o,
// Interface with the CSRs // Interface with the CSRs
input logic [2:0] fcsr_frm_i, input logic [2:0] fcsr_frm_i,
output logic dirty_v_state_o, output logic dirty_v_state_o,
// Interface with the issue stage // Interface with the issue stage
input scoreboard_entry_t issue_instr_i, input scoreboard_entry_t issue_instr_i,
input logic issue_instr_hs_i, input logic issue_instr_hs_i,
output logic issue_stall_o, output logic issue_stall_o,
input fu_data_t fu_data_i, input fu_data_t fu_data_i,
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i,
output logic [TRANS_ID_BITS-1:0] acc_trans_id_o, output logic [TRANS_ID_BITS-1:0] acc_trans_id_o,
output xlen_t acc_result_o, output xlen_t acc_result_o,
output logic acc_valid_o, output logic acc_valid_o,
output exception_t acc_exception_o, output exception_t acc_exception_o,
// Interface with the execute stage // Interface with the execute stage
output logic acc_valid_ex_o, // FU executed output logic acc_valid_ex_o, // FU executed
// Interface with the commit stage // Interface with the commit stage
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
input logic commit_st_barrier_i, // A store barrier was commited input logic commit_st_barrier_i, // A store barrier was commited
// Interface with the load/store unit // Interface with the load/store unit
output logic acc_stall_st_pending_o, output logic acc_stall_st_pending_o,
input logic acc_no_st_pending_i, input logic acc_no_st_pending_i,
input dcache_req_i_t [2:0] dcache_req_ports_i, input dcache_req_i_t [2:0] dcache_req_ports_i,
// Interface with the controller // Interface with the controller
output logic ctrl_halt_o, output logic ctrl_halt_o,
input logic flush_unissued_instr_i, input logic flush_unissued_instr_i,
input logic flush_ex_i, input logic flush_ex_i,
output logic flush_pipeline_o, output logic flush_pipeline_o,
// Interface with cache subsystem // Interface with cache subsystem
output dcache_req_i_t [1:0] acc_dcache_req_ports_o, output dcache_req_i_t [1:0] acc_dcache_req_ports_o,
input dcache_req_o_t [1:0] acc_dcache_req_ports_i, input dcache_req_o_t [1:0] acc_dcache_req_ports_i,
input logic inval_ready_i, input logic inval_ready_i,
output logic inval_valid_o, output logic inval_valid_o,
output logic [63:0] inval_addr_o, output logic [63:0] inval_addr_o,
// Accelerator interface // Accelerator interface
output acc_req_t acc_req_o, output acc_req_t acc_req_o,
input acc_resp_t acc_resp_i input acc_resp_t acc_resp_i
); );
`include "common_cells/registers.svh" `include "common_cells/registers.svh"
@ -96,16 +99,15 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
always_comb begin : stall_issue always_comb begin : stall_issue
unique case (issue_instr_i.fu) unique case (issue_instr_i.fu)
ACCEL: ACCEL:
// 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet // 1. We're issuing an accelerator instruction but the dispatcher isn't ready yet
issue_stall_o = ~acc_ready; issue_stall_o = ~acc_ready;
LOAD: LOAD:
// 2. We're issuing a scalar load but there is an inflight accelerator store. // 2. We're issuing a scalar load but there is an inflight accelerator store.
issue_stall_o = acc_cons_en_i & ~acc_no_st_pending; issue_stall_o = acc_cons_en_i & ~acc_no_st_pending;
STORE: STORE:
// 3. We're issuing a scalar store but there is an inflight accelerator load or store. // 3. We're issuing a scalar store but there is an inflight accelerator load or store.
issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending); issue_stall_o = acc_cons_en_i & (~acc_no_st_pending | ~acc_no_ld_pending);
default: default: issue_stall_o = 1'b0;
issue_stall_o = 1'b0;
endcase endcase
end end
@ -121,30 +123,30 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
logic acc_insn_queue_empty; logic acc_insn_queue_empty;
logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage; logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage;
logic acc_commit; logic acc_commit;
logic [TRANS_ID_BITS-1:0] acc_commit_trans_id; logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id;
assign acc_data = acc_valid_ex_o ? fu_data_i : '0; assign acc_data = acc_valid_ex_o ? fu_data_i : '0;
fifo_v3 #( fifo_v3 #(
.DEPTH (InstructionQueueDepth), .DEPTH (InstructionQueueDepth),
.FALL_THROUGH(1'b1 ), .FALL_THROUGH(1'b1),
.dtype (fu_data_t ) .dtype (fu_data_t)
) i_acc_insn_queue ( ) i_acc_insn_queue (
.clk_i (clk_i ), .clk_i (clk_i),
.rst_ni (rst_ni ), .rst_ni (rst_ni),
.flush_i (flush_ex_i ), .flush_i (flush_ex_i),
.testmode_i(1'b0 ), .testmode_i(1'b0),
.data_i (fu_data_i ), .data_i (fu_data_i),
.push_i (acc_valid_q ), .push_i (acc_valid_q),
.full_o (/* Unused */ ), .full_o ( /* Unused */),
.data_o (acc_insn_queue_o ), .data_o (acc_insn_queue_o),
.pop_i (acc_insn_queue_pop ), .pop_i (acc_insn_queue_pop),
.empty_o (acc_insn_queue_empty), .empty_o (acc_insn_queue_empty),
.usage_o (acc_insn_queue_usage) .usage_o (acc_insn_queue_usage)
); );
// We are ready if the instruction queue is able to accept at least one more entry. // We are ready if the instruction queue is able to accept at least one more entry.
assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth-1); assign acc_ready = acc_insn_queue_usage < (InstructionQueueDepth - 1);
/********************************** /**********************************
* Non-speculative instructions * * Non-speculative instructions *
@ -160,17 +162,15 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q; logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q;
`FF(insn_ready_q, insn_ready_d, '0) `FF(insn_ready_q, insn_ready_d, '0)
always_comb begin: p_non_speculative_ff always_comb begin : p_non_speculative_ff
// Maintain state // Maintain state
insn_pending_d = insn_pending_q; insn_pending_d = insn_pending_q;
insn_ready_d = insn_ready_q; insn_ready_d = insn_ready_q;
// We received a new instruction // We received a new instruction
if (acc_valid_q) if (acc_valid_q) insn_pending_d[acc_data.trans_id] = 1'b1;
insn_pending_d[acc_data.trans_id] = 1'b1;
// Flush all received instructions // Flush all received instructions
if (flush_ex_i) if (flush_ex_i) insn_pending_d = '0;
insn_pending_d = '0;
// An accelerator instruction is no longer speculative. // An accelerator instruction is no longer speculative.
if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin if (acc_commit && insn_pending_q[acc_commit_trans_id]) begin
@ -179,9 +179,8 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
end end
// An accelerator instruction was issued. // An accelerator instruction was issued.
if (acc_req_o.req_valid) if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0;
insn_ready_d[acc_req_o.trans_id] = 1'b0; end : p_non_speculative_ff
end: p_non_speculative_ff
/************************* /*************************
* Accelerator request * * Accelerator request *
@ -193,18 +192,18 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
acc_pkg::accelerator_req_t acc_req_int; acc_pkg::accelerator_req_t acc_req_int;
fall_through_register #( fall_through_register #(
.T(acc_pkg::accelerator_req_t) .T(acc_pkg::accelerator_req_t)
) i_accelerator_req_register ( ) i_accelerator_req_register (
.clk_i (clk_i ), .clk_i (clk_i),
.rst_ni (rst_ni ), .rst_ni (rst_ni),
.clr_i (1'b0 ), .clr_i (1'b0),
.testmode_i(1'b0 ), .testmode_i(1'b0),
.data_i (acc_req ), .data_i (acc_req),
.valid_i (acc_req_valid ), .valid_i (acc_req_valid),
.ready_o (acc_req_ready ), .ready_o (acc_req_ready),
.data_o (acc_req_int ), .data_o (acc_req_int),
.valid_o (acc_req_o.req_valid), .valid_o (acc_req_o.req_valid),
.ready_i (acc_resp_i.req_ready) .ready_i (acc_resp_i.req_ready)
); );
assign acc_req_o.insn = acc_req_int.insn; assign acc_req_o.insn = acc_req_int.insn;
@ -216,28 +215,33 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
assign acc_req_o.acc_cons_en = acc_cons_en_i; assign acc_req_o.acc_cons_en = acc_cons_en_i;
assign acc_req_o.inval_ready = inval_ready_i; assign acc_req_o.inval_ready = inval_ready_i;
always_comb begin: accelerator_req_dispatcher always_comb begin : accelerator_req_dispatcher
// Do not fetch from the instruction queue // Do not fetch from the instruction queue
acc_insn_queue_pop = 1'b0; acc_insn_queue_pop = 1'b0;
// Default values // Default values
acc_req = '0; acc_req = '0;
acc_req_valid = 1'b0; acc_req_valid = 1'b0;
// Unpack fu_data_t into accelerator_req_t // Unpack fu_data_t into accelerator_req_t
if (!acc_insn_queue_empty) begin if (!acc_insn_queue_empty) begin
acc_req = '{ acc_req = '{
// Instruction is forwarded from the decoder as an immediate // Instruction is forwarded from the decoder as an immediate
// - // -
// frm rounding information is up to date during a valid request to the accelerator // frm rounding information is up to date during a valid request to the accelerator
// The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes // The scoreboard synchronizes it with previous fcsr writes, and future fcsr writes
// do not take place until the accelerator answers (Ariane commits in-order) // do not take place until the accelerator answers (Ariane commits in-order)
insn : acc_insn_queue_o.imm[31:0], insn :
rs1 : acc_insn_queue_o.operand_a, acc_insn_queue_o.imm[
rs2 : acc_insn_queue_o.operand_b, 31
frm : fpnew_pkg::roundmode_e'(fcsr_frm_i), :
trans_id: acc_insn_queue_o.trans_id, 0
default : '0 ],
rs1 : acc_insn_queue_o.operand_a,
rs2 : acc_insn_queue_o.operand_b,
frm : fpnew_pkg::roundmode_e'(fcsr_frm_i),
trans_id: acc_insn_queue_o.trans_id,
default: '0
}; };
// Wait until the instruction is no longer speculative. // Wait until the instruction is no longer speculative.
acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] || acc_req_valid = insn_ready_q[acc_insn_queue_o.trans_id] ||
@ -254,26 +258,22 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
logic acc_st_disp; logic acc_st_disp;
// Unpack the accelerator response // Unpack the accelerator response
assign acc_trans_id_o = acc_resp_i.trans_id; assign acc_trans_id_o = acc_resp_i.trans_id;
assign acc_result_o = acc_resp_i.result; assign acc_result_o = acc_resp_i.result;
assign acc_valid_o = acc_resp_i.resp_valid; assign acc_valid_o = acc_resp_i.resp_valid;
assign acc_exception_o = '{ assign acc_exception_o = '{cause: riscv::ILLEGAL_INSTR, tval : '0, valid: acc_resp_i.error};
cause: riscv::ILLEGAL_INSTR, assign acc_fflags_valid_o = acc_resp_i.fflags_valid;
tval : '0, assign acc_fflags_o = acc_resp_i.fflags;
valid: acc_resp_i.error
};
assign acc_fflags_valid_o = acc_resp_i.fflags_valid;
assign acc_fflags_o = acc_resp_i.fflags;
// Always ready to receive responses // Always ready to receive responses
assign acc_req_o.resp_ready = 1'b1; assign acc_req_o.resp_ready = 1'b1;
// Signal dispatched load/store to issue stage // Signal dispatched load/store to issue stage
assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD); assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE); assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
// Cache invalidation // Cache invalidation
assign inval_valid_o = acc_resp_i.inval_valid; assign inval_valid_o = acc_resp_i.inval_valid;
assign inval_addr_o = acc_resp_i.inval_addr; assign inval_addr_o = acc_resp_i.inval_addr;
/************************** /**************************
* Accelerator commit * * Accelerator commit *
@ -282,13 +282,11 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
// Instruction can be issued to the (in-order) back-end if // Instruction can be issued to the (in-order) back-end if
// it reached the top of the scoreboard and it hasn't been // it reached the top of the scoreboard and it hasn't been
// issued yet // issued yet
always_comb begin: accelerator_commit always_comb begin : accelerator_commit
acc_commit = 1'b0; acc_commit = 1'b0;
if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) if (!commit_instr_i[0].valid && commit_instr_i[0].fu == ACCEL) acc_commit = 1'b1;
acc_commit = 1'b1; if (commit_instr_i[0].valid && !commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
if (commit_instr_i[0].valid && acc_commit = 1'b1;
!commit_instr_i[1].valid && commit_instr_i[1].fu == ACCEL)
acc_commit = 1'b1;
end end
// Dirty the V state if we are committing anything related to the vector accelerator // Dirty the V state if we are committing anything related to the vector accelerator
@ -330,37 +328,38 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
// Count speculative loads. These can still be flushed. // Count speculative loads. These can still be flushed.
counter #( counter #(
.WIDTH (3), .WIDTH (3),
.STICKY_OVERFLOW (0) .STICKY_OVERFLOW(0)
) i_acc_spec_loads ( ) i_acc_spec_loads (
.clk_i (clk_i ), .clk_i (clk_i),
.rst_ni (rst_ni ), .rst_ni (rst_ni),
.clear_i (flush_ex_i ), .clear_i (flush_ex_i),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp), .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_LOAD) ^ acc_ld_disp),
.load_i (1'b0 ), .load_i (1'b0),
.down_i (acc_ld_disp ), .down_i (acc_ld_disp),
.d_i ('0 ), .d_i ('0),
.q_o (acc_spec_loads_pending ), .q_o (acc_spec_loads_pending),
.overflow_o (acc_spec_loads_overflow ) .overflow_o(acc_spec_loads_overflow)
); );
// Count dispatched loads. These cannot be flushed anymore. // Count dispatched loads. These cannot be flushed anymore.
counter #( counter #(
.WIDTH (3), .WIDTH (3),
.STICKY_OVERFLOW (0) .STICKY_OVERFLOW(0)
) i_acc_disp_loads ( ) i_acc_disp_loads (
.clk_i (clk_i ), .clk_i (clk_i),
.rst_ni (rst_ni ), .rst_ni (rst_ni),
.clear_i (1'b0 ), .clear_i (1'b0),
.en_i (acc_ld_disp ^ acc_resp_i.load_complete), .en_i (acc_ld_disp ^ acc_resp_i.load_complete),
.load_i (1'b0 ), .load_i (1'b0),
.down_i (acc_resp_i.load_complete), .down_i (acc_resp_i.load_complete),
.d_i ('0 ), .d_i ('0),
.q_o (acc_disp_loads_pending ), .q_o (acc_disp_loads_pending),
.overflow_o (acc_disp_loads_overflow ) .overflow_o(acc_disp_loads_overflow)
); );
acc_dispatcher_no_load_overflow: assert property ( acc_dispatcher_no_load_overflow :
assert property (
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) ) @(posedge clk_i) disable iff (~rst_ni) (acc_spec_loads_overflow == 1'b0) && (acc_disp_loads_overflow == 1'b0) )
else $error("[acc_dispatcher] Too many pending loads."); else $error("[acc_dispatcher] Too many pending loads.");
@ -374,37 +373,38 @@ module acc_dispatcher import ariane_pkg::*; import riscv::*; #(
// Count speculative stores. These can still be flushed. // Count speculative stores. These can still be flushed.
counter #( counter #(
.WIDTH (3), .WIDTH (3),
.STICKY_OVERFLOW (0) .STICKY_OVERFLOW(0)
) i_acc_spec_stores ( ) i_acc_spec_stores (
.clk_i (clk_i ), .clk_i (clk_i),
.rst_ni (rst_ni ), .rst_ni (rst_ni),
.clear_i (flush_ex_i ), .clear_i (flush_ex_i),
.en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp), .en_i ((acc_valid_d && issue_instr_i.op == ACCEL_OP_STORE) ^ acc_st_disp),
.load_i (1'b0 ), .load_i (1'b0),
.down_i (acc_st_disp ), .down_i (acc_st_disp),
.d_i ('0 ), .d_i ('0),
.q_o (acc_spec_stores_pending ), .q_o (acc_spec_stores_pending),
.overflow_o (acc_spec_stores_overflow) .overflow_o(acc_spec_stores_overflow)
); );
// Count dispatched stores. These cannot be flushed anymore. // Count dispatched stores. These cannot be flushed anymore.
counter #( counter #(
.WIDTH (3), .WIDTH (3),
.STICKY_OVERFLOW (0) .STICKY_OVERFLOW(0)
) i_acc_disp_stores ( ) i_acc_disp_stores (
.clk_i (clk_i ), .clk_i (clk_i),
.rst_ni (rst_ni ), .rst_ni (rst_ni),
.clear_i (1'b0 ), .clear_i (1'b0),
.en_i (acc_st_disp ^ acc_resp_i.store_complete), .en_i (acc_st_disp ^ acc_resp_i.store_complete),
.load_i (1'b0 ), .load_i (1'b0),
.down_i (acc_resp_i.store_complete), .down_i (acc_resp_i.store_complete),
.d_i ('0 ), .d_i ('0),
.q_o (acc_disp_stores_pending ), .q_o (acc_disp_stores_pending),
.overflow_o (acc_disp_stores_overflow ) .overflow_o(acc_disp_stores_overflow)
); );
acc_dispatcher_no_store_overflow: assert property ( acc_dispatcher_no_store_overflow :
assert property (
@(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) ) @(posedge clk_i) disable iff (~rst_ni) (acc_spec_stores_overflow == 1'b0) && (acc_disp_stores_overflow == 1'b0) )
else $error("[acc_dispatcher] Too many pending stores."); else $error("[acc_dispatcher] Too many pending stores.");

View file

@ -18,291 +18,305 @@
// Description: Ariane ALU based on RI5CY's ALU // Description: Ariane ALU based on RI5CY's ALU
module alu import ariane_pkg::*; #( module alu
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input fu_data_t fu_data_i, input fu_data_t fu_data_i,
output riscv::xlen_t result_o, output riscv::xlen_t result_o,
output logic alu_branch_res_o output logic alu_branch_res_o
); );
riscv::xlen_t operand_a_rev; riscv::xlen_t operand_a_rev;
logic [31:0] operand_a_rev32; logic [ 31:0] operand_a_rev32;
logic [riscv::XLEN:0] operand_b_neg; logic [ riscv::XLEN:0] operand_b_neg;
logic [riscv::XLEN+1:0] adder_result_ext_o; logic [riscv::XLEN+1:0] adder_result_ext_o;
logic less; // handles both signed and unsigned forms logic less; // handles both signed and unsigned forms
logic [31:0] rolw; // Rotate Left Word logic [ 31:0] rolw; // Rotate Left Word
logic [31:0] rorw; // Rotate Right Word logic [ 31:0] rorw; // Rotate Right Word
logic [31:0] orcbw, rev8w; logic [31:0] orcbw, rev8w;
logic [$clog2(riscv::XLEN) :0] cpop; // Count Population logic [ $clog2(riscv::XLEN) : 0] cpop; // Count Population
logic [$clog2(riscv::XLEN)-1 :0] lz_tz_count; // Count Leading Zeros logic [$clog2(riscv::XLEN)-1 : 0] lz_tz_count; // Count Leading Zeros
logic [4:0] lz_tz_wcount; // Count Leading Zeros Word logic [ 4:0] lz_tz_wcount; // Count Leading Zeros Word
logic lz_tz_empty, lz_tz_wempty; logic lz_tz_empty, lz_tz_wempty;
// bit reverse operand_a for left shifts and bit counting // bit reverse operand_a for left shifts and bit counting
generate generate
genvar k; genvar k;
for(k = 0; k < riscv::XLEN; k++) for (k = 0; k < riscv::XLEN; k++)
assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k]; assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k];
for (k = 0; k < 32; k++) for (k = 0; k < 32; k++) assign operand_a_rev32[k] = fu_data_i.operand_a[31-k];
assign operand_a_rev32[k] = fu_data_i.operand_a[31-k]; endgenerate
endgenerate
// ------ // ------
// Adder // Adder
// ------ // ------
logic adder_op_b_negate; logic adder_op_b_negate;
logic adder_z_flag; logic adder_z_flag;
logic [riscv::XLEN:0] adder_in_a, adder_in_b; logic [riscv::XLEN:0] adder_in_a, adder_in_b;
riscv::xlen_t adder_result; riscv::xlen_t adder_result;
logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx; logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx;
always_comb begin always_comb begin
adder_op_b_negate = 1'b0; adder_op_b_negate = 1'b0;
unique case (fu_data_i.operation)
// ADDER OPS
EQ, NE, SUB, SUBW, ANDN, ORN, XNOR: adder_op_b_negate = 1'b1;
default: ;
endcase
end
always_comb begin
operand_a_bitmanip = fu_data_i.operand_a;
if (ariane_pkg::BITMANIP) begin
unique case (fu_data_i.operation) unique case (fu_data_i.operation)
// ADDER OPS SH1ADD: operand_a_bitmanip = fu_data_i.operand_a << 1;
EQ, NE, SH2ADD: operand_a_bitmanip = fu_data_i.operand_a << 2;
SUB, SUBW, SH3ADD: operand_a_bitmanip = fu_data_i.operand_a << 3;
ANDN, ORN, XNOR: adder_op_b_negate = 1'b1; SH1ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1;
default: ; SH2ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2;
SH3ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3;
CTZ: operand_a_bitmanip = operand_a_rev;
CTZW: operand_a_bitmanip = operand_a_rev32;
ADDUW, CPOPW, CLZW: operand_a_bitmanip = fu_data_i.operand_a[31:0];
default: ;
endcase endcase
end end
end
always_comb begin // prepare operand a
operand_a_bitmanip = fu_data_i.operand_a; assign adder_in_a = {operand_a_bitmanip, 1'b1};
if (ariane_pkg::BITMANIP) begin // prepare operand b
unique case (fu_data_i.operation) assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN + 1{adder_op_b_negate}};
SH1ADD : operand_a_bitmanip = fu_data_i.operand_a << 1; assign adder_in_b = operand_b_neg;
SH2ADD : operand_a_bitmanip = fu_data_i.operand_a << 2;
SH3ADD : operand_a_bitmanip = fu_data_i.operand_a << 3;
SH1ADDUW : operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1;
SH2ADDUW : operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2;
SH3ADDUW : operand_a_bitmanip = fu_data_i.operand_a[31:0] << 3;
CTZ : operand_a_bitmanip = operand_a_rev;
CTZW : operand_a_bitmanip = operand_a_rev32;
ADDUW, CPOPW, CLZW : operand_a_bitmanip = fu_data_i.operand_a[31:0];
default : ;
endcase
end
end
// prepare operand a // actual adder
assign adder_in_a = {operand_a_bitmanip, 1'b1}; assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b);
assign adder_result = adder_result_ext_o[riscv::XLEN:1];
assign adder_z_flag = ~|adder_result;
// prepare operand b // get the right branch comparison result
assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN+1{adder_op_b_negate}}; always_comb begin : branch_resolve
assign adder_in_b = operand_b_neg ; // set comparison by default
alu_branch_res_o = 1'b1;
case (fu_data_i.operation)
EQ: alu_branch_res_o = adder_z_flag;
NE: alu_branch_res_o = ~adder_z_flag;
LTS, LTU: alu_branch_res_o = less;
GES, GEU: alu_branch_res_o = ~less;
default: alu_branch_res_o = 1'b1;
endcase
end
// actual adder // ---------
assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b); // Shifts
assign adder_result = adder_result_ext_o[riscv::XLEN:1]; // ---------
assign adder_z_flag = ~|adder_result;
// get the right branch comparison result // TODO: this can probably optimized significantly
always_comb begin : branch_resolve logic shift_left; // should we shift left
// set comparison by default logic shift_arithmetic;
alu_branch_res_o = 1'b1;
case (fu_data_i.operation)
EQ: alu_branch_res_o = adder_z_flag;
NE: alu_branch_res_o = ~adder_z_flag;
LTS, LTU: alu_branch_res_o = less;
GES, GEU: alu_branch_res_o = ~less;
default: alu_branch_res_o = 1'b1;
endcase
end
// --------- riscv::xlen_t shift_amt; // amount of shift, to the right
// Shifts riscv::xlen_t shift_op_a; // input of the shifter
// --------- logic [ 31:0] shift_op_a32; // input to the 32 bit shift operation
// TODO: this can probably optimized significantly riscv::xlen_t shift_result;
logic shift_left; // should we shift left logic [ 31:0] shift_result32;
logic shift_arithmetic;
riscv::xlen_t shift_amt; // amount of shift, to the right logic [riscv::XLEN:0] shift_right_result;
riscv::xlen_t shift_op_a; // input of the shifter logic [ 32:0] shift_right_result32;
logic [31:0] shift_op_a32; // input to the 32 bit shift operation
riscv::xlen_t shift_result; riscv::xlen_t shift_left_result;
logic [31:0] shift_result32; logic [ 31:0] shift_left_result32;
logic [riscv::XLEN:0] shift_right_result; assign shift_amt = fu_data_i.operand_b;
logic [32:0] shift_right_result32;
riscv::xlen_t shift_left_result; assign shift_left = (fu_data_i.operation == SLL) | (fu_data_i.operation == SLLW);
logic [31:0] shift_left_result32;
assign shift_amt = fu_data_i.operand_b; assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW);
assign shift_left = (fu_data_i.operation == SLL) | (fu_data_i.operation == SLLW); // right shifts, we let the synthesizer optimize this
logic [riscv::XLEN:0] shift_op_a_64;
logic [32:0] shift_op_a_32;
assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW); // choose the bit reversed or the normal input for shift operand a
assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a;
assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0];
// right shifts, we let the synthesizer optimize this assign shift_op_a_64 = {shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a};
logic [riscv::XLEN:0] shift_op_a_64; assign shift_op_a_32 = {shift_arithmetic & shift_op_a[31], shift_op_a32};
logic [32:0] shift_op_a_32;
// choose the bit reversed or the normal input for shift operand a assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]);
assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a;
assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0];
assign shift_op_a_64 = { shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a}; assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]);
assign shift_op_a_32 = { shift_arithmetic & shift_op_a[31], shift_op_a32}; // bit reverse the shift_right_result for left shifts
genvar j;
generate
for (j = 0; j < riscv::XLEN; j++)
assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j];
assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]); for (j = 0; j < 32; j++) assign shift_left_result32[j] = shift_right_result32[31-j];
assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]); endgenerate
// bit reverse the shift_right_result for left shifts
genvar j;
generate
for(j = 0; j < riscv::XLEN; j++)
assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j];
for(j = 0; j < 32; j++) assign shift_result = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0];
assign shift_left_result32[j] = shift_right_result32[31-j]; assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0];
endgenerate // ------------
// Comparisons
// ------------
assign shift_result = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0]; always_comb begin
assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0]; logic sgn;
sgn = 1'b0;
// ------------ if ((fu_data_i.operation == SLTS) ||
// Comparisons
// ------------
always_comb begin
logic sgn;
sgn = 1'b0;
if ((fu_data_i.operation == SLTS) ||
(fu_data_i.operation == LTS) || (fu_data_i.operation == LTS) ||
(fu_data_i.operation == GES) || (fu_data_i.operation == GES) ||
(fu_data_i.operation == MAX) || (fu_data_i.operation == MAX) ||
(fu_data_i.operation == MIN)) (fu_data_i.operation == MIN))
sgn = 1'b1; sgn = 1'b1;
less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) < $signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b})); less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) <
end $signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b}));
end
if (ariane_pkg::BITMANIP) begin : gen_bitmanip if (ariane_pkg::BITMANIP) begin : gen_bitmanip
// Count Population + Count population Word // Count Population + Count population Word
popcount #( popcount #(
.INPUT_WIDTH(riscv::XLEN) .INPUT_WIDTH(riscv::XLEN)
) i_cpop_count ( ) i_cpop_count (
.data_i (operand_a_bitmanip), .data_i (operand_a_bitmanip),
.popcount_o (cpop) .popcount_o(cpop)
); );
// Count Leading/Trailing Zeros
// 64b
lzc #(
.WIDTH(riscv::XLEN),
.MODE (1)
) i_clz_64b (
.in_i(operand_a_bitmanip),
.cnt_o(lz_tz_count),
.empty_o(lz_tz_empty)
);
//32b
lzc #(
.WIDTH(32),
.MODE (1)
) i_clz_32b (
.in_i(operand_a_bitmanip[31:0]),
.cnt_o(lz_tz_wcount),
.empty_o(lz_tz_wempty)
);
end
// -----------
// Result MUX
// -----------
always_comb begin
result_o = '0;
unique case (fu_data_i.operation)
// Standard Operations
ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1];
ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1];
XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1];
// Adder Operations
ADD, SUB, ADDUW, SH1ADD, SH2ADD, SH3ADD, SH1ADDUW, SH2ADDUW, SH3ADDUW:
result_o = adder_result;
// Add word: Ignore the upper bits and sign extend to 64 bit
ADDW, SUBW: result_o = {{riscv::XLEN - 32{adder_result[31]}}, adder_result[31:0]};
// Shift Operations
SLL, SRL, SRA: result_o = (riscv::XLEN == 64) ? shift_result : shift_result32;
// Shifts 32 bit
SLLW, SRLW, SRAW: result_o = {{riscv::XLEN - 32{shift_result32[31]}}, shift_result32[31:0]};
// Comparison Operations
SLTS, SLTU: result_o = {{riscv::XLEN - 1{1'b0}}, less};
default: ; // default case to suppress unique warning
endcase
if (ariane_pkg::BITMANIP) begin
// Index for Bitwise Rotation
bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN - 1));
orcbw = {
{8{|fu_data_i.operand_a[31:24]}},
{8{|fu_data_i.operand_a[23:16]}},
{8{|fu_data_i.operand_a[15:8]}},
{8{|fu_data_i.operand_a[7:0]}}
};
rev8w = {
{fu_data_i.operand_a[7:0]},
{fu_data_i.operand_a[15:8]},
{fu_data_i.operand_a[23:16]},
{fu_data_i.operand_a[31:24]}
};
// rolw, roriw, rorw
rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
unique case (fu_data_i.operation)
// Left Shift 32 bit unsigned
SLLIUW:
result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0];
// Integer minimum/maximum
MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MIN: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
// Single bit instructions operations
BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx;
BEXT, BEXTI: result_o = {{riscv::XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)};
BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx;
BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx;
// Count Leading/Trailing Zeros // Count Leading/Trailing Zeros
// 64b CLZ, CTZ:
lzc #( result_o = (lz_tz_empty) ? ({{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count} + 1) :
.WIDTH(riscv::XLEN), {{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count};
.MODE (1) CLZW, CTZW: result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN - 5{1'b0}}, lz_tz_wcount};
) i_clz_64b (
.in_i (operand_a_bitmanip), // Count population
.cnt_o (lz_tz_count), CPOP, CPOPW: result_o = {{(riscv::XLEN - ($clog2(riscv::XLEN) + 1)) {1'b0}}, cpop};
.empty_o (lz_tz_empty)
); // Sign and Zero Extend
//32b SEXTB: result_o = {{riscv::XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]};
lzc #( SEXTH: result_o = {{riscv::XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]};
.WIDTH(32), ZEXTH: result_o = {{riscv::XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]};
.MODE (1)
) i_clz_32b ( // Bitwise Rotation
.in_i (operand_a_bitmanip[31:0]), ROL:
.cnt_o (lz_tz_wcount), result_o = (riscv::XLEN == 64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0])));
.empty_o (lz_tz_wempty) ROLW: result_o = {{riscv::XLEN - 32{rolw[31]}}, rolw};
); ROR, RORI:
result_o = (riscv::XLEN == 64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0])));
RORW, RORIW: result_o = {{riscv::XLEN - 32{rorw[31]}}, rorw};
ORCB:
result_o = (riscv::XLEN == 64) ? ({{8{|fu_data_i.operand_a[63:56]}}, {8{|fu_data_i.operand_a[55:48]}}, {8{|fu_data_i.operand_a[47:40]}}, {8{|fu_data_i.operand_a[39:32]}}, orcbw}) : orcbw;
REV8:
result_o = (riscv::XLEN == 64) ? ({rev8w , {fu_data_i.operand_a[39:32]}, {fu_data_i.operand_a[47:40]}, {fu_data_i.operand_a[55:48]}, {fu_data_i.operand_a[63:56]}}) : rev8w;
default: ; // default case to suppress unique warning
endcase
end end
if (CVA6Cfg.ZiCondExtEn) begin
// ----------- unique case (fu_data_i.operation)
// Result MUX CZERO_EQZ:
// ----------- result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0; // move zero to rd if rs2 is equal to zero else rs1
always_comb begin CZERO_NEZ:
result_o = '0; result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1
unique case (fu_data_i.operation) default: ; // default case to suppress unique warning
// Standard Operations endcase
ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1];
ORL, ORN : result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1];
XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1];
// Adder Operations
ADD, SUB,
ADDUW,
SH1ADD, SH2ADD, SH3ADD,
SH1ADDUW, SH2ADDUW, SH3ADDUW: result_o = adder_result;
// Add word: Ignore the upper bits and sign extend to 64 bit
ADDW, SUBW: result_o = {{riscv::XLEN-32{adder_result[31]}}, adder_result[31:0]};
// Shift Operations
SLL,
SRL, SRA: result_o = (riscv::XLEN == 64) ? shift_result : shift_result32;
// Shifts 32 bit
SLLW,
SRLW, SRAW: result_o = {{riscv::XLEN-32{shift_result32[31]}}, shift_result32[31:0]};
// Comparison Operations
SLTS, SLTU: result_o = {{riscv::XLEN-1{1'b0}}, less};
default: ; // default case to suppress unique warning
endcase
if (ariane_pkg::BITMANIP) begin
// Index for Bitwise Rotation
bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN-1));
orcbw = {{8{|fu_data_i.operand_a[31:24]}}, {8{|fu_data_i.operand_a[23:16]}}, {8{|fu_data_i.operand_a[15:8]}}, {8{|fu_data_i.operand_a[7:0]}}};
rev8w = {{fu_data_i.operand_a[7:0]}, {fu_data_i.operand_a[15:8]}, {fu_data_i.operand_a[23:16]}, {fu_data_i.operand_a[31:24]}};
// rolw, roriw, rorw
rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0]));
unique case (fu_data_i.operation)
// Left Shift 32 bit unsigned
SLLIUW: result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0];
// Integer minimum/maximum
MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a;
MIN: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
MINU: result_o = ~less ? fu_data_i.operand_b : fu_data_i.operand_a;
// Single bit instructions operations
BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx;
BEXT, BEXTI: result_o = {{riscv::XLEN-1{1'b0}}, |(fu_data_i.operand_a & bit_indx)};
BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx;
BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx;
// Count Leading/Trailing Zeros
CLZ, CTZ : result_o = (lz_tz_empty) ? ({{riscv::XLEN-$clog2(riscv::XLEN){1'b0}}, lz_tz_count} + 1) : {{riscv::XLEN-$clog2(riscv::XLEN){1'b0}}, lz_tz_count};
CLZW, CTZW: result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN-5{1'b0}}, lz_tz_wcount};
// Count population
CPOP, CPOPW: result_o = {{(riscv::XLEN-($clog2(riscv::XLEN)+1)){1'b0}}, cpop};
// Sign and Zero Extend
SEXTB: result_o = {{riscv::XLEN-8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]};
SEXTH: result_o = {{riscv::XLEN-16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]};
ZEXTH: result_o = {{riscv::XLEN-16{1'b0}}, fu_data_i.operand_a[15:0]};
// Bitwise Rotation
ROL: result_o = (riscv::XLEN == 64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0])));
ROLW: result_o = {{riscv::XLEN-32{rolw[31]}}, rolw};
ROR, RORI: result_o = (riscv::XLEN == 64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0])));
RORW, RORIW: result_o = {{riscv::XLEN-32{rorw[31]}}, rorw};
ORCB: result_o = (riscv::XLEN == 64) ? ({{8{|fu_data_i.operand_a[63:56]}}, {8{|fu_data_i.operand_a[55:48]}}, {8{|fu_data_i.operand_a[47:40]}}, {8{|fu_data_i.operand_a[39:32]}}, orcbw}) : orcbw;
REV8: result_o = (riscv::XLEN == 64) ? ({rev8w , {fu_data_i.operand_a[39:32]}, {fu_data_i.operand_a[47:40]}, {fu_data_i.operand_a[55:48]}, {fu_data_i.operand_a[63:56]}}) : rev8w;
default: ; // default case to suppress unique warning
endcase
end
if (CVA6Cfg.ZiCondExtEn) begin
unique case (fu_data_i.operation)
CZERO_EQZ : result_o = (|fu_data_i.operand_b) ? fu_data_i.operand_a : '0; // move zero to rd if rs2 is equal to zero else rs1
CZERO_NEZ : result_o = (|fu_data_i.operand_b) ? '0 : fu_data_i.operand_a; // move zero to rd if rs2 is nonzero else rs1
default: ; // default case to suppress unique warning
endcase
end
end end
end
endmodule endmodule

View file

@ -17,66 +17,66 @@
module amo_buffer #( module amo_buffer #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // pipeline flush input logic flush_i, // pipeline flush
input logic valid_i, // AMO is valid input logic valid_i, // AMO is valid
output logic ready_o, // AMO unit is ready output logic ready_o, // AMO unit is ready
input ariane_pkg::amo_t amo_op_i, // AMO Operation input ariane_pkg::amo_t amo_op_i, // AMO Operation
input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue
input riscv::xlen_t data_i, // data which is placed in the queue input riscv::xlen_t data_i, // data which is placed in the queue
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
// D$ // D$
output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem
input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem input ariane_pkg::amo_resp_t amo_resp_i, // response from cache subsystem
// Auxiliary signals // Auxiliary signals
input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage input logic amo_valid_commit_i, // We have a vaild AMO in the commit stage
input logic no_st_pending_i // there is currently no store pending anymore input logic no_st_pending_i // there is currently no store pending anymore
); );
logic flush_amo_buffer; logic flush_amo_buffer;
logic amo_valid; logic amo_valid;
typedef struct packed { typedef struct packed {
ariane_pkg::amo_t op; ariane_pkg::amo_t op;
logic [riscv::PLEN-1:0] paddr; logic [riscv::PLEN-1:0] paddr;
riscv::xlen_t data; riscv::xlen_t data;
logic [1:0] size; logic [1:0] size;
} amo_op_t ; } amo_op_t;
amo_op_t amo_data_in, amo_data_out; amo_op_t amo_data_in, amo_data_out;
// validate this request as soon as all stores have drained and the AMO is in the commit stage // validate this request as soon as all stores have drained and the AMO is in the commit stage
assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid; assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid;
assign amo_req_o.amo_op = amo_data_out.op; assign amo_req_o.amo_op = amo_data_out.op;
assign amo_req_o.size = amo_data_out.size; assign amo_req_o.size = amo_data_out.size;
assign amo_req_o.operand_a = {{64-riscv::PLEN{1'b0}}, amo_data_out.paddr}; assign amo_req_o.operand_a = {{64 - riscv::PLEN{1'b0}}, amo_data_out.paddr};
assign amo_req_o.operand_b = {{64-riscv::XLEN{1'b0}}, amo_data_out.data}; assign amo_req_o.operand_b = {{64 - riscv::XLEN{1'b0}}, amo_data_out.data};
assign amo_data_in.op = amo_op_i; assign amo_data_in.op = amo_op_i;
assign amo_data_in.data = data_i; assign amo_data_in.data = data_i;
assign amo_data_in.paddr = paddr_i; assign amo_data_in.paddr = paddr_i;
assign amo_data_in.size = data_size_i; assign amo_data_in.size = data_size_i;
// only flush if we are currently not committing the AMO // only flush if we are currently not committing the AMO
// e.g.: it is not speculative anymore // e.g.: it is not speculative anymore
assign flush_amo_buffer = flush_i & !amo_valid_commit_i; assign flush_amo_buffer = flush_i & !amo_valid_commit_i;
fifo_v3 #( fifo_v3 #(
.DEPTH ( 1 ), .DEPTH(1),
.dtype ( amo_op_t ) .dtype(amo_op_t)
) i_amo_fifo ( ) i_amo_fifo (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_amo_buffer ), .flush_i (flush_amo_buffer),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( amo_valid ), .full_o (amo_valid),
.empty_o ( ready_o ), .empty_o (ready_o),
.usage_o ( ), // left open .usage_o (), // left open
.data_i ( amo_data_in ), .data_i (amo_data_in),
.push_i ( valid_i ), .push_i (valid_i),
.data_o ( amo_data_out ), .data_o (amo_data_out),
.pop_i ( amo_resp_i.ack ) .pop_i (amo_resp_i.ack)
); );
endmodule endmodule

View file

@ -24,97 +24,91 @@
// //
module ariane_regfile_lol #( module ariane_regfile_lol #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32, parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2, parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0 parameter bit ZERO_REG_ZERO = 0
)( ) (
// clock and reset // clock and reset
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// disable clock gates for testing // disable clock gates for testing
input logic test_en_i, input logic test_en_i,
// read port // read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i, input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port // write port
input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i, input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i, input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
); );
localparam ADDR_WIDTH = 5; localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH; localparam NUM_WORDS = 2 ** ADDR_WIDTH;
logic [NUM_WORDS-1:ZERO_REG_ZERO] mem_clocks; logic [NUM_WORDS-1:ZERO_REG_ZERO] mem_clocks;
logic [DATA_WIDTH-1:0] mem[NUM_WORDS]; logic [ DATA_WIDTH-1:0] mem [NUM_WORDS];
logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:1] waddr_onehot,waddr_onehot_q; logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:1] waddr_onehot, waddr_onehot_q;
logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_q; logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_q;
// decode addresses // decode addresses
for (genvar i = 0; i < NR_READ_PORTS; i++) for (genvar i = 0; i < NR_READ_PORTS; i++) assign rdata_o[i] = mem[raddr_i[i][ADDR_WIDTH-1:0]];
assign rdata_o[i] = mem[raddr_i[i][ADDR_WIDTH-1:0]];
always_ff @(posedge clk_i, negedge rst_ni) begin : sample_waddr always_ff @(posedge clk_i, negedge rst_ni) begin : sample_waddr
if (~rst_ni) begin if (~rst_ni) begin
wdata_q <= '0; wdata_q <= '0;
end else begin end else begin
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++)
// enable flipflop will most probably infer clock gating // enable flipflop will most probably infer clock gating
if (we_i[i]) begin if (we_i[i]) begin
wdata_q[i] <= wdata_i[i]; wdata_q[i] <= wdata_i[i];
end end
waddr_onehot_q <= waddr_onehot; waddr_onehot_q <= waddr_onehot;
end
end end
end
// WRITE : Write Address Decoder (WAD), combinatorial process // WRITE : Write Address Decoder (WAD), combinatorial process
always_comb begin : decode_write_addess always_comb begin : decode_write_addess
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for (int unsigned j = 1; j < NUM_WORDS; j++) begin for (int unsigned j = 1; j < NUM_WORDS; j++) begin
if (we_i[i] && (waddr_i[i] == j)) if (we_i[i] && (waddr_i[i] == j)) waddr_onehot[i][j] = 1'b1;
waddr_onehot[i][j] = 1'b1; else waddr_onehot[i][j] = 1'b0;
else end
waddr_onehot[i][j] = 1'b0;
end
end
end end
end
// WRITE : Clock gating (if integrated clock-gating cells are available) // WRITE : Clock gating (if integrated clock-gating cells are available)
for (genvar x = ZERO_REG_ZERO; x < NUM_WORDS; x++) begin for (genvar x = ZERO_REG_ZERO; x < NUM_WORDS; x++) begin
logic [CVA6Cfg.NrCommitPorts-1:0] waddr_ored; logic [CVA6Cfg.NrCommitPorts-1:0] waddr_ored;
for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) assign waddr_ored[i] = waddr_onehot[i][x];
assign waddr_ored[i] = waddr_onehot[i][x];
cluster_clock_gating i_cg ( cluster_clock_gating i_cg (
.clk_i ( clk_i ), .clk_i (clk_i),
.en_i ( |waddr_ored ), .en_i (|waddr_ored),
.test_en_i ( test_en_i ), .test_en_i(test_en_i),
.clk_o ( mem_clocks[x] ) .clk_o (mem_clocks[x])
); );
end end
// Generate M = WORDS sequential processes, each of which describes one // Generate M = WORDS sequential processes, each of which describes one
// word of the memory. The processes are synchronized with the clocks // word of the memory. The processes are synchronized with the clocks
// ClocksxC(i), i = 0, 1, ..., M-1 // ClocksxC(i), i = 0, 1, ..., M-1
// Use active low, i.e. transparent on low latches as storage elements // Use active low, i.e. transparent on low latches as storage elements
// Data is sampled on rising clock edge // Data is sampled on rising clock edge
// Integer registers // Integer registers
always_latch begin : latch_wdata always_latch begin : latch_wdata
// Note: The assignment has to be done inside this process or Modelsim complains about it // Note: The assignment has to be done inside this process or Modelsim complains about it
if (ZERO_REG_ZERO) if (ZERO_REG_ZERO) mem[0] = '0;
mem[0] = '0;
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for (int unsigned i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin for (int unsigned k = ZERO_REG_ZERO; k < NUM_WORDS; k++) begin
for (int unsigned k = ZERO_REG_ZERO; k < NUM_WORDS; k++) begin if (mem_clocks[k] && waddr_onehot_q[i][k]) mem[k] = wdata_q[i];
if (mem_clocks[k] && waddr_onehot_q[i][k]) end
mem[k] = wdata_q[i];
end
end
end end
end
endmodule endmodule

View file

@ -23,60 +23,58 @@
// //
module ariane_regfile #( module ariane_regfile #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32, parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2, parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0 parameter bit ZERO_REG_ZERO = 0
)( ) (
// clock and reset // clock and reset
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// disable clock gates for testing // disable clock gates for testing
input logic test_en_i, input logic test_en_i,
// read port // read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i, input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port // write port
input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i, input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i, input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
); );
localparam ADDR_WIDTH = 5; localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH; localparam NUM_WORDS = 2 ** ADDR_WIDTH;
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem; logic [ NUM_WORDS-1:0][DATA_WIDTH-1:0] mem;
logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:0] we_dec; logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec;
always_comb begin : we_decoder always_comb begin : we_decoder
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (waddr_i[j] == i) if (waddr_i[j] == i) we_dec[j][i] = we_i[j];
we_dec[j][i] = we_i[j]; else we_dec[j][i] = 1'b0;
else end
we_dec[j][i] = 1'b0;
end
end
end end
end
// loop from 1 to NUM_WORDS-1 as R0 is nil // loop from 1 to NUM_WORDS-1 as R0 is nil
always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral
if (~rst_ni) begin if (~rst_ni) begin
mem <= '{default: '0}; mem <= '{default: '0};
end else begin end else begin
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
for (int unsigned i = 0; i < NUM_WORDS; i++) begin for (int unsigned i = 0; i < NUM_WORDS; i++) begin
if (we_dec[j][i]) begin if (we_dec[j][i]) begin
mem[i] <= wdata_i[j]; mem[i] <= wdata_i[j];
end end
end
if (ZERO_REG_ZERO) begin
mem[0] <= '0;
end
end
end end
if (ZERO_REG_ZERO) begin
mem[0] <= '0;
end
end
end end
end
for (genvar i = 0; i < NR_READ_PORTS; i++) begin for (genvar i = 0; i < NR_READ_PORTS; i++) begin
assign rdata_o[i] = mem[raddr_i[i]]; assign rdata_o[i] = mem[raddr_i[i]];

View file

@ -26,35 +26,35 @@
// //
module ariane_regfile_fpga #( module ariane_regfile_fpga #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 32, parameter int unsigned DATA_WIDTH = 32,
parameter int unsigned NR_READ_PORTS = 2, parameter int unsigned NR_READ_PORTS = 2,
parameter bit ZERO_REG_ZERO = 0 parameter bit ZERO_REG_ZERO = 0
)( ) (
// clock and reset // clock and reset
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// disable clock gates for testing // disable clock gates for testing
input logic test_en_i, input logic test_en_i,
// read port // read port
input logic [NR_READ_PORTS-1:0][4:0] raddr_i, input logic [ NR_READ_PORTS-1:0][ 4:0] raddr_i,
output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, output logic [ NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o,
// write port // write port
input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i, input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i, input logic [CVA6Cfg.NrCommitPorts-1:0][DATA_WIDTH-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_i input logic [CVA6Cfg.NrCommitPorts-1:0] we_i
); );
localparam ADDR_WIDTH = 5; localparam ADDR_WIDTH = 5;
localparam NUM_WORDS = 2**ADDR_WIDTH; localparam NUM_WORDS = 2 ** ADDR_WIDTH;
localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts); localparam LOG_NR_WRITE_PORTS = CVA6Cfg.NrCommitPorts == 1 ? 1 : $clog2(CVA6Cfg.NrCommitPorts);
// Distributed RAM usually supports one write port per block - duplicate for each write port. // Distributed RAM usually supports one write port per block - duplicate for each write port.
logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts]; logic [ NUM_WORDS-1:0][ DATA_WIDTH-1:0] mem [CVA6Cfg.NrCommitPorts];
logic [CVA6Cfg.NrCommitPorts-1:0][NUM_WORDS-1:0] we_dec; logic [CVA6Cfg.NrCommitPorts-1:0][ NUM_WORDS-1:0] we_dec;
logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel; logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel;
logic [NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q; logic [ NUM_WORDS-1:0][LOG_NR_WRITE_PORTS-1:0] mem_block_sel_q;
// write adress decoder (for block selector) // write adress decoder (for block selector)
always_comb begin always_comb begin
@ -75,8 +75,8 @@ module ariane_regfile_fpga #(
// index has priority. // index has priority.
always_comb begin always_comb begin
mem_block_sel = mem_block_sel_q; mem_block_sel = mem_block_sel_q;
for (int i = 0; i<NUM_WORDS; i++) begin for (int i = 0; i < NUM_WORDS; i++) begin
for (int j = 0; j<CVA6Cfg.NrCommitPorts; j++) begin for (int j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin
if (we_dec[j][i] == 1'b1) begin if (we_dec[j][i] == 1'b1) begin
mem_block_sel[i] = LOG_NR_WRITE_PORTS'(j); mem_block_sel[i] = LOG_NR_WRITE_PORTS'(j);
end end
@ -94,14 +94,14 @@ module ariane_regfile_fpga #(
end end
// distributed RAM blocks // distributed RAM blocks
logic [NR_READ_PORTS-1:0] [DATA_WIDTH-1:0] mem_read [CVA6Cfg.NrCommitPorts]; logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] mem_read[CVA6Cfg.NrCommitPorts];
for (genvar j=0; j<CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block for (genvar j = 0; j < CVA6Cfg.NrCommitPorts; j++) begin : regfile_ram_block
always_ff @(posedge clk_i) begin always_ff @(posedge clk_i) begin
if (we_i[j] && ~waddr_i[j] != 0) begin if (we_i[j] && ~waddr_i[j] != 0) begin
mem[j][waddr_i[j]] <= wdata_i[j]; mem[j][waddr_i[j]] <= wdata_i[j];
end end
end end
for (genvar k=0; k<NR_READ_PORTS; k++) begin : block_read for (genvar k = 0; k < NR_READ_PORTS; k++) begin : block_read
assign mem_read[j][k] = mem[j][raddr_i[k]]; assign mem_read[j][k] = mem[j][raddr_i[k]];
end end
end end
@ -110,15 +110,13 @@ module ariane_regfile_fpga #(
logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr; logic [NR_READ_PORTS-1:0][LOG_NR_WRITE_PORTS-1:0] block_addr;
for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port for (genvar k = 0; k < NR_READ_PORTS; k++) begin : regfile_read_port
assign block_addr[k] = mem_block_sel_q[raddr_i[k]]; assign block_addr[k] = mem_block_sel_q[raddr_i[k]];
assign rdata_o[k] = assign rdata_o[k] = (ZERO_REG_ZERO && raddr_i[k] == '0) ? '0 : mem_read[block_addr[k]][k];
(ZERO_REG_ZERO && raddr_i[k] == '0 ) ? '0 : mem_read[block_addr[k]][k];
end end
// random initialization of the memory to suppress assert warnings on Questa. // random initialization of the memory to suppress assert warnings on Questa.
initial initial begin
begin for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
for(int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin for (int j = 0; j < NUM_WORDS; j++) begin
for(int j = 0; j < NUM_WORDS; j++) begin
mem[i][j] = $random(); mem[i][j] = $random();
end end
end end

View file

@ -20,69 +20,74 @@
module axi_shim #( module axi_shim #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2 parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2
parameter type axi_req_t = logic, parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic parameter type axi_rsp_t = logic
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
// read channel // read channel
// request // request
input logic rd_req_i, input logic rd_req_i,
output logic rd_gnt_o, output logic rd_gnt_o,
input logic [CVA6Cfg.AxiAddrWidth-1:0] rd_addr_i, input logic [CVA6Cfg.AxiAddrWidth-1:0] rd_addr_i,
input logic [$clog2(AxiNumWords)-1:0] rd_blen_i, // axi convention: LEN-1 input logic [$clog2(AxiNumWords)-1:0] rd_blen_i, // axi convention: LEN-1
input logic [2:0] rd_size_i, input logic [2:0] rd_size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_i, // use same ID for reads, or make sure you only have one outstanding read tx input logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_i, // use same ID for reads, or make sure you only have one outstanding read tx
input logic rd_lock_i, input logic rd_lock_i,
// read response (we have to unconditionally sink the response) // read response (we have to unconditionally sink the response)
input logic rd_rdy_i, input logic rd_rdy_i,
output logic rd_last_o, output logic rd_last_o,
output logic rd_valid_o, output logic rd_valid_o,
output logic [CVA6Cfg.AxiDataWidth-1:0] rd_data_o, output logic [CVA6Cfg.AxiDataWidth-1:0] rd_data_o,
output logic [CVA6Cfg.AxiUserWidth-1:0] rd_user_o, output logic [CVA6Cfg.AxiUserWidth-1:0] rd_user_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_o, output logic [CVA6Cfg.AxiIdWidth-1:0] rd_id_o,
output logic rd_exokay_o, // indicates whether exclusive tx succeeded output logic rd_exokay_o, // indicates whether exclusive tx succeeded
// write channel // write channel
input logic wr_req_i, input logic wr_req_i,
output logic wr_gnt_o, output logic wr_gnt_o,
input logic [CVA6Cfg.AxiAddrWidth-1:0] wr_addr_i, input logic [CVA6Cfg.AxiAddrWidth-1:0] wr_addr_i,
input logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] wr_data_i, input logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] wr_data_i,
input logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] wr_user_i, input logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] wr_user_i,
input logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] wr_be_i, input logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] wr_be_i,
input logic [$clog2(AxiNumWords)-1:0] wr_blen_i, // axi convention: LEN-1 input logic [$clog2(AxiNumWords)-1:0] wr_blen_i, // axi convention: LEN-1
input logic [2:0] wr_size_i, input logic [2:0] wr_size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_i, input logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_i,
input logic wr_lock_i, input logic wr_lock_i,
input logic [5:0] wr_atop_i, input logic [5:0] wr_atop_i,
// write response // write response
input logic wr_rdy_i, input logic wr_rdy_i,
output logic wr_valid_o, output logic wr_valid_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_o, output logic [CVA6Cfg.AxiIdWidth-1:0] wr_id_o,
output logic wr_exokay_o, // indicates whether exclusive tx succeeded output logic wr_exokay_o, // indicates whether exclusive tx succeeded
// AXI port // AXI port
output axi_req_t axi_req_o, output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i input axi_rsp_t axi_resp_i
); );
localparam AddrIndex = ($clog2(AxiNumWords) > 0) ? $clog2(AxiNumWords) : 1; localparam AddrIndex = ($clog2(AxiNumWords) > 0) ? $clog2(AxiNumWords) : 1;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// write channel // write channel
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
enum logic [3:0] { enum logic [3:0] {
IDLE, WAIT_AW_READY, WAIT_LAST_W_READY, WAIT_LAST_W_READY_AW_READY, WAIT_AW_READY_BURST IDLE,
} wr_state_q, wr_state_d; WAIT_AW_READY,
WAIT_LAST_W_READY,
WAIT_LAST_W_READY_AW_READY,
WAIT_AW_READY_BURST
}
wr_state_q, wr_state_d;
// AXI tx counter // AXI tx counter
logic [AddrIndex-1:0] wr_cnt_d, wr_cnt_q; logic [AddrIndex-1:0] wr_cnt_d, wr_cnt_q;
logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en; logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en;
assign wr_single_req = (wr_blen_i == 0); assign wr_single_req = (wr_blen_i == 0);
// address // address
assign axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction assign axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
assign axi_req_o.aw.addr = wr_addr_i[CVA6Cfg.AxiAddrWidth-1:0]; assign axi_req_o.aw.addr = wr_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
assign axi_req_o.aw.size = wr_size_i; assign axi_req_o.aw.size = wr_size_i;
assign axi_req_o.aw.len = wr_blen_i; assign axi_req_o.aw.len = wr_blen_i;
@ -108,10 +113,8 @@ module axi_shim #(
assign wr_id_o = axi_resp_i.b.id; assign wr_id_o = axi_resp_i.b.id;
// tx counter // tx counter
assign wr_cnt_done = (wr_cnt_q == wr_blen_i); assign wr_cnt_done = (wr_cnt_q == wr_blen_i);
assign wr_cnt_d = (wr_cnt_clr) ? assign wr_cnt_d = (wr_cnt_clr) ? '0 : (wr_cnt_en) ? wr_cnt_q + 1 : wr_cnt_q;
'0 : (wr_cnt_en) ?
wr_cnt_q+1 : wr_cnt_q;
always_comb begin : p_axi_write_fsm always_comb begin : p_axi_write_fsm
// default // default
@ -137,21 +140,25 @@ module axi_shim #(
if (wr_single_req) begin if (wr_single_req) begin
wr_cnt_clr = 1'b1; wr_cnt_clr = 1'b1;
// single req can be granted here // single req can be granted here
wr_gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready; wr_gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready}) case ({
2'b01: wr_state_d = WAIT_AW_READY; axi_resp_i.aw_ready, axi_resp_i.w_ready
2'b10: wr_state_d = WAIT_LAST_W_READY; })
2'b01: wr_state_d = WAIT_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
default: wr_state_d = IDLE; default: wr_state_d = IDLE;
endcase endcase
// its a request for the whole cache line // its a request for the whole cache line
end else begin end else begin
wr_cnt_en = axi_resp_i.w_ready; wr_cnt_en = axi_resp_i.w_ready;
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready}) case ({
2'b11: wr_state_d = WAIT_LAST_W_READY; axi_resp_i.aw_ready, axi_resp_i.w_ready
2'b01: wr_state_d = WAIT_LAST_W_READY_AW_READY; })
2'b10: wr_state_d = WAIT_LAST_W_READY; 2'b11: wr_state_d = WAIT_LAST_W_READY;
default:; 2'b01: wr_state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: wr_state_d = WAIT_LAST_W_READY;
default: ;
endcase endcase
end end
end end
@ -172,7 +179,9 @@ module axi_shim #(
axi_req_o.w_valid = 1'b1; axi_req_o.w_valid = 1'b1;
axi_req_o.aw_valid = 1'b1; axi_req_o.aw_valid = 1'b1;
// we got an aw_ready // we got an aw_ready
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready}) case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
// we got an aw ready // we got an aw ready
2'b01: begin 2'b01: begin
// are there any outstanding transactions? // are there any outstanding transactions?
@ -180,25 +189,25 @@ module axi_shim #(
wr_state_d = WAIT_AW_READY_BURST; wr_state_d = WAIT_AW_READY_BURST;
wr_cnt_clr = 1'b1; wr_cnt_clr = 1'b1;
end else begin end else begin
// yes, so reduce the count and stay here // yes, so reduce the count and stay here
wr_cnt_en = 1'b1; wr_cnt_en = 1'b1;
end end
end end
2'b10: wr_state_d = WAIT_LAST_W_READY; 2'b10: wr_state_d = WAIT_LAST_W_READY;
2'b11: begin 2'b11: begin
// we are finished // we are finished
if (wr_cnt_done) begin if (wr_cnt_done) begin
wr_state_d = IDLE; wr_state_d = IDLE;
wr_gnt_o = 1'b1; wr_gnt_o = 1'b1;
wr_cnt_clr = 1'b1; wr_cnt_clr = 1'b1;
// there are outstanding transactions // there are outstanding transactions
end else begin end else begin
wr_state_d = WAIT_LAST_W_READY; wr_state_d = WAIT_LAST_W_READY;
wr_cnt_en = 1'b1; wr_cnt_en = 1'b1;
end end
end end
default:; default: ;
endcase endcase
end end
/////////////////////////////////// ///////////////////////////////////
// ~> all data has already been sent, we are only waiting for the aw_ready // ~> all data has already been sent, we are only waiting for the aw_ready
@ -234,14 +243,14 @@ module axi_shim #(
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// read channel // read channel
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// address // address
// in case of a wrapping transfer we can simply begin at the address, if we want to request a cache-line // in case of a wrapping transfer we can simply begin at the address, if we want to request a cache-line
// with an incremental transfer we need to output the corresponding base address of the cache line // with an incremental transfer we need to output the corresponding base address of the cache line
assign axi_req_o.ar.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction assign axi_req_o.ar.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
assign axi_req_o.ar.addr = rd_addr_i[CVA6Cfg.AxiAddrWidth-1:0]; assign axi_req_o.ar.addr = rd_addr_i[CVA6Cfg.AxiAddrWidth-1:0];
assign axi_req_o.ar.size = rd_size_i; assign axi_req_o.ar.size = rd_size_i;
assign axi_req_o.ar.len = rd_blen_i; assign axi_req_o.ar.len = rd_blen_i;
@ -261,14 +270,14 @@ module axi_shim #(
assign axi_req_o.r_ready = rd_rdy_i; assign axi_req_o.r_ready = rd_rdy_i;
assign rd_data_o = axi_resp_i.r.data; assign rd_data_o = axi_resp_i.r.data;
if (ariane_pkg::AXI_USER_EN) begin if (ariane_pkg::AXI_USER_EN) begin
assign rd_user_o = axi_resp_i.r.user; assign rd_user_o = axi_resp_i.r.user;
end else begin end else begin
assign rd_user_o = '0; assign rd_user_o = '0;
end end
assign rd_last_o = axi_resp_i.r.last; assign rd_last_o = axi_resp_i.r.last;
assign rd_valid_o = axi_resp_i.r_valid; assign rd_valid_o = axi_resp_i.r_valid;
assign rd_id_o = axi_resp_i.r.id; assign rd_id_o = axi_resp_i.r.id;
assign rd_exokay_o = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY); assign rd_exokay_o = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY);
// ---------------- // ----------------
@ -285,17 +294,17 @@ module axi_shim #(
end end
end end
// ---------------- // ----------------
// Assertions // Assertions
// ---------------- // ----------------
//pragma translate_off //pragma translate_off
initial begin initial begin
assert (AxiNumWords >= 1) else assert (AxiNumWords >= 1)
$fatal(1, "[axi adapter] AxiNumWords must be >= 1"); else $fatal(1, "[axi adapter] AxiNumWords must be >= 1");
assert (CVA6Cfg.AxiIdWidth >= 2) else assert (CVA6Cfg.AxiIdWidth >= 2)
$fatal(1, "[axi adapter] AXI id width must be at least 2 bit wide"); else $fatal(1, "[axi adapter] AXI id width must be at least 2 bit wide");
end end
//pragma translate_on //pragma translate_on
endmodule // axi_adapter2 endmodule // axi_adapter2

View file

@ -15,92 +15,90 @@
module branch_unit #( module branch_unit #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic debug_mode_i, input logic debug_mode_i,
input ariane_pkg::fu_data_t fu_data_i, input ariane_pkg::fu_data_t fu_data_i,
input logic [riscv::VLEN-1:0] pc_i, // PC of instruction input logic [riscv::VLEN-1:0] pc_i, // PC of instruction
input logic is_compressed_instr_i, input logic is_compressed_instr_i,
input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict
input logic branch_valid_i, input logic branch_valid_i,
input logic branch_comp_res_i, // branch comparison result from ALU input logic branch_comp_res_i, // branch comparison result from ALU
output logic [riscv::VLEN-1:0] branch_result_o, output logic [riscv::VLEN-1:0] branch_result_o,
input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted
output ariane_pkg::bp_resolve_t resolved_branch_o, // this is the actual address we are targeting output ariane_pkg::bp_resolve_t resolved_branch_o, // this is the actual address we are targeting
output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can
// accept new entries to the scoreboard // accept new entries to the scoreboard
output ariane_pkg::exception_t branch_exception_o // branch exception out output ariane_pkg::exception_t branch_exception_o // branch exception out
); );
logic [riscv::VLEN-1:0] target_address; logic [riscv::VLEN-1:0] target_address;
logic [riscv::VLEN-1:0] next_pc; logic [riscv::VLEN-1:0] next_pc;
// here we handle the various possibilities of mis-predicts // here we handle the various possibilities of mis-predicts
always_comb begin : mispredict_handler always_comb begin : mispredict_handler
// set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC // set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC
automatic logic [riscv::VLEN-1:0] jump_base; automatic logic [riscv::VLEN-1:0] jump_base;
// TODO(zarubaf): The ALU can be used to calculate the branch target // TODO(zarubaf): The ALU can be used to calculate the branch target
jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i; jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i;
target_address = {riscv::VLEN{1'b0}}; target_address = {riscv::VLEN{1'b0}};
resolve_branch_o = 1'b0; resolve_branch_o = 1'b0;
resolved_branch_o.target_address = {riscv::VLEN{1'b0}}; resolved_branch_o.target_address = {riscv::VLEN{1'b0}};
resolved_branch_o.is_taken = 1'b0; resolved_branch_o.is_taken = 1'b0;
resolved_branch_o.valid = branch_valid_i; resolved_branch_o.valid = branch_valid_i;
resolved_branch_o.is_mispredict = 1'b0; resolved_branch_o.is_mispredict = 1'b0;
resolved_branch_o.cf_type = branch_predict_i.cf; resolved_branch_o.cf_type = branch_predict_i.cf;
// calculate next PC, depending on whether the instruction is compressed or not this may be different // calculate next PC, depending on whether the instruction is compressed or not this may be different
// TODO(zarubaf): We already calculate this a couple of times, maybe re-use? // TODO(zarubaf): We already calculate this a couple of times, maybe re-use?
next_pc = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4}); next_pc = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4});
// calculate target address simple 64 bit addition // calculate target address simple 64 bit addition
target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0])); target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0]));
// on a JALR we are supposed to reset the LSB to 0 (according to the specification) // on a JALR we are supposed to reset the LSB to 0 (according to the specification)
if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0; if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0;
// we need to put the branch target address into rd, this is the result of this unit // we need to put the branch target address into rd, this is the result of this unit
branch_result_o = next_pc; branch_result_o = next_pc;
resolved_branch_o.pc = pc_i; resolved_branch_o.pc = pc_i;
// There are only two sources of mispredicts: // There are only two sources of mispredicts:
// 1. Branches // 1. Branches
// 2. Jumps to register addresses // 2. Jumps to register addresses
if (branch_valid_i) begin if (branch_valid_i) begin
// write target address which goes to PC Gen // write target address which goes to PC Gen
resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc; resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
resolved_branch_o.is_taken = branch_comp_res_i; resolved_branch_o.is_taken = branch_comp_res_i;
// check the outcome of the branch speculation // check the outcome of the branch speculation
if ( ariane_pkg::op_is_branch(fu_data_i.operation) ) begin if (ariane_pkg::op_is_branch(fu_data_i.operation)) begin
// Set the `cf_type` of the output as `branch`, this will update the BHT. // Set the `cf_type` of the output as `branch`, this will update the BHT.
resolved_branch_o.cf_type = ariane_pkg::Branch; resolved_branch_o.cf_type = ariane_pkg::Branch;
// If the ALU comparison does not agree with the BHT prediction set the resolution as mispredicted. // If the ALU comparison does not agree with the BHT prediction set the resolution as mispredicted.
resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch); resolved_branch_o.is_mispredict = branch_comp_res_i != (branch_predict_i.cf == ariane_pkg::Branch);
end end
if (fu_data_i.operation == ariane_pkg::JALR if (fu_data_i.operation == ariane_pkg::JALR
// check if the address of the jump register is correct and that we actually predicted // check if the address of the jump register is correct and that we actually predicted
&& (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin && (branch_predict_i.cf == ariane_pkg::NoCF || target_address != branch_predict_i.predict_address)) begin
resolved_branch_o.is_mispredict = 1'b1; resolved_branch_o.is_mispredict = 1'b1;
// update BTB only if this wasn't a return // update BTB only if this wasn't a return
if (branch_predict_i.cf != ariane_pkg::Return) resolved_branch_o.cf_type = ariane_pkg::JumpR; if (branch_predict_i.cf != ariane_pkg::Return)
end resolved_branch_o.cf_type = ariane_pkg::JumpR;
// to resolve the branch in ID end
resolve_branch_o = 1'b1; // to resolve the branch in ID
end resolve_branch_o = 1'b1;
end
// use ALU exception signal for storing instruction fetch exceptions if
// the target address is not aligned to a 2 byte boundary
//
logic jump_taken;
always_comb begin : exception_handling
// Do a jump if it is either unconditional jump (JAL | JALR) or `taken` conditional jump
jump_taken = !(ariane_pkg::op_is_branch(fu_data_i.operation)) ||
((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i);
branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED;
branch_exception_o.valid = 1'b0;
branch_exception_o.tval = {{riscv::XLEN-riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
// Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or
// an unconditional jump
if (branch_valid_i &&
target_address[0] != 1'b0 &&
jump_taken)
branch_exception_o.valid = 1'b1;
end end
end
// use ALU exception signal for storing instruction fetch exceptions if
// the target address is not aligned to a 2 byte boundary
//
logic jump_taken;
always_comb begin : exception_handling
// Do a jump if it is either unconditional jump (JAL | JALR) or `taken` conditional jump
jump_taken = !(ariane_pkg::op_is_branch(fu_data_i.operation)) ||
((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i);
branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED;
branch_exception_o.valid = 1'b0;
branch_exception_o.tval = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i};
// Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or
// an unconditional jump
if (branch_valid_i && target_address[0] != 1'b0 && jump_taken) branch_exception_o.valid = 1'b1;
end
endmodule endmodule

View file

@ -12,54 +12,54 @@
// Date: 15.09.2018 // Date: 15.09.2018
// Description: Combinatorial AMO unit // Description: Combinatorial AMO unit
module amo_alu #( module amo_alu #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
// AMO interface // AMO interface
input ariane_pkg::amo_t amo_op_i, input ariane_pkg::amo_t amo_op_i,
input logic [63:0] amo_operand_a_i, input logic [63:0] amo_operand_a_i,
input logic [63:0] amo_operand_b_i, input logic [63:0] amo_operand_b_i,
output logic [63:0] amo_result_o // result of atomic memory operation output logic [63:0] amo_result_o // result of atomic memory operation
); );
logic [64:0] adder_sum; logic [64:0] adder_sum;
logic [64:0] adder_operand_a, adder_operand_b; logic [64:0] adder_operand_a, adder_operand_b;
assign adder_sum = adder_operand_a + adder_operand_b; assign adder_sum = adder_operand_a + adder_operand_b;
always_comb begin always_comb begin
adder_operand_a = $signed(amo_operand_a_i); adder_operand_a = $signed(amo_operand_a_i);
adder_operand_b = $signed(amo_operand_b_i); adder_operand_b = $signed(amo_operand_b_i);
amo_result_o = amo_operand_b_i; amo_result_o = amo_operand_b_i;
unique case (amo_op_i) unique case (amo_op_i)
// the default is to output operand_b // the default is to output operand_b
ariane_pkg::AMO_SC:; ariane_pkg::AMO_SC: ;
ariane_pkg::AMO_SWAP:; ariane_pkg::AMO_SWAP: ;
ariane_pkg::AMO_ADD: amo_result_o = adder_sum[63:0]; ariane_pkg::AMO_ADD: amo_result_o = adder_sum[63:0];
ariane_pkg::AMO_AND: amo_result_o = amo_operand_a_i & amo_operand_b_i; ariane_pkg::AMO_AND: amo_result_o = amo_operand_a_i & amo_operand_b_i;
ariane_pkg::AMO_OR: amo_result_o = amo_operand_a_i | amo_operand_b_i; ariane_pkg::AMO_OR: amo_result_o = amo_operand_a_i | amo_operand_b_i;
ariane_pkg::AMO_XOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i; ariane_pkg::AMO_XOR: amo_result_o = amo_operand_a_i ^ amo_operand_b_i;
ariane_pkg::AMO_MAX: begin ariane_pkg::AMO_MAX: begin
adder_operand_b = -$signed(amo_operand_b_i); adder_operand_b = -$signed(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i; amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i;
end end
ariane_pkg::AMO_MIN: begin ariane_pkg::AMO_MIN: begin
adder_operand_b = -$signed(amo_operand_b_i); adder_operand_b = -$signed(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i; amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i;
end end
ariane_pkg::AMO_MAXU: begin ariane_pkg::AMO_MAXU: begin
adder_operand_a = $unsigned(amo_operand_a_i); adder_operand_a = $unsigned(amo_operand_a_i);
adder_operand_b = -$unsigned(amo_operand_b_i); adder_operand_b = -$unsigned(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i; amo_result_o = adder_sum[64] ? amo_operand_b_i : amo_operand_a_i;
end end
ariane_pkg::AMO_MINU: begin ariane_pkg::AMO_MINU: begin
adder_operand_a = $unsigned(amo_operand_a_i); adder_operand_a = $unsigned(amo_operand_a_i);
adder_operand_b = -$unsigned(amo_operand_b_i); adder_operand_b = -$unsigned(amo_operand_b_i);
amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i; amo_result_o = adder_sum[64] ? amo_operand_a_i : amo_operand_b_i;
end end
default: amo_result_o = '0; default: amo_result_o = '0;
endcase endcase
end end
endmodule endmodule

View file

@ -17,52 +17,66 @@
//import std_cache_pkg::*; //import std_cache_pkg::*;
module axi_adapter #( module axi_adapter #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DATA_WIDTH = 256, parameter int unsigned DATA_WIDTH = 256,
parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature parameter logic CRITICAL_WORD_FIRST = 0, // the AXI subsystem needs to support wrapping reads for this feature
parameter int unsigned CACHELINE_BYTE_OFFSET = 8, parameter int unsigned CACHELINE_BYTE_OFFSET = 8,
parameter type axi_req_t = logic, parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic parameter type axi_rsp_t = logic
)( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic req_i, input logic req_i,
input ariane_pkg::ad_req_t type_i, input ariane_pkg::ad_req_t type_i,
input ariane_pkg::amo_t amo_i, input ariane_pkg::amo_t amo_i,
output logic gnt_o, output logic gnt_o,
input logic [riscv::XLEN-1:0] addr_i, input logic [riscv::XLEN-1:0] addr_i,
input logic we_i, input logic we_i,
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i, input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i,
input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i, input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i,
input logic [1:0] size_i, input logic [1:0] size_i,
input logic [CVA6Cfg.AxiIdWidth-1:0] id_i, input logic [CVA6Cfg.AxiIdWidth-1:0] id_i,
// read port // read port
output logic valid_o, output logic valid_o,
output logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] rdata_o, output logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] rdata_o,
output logic [CVA6Cfg.AxiIdWidth-1:0] id_o, output logic [CVA6Cfg.AxiIdWidth-1:0] id_o,
// critical word - read port // critical word - read port
output logic [CVA6Cfg.AxiDataWidth-1:0] critical_word_o, output logic [CVA6Cfg.AxiDataWidth-1:0] critical_word_o,
output logic critical_word_valid_o, output logic critical_word_valid_o,
// AXI port // AXI port
output axi_req_t axi_req_o, output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i input axi_rsp_t axi_resp_i
); );
localparam BURST_SIZE = (DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1; localparam BURST_SIZE = (DATA_WIDTH / CVA6Cfg.AxiDataWidth) - 1;
localparam ADDR_INDEX = ($clog2(DATA_WIDTH/CVA6Cfg.AxiDataWidth) > 0) ? $clog2(DATA_WIDTH/CVA6Cfg.AxiDataWidth) : 1; localparam ADDR_INDEX = ($clog2(
DATA_WIDTH / CVA6Cfg.AxiDataWidth
) > 0) ? $clog2(
DATA_WIDTH / CVA6Cfg.AxiDataWidth
) : 1;
enum logic [3:0] { enum logic [3:0] {
IDLE, WAIT_B_VALID, WAIT_AW_READY, WAIT_LAST_W_READY, WAIT_LAST_W_READY_AW_READY, WAIT_AW_READY_BURST, IDLE,
WAIT_R_VALID, WAIT_R_VALID_MULTIPLE, COMPLETE_READ, WAIT_AMO_R_VALID WAIT_B_VALID,
} state_q, state_d; WAIT_AW_READY,
WAIT_LAST_W_READY,
WAIT_LAST_W_READY_AW_READY,
WAIT_AW_READY_BURST,
WAIT_R_VALID,
WAIT_R_VALID_MULTIPLE,
COMPLETE_READ,
WAIT_AMO_R_VALID
}
state_q, state_d;
// counter for AXI transfers // counter for AXI transfers
logic [ADDR_INDEX-1:0] cnt_d, cnt_q; logic [ADDR_INDEX-1:0] cnt_d, cnt_q;
logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] cache_line_d, cache_line_q; logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0]
cache_line_d, cache_line_q;
// save the address for a read, as we allow for non-cacheline aligned accesses // save the address for a read, as we allow for non-cacheline aligned accesses
logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0] addr_offset_d, addr_offset_q; logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0] addr_offset_d, addr_offset_q;
logic [CVA6Cfg.AxiIdWidth-1:0] id_d, id_q; logic [CVA6Cfg.AxiIdWidth-1:0] id_d, id_q;
logic [ADDR_INDEX-1:0] index; logic [ADDR_INDEX-1:0] index;
// save the atomic operation and size // save the atomic operation and size
ariane_pkg::amo_t amo_d, amo_q; ariane_pkg::amo_t amo_d, amo_q;
logic [1:0] size_d, size_q; logic [1:0] size_d, size_q;
@ -75,8 +89,8 @@ module axi_adapter #(
axi_req_o.aw.prot = 3'b0; axi_req_o.aw.prot = 3'b0;
axi_req_o.aw.region = 4'b0; axi_req_o.aw.region = 4'b0;
axi_req_o.aw.len = 8'b0; axi_req_o.aw.len = 8'b0;
axi_req_o.aw.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes axi_req_o.aw.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction axi_req_o.aw.burst = axi_pkg::BURST_INCR; // Use BURST_INCR for AXI regular transaction
axi_req_o.aw.lock = 1'b0; axi_req_o.aw.lock = 1'b0;
axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE; axi_req_o.aw.cache = axi_pkg::CACHE_MODIFIABLE;
axi_req_o.aw.qos = 4'b0; axi_req_o.aw.qos = 4'b0;
@ -92,42 +106,42 @@ module axi_adapter #(
if (!CRITICAL_WORD_FIRST && type_i != ariane_pkg::SINGLE_REQ) begin if (!CRITICAL_WORD_FIRST && type_i != ariane_pkg::SINGLE_REQ) begin
axi_req_o.ar.addr[CACHELINE_BYTE_OFFSET-1:0] = '0; axi_req_o.ar.addr[CACHELINE_BYTE_OFFSET-1:0] = '0;
end end
axi_req_o.ar.prot = 3'b0; axi_req_o.ar.prot = 3'b0;
axi_req_o.ar.region = 4'b0; axi_req_o.ar.region = 4'b0;
axi_req_o.ar.len = 8'b0; axi_req_o.ar.len = 8'b0;
axi_req_o.ar.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes axi_req_o.ar.size = {1'b0, size_i}; // 1, 2, 4 or 8 bytes
axi_req_o.ar.burst = (CRITICAL_WORD_FIRST ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR); // wrapping transfer in case of a critical word first strategy axi_req_o.ar.burst = (CRITICAL_WORD_FIRST ? axi_pkg::BURST_WRAP : axi_pkg::BURST_INCR); // wrapping transfer in case of a critical word first strategy
axi_req_o.ar.lock = 1'b0; axi_req_o.ar.lock = 1'b0;
axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE; axi_req_o.ar.cache = axi_pkg::CACHE_MODIFIABLE;
axi_req_o.ar.qos = 4'b0; axi_req_o.ar.qos = 4'b0;
axi_req_o.ar.id = id_i; axi_req_o.ar.id = id_i;
axi_req_o.ar.user = '0; axi_req_o.ar.user = '0;
axi_req_o.w_valid = 1'b0; axi_req_o.w_valid = 1'b0;
axi_req_o.w.data = wdata_i[0]; axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0]; axi_req_o.w.strb = be_i[0];
axi_req_o.w.last = 1'b0; axi_req_o.w.last = 1'b0;
axi_req_o.w.user = '0; axi_req_o.w.user = '0;
axi_req_o.b_ready = 1'b0; axi_req_o.b_ready = 1'b0;
axi_req_o.r_ready = 1'b0; axi_req_o.r_ready = 1'b0;
gnt_o = 1'b0; gnt_o = 1'b0;
valid_o = 1'b0; valid_o = 1'b0;
id_o = axi_resp_i.r.id; id_o = axi_resp_i.r.id;
critical_word_o = axi_resp_i.r.data; critical_word_o = axi_resp_i.r.data;
critical_word_valid_o = 1'b0; critical_word_valid_o = 1'b0;
rdata_o = cache_line_q; rdata_o = cache_line_q;
state_d = state_q; state_d = state_q;
cnt_d = cnt_q; cnt_d = cnt_q;
cache_line_d = cache_line_q; cache_line_d = cache_line_q;
addr_offset_d = addr_offset_q; addr_offset_d = addr_offset_q;
id_d = id_q; id_d = id_q;
amo_d = amo_q; amo_d = amo_q;
size_d = size_q; size_d = size_q;
index = '0; index = '0;
case (state_q) case (state_q)
@ -142,17 +156,19 @@ module axi_adapter #(
axi_req_o.aw_valid = 1'b1; axi_req_o.aw_valid = 1'b1;
axi_req_o.w_valid = 1'b1; axi_req_o.w_valid = 1'b1;
// store-conditional requires exclusive access // store-conditional requires exclusive access
axi_req_o.aw.lock = amo_i == ariane_pkg::AMO_SC; axi_req_o.aw.lock = amo_i == ariane_pkg::AMO_SC;
// its a single write // its a single write
if (type_i == ariane_pkg::SINGLE_REQ) begin if (type_i == ariane_pkg::SINGLE_REQ) begin
// only a single write so the data is already the last one // only a single write so the data is already the last one
axi_req_o.w.last = 1'b1; axi_req_o.w.last = 1'b1;
// single req can be granted here // single req can be granted here
gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready; gnt_o = axi_resp_i.aw_ready & axi_resp_i.w_ready;
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready}) case ({
2'b11: state_d = WAIT_B_VALID; axi_resp_i.aw_ready, axi_resp_i.w_ready
2'b01: state_d = WAIT_AW_READY; })
2'b10: state_d = WAIT_LAST_W_READY; 2'b11: state_d = WAIT_B_VALID;
2'b01: state_d = WAIT_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default: state_d = IDLE; default: state_d = IDLE;
endcase endcase
@ -161,29 +177,29 @@ module axi_adapter #(
size_d = size_i; size_d = size_i;
end end
// its a request for the whole cache line // its a request for the whole cache line
end else begin end else begin
// bursts of AMOs unsupported // bursts of AMOs unsupported
assert (amo_i == ariane_pkg::AMO_NONE) assert (amo_i == ariane_pkg::AMO_NONE)
else $fatal("Bursts of atomic operations are not supported"); else $fatal("Bursts of atomic operations are not supported");
axi_req_o.aw.len = BURST_SIZE[7:0]; // number of bursts to do axi_req_o.aw.len = BURST_SIZE[7:0]; // number of bursts to do
axi_req_o.w.data = wdata_i[0]; axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0]; axi_req_o.w.strb = be_i[0];
if (axi_resp_i.w_ready) if (axi_resp_i.w_ready) cnt_d = BURST_SIZE[ADDR_INDEX-1:0] - 1;
cnt_d = BURST_SIZE[ADDR_INDEX-1:0] - 1; else cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
else
cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready}) case ({
2'b11: state_d = WAIT_LAST_W_READY; axi_resp_i.aw_ready, axi_resp_i.w_ready
2'b01: state_d = WAIT_LAST_W_READY_AW_READY; })
2'b10: state_d = WAIT_LAST_W_READY; 2'b11: state_d = WAIT_LAST_W_READY;
default:; 2'b01: state_d = WAIT_LAST_W_READY_AW_READY;
2'b10: state_d = WAIT_LAST_W_READY;
default: ;
endcase endcase
end end
// read // read
end else begin end else begin
axi_req_o.ar_valid = 1'b1; axi_req_o.ar_valid = 1'b1;
@ -193,7 +209,7 @@ module axi_adapter #(
gnt_o = axi_resp_i.ar_ready; gnt_o = axi_resp_i.ar_ready;
if (type_i != ariane_pkg::SINGLE_REQ) begin if (type_i != ariane_pkg::SINGLE_REQ) begin
assert (amo_i == ariane_pkg::AMO_NONE) assert (amo_i == ariane_pkg::AMO_NONE)
else $fatal("Bursts of atomic operations are not supported"); else $fatal("Bursts of atomic operations are not supported");
axi_req_o.ar.len = BURST_SIZE[7:0]; axi_req_o.ar.len = BURST_SIZE[7:0];
cnt_d = BURST_SIZE[ADDR_INDEX-1:0]; cnt_d = BURST_SIZE[ADDR_INDEX-1:0];
@ -221,8 +237,8 @@ module axi_adapter #(
// ~> we need to wait for an aw_ready and there is at least one outstanding write // ~> we need to wait for an aw_ready and there is at least one outstanding write
WAIT_LAST_W_READY_AW_READY: begin WAIT_LAST_W_READY_AW_READY: begin
axi_req_o.w_valid = 1'b1; axi_req_o.w_valid = 1'b1;
axi_req_o.w.last = (cnt_q == '0); axi_req_o.w.last = (cnt_q == '0);
if (type_i == ariane_pkg::SINGLE_REQ) begin if (type_i == ariane_pkg::SINGLE_REQ) begin
axi_req_o.w.data = wdata_i[0]; axi_req_o.w.data = wdata_i[0];
axi_req_o.w.strb = be_i[0]; axi_req_o.w.strb = be_i[0];
@ -234,29 +250,30 @@ module axi_adapter #(
// we are here because we want to write a cache line // we are here because we want to write a cache line
axi_req_o.aw.len = BURST_SIZE[7:0]; axi_req_o.aw.len = BURST_SIZE[7:0];
// we got an aw_ready // we got an aw_ready
case ({axi_resp_i.aw_ready, axi_resp_i.w_ready}) case ({
axi_resp_i.aw_ready, axi_resp_i.w_ready
})
// we got an aw ready // we got an aw ready
2'b01: begin 2'b01: begin
// are there any outstanding transactions? // are there any outstanding transactions?
if (cnt_q == 0) if (cnt_q == 0) state_d = WAIT_AW_READY_BURST;
state_d = WAIT_AW_READY_BURST; else // yes, so reduce the count and stay here
else // yes, so reduce the count and stay here
cnt_d = cnt_q - 1; cnt_d = cnt_q - 1;
end end
2'b10: state_d = WAIT_LAST_W_READY; 2'b10: state_d = WAIT_LAST_W_READY;
2'b11: begin 2'b11: begin
// we are finished // we are finished
if (cnt_q == 0) begin if (cnt_q == 0) begin
state_d = WAIT_B_VALID; state_d = WAIT_B_VALID;
gnt_o = 1'b1; gnt_o = 1'b1;
// there are outstanding transactions // there are outstanding transactions
end else begin end else begin
state_d = WAIT_LAST_W_READY; state_d = WAIT_LAST_W_READY;
cnt_d = cnt_q - 1; cnt_d = cnt_q - 1;
end end
end end
default:; default: ;
endcase endcase
end end
@ -266,8 +283,8 @@ module axi_adapter #(
axi_req_o.aw.len = BURST_SIZE[7:0]; axi_req_o.aw.len = BURST_SIZE[7:0];
if (axi_resp_i.aw_ready) begin if (axi_resp_i.aw_ready) begin
state_d = WAIT_B_VALID; state_d = WAIT_B_VALID;
gnt_o = 1'b1; gnt_o = 1'b1;
end end
end end
@ -347,10 +364,8 @@ module axi_adapter #(
// ~> cacheline read, single read // ~> cacheline read, single read
WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin WAIT_R_VALID_MULTIPLE, WAIT_R_VALID: begin
if (CRITICAL_WORD_FIRST) if (CRITICAL_WORD_FIRST) index = addr_offset_q + (BURST_SIZE[ADDR_INDEX-1:0] - cnt_q);
index = addr_offset_q + (BURST_SIZE[ADDR_INDEX-1:0]-cnt_q); else index = BURST_SIZE[ADDR_INDEX-1:0] - cnt_q;
else
index = BURST_SIZE[ADDR_INDEX-1:0]-cnt_q;
// reads are always wrapping here // reads are always wrapping here
axi_req_o.r_ready = 1'b1; axi_req_o.r_ready = 1'b1;
@ -380,8 +395,7 @@ module axi_adapter #(
if (state_q == WAIT_R_VALID_MULTIPLE) begin if (state_q == WAIT_R_VALID_MULTIPLE) begin
cache_line_d[index] = axi_resp_i.r.data; cache_line_d[index] = axi_resp_i.r.data;
end else end else cache_line_d[0] = axi_resp_i.r.data;
cache_line_d[0] = axi_resp_i.r.data;
// Decrease the counter // Decrease the counter
cnt_d = cnt_q - 1; cnt_d = cnt_q - 1;
@ -425,19 +439,27 @@ module axi_adapter #(
function automatic axi_pkg::atop_t atop_from_amo(ariane_pkg::amo_t amo); function automatic axi_pkg::atop_t atop_from_amo(ariane_pkg::amo_t amo);
axi_pkg::atop_t result = 6'b000000; axi_pkg::atop_t result = 6'b000000;
unique case(amo) unique case (amo)
ariane_pkg::AMO_NONE: result = {axi_pkg::ATOP_NONE, 4'b0000}; ariane_pkg::AMO_NONE: result = {axi_pkg::ATOP_NONE, 4'b0000};
ariane_pkg::AMO_SWAP: result = {axi_pkg::ATOP_ATOMICSWAP}; ariane_pkg::AMO_SWAP: result = {axi_pkg::ATOP_ATOMICSWAP};
ariane_pkg::AMO_ADD : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD}; ariane_pkg::AMO_ADD:
ariane_pkg::AMO_AND : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR}; result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
ariane_pkg::AMO_OR : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET}; ariane_pkg::AMO_AND:
ariane_pkg::AMO_XOR : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR}; result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR};
ariane_pkg::AMO_MAX : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX}; ariane_pkg::AMO_OR:
ariane_pkg::AMO_MAXU: result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX}; result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
ariane_pkg::AMO_MIN : result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN}; ariane_pkg::AMO_XOR:
ariane_pkg::AMO_MINU: result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN}; result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
ariane_pkg::AMO_CAS1: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported ariane_pkg::AMO_MAX:
ariane_pkg::AMO_CAS2: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX};
ariane_pkg::AMO_MAXU:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX};
ariane_pkg::AMO_MIN:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN};
ariane_pkg::AMO_MINU:
result = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN};
ariane_pkg::AMO_CAS1: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
ariane_pkg::AMO_CAS2: result = {axi_pkg::ATOP_NONE, 4'b0000}; // Unsupported
default: result = 6'b000000; default: result = 6'b000000;
endcase endcase
@ -445,8 +467,8 @@ module axi_adapter #(
endfunction endfunction
function automatic logic amo_returns_data(ariane_pkg::amo_t amo); function automatic logic amo_returns_data(ariane_pkg::amo_t amo);
axi_pkg::atop_t atop = atop_from_amo(amo); axi_pkg::atop_t atop = atop_from_amo(amo);
logic is_load = atop[5:4] == axi_pkg::ATOP_ATOMICLOAD; logic is_load = atop[5:4] == axi_pkg::ATOP_ATOMICLOAD;
logic is_swap_or_cmp = atop[5:4] == axi_pkg::ATOP_ATOMICSWAP[5:4]; logic is_swap_or_cmp = atop[5:4] == axi_pkg::ATOP_ATOMICSWAP[5:4];
return is_load || is_swap_or_cmp; return is_load || is_swap_or_cmp;
endfunction endfunction

View file

@ -18,446 +18,453 @@
// Description: Cache controller // Description: Cache controller
module cache_ctrl import ariane_pkg::*; import std_cache_pkg::*; #( module cache_ctrl
import ariane_pkg::*;
import std_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, input logic flush_i,
input logic bypass_i, // enable cache input logic bypass_i, // enable cache
output logic busy_o, output logic busy_o,
// Core request ports // Core request ports
input dcache_req_i_t req_port_i, input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o, output dcache_req_o_t req_port_o,
// SRAM interface // SRAM interface
output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid
output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array
input logic gnt_i, input logic gnt_i,
output cache_line_t data_o, output cache_line_t data_o,
output cl_be_t be_o, output cl_be_t be_o,
output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later
input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i,
output logic we_o, output logic we_o,
input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, input logic [DCACHE_SET_ASSOC-1:0] hit_way_i,
// Miss handling // Miss handling
output miss_req_t miss_req_o, output miss_req_t miss_req_o,
// return // return
input logic miss_gnt_i, input logic miss_gnt_i,
input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss input logic active_serving_i, // the miss unit is currently active for this unit, serving the miss
input logic [63:0] critical_word_i, input logic [63:0] critical_word_i,
input logic critical_word_valid_i, input logic critical_word_valid_i,
// bypass ports // bypass ports
input logic bypass_gnt_i, input logic bypass_gnt_i,
input logic bypass_valid_i, input logic bypass_valid_i,
input logic [63:0] bypass_data_i, input logic [63:0] bypass_data_i,
// check MSHR for aliasing // check MSHR for aliasing
output logic [55:0] mshr_addr_o, output logic [55:0] mshr_addr_o,
input logic mshr_addr_matches_i, input logic mshr_addr_matches_i,
input logic mshr_index_matches_i input logic mshr_index_matches_i
); );
enum logic [3:0] { enum logic [3:0] {
IDLE, // 0 IDLE, // 0
WAIT_TAG, // 1 WAIT_TAG, // 1
WAIT_TAG_BYPASSED, // 2 WAIT_TAG_BYPASSED, // 2
WAIT_GNT, // 3 WAIT_GNT, // 3
WAIT_GNT_SAVED, // 4 WAIT_GNT_SAVED, // 4
STORE_REQ, // 5 STORE_REQ, // 5
WAIT_REFILL_VALID, // 6 WAIT_REFILL_VALID, // 6
WAIT_REFILL_GNT, // 7 WAIT_REFILL_GNT, // 7
WAIT_TAG_SAVED, // 8 WAIT_TAG_SAVED, // 8
WAIT_MSHR, // 9 WAIT_MSHR, // 9
WAIT_CRITICAL_WORD // 10 WAIT_CRITICAL_WORD // 10
} state_d, state_q; }
state_d, state_q;
typedef struct packed { typedef struct packed {
logic [DCACHE_INDEX_WIDTH-1:0] index; logic [DCACHE_INDEX_WIDTH-1:0] index;
logic [DCACHE_TAG_WIDTH-1:0] tag; logic [DCACHE_TAG_WIDTH-1:0] tag;
logic [DCACHE_TID_WIDTH-1:0] id; logic [DCACHE_TID_WIDTH-1:0] id;
logic [7:0] be; logic [7:0] be;
logic [1:0] size; logic [1:0] size;
logic we; logic we;
logic [63:0] wdata; logic [63:0] wdata;
logic bypass; logic bypass;
logic killed; logic killed;
} mem_req_t; } mem_req_t;
logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q; logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q;
mem_req_t mem_req_d, mem_req_q; mem_req_t mem_req_d, mem_req_q;
assign busy_o = (state_q != IDLE); assign busy_o = (state_q != IDLE);
assign tag_o = mem_req_d.tag; assign tag_o = mem_req_d.tag;
logic [DCACHE_LINE_WIDTH-1:0] cl_i; logic [DCACHE_LINE_WIDTH-1:0] cl_i;
always_comb begin : way_select always_comb begin : way_select
cl_i = '0; cl_i = '0;
for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data;
if (hit_way_i[i])
cl_i = data_i[i].data;
// cl_i = data_i[one_hot_to_bin(hit_way_i)].data; // cl_i = data_i[one_hot_to_bin(hit_way_i)].data;
end end
// -------------- // --------------
// Cache FSM // Cache FSM
// -------------- // --------------
always_comb begin : cache_ctrl_fsm always_comb begin : cache_ctrl_fsm
automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
// incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array // incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array
// cache-line offset -> multiple of 64 // cache-line offset -> multiple of 64
cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left
// default assignments // default assignments
state_d = state_q; state_d = state_q;
mem_req_d = mem_req_q; mem_req_d = mem_req_q;
hit_way_d = hit_way_q; hit_way_d = hit_way_q;
// output assignments // output assignments
req_port_o.data_gnt = 1'b0; req_port_o.data_gnt = 1'b0;
req_port_o.data_rvalid = 1'b0; req_port_o.data_rvalid = 1'b0;
req_port_o.data_rdata = '0; req_port_o.data_rdata = '0;
req_port_o.data_rid = mem_req_q.id; req_port_o.data_rid = mem_req_q.id;
miss_req_o = '0; miss_req_o = '0;
mshr_addr_o = '0; mshr_addr_o = '0;
// Memory array communication // Memory array communication
req_o = '0; req_o = '0;
addr_o = req_port_i.address_index; addr_o = req_port_i.address_index;
data_o = '0; data_o = '0;
be_o = '0; be_o = '0;
we_o = '0; we_o = '0;
mem_req_d.killed |= req_port_i.kill_req; mem_req_d.killed |= req_port_i.kill_req;
case (state_q) case (state_q)
IDLE: begin IDLE: begin
// a new request arrived // a new request arrived
if (req_port_i.data_req && !flush_i) begin if (req_port_i.data_req && !flush_i) begin
// request the cache line - we can do this speculatively // request the cache line - we can do this speculatively
req_o = '1; req_o = '1;
// save index, be and we // save index, be and we
mem_req_d.index = req_port_i.address_index; mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id; mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be; mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size; mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we; mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata; mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req; mem_req_d.killed = req_port_i.kill_req;
// Bypass mode, check for uncacheable address here as well // Bypass mode, check for uncacheable address here as well
if (bypass_i) begin if (bypass_i) begin
state_d = WAIT_TAG_BYPASSED; state_d = WAIT_TAG_BYPASSED;
// grant this access only if it was a load // grant this access only if it was a load
req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1; req_port_o.data_gnt = (req_port_i.data_we) ? 1'b0 : 1'b1;
mem_req_d.bypass = 1'b1; mem_req_d.bypass = 1'b1;
// ------------------ // ------------------
// Cache is enabled // Cache is enabled
// ------------------ // ------------------
end else begin end else begin
// Wait that we have access on the memory array // Wait that we have access on the memory array
if (gnt_i) begin if (gnt_i) begin
state_d = WAIT_TAG; state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0; mem_req_d.bypass = 1'b0;
// only for a read // only for a read
if (!req_port_i.data_we) if (!req_port_i.data_we) req_port_o.data_gnt = 1'b1;
req_port_o.data_gnt = 1'b1;
end
end
end
end end
end
end
end
// cache enabled and waiting for tag // cache enabled and waiting for tag
WAIT_TAG, WAIT_TAG_SAVED: begin WAIT_TAG, WAIT_TAG_SAVED: begin
// check that the client really wants to do the request and that we have a valid tag // check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin if (!req_port_i.kill_req && (req_port_i.tag_valid || state_q == WAIT_TAG_SAVED || mem_req_q.we)) begin
// save tag if we didn't already save it // save tag if we didn't already save it
if (state_q != WAIT_TAG_SAVED) begin if (state_q != WAIT_TAG_SAVED) begin
mem_req_d.tag = req_port_i.address_tag; mem_req_d.tag = req_port_i.address_tag;
end end
// we speculatively request another transfer // we speculatively request another transfer
if (req_port_i.data_req && !flush_i) begin if (req_port_i.data_req && !flush_i) begin
req_o = '1; req_o = '1;
end end
// ------------ // ------------
// HIT CASE // HIT CASE
// ------------ // ------------
if (|hit_way_i) begin if (|hit_way_i) begin
// we can request another cache-line if this was a load // we can request another cache-line if this was a load
if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin if (req_port_i.data_req && !mem_req_q.we && !flush_i) begin
state_d = WAIT_TAG; // switch back to WAIT_TAG state_d = WAIT_TAG; // switch back to WAIT_TAG
mem_req_d.index = req_port_i.address_index; mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id; mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be; mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size; mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we; mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata; mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req; mem_req_d.killed = req_port_i.kill_req;
mem_req_d.bypass = 1'b0; mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = gnt_i; req_port_o.data_gnt = gnt_i;
if (!gnt_i) begin if (!gnt_i) begin
state_d = IDLE;
end
end else begin
state_d = IDLE;
end
// this is timing critical
req_port_o.data_rdata = cl_i[cl_offset +: 64];
// report data for a read
if (!mem_req_q.we) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
// else this was a store so we need an extra step to handle it
end else begin
state_d = STORE_REQ;
hit_way_d = hit_way_i;
end
// ------------
// MISS CASE
// ------------
end else begin
// make a miss request
state_d = WAIT_REFILL_GNT;
end
// ----------------------------------------------
// Check MSHR - Miss Status Handling Register
// ----------------------------------------------
mshr_addr_o = {tag_o, mem_req_q.index};
// 1. We've got a match on MSHR and while are going down the
// store path. This means that the miss controller is
// currently evicting our cache-line. As the store is
// non-atomic we need to constantly check whether we are
// matching the address the miss handler is serving.
// Furthermore we need to check for the whole index
// because a completely different memory line could alias
// with the cache-line we are evicting.
// 2. The second case is where we are currently loading and
// the address matches the exact CL the miss controller
// is currently serving. That means we need to wait for
// the miss controller to finish its request before we
// can continue to serve this CL. Otherwise we will fetch
// the cache-line again and potentially loosing any
// content we've written so far. This as a consequence
// means we can't have hit on the CL which mean the
// req_port_o.data_rvalid will be de-asserted.
if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin
state_d = WAIT_MSHR;
end
// -------------------------
// Check for cache-ability
// -------------------------
if (!config_pkg::is_inside_cacheable_regions(CVA6Cfg, {{{64-riscv::PLEN}{1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}})) begin
mem_req_d.bypass = 1'b1;
state_d = WAIT_REFILL_GNT;
end
// we are still waiting for a valid tag
end else begin
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;
// check that we still have a memory grant
if (!gnt_i) begin
state_d = WAIT_GNT;
end
end
end
// ~> we already granted the request but lost the memory grant while waiting for the tag
WAIT_GNT, WAIT_GNT_SAVED: begin
// request cache line for saved index
addr_o = mem_req_q.index;
req_o = '1;
// if we get a valid tag while waiting for the memory grant, save it
if (req_port_i.tag_valid) begin
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_GNT_SAVED;
end
// we have a memory grant again ~> go back to WAIT_TAG
if (gnt_i) begin
state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED;
end
end
// ~> we are here as we need a second round of memory access for a store
STORE_REQ: begin
// check if the MSHR still doesn't match
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// We need to re-check for MSHR aliasing here as the store requires at least
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
if (!mshr_index_matches_i) begin
// store data, write dirty bit
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;
be_o.vldrty = hit_way_q;
// set the correct byte enable
be_o.data[cl_offset>>3 +: 8] = mem_req_q.be;
data_o.data[cl_offset +: 64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
// got a grant ~> this is finished now
if (gnt_i) begin
req_port_o.data_gnt = 1'b1;
state_d = IDLE;
end
end else begin
state_d = WAIT_MSHR;
end
end // case: STORE_REQ
// we've got a match on MSHR ~> miss unit is currently serving a request
WAIT_MSHR: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// we can start a new request
if (!mshr_index_matches_i) begin
req_o = '1;
addr_o = mem_req_q.index;
if (gnt_i) state_d = WAIT_TAG_SAVED;
end
end
// its for sure a miss
WAIT_TAG_BYPASSED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin
// save tag
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_REFILL_GNT;
end
end
// ~> wait for grant from miss unit
WAIT_REFILL_GNT: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
miss_req_o.valid = 1'b1;
miss_req_o.bypass = mem_req_q.bypass;
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
miss_req_o.be = mem_req_q.be;
miss_req_o.size = mem_req_q.size;
miss_req_o.we = mem_req_q.we;
miss_req_o.wdata = mem_req_q.wdata;
// got a grant so go to valid
if (bypass_gnt_i) begin
state_d = WAIT_REFILL_VALID;
// if this was a write we still need to give a grant to the store unit
if (mem_req_q.we)
req_port_o.data_gnt = 1'b1;
end
if (miss_gnt_i && !mem_req_q.we)
state_d = WAIT_CRITICAL_WORD;
else if (miss_gnt_i) begin
state_d = IDLE;
req_port_o.data_gnt = 1'b1;
end
// it can be the case that the miss unit is currently serving a
// request which matches ours
// so we need to check the MSHR for matching continuously
// if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i
if (mshr_addr_matches_i && !active_serving_i) begin
state_d = WAIT_MSHR;
end
end
// ~> wait for critical word to arrive
WAIT_CRITICAL_WORD: begin
// speculatively request another word
if (req_port_i.data_req) begin
// request the cache line
req_o = '1;
end
if (critical_word_valid_i) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
req_port_o.data_rdata = critical_word_i;
// we can make another request
if (req_port_i.data_req) begin
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
state_d = IDLE;
// Wait until we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = 1'b1;
end
end else begin
state_d = IDLE;
end
end
end
// ~> wait until the bypass request is valid
WAIT_REFILL_VALID: begin
// got a valid answer
if (bypass_valid_i) begin
req_port_o.data_rdata = bypass_data_i;
req_port_o.data_rvalid = ~mem_req_q.killed;
state_d = IDLE;
end
end
endcase
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (!(state_q inside {
WAIT_REFILL_GNT,
WAIT_CRITICAL_WORD})) begin
state_d = IDLE; state_d = IDLE;
end
end else begin
state_d = IDLE;
end end
end
end
// -------------- // this is timing critical
// Registers req_port_o.data_rdata = cl_i[cl_offset+:64];
// --------------
always_ff @(posedge clk_i or negedge rst_ni) begin // report data for a read
if (~rst_ni) begin if (!mem_req_q.we) begin
state_q <= IDLE; req_port_o.data_rvalid = ~mem_req_q.killed;
mem_req_q <= '0; // else this was a store so we need an extra step to handle it
hit_way_q <= '0; end else begin
state_d = STORE_REQ;
hit_way_d = hit_way_i;
end
// ------------
// MISS CASE
// ------------
end else begin
// make a miss request
state_d = WAIT_REFILL_GNT;
end
// ----------------------------------------------
// Check MSHR - Miss Status Handling Register
// ----------------------------------------------
mshr_addr_o = {tag_o, mem_req_q.index};
// 1. We've got a match on MSHR and while are going down the
// store path. This means that the miss controller is
// currently evicting our cache-line. As the store is
// non-atomic we need to constantly check whether we are
// matching the address the miss handler is serving.
// Furthermore we need to check for the whole index
// because a completely different memory line could alias
// with the cache-line we are evicting.
// 2. The second case is where we are currently loading and
// the address matches the exact CL the miss controller
// is currently serving. That means we need to wait for
// the miss controller to finish its request before we
// can continue to serve this CL. Otherwise we will fetch
// the cache-line again and potentially loosing any
// content we've written so far. This as a consequence
// means we can't have hit on the CL which mean the
// req_port_o.data_rvalid will be de-asserted.
if ((mshr_index_matches_i && mem_req_q.we) || mshr_addr_matches_i) begin
state_d = WAIT_MSHR;
end
// -------------------------
// Check for cache-ability
// -------------------------
if (!config_pkg::is_inside_cacheable_regions(
CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}}
)) begin
mem_req_d.bypass = 1'b1;
state_d = WAIT_REFILL_GNT;
end
// we are still waiting for a valid tag
end else begin end else begin
state_q <= state_d; // request cache line for saved index
mem_req_q <= mem_req_d; addr_o = mem_req_q.index;
hit_way_q <= hit_way_d; req_o = '1;
end
end
//pragma translate_off // check that we still have a memory grant
`ifndef VERILATOR if (!gnt_i) begin
initial begin state_d = WAIT_GNT;
assert (DCACHE_LINE_WIDTH == 128) else $error ("Cacheline width has to be 128 for the moment. But only small changes required in data select logic"); end
end end
// if the full MSHR address matches so should also match the partial one end
partial_full_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i) else $fatal (1, "partial mshr index doesn't match");
// there should never be a valid answer when the MSHR matches and we are not being served // ~> we already granted the request but lost the memory grant while waiting for the tag
no_valid_on_mshr_match: assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req) else $fatal (1, "rvalid_o should not be set on MSHR match"); WAIT_GNT, WAIT_GNT_SAVED: begin
`endif // request cache line for saved index
//pragma translate_on addr_o = mem_req_q.index;
req_o = '1;
// if we get a valid tag while waiting for the memory grant, save it
if (req_port_i.tag_valid) begin
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_GNT_SAVED;
end
// we have a memory grant again ~> go back to WAIT_TAG
if (gnt_i) begin
state_d = (state_d == WAIT_GNT) ? WAIT_TAG : WAIT_TAG_SAVED;
end
end
// ~> we are here as we need a second round of memory access for a store
STORE_REQ: begin
// check if the MSHR still doesn't match
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// We need to re-check for MSHR aliasing here as the store requires at least
// two memory look-ups on a single-ported SRAM and therefore is non-atomic
if (!mshr_index_matches_i) begin
// store data, write dirty bit
req_o = hit_way_q;
addr_o = mem_req_q.index;
we_o = 1'b1;
be_o.vldrty = hit_way_q;
// set the correct byte enable
be_o.data[cl_offset>>3+:8] = mem_req_q.be;
data_o.data[cl_offset+:64] = mem_req_q.wdata;
// ~> change the state
data_o.dirty = 1'b1;
data_o.valid = 1'b1;
// got a grant ~> this is finished now
if (gnt_i) begin
req_port_o.data_gnt = 1'b1;
state_d = IDLE;
end
end else begin
state_d = WAIT_MSHR;
end
end // case: STORE_REQ
// we've got a match on MSHR ~> miss unit is currently serving a request
WAIT_MSHR: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
// we can start a new request
if (!mshr_index_matches_i) begin
req_o = '1;
addr_o = mem_req_q.index;
if (gnt_i) state_d = WAIT_TAG_SAVED;
end
end
// its for sure a miss
WAIT_TAG_BYPASSED: begin
// check that the client really wants to do the request and that we have a valid tag
if (!req_port_i.kill_req && (req_port_i.tag_valid || mem_req_q.we)) begin
// save tag
mem_req_d.tag = req_port_i.address_tag;
state_d = WAIT_REFILL_GNT;
end
end
// ~> wait for grant from miss unit
WAIT_REFILL_GNT: begin
mshr_addr_o = {mem_req_q.tag, mem_req_q.index};
miss_req_o.valid = 1'b1;
miss_req_o.bypass = mem_req_q.bypass;
miss_req_o.addr = {mem_req_q.tag, mem_req_q.index};
miss_req_o.be = mem_req_q.be;
miss_req_o.size = mem_req_q.size;
miss_req_o.we = mem_req_q.we;
miss_req_o.wdata = mem_req_q.wdata;
// got a grant so go to valid
if (bypass_gnt_i) begin
state_d = WAIT_REFILL_VALID;
// if this was a write we still need to give a grant to the store unit
if (mem_req_q.we) req_port_o.data_gnt = 1'b1;
end
if (miss_gnt_i && !mem_req_q.we) state_d = WAIT_CRITICAL_WORD;
else if (miss_gnt_i) begin
state_d = IDLE;
req_port_o.data_gnt = 1'b1;
end
// it can be the case that the miss unit is currently serving a
// request which matches ours
// so we need to check the MSHR for matching continuously
// if the MSHR matches we need to go to a different state -> we should never get a matching MSHR and a high miss_gnt_i
if (mshr_addr_matches_i && !active_serving_i) begin
state_d = WAIT_MSHR;
end
end
// ~> wait for critical word to arrive
WAIT_CRITICAL_WORD: begin
// speculatively request another word
if (req_port_i.data_req) begin
// request the cache line
req_o = '1;
end
if (critical_word_valid_i) begin
req_port_o.data_rvalid = ~mem_req_q.killed;
req_port_o.data_rdata = critical_word_i;
// we can make another request
if (req_port_i.data_req) begin
// save index, be and we
mem_req_d.index = req_port_i.address_index;
mem_req_d.id = req_port_i.data_id;
mem_req_d.be = req_port_i.data_be;
mem_req_d.size = req_port_i.data_size;
mem_req_d.we = req_port_i.data_we;
mem_req_d.wdata = req_port_i.data_wdata;
mem_req_d.killed = req_port_i.kill_req;
state_d = IDLE;
// Wait until we have access on the memory array
if (gnt_i) begin
state_d = WAIT_TAG;
mem_req_d.bypass = 1'b0;
req_port_o.data_gnt = 1'b1;
end
end else begin
state_d = IDLE;
end
end
end
// ~> wait until the bypass request is valid
WAIT_REFILL_VALID: begin
// got a valid answer
if (bypass_valid_i) begin
req_port_o.data_rdata = bypass_data_i;
req_port_o.data_rvalid = ~mem_req_q.killed;
state_d = IDLE;
end
end
endcase
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (!(state_q inside {WAIT_REFILL_GNT, WAIT_CRITICAL_WORD})) begin
state_d = IDLE;
end
end
end
// --------------
// Registers
// --------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
mem_req_q <= '0;
hit_way_q <= '0;
end else begin
state_q <= state_d;
mem_req_q <= mem_req_d;
hit_way_q <= hit_way_d;
end
end
//pragma translate_off
`ifndef VERILATOR
initial begin
assert (DCACHE_LINE_WIDTH == 128)
else
$error(
"Cacheline width has to be 128 for the moment. But only small changes required in data select logic"
);
end
// if the full MSHR address matches so should also match the partial one
partial_full_mshr_match :
assert property(@(posedge clk_i) disable iff (~rst_ni) mshr_addr_matches_i -> mshr_index_matches_i)
else $fatal(1, "partial mshr index doesn't match");
// there should never be a valid answer when the MSHR matches and we are not being served
no_valid_on_mshr_match :
assert property(@(posedge clk_i) disable iff (~rst_ni) (mshr_addr_matches_i && !active_serving_i)-> !req_port_o.data_rvalid || req_port_i.kill_req)
else $fatal(1, "rvalid_o should not be set on MSHR match");
`endif
//pragma translate_on
endmodule endmodule

View file

@ -10,13 +10,13 @@
// Date: February, 2023 // Date: February, 2023
// Description: Interface adapter for the CVA6 core // Description: Interface adapter for the CVA6 core
module cva6_hpdcache_if_adapter module cva6_hpdcache_if_adapter
import hpdcache_pkg::*; import hpdcache_pkg::*;
// Parameters // Parameters
// {{{ // {{{
#( #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter bit is_load_port = 1'b1 parameter bit is_load_port = 1'b1
) )
// }}} // }}}
@ -24,180 +24,177 @@ import hpdcache_pkg::*;
// {{{ // {{{
( (
// Clock and active-low reset pins // Clock and active-low reset pins
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// Port ID // Port ID
input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i, input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i,
// Request/response ports from/to the CVA6 core // Request/response ports from/to the CVA6 core
input ariane_pkg::dcache_req_i_t cva6_req_i, input ariane_pkg::dcache_req_i_t cva6_req_i,
output ariane_pkg::dcache_req_o_t cva6_req_o, output ariane_pkg::dcache_req_o_t cva6_req_o,
input ariane_pkg::amo_req_t cva6_amo_req_i, input ariane_pkg::amo_req_t cva6_amo_req_i,
output ariane_pkg::amo_resp_t cva6_amo_resp_o, output ariane_pkg::amo_resp_t cva6_amo_resp_o,
// Request port to the L1 Dcache // Request port to the L1 Dcache
output logic hpdcache_req_valid_o, output logic hpdcache_req_valid_o,
input logic hpdcache_req_ready_i, input logic hpdcache_req_ready_i,
output hpdcache_pkg::hpdcache_req_t hpdcache_req_o, output hpdcache_pkg::hpdcache_req_t hpdcache_req_o,
output logic hpdcache_req_abort_o, output logic hpdcache_req_abort_o,
output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o, output hpdcache_pkg::hpdcache_tag_t hpdcache_req_tag_o,
output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o, output hpdcache_pkg::hpdcache_pma_t hpdcache_req_pma_o,
// Response port from the L1 Dcache // Response port from the L1 Dcache
input logic hpdcache_rsp_valid_i, input logic hpdcache_rsp_valid_i,
input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i input hpdcache_pkg::hpdcache_rsp_t hpdcache_rsp_i
); );
// }}} // }}}
// Internal nets and registers // Internal nets and registers
// {{{
logic forward_store, forward_amo;
logic hpdcache_req_is_uncacheable;
// }}}
// Request forwarding
// {{{
generate
// LOAD request
// {{{ // {{{
logic forward_store, forward_amo; if (is_load_port == 1'b1) begin : load_port_gen
logic hpdcache_req_is_uncacheable; assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
// }}} CVA6Cfg,
{
{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
, cva6_req_i.address_tag
, {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
}
);
// Request forwarding // Request forwarding
// {{{ assign hpdcache_req_valid_o = cva6_req_i.data_req,
generate hpdcache_req_o.addr_offset = cva6_req_i.address_index,
// LOAD request hpdcache_req_o.wdata = '0,
// {{{ hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD,
if (is_load_port == 1'b1) begin : load_port_gen hpdcache_req_o.be = cva6_req_i.data_be,
assign hpdcache_req_is_uncacheable = hpdcache_req_o.size = cva6_req_i.data_size,
!config_pkg::is_inside_cacheable_regions(CVA6Cfg, hpdcache_req_o.sid = hpdcache_req_sid_i,
{{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} hpdcache_req_o.tid = cva6_req_i.data_id,
, cva6_req_i.address_tag hpdcache_req_o.need_rsp = 1'b1,
, {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}}); hpdcache_req_o.phys_indexed = 1'b0,
hpdcache_req_o.addr_tag = '0, // unused on virtually indexed request
hpdcache_req_o.pma = '0; // unused on virtually indexed request
// Request forwarding assign hpdcache_req_abort_o = cva6_req_i.kill_req,
assign hpdcache_req_valid_o = cva6_req_i.data_req, hpdcache_req_tag_o = cva6_req_i.address_tag,
hpdcache_req_o.addr_offset = cva6_req_i.address_index, hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable,
hpdcache_req_o.wdata = '0, hpdcache_req_pma_o.io = 1'b0;
hpdcache_req_o.op = hpdcache_pkg::HPDCACHE_REQ_LOAD,
hpdcache_req_o.be = cva6_req_i.data_be,
hpdcache_req_o.size = cva6_req_i.data_size,
hpdcache_req_o.sid = hpdcache_req_sid_i,
hpdcache_req_o.tid = cva6_req_i.data_id,
hpdcache_req_o.need_rsp = 1'b1,
hpdcache_req_o.phys_indexed = 1'b0,
hpdcache_req_o.addr_tag = '0, // unused on virtually indexed request
hpdcache_req_o.pma = '0; // unused on virtually indexed request
assign hpdcache_req_abort_o = cva6_req_i.kill_req, // Response forwarding
hpdcache_req_tag_o = cva6_req_i.address_tag, assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i,
hpdcache_req_pma_o.uncacheable = hpdcache_req_is_uncacheable, cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
hpdcache_req_pma_o.io = 1'b0; cva6_req_o.data_rid = hpdcache_rsp_i.tid,
cva6_req_o.data_gnt = hpdcache_req_ready_i;
end // }}}
// Response forwarding // STORE/AMO request
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i, // {{{
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata, else begin : store_amo_gen
cva6_req_o.data_rid = hpdcache_rsp_i.tid, hpdcache_req_addr_t amo_addr;
cva6_req_o.data_gnt = hpdcache_req_ready_i; hpdcache_req_offset_t amo_addr_offset;
end hpdcache_tag_t amo_tag;
// }}} logic amo_is_word, amo_is_word_hi;
hpdcache_req_data_t amo_data;
hpdcache_req_be_t amo_data_be;
hpdcache_req_op_t amo_op;
logic [31:0] amo_resp_word;
// STORE/AMO request // AMO logic
// {{{ // {{{
else begin : store_amo_gen always_comb begin : amo_op_comb
hpdcache_req_addr_t amo_addr; amo_addr = cva6_amo_req_i.operand_a;
hpdcache_req_offset_t amo_addr_offset; amo_addr_offset = amo_addr[0+:HPDCACHE_REQ_OFFSET_WIDTH];
hpdcache_tag_t amo_tag; amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH+:HPDCACHE_TAG_WIDTH];
logic amo_is_word, amo_is_word_hi; amo_is_word = (cva6_amo_req_i.size == 2'b10);
hpdcache_req_data_t amo_data; amo_is_word_hi = cva6_amo_req_i.operand_a[2];
hpdcache_req_be_t amo_data_be;
hpdcache_req_op_t amo_op;
logic [31:0] amo_resp_word;
// AMO logic amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0+:32]}} : cva6_amo_req_i.operand_b;
// {{{
always_comb
begin : amo_op_comb
amo_addr = cva6_amo_req_i.operand_a;
amo_addr_offset = amo_addr[0 +: HPDCACHE_REQ_OFFSET_WIDTH];
amo_tag = amo_addr[HPDCACHE_REQ_OFFSET_WIDTH +: HPDCACHE_TAG_WIDTH];
amo_is_word = (cva6_amo_req_i.size == 2'b10);
amo_is_word_hi = cva6_amo_req_i.operand_a[2];
amo_data = amo_is_word ? {2{cva6_amo_req_i.operand_b[0 +: 32]}} amo_data_be = amo_is_word_hi ? 8'hf0 : amo_is_word ? 8'h0f : 8'hff;
: cva6_amo_req_i.operand_b;
amo_data_be = amo_is_word_hi ? 8'hf0 : unique case (cva6_amo_req_i.amo_op)
amo_is_word ? 8'h0f : 8'hff; ariane_pkg::AMO_LR: amo_op = HPDCACHE_REQ_AMO_LR;
ariane_pkg::AMO_SC: amo_op = HPDCACHE_REQ_AMO_SC;
ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP;
ariane_pkg::AMO_ADD: amo_op = HPDCACHE_REQ_AMO_ADD;
ariane_pkg::AMO_AND: amo_op = HPDCACHE_REQ_AMO_AND;
ariane_pkg::AMO_OR: amo_op = HPDCACHE_REQ_AMO_OR;
ariane_pkg::AMO_XOR: amo_op = HPDCACHE_REQ_AMO_XOR;
ariane_pkg::AMO_MAX: amo_op = HPDCACHE_REQ_AMO_MAX;
ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU;
ariane_pkg::AMO_MIN: amo_op = HPDCACHE_REQ_AMO_MIN;
ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU;
default: amo_op = HPDCACHE_REQ_LOAD;
endcase
end
unique case(cva6_amo_req_i.amo_op) assign amo_resp_word = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32]
ariane_pkg::AMO_LR: amo_op = HPDCACHE_REQ_AMO_LR;
ariane_pkg::AMO_SC: amo_op = HPDCACHE_REQ_AMO_SC;
ariane_pkg::AMO_SWAP: amo_op = HPDCACHE_REQ_AMO_SWAP;
ariane_pkg::AMO_ADD: amo_op = HPDCACHE_REQ_AMO_ADD;
ariane_pkg::AMO_AND: amo_op = HPDCACHE_REQ_AMO_AND;
ariane_pkg::AMO_OR: amo_op = HPDCACHE_REQ_AMO_OR;
ariane_pkg::AMO_XOR: amo_op = HPDCACHE_REQ_AMO_XOR;
ariane_pkg::AMO_MAX: amo_op = HPDCACHE_REQ_AMO_MAX;
ariane_pkg::AMO_MAXU: amo_op = HPDCACHE_REQ_AMO_MAXU;
ariane_pkg::AMO_MIN: amo_op = HPDCACHE_REQ_AMO_MIN;
ariane_pkg::AMO_MINU: amo_op = HPDCACHE_REQ_AMO_MINU;
default: amo_op = HPDCACHE_REQ_LOAD;
endcase
end
assign amo_resp_word = amo_is_word_hi ? hpdcache_rsp_i.rdata[0][32 +: 32]
: hpdcache_rsp_i.rdata[0][0 +: 32]; : hpdcache_rsp_i.rdata[0][0 +: 32];
// }}} // }}}
// Request forwarding // Request forwarding
// {{{ // {{{
assign hpdcache_req_is_uncacheable = assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions(
!config_pkg::is_inside_cacheable_regions(CVA6Cfg, CVA6Cfg,
{{64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} {
, hpdcache_req_o.addr_tag, {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}}); {64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}}
, hpdcache_req_o.addr_tag,
{ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}}
}
);
assign forward_store = cva6_req_i.data_req, assign forward_store = cva6_req_i.data_req, forward_amo = cva6_amo_req_i.req;
forward_amo = cva6_amo_req_i.req;
assign hpdcache_req_valid_o = forward_store | forward_amo, assign hpdcache_req_valid_o = forward_store | forward_amo,
hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset hpdcache_req_o.addr_offset = forward_amo ? amo_addr_offset : cva6_req_i.address_index,
: cva6_req_i.address_index, hpdcache_req_o.wdata = forward_amo ? amo_data : cva6_req_i.data_wdata,
hpdcache_req_o.wdata = forward_amo ? amo_data hpdcache_req_o.op = forward_amo ? amo_op : hpdcache_pkg::HPDCACHE_REQ_STORE,
: cva6_req_i.data_wdata, hpdcache_req_o.be = forward_amo ? amo_data_be : cva6_req_i.data_be,
hpdcache_req_o.op = forward_amo ? amo_op hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size : cva6_req_i.data_size,
: hpdcache_pkg::HPDCACHE_REQ_STORE, hpdcache_req_o.sid = hpdcache_req_sid_i,
hpdcache_req_o.be = forward_amo ? amo_data_be hpdcache_req_o.tid = forward_amo ? '1 : '0,
: cva6_req_i.data_be, hpdcache_req_o.need_rsp = forward_amo,
hpdcache_req_o.size = forward_amo ? cva6_amo_req_i.size hpdcache_req_o.phys_indexed = 1'b1,
: cva6_req_i.data_size, hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag,
hpdcache_req_o.sid = hpdcache_req_sid_i, hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable,
hpdcache_req_o.tid = forward_amo ? '1 : '0, hpdcache_req_o.pma.io = 1'b0,
hpdcache_req_o.need_rsp = forward_amo, hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests
hpdcache_req_o.phys_indexed = 1'b1, hpdcache_req_tag_o = '0, // unused on physically indexed requests
hpdcache_req_o.addr_tag = forward_amo ? amo_tag : cva6_req_i.address_tag, hpdcache_req_pma_o = '0; // unused on physically indexed requests
hpdcache_req_o.pma.uncacheable = hpdcache_req_is_uncacheable, // }}}
hpdcache_req_o.pma.io = 1'b0,
hpdcache_req_abort_o = 1'b0, // unused on physically indexed requests
hpdcache_req_tag_o = '0, // unused on physically indexed requests
hpdcache_req_pma_o = '0; // unused on physically indexed requests
// }}}
// Response forwarding // Response forwarding
// {{{ // {{{
assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1), assign cva6_req_o.data_rvalid = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid != '1),
cva6_req_o.data_rdata = hpdcache_rsp_i.rdata, cva6_req_o.data_rdata = hpdcache_rsp_i.rdata,
cva6_req_o.data_rid = hpdcache_rsp_i.tid, cva6_req_o.data_rid = hpdcache_rsp_i.tid,
cva6_req_o.data_gnt = hpdcache_req_ready_i; cva6_req_o.data_gnt = hpdcache_req_ready_i;
assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1), assign cva6_amo_resp_o.ack = hpdcache_rsp_valid_i && (hpdcache_rsp_i.tid == '1),
cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word} cva6_amo_resp_o.result = amo_is_word ? {{32{amo_resp_word[31]}}, amo_resp_word}
: hpdcache_rsp_i.rdata[0][63:0]; : hpdcache_rsp_i.rdata[0][63:0];
// }}} // }}}
end end
// }}}
endgenerate
// }}} // }}}
endgenerate
// }}}
// Assertions // Assertions
// {{{ // {{{
// pragma translate_off // pragma translate_off
forward_one_request_assert: assert property (@(posedge clk_i) forward_one_request_assert :
($onehot0({forward_store, forward_amo}))) else assert property (@(posedge clk_i) ($onehot0({forward_store, forward_amo})))
$error("Only one request shall be forwarded"); else $error("Only one request shall be forwarded");
// pragma translate_on // pragma translate_on
// }}} // }}}
endmodule endmodule

View file

@ -16,83 +16,83 @@ module cva6_hpdcache_subsystem
// Parameters // Parameters
// {{{ // {{{
#( #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int NumPorts = 4, parameter int NumPorts = 4,
parameter int NrHwPrefetchers = 4, parameter int NrHwPrefetchers = 4,
parameter type noc_req_t = logic, parameter type noc_req_t = logic,
parameter type noc_resp_t = logic, parameter type noc_resp_t = logic,
parameter type cmo_req_t = logic, parameter type cmo_req_t = logic,
parameter type cmo_rsp_t = logic parameter type cmo_rsp_t = logic
) )
// }}} // }}}
// Ports // Ports
// {{{ // {{{
( (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// I$ // I$
// {{{ // {{{
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter output logic icache_miss_o, // to performance counter
// address translation requests // address translation requests
input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend
output ariane_pkg::icache_arsp_t icache_areq_o, output ariane_pkg::icache_arsp_t icache_areq_o,
// data requests // data requests
input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend
output ariane_pkg::icache_drsp_t icache_dreq_o, output ariane_pkg::icache_drsp_t icache_dreq_o,
// }}} // }}}
// D$ // D$
// {{{ // {{{
// Cache management // Cache management
input logic dcache_enable_i, // from CSR input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st output logic dcache_miss_o, // we missed on a ld/st
// AMO interface // AMO interface
input ariane_pkg::amo_req_t dcache_amo_req_i, // from LSU input ariane_pkg::amo_req_t dcache_amo_req_i, // from LSU
output ariane_pkg::amo_resp_t dcache_amo_resp_o, // to LSU output ariane_pkg::amo_resp_t dcache_amo_resp_o, // to LSU
// CMO interface // CMO interface
input cmo_req_t dcache_cmo_req_i, // from CMO FU input cmo_req_t dcache_cmo_req_i, // from CMO FU
output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU
// Request ports // Request ports
input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU
output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU
// Write Buffer status // Write Buffer status
output logic wbuffer_empty_o, output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o, output logic wbuffer_not_ni_o,
// Hardware memory prefetcher configuration // Hardware memory prefetcher configuration
input logic [NrHwPrefetchers-1:0] hwpf_base_set_i, input logic [NrHwPrefetchers-1:0] hwpf_base_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i, input logic [NrHwPrefetchers-1:0][63:0] hwpf_base_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o, output logic [NrHwPrefetchers-1:0][63:0] hwpf_base_o,
input logic [NrHwPrefetchers-1:0] hwpf_param_set_i, input logic [NrHwPrefetchers-1:0] hwpf_param_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i, input logic [NrHwPrefetchers-1:0][63:0] hwpf_param_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o, output logic [NrHwPrefetchers-1:0][63:0] hwpf_param_o,
input logic [NrHwPrefetchers-1:0] hwpf_throttle_set_i, input logic [NrHwPrefetchers-1:0] hwpf_throttle_set_i,
input logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i, input logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_i,
output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o, output logic [NrHwPrefetchers-1:0][63:0] hwpf_throttle_o,
output logic [63:0] hwpf_status_o, output logic [ 63:0] hwpf_status_o,
// }}} // }}}
// AXI port to upstream memory/peripherals // AXI port to upstream memory/peripherals
// {{{ // {{{
output noc_req_t noc_req_o, output noc_req_t noc_req_o,
input noc_resp_t noc_resp_i input noc_resp_t noc_resp_i
// }}} // }}}
); );
// }}} // }}}
`include "axi/typedef.svh" `include "axi/typedef.svh"
// I$ instantiation // I$ instantiation
// {{{ // {{{
logic icache_miss_valid, icache_miss_ready; logic icache_miss_valid, icache_miss_ready;
wt_cache_pkg::icache_req_t icache_miss; wt_cache_pkg::icache_req_t icache_miss;
logic icache_miss_resp_valid; logic icache_miss_resp_valid;
wt_cache_pkg::icache_rtrn_t icache_miss_resp; wt_cache_pkg::icache_rtrn_t icache_miss_resp;
@ -100,23 +100,23 @@ module cva6_hpdcache_subsystem
localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1); localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1);
cva6_icache #( cva6_icache #(
.CVA6Cfg (CVA6Cfg), .CVA6Cfg(CVA6Cfg),
.RdTxId (ICACHE_RDTXID) .RdTxId (ICACHE_RDTXID)
) i_cva6_icache ( ) i_cva6_icache (
.clk_i (clk_i), .clk_i (clk_i),
.rst_ni (rst_ni), .rst_ni (rst_ni),
.flush_i (icache_flush_i), .flush_i (icache_flush_i),
.en_i (icache_en_i), .en_i (icache_en_i),
.miss_o (icache_miss_o), .miss_o (icache_miss_o),
.areq_i (icache_areq_i), .areq_i (icache_areq_i),
.areq_o (icache_areq_o), .areq_o (icache_areq_o),
.dreq_i (icache_dreq_i), .dreq_i (icache_dreq_i),
.dreq_o (icache_dreq_o), .dreq_o (icache_dreq_o),
.mem_rtrn_vld_i (icache_miss_resp_valid), .mem_rtrn_vld_i(icache_miss_resp_valid),
.mem_rtrn_i (icache_miss_resp), .mem_rtrn_i (icache_miss_resp),
.mem_data_req_o (icache_miss_valid), .mem_data_req_o(icache_miss_valid),
.mem_data_ack_i (icache_miss_ready), .mem_data_ack_i(icache_miss_ready),
.mem_data_o (icache_miss) .mem_data_o (icache_miss)
); );
// }}} // }}}
@ -135,10 +135,10 @@ module cva6_hpdcache_subsystem
// NumPorts + 1: Hardware Memory Prefetcher (hwpf) // NumPorts + 1: Hardware Memory Prefetcher (hwpf)
localparam int HPDCACHE_NREQUESTERS = NumPorts + 2; localparam int HPDCACHE_NREQUESTERS = NumPorts + 2;
typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t; typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t;
typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t; typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t;
typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t; typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t;
typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t; typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t;
`HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t); `HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t);
`HPDCACHE_TYPEDEF_MEM_RESP_R_T(hpdcache_mem_resp_r_t, hpdcache_mem_id_t, hpdcache_mem_data_t); `HPDCACHE_TYPEDEF_MEM_RESP_R_T(hpdcache_mem_resp_r_t, hpdcache_mem_id_t, hpdcache_mem_data_t);
`HPDCACHE_TYPEDEF_MEM_REQ_W_T(hpdcache_mem_req_w_t, hpdcache_mem_data_t, hpdcache_mem_be_t); `HPDCACHE_TYPEDEF_MEM_REQ_W_T(hpdcache_mem_req_w_t, hpdcache_mem_data_t, hpdcache_mem_be_t);
@ -146,63 +146,63 @@ module cva6_hpdcache_subsystem
typedef logic [63:0] hwpf_stride_param_t; typedef logic [63:0] hwpf_stride_param_t;
logic dcache_req_valid [HPDCACHE_NREQUESTERS-1:0]; logic dcache_req_valid[HPDCACHE_NREQUESTERS-1:0];
logic dcache_req_ready [HPDCACHE_NREQUESTERS-1:0]; logic dcache_req_ready[HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_req_t dcache_req [HPDCACHE_NREQUESTERS-1:0]; hpdcache_pkg::hpdcache_req_t dcache_req [HPDCACHE_NREQUESTERS-1:0];
logic dcache_req_abort [HPDCACHE_NREQUESTERS-1:0]; logic dcache_req_abort[HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_tag_t dcache_req_tag [HPDCACHE_NREQUESTERS-1:0]; hpdcache_pkg::hpdcache_tag_t dcache_req_tag [HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_pma_t dcache_req_pma [HPDCACHE_NREQUESTERS-1:0]; hpdcache_pkg::hpdcache_pma_t dcache_req_pma [HPDCACHE_NREQUESTERS-1:0];
logic dcache_rsp_valid [HPDCACHE_NREQUESTERS-1:0]; logic dcache_rsp_valid[HPDCACHE_NREQUESTERS-1:0];
hpdcache_pkg::hpdcache_rsp_t dcache_rsp [HPDCACHE_NREQUESTERS-1:0]; hpdcache_pkg::hpdcache_rsp_t dcache_rsp [HPDCACHE_NREQUESTERS-1:0];
logic dcache_read_miss, dcache_write_miss; logic dcache_read_miss, dcache_write_miss;
logic [2:0] snoop_valid; logic [ 2:0] snoop_valid;
logic [2:0] snoop_abort; logic [ 2:0] snoop_abort;
hpdcache_pkg::hpdcache_req_offset_t [2:0] snoop_addr_offset; hpdcache_pkg::hpdcache_req_offset_t [ 2:0] snoop_addr_offset;
hpdcache_pkg::hpdcache_tag_t [2:0] snoop_addr_tag; hpdcache_pkg::hpdcache_tag_t [ 2:0] snoop_addr_tag;
logic [2:0] snoop_phys_indexed; logic [ 2:0] snoop_phys_indexed;
logic dcache_cmo_req_is_prefetch; logic dcache_cmo_req_is_prefetch;
logic dcache_miss_ready; logic dcache_miss_ready;
logic dcache_miss_valid; logic dcache_miss_valid;
hpdcache_mem_req_t dcache_miss; hpdcache_mem_req_t dcache_miss;
logic dcache_miss_resp_ready; logic dcache_miss_resp_ready;
logic dcache_miss_resp_valid; logic dcache_miss_resp_valid;
hpdcache_mem_resp_r_t dcache_miss_resp; hpdcache_mem_resp_r_t dcache_miss_resp;
logic dcache_wbuf_ready; logic dcache_wbuf_ready;
logic dcache_wbuf_valid; logic dcache_wbuf_valid;
hpdcache_mem_req_t dcache_wbuf; hpdcache_mem_req_t dcache_wbuf;
logic dcache_wbuf_data_ready; logic dcache_wbuf_data_ready;
logic dcache_wbuf_data_valid; logic dcache_wbuf_data_valid;
hpdcache_mem_req_w_t dcache_wbuf_data; hpdcache_mem_req_w_t dcache_wbuf_data;
logic dcache_wbuf_resp_ready; logic dcache_wbuf_resp_ready;
logic dcache_wbuf_resp_valid; logic dcache_wbuf_resp_valid;
hpdcache_mem_resp_w_t dcache_wbuf_resp; hpdcache_mem_resp_w_t dcache_wbuf_resp;
logic dcache_uc_read_ready; logic dcache_uc_read_ready;
logic dcache_uc_read_valid; logic dcache_uc_read_valid;
hpdcache_mem_req_t dcache_uc_read; hpdcache_mem_req_t dcache_uc_read;
logic dcache_uc_read_resp_ready; logic dcache_uc_read_resp_ready;
logic dcache_uc_read_resp_valid; logic dcache_uc_read_resp_valid;
hpdcache_mem_resp_r_t dcache_uc_read_resp; hpdcache_mem_resp_r_t dcache_uc_read_resp;
logic dcache_uc_write_ready; logic dcache_uc_write_ready;
logic dcache_uc_write_valid; logic dcache_uc_write_valid;
hpdcache_mem_req_t dcache_uc_write; hpdcache_mem_req_t dcache_uc_write;
logic dcache_uc_write_data_ready; logic dcache_uc_write_data_ready;
logic dcache_uc_write_data_valid; logic dcache_uc_write_data_valid;
hpdcache_mem_req_w_t dcache_uc_write_data; hpdcache_mem_req_w_t dcache_uc_write_data;
logic dcache_uc_write_resp_ready; logic dcache_uc_write_resp_ready;
logic dcache_uc_write_resp_valid; logic dcache_uc_write_resp_valid;
hpdcache_mem_resp_w_t dcache_uc_write_resp; hpdcache_mem_resp_w_t dcache_uc_write_resp;
hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_in; hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_in;
hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out; hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out;
@ -210,263 +210,262 @@ module cva6_hpdcache_subsystem
generate generate
ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0]; ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0];
for (genvar r = 0; r < (NumPorts-1); r++) begin : cva6_hpdcache_load_if_adapter_gen for (genvar r = 0; r < (NumPorts - 1); r++) begin : cva6_hpdcache_load_if_adapter_gen
assign dcache_req_ports[r] = dcache_req_ports_i[r]; assign dcache_req_ports[r] = dcache_req_ports_i[r];
cva6_hpdcache_if_adapter #( cva6_hpdcache_if_adapter #(
.CVA6Cfg (CVA6Cfg), .CVA6Cfg (CVA6Cfg),
.is_load_port (1'b1) .is_load_port(1'b1)
) i_cva6_hpdcache_load_if_adapter ( ) i_cva6_hpdcache_load_if_adapter (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.hpdcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(r)), .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(r)),
.cva6_req_i (dcache_req_ports[r]), .cva6_req_i (dcache_req_ports[r]),
.cva6_req_o (dcache_req_ports_o[r]), .cva6_req_o (dcache_req_ports_o[r]),
.cva6_amo_req_i ('0), .cva6_amo_req_i ('0),
.cva6_amo_resp_o (/* unused */), .cva6_amo_resp_o( /* unused */),
.hpdcache_req_valid_o (dcache_req_valid[r]), .hpdcache_req_valid_o(dcache_req_valid[r]),
.hpdcache_req_ready_i (dcache_req_ready[r]), .hpdcache_req_ready_i(dcache_req_ready[r]),
.hpdcache_req_o (dcache_req[r]), .hpdcache_req_o (dcache_req[r]),
.hpdcache_req_abort_o (dcache_req_abort[r]), .hpdcache_req_abort_o(dcache_req_abort[r]),
.hpdcache_req_tag_o (dcache_req_tag[r]), .hpdcache_req_tag_o (dcache_req_tag[r]),
.hpdcache_req_pma_o (dcache_req_pma[r]), .hpdcache_req_pma_o (dcache_req_pma[r]),
.hpdcache_rsp_valid_i (dcache_rsp_valid[r]), .hpdcache_rsp_valid_i(dcache_rsp_valid[r]),
.hpdcache_rsp_i (dcache_rsp[r]) .hpdcache_rsp_i (dcache_rsp[r])
); );
end end
cva6_hpdcache_if_adapter #( cva6_hpdcache_if_adapter #(
.CVA6Cfg (CVA6Cfg), .CVA6Cfg (CVA6Cfg),
.is_load_port (1'b0) .is_load_port(1'b0)
) i_cva6_hpdcache_store_if_adapter ( ) i_cva6_hpdcache_store_if_adapter (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.hpdcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(NumPorts-1)), .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts - 1)),
.cva6_req_i (dcache_req_ports_i[NumPorts-1]), .cva6_req_i (dcache_req_ports_i[NumPorts-1]),
.cva6_req_o (dcache_req_ports_o[NumPorts-1]), .cva6_req_o (dcache_req_ports_o[NumPorts-1]),
.cva6_amo_req_i (dcache_amo_req_i), .cva6_amo_req_i (dcache_amo_req_i),
.cva6_amo_resp_o (dcache_amo_resp_o), .cva6_amo_resp_o(dcache_amo_resp_o),
.hpdcache_req_valid_o (dcache_req_valid[NumPorts-1]), .hpdcache_req_valid_o(dcache_req_valid[NumPorts-1]),
.hpdcache_req_ready_i (dcache_req_ready[NumPorts-1]), .hpdcache_req_ready_i(dcache_req_ready[NumPorts-1]),
.hpdcache_req_o (dcache_req[NumPorts-1]), .hpdcache_req_o (dcache_req[NumPorts-1]),
.hpdcache_req_abort_o (dcache_req_abort[NumPorts-1]), .hpdcache_req_abort_o(dcache_req_abort[NumPorts-1]),
.hpdcache_req_tag_o (dcache_req_tag[NumPorts-1]), .hpdcache_req_tag_o (dcache_req_tag[NumPorts-1]),
.hpdcache_req_pma_o (dcache_req_pma[NumPorts-1]), .hpdcache_req_pma_o (dcache_req_pma[NumPorts-1]),
.hpdcache_rsp_valid_i (dcache_rsp_valid[NumPorts-1]), .hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts-1]),
.hpdcache_rsp_i (dcache_rsp[NumPorts-1]) .hpdcache_rsp_i (dcache_rsp[NumPorts-1])
); );
`ifdef HPDCACHE_ENABLE_CMO `ifdef HPDCACHE_ENABLE_CMO
cva6_hpdcache_cmo_if_adapter #( cva6_hpdcache_cmo_if_adapter #(
.cmo_req_t (cmo_req_t), .cmo_req_t(cmo_req_t),
.cmo_rsp_t (cmo_rsp_t) .cmo_rsp_t(cmo_rsp_t)
) i_cva6_hpdcache_cmo_if_adapter ( ) i_cva6_hpdcache_cmo_if_adapter (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.dcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)), .dcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts)),
.cva6_cmo_req_i (dcache_cmo_req_i), .cva6_cmo_req_i (dcache_cmo_req_i),
.cva6_cmo_resp_o (dcache_cmo_resp_o), .cva6_cmo_resp_o(dcache_cmo_resp_o),
.dcache_req_valid_o (dcache_req_valid[NumPorts]), .dcache_req_valid_o(dcache_req_valid[NumPorts]),
.dcache_req_ready_i (dcache_req_ready[NumPorts]), .dcache_req_ready_i(dcache_req_ready[NumPorts]),
.dcache_req_o (dcache_req[NumPorts]), .dcache_req_o (dcache_req[NumPorts]),
.dcache_req_abort_o (dcache_req_abort[NumPorts]), .dcache_req_abort_o(dcache_req_abort[NumPorts]),
.dcache_req_tag_o (dcache_req_tag[NumPorts]), .dcache_req_tag_o (dcache_req_tag[NumPorts]),
.dcache_req_pma_o (dcache_req_pma[NumPorts]), .dcache_req_pma_o (dcache_req_pma[NumPorts]),
.dcache_rsp_valid_i (dcache_rsp_valid[NumPorts]), .dcache_rsp_valid_i(dcache_rsp_valid[NumPorts]),
.dcache_rsp_i (dcache_rsp[NumPorts]) .dcache_rsp_i (dcache_rsp[NumPorts])
); );
`else `else
assign dcache_req_valid[NumPorts] = 1'b0, assign dcache_req_valid[NumPorts] = 1'b0,
dcache_req [NumPorts] = '0, dcache_req[NumPorts] = '0,
dcache_req_abort[NumPorts] = 1'b0, dcache_req_abort[NumPorts] = 1'b0,
dcache_req_tag [NumPorts] = '0, dcache_req_tag[NumPorts] = '0,
dcache_req_pma [NumPorts] = '0; dcache_req_pma[NumPorts] = '0;
`endif `endif
endgenerate endgenerate
// Snoop load port // Snoop load port
assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1], assign snoop_valid[0] = dcache_req_valid[1] & dcache_req_ready[1],
snoop_abort[0] = dcache_req_abort[1], snoop_abort[0] = dcache_req_abort[1],
snoop_addr_offset[0] = dcache_req[1].addr_offset, snoop_addr_offset[0] = dcache_req[1].addr_offset,
snoop_addr_tag[0] = dcache_req_tag[1], snoop_addr_tag[0] = dcache_req_tag[1],
snoop_phys_indexed[0] = dcache_req[1].phys_indexed; snoop_phys_indexed[0] = dcache_req[1].phys_indexed;
// Snoop Store/AMO port // Snoop Store/AMO port
assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1], assign snoop_valid[1] = dcache_req_valid[NumPorts-1] & dcache_req_ready[NumPorts-1],
snoop_abort[1] = dcache_req_abort[NumPorts-1], snoop_abort[1] = dcache_req_abort[NumPorts-1],
snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset, snoop_addr_offset[1] = dcache_req[NumPorts-1].addr_offset,
snoop_addr_tag[1] = dcache_req_tag[NumPorts-1], snoop_addr_tag[1] = dcache_req_tag[NumPorts-1],
snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed; snoop_phys_indexed[1] = dcache_req[NumPorts-1].phys_indexed;
`ifdef HPDCACHE_ENABLE_CMO `ifdef HPDCACHE_ENABLE_CMO
// Snoop CMO port (in case of read prefetch accesses) // Snoop CMO port (in case of read prefetch accesses)
assign dcache_cmo_req_is_prefetch = assign dcache_cmo_req_is_prefetch = hpdcache_pkg::is_cmo_prefetch(
hpdcache_pkg::is_cmo_prefetch(dcache_req[NumPorts].op, dcache_req[NumPorts].size); dcache_req[NumPorts].op, dcache_req[NumPorts].size
);
assign snoop_valid[2] = dcache_req_valid[NumPorts] assign snoop_valid[2] = dcache_req_valid[NumPorts]
& dcache_req_ready[NumPorts] & dcache_req_ready[NumPorts]
& dcache_cmo_req_is_prefetch, & dcache_cmo_req_is_prefetch,
snoop_abort[2] = dcache_req_abort[NumPorts], snoop_abort[2] = dcache_req_abort[NumPorts],
snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset, snoop_addr_offset[2] = dcache_req[NumPorts].addr_offset,
snoop_addr_tag[2] = dcache_req_tag[NumPorts], snoop_addr_tag[2] = dcache_req_tag[NumPorts],
snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed; snoop_phys_indexed[2] = dcache_req[NumPorts].phys_indexed;
`else `else
assign snoop_valid[2] = 1'b0, assign snoop_valid[2] = 1'b0,
snoop_abort[2] = 1'b0, snoop_abort[2] = 1'b0,
snoop_addr_offset[2] = '0, snoop_addr_offset[2] = '0,
snoop_addr_tag[2] = '0, snoop_addr_tag[2] = '0,
snoop_phys_indexed[2] = 1'b0; snoop_phys_indexed[2] = 1'b0;
`endif `endif
generate generate
for (genvar h = 0; h < NrHwPrefetchers; h++) begin : hwpf_throttle_gen for (genvar h = 0; h < NrHwPrefetchers; h++) begin : hwpf_throttle_gen
assign hwpf_throttle_in[h] = hwpf_stride_pkg::hwpf_stride_throttle_t'(hwpf_throttle_i[h]), assign hwpf_throttle_in[h] = hwpf_stride_pkg::hwpf_stride_throttle_t'(hwpf_throttle_i[h]),
hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]); hwpf_throttle_o[h] = hwpf_stride_pkg::hwpf_stride_param_t'(hwpf_throttle_out[h]);
end end
endgenerate endgenerate
hwpf_stride_wrapper #( hwpf_stride_wrapper #(
.NUM_HW_PREFETCH (NrHwPrefetchers), .NUM_HW_PREFETCH(NrHwPrefetchers),
.NUM_SNOOP_PORTS (3) .NUM_SNOOP_PORTS(3)
) i_hwpf_stride_wrapper ( ) i_hwpf_stride_wrapper (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.hwpf_stride_base_set_i (hwpf_base_set_i), .hwpf_stride_base_set_i (hwpf_base_set_i),
.hwpf_stride_base_i (hwpf_base_i), .hwpf_stride_base_i (hwpf_base_i),
.hwpf_stride_base_o (hwpf_base_o), .hwpf_stride_base_o (hwpf_base_o),
.hwpf_stride_param_set_i (hwpf_param_set_i), .hwpf_stride_param_set_i (hwpf_param_set_i),
.hwpf_stride_param_i (hwpf_param_i), .hwpf_stride_param_i (hwpf_param_i),
.hwpf_stride_param_o (hwpf_param_o), .hwpf_stride_param_o (hwpf_param_o),
.hwpf_stride_throttle_set_i (hwpf_throttle_set_i), .hwpf_stride_throttle_set_i(hwpf_throttle_set_i),
.hwpf_stride_throttle_i (hwpf_throttle_in), .hwpf_stride_throttle_i (hwpf_throttle_in),
.hwpf_stride_throttle_o (hwpf_throttle_out), .hwpf_stride_throttle_o (hwpf_throttle_out),
.hwpf_stride_status_o (hwpf_status_o), .hwpf_stride_status_o (hwpf_status_o),
.snoop_valid_i (snoop_valid), .snoop_valid_i (snoop_valid),
.snoop_abort_i (snoop_abort), .snoop_abort_i (snoop_abort),
.snoop_addr_offset_i (snoop_addr_offset), .snoop_addr_offset_i (snoop_addr_offset),
.snoop_addr_tag_i (snoop_addr_tag), .snoop_addr_tag_i (snoop_addr_tag),
.snoop_phys_indexed_i (snoop_phys_indexed), .snoop_phys_indexed_i(snoop_phys_indexed),
.hpdcache_req_sid_i (hpdcache_pkg::hpdcache_req_sid_t'(NumPorts+1)), .hpdcache_req_sid_i(hpdcache_pkg::hpdcache_req_sid_t'(NumPorts + 1)),
.hpdcache_req_valid_o (dcache_req_valid[NumPorts+1]), .hpdcache_req_valid_o(dcache_req_valid[NumPorts+1]),
.hpdcache_req_ready_i (dcache_req_ready[NumPorts+1]), .hpdcache_req_ready_i(dcache_req_ready[NumPorts+1]),
.hpdcache_req_o (dcache_req[NumPorts+1]), .hpdcache_req_o (dcache_req[NumPorts+1]),
.hpdcache_req_abort_o (dcache_req_abort[NumPorts+1]), .hpdcache_req_abort_o(dcache_req_abort[NumPorts+1]),
.hpdcache_req_tag_o (dcache_req_tag[NumPorts+1]), .hpdcache_req_tag_o (dcache_req_tag[NumPorts+1]),
.hpdcache_req_pma_o (dcache_req_pma[NumPorts+1]), .hpdcache_req_pma_o (dcache_req_pma[NumPorts+1]),
.hpdcache_rsp_valid_i (dcache_rsp_valid[NumPorts+1]), .hpdcache_rsp_valid_i(dcache_rsp_valid[NumPorts+1]),
.hpdcache_rsp_i (dcache_rsp[NumPorts+1]) .hpdcache_rsp_i (dcache_rsp[NumPorts+1])
); );
hpdcache #( hpdcache #(
.NREQUESTERS (HPDCACHE_NREQUESTERS), .NREQUESTERS (HPDCACHE_NREQUESTERS),
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t), .hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t), .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t) .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t)
) i_hpdcache( ) i_hpdcache (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.wbuf_flush_i (dcache_flush_i), .wbuf_flush_i(dcache_flush_i),
.core_req_valid_i (dcache_req_valid), .core_req_valid_i(dcache_req_valid),
.core_req_ready_o (dcache_req_ready), .core_req_ready_o(dcache_req_ready),
.core_req_i (dcache_req), .core_req_i (dcache_req),
.core_req_abort_i (dcache_req_abort), .core_req_abort_i(dcache_req_abort),
.core_req_tag_i (dcache_req_tag), .core_req_tag_i (dcache_req_tag),
.core_req_pma_i (dcache_req_pma), .core_req_pma_i (dcache_req_pma),
.core_rsp_valid_o (dcache_rsp_valid), .core_rsp_valid_o(dcache_rsp_valid),
.core_rsp_o (dcache_rsp), .core_rsp_o (dcache_rsp),
.mem_req_miss_read_ready_i (dcache_miss_ready), .mem_req_miss_read_ready_i(dcache_miss_ready),
.mem_req_miss_read_valid_o (dcache_miss_valid), .mem_req_miss_read_valid_o(dcache_miss_valid),
.mem_req_miss_read_o (dcache_miss), .mem_req_miss_read_o (dcache_miss),
.mem_resp_miss_read_ready_o (dcache_miss_resp_ready), .mem_resp_miss_read_ready_o(dcache_miss_resp_ready),
.mem_resp_miss_read_valid_i (dcache_miss_resp_valid), .mem_resp_miss_read_valid_i(dcache_miss_resp_valid),
.mem_resp_miss_read_i (dcache_miss_resp), .mem_resp_miss_read_i (dcache_miss_resp),
.mem_req_wbuf_write_ready_i (dcache_wbuf_ready), .mem_req_wbuf_write_ready_i(dcache_wbuf_ready),
.mem_req_wbuf_write_valid_o (dcache_wbuf_valid), .mem_req_wbuf_write_valid_o(dcache_wbuf_valid),
.mem_req_wbuf_write_o (dcache_wbuf), .mem_req_wbuf_write_o (dcache_wbuf),
.mem_req_wbuf_write_data_ready_i (dcache_wbuf_data_ready), .mem_req_wbuf_write_data_ready_i(dcache_wbuf_data_ready),
.mem_req_wbuf_write_data_valid_o (dcache_wbuf_data_valid), .mem_req_wbuf_write_data_valid_o(dcache_wbuf_data_valid),
.mem_req_wbuf_write_data_o (dcache_wbuf_data), .mem_req_wbuf_write_data_o (dcache_wbuf_data),
.mem_resp_wbuf_write_ready_o (dcache_wbuf_resp_ready), .mem_resp_wbuf_write_ready_o(dcache_wbuf_resp_ready),
.mem_resp_wbuf_write_valid_i (dcache_wbuf_resp_valid), .mem_resp_wbuf_write_valid_i(dcache_wbuf_resp_valid),
.mem_resp_wbuf_write_i (dcache_wbuf_resp), .mem_resp_wbuf_write_i (dcache_wbuf_resp),
.mem_req_uc_read_ready_i (dcache_uc_read_ready), .mem_req_uc_read_ready_i(dcache_uc_read_ready),
.mem_req_uc_read_valid_o (dcache_uc_read_valid), .mem_req_uc_read_valid_o(dcache_uc_read_valid),
.mem_req_uc_read_o (dcache_uc_read), .mem_req_uc_read_o (dcache_uc_read),
.mem_resp_uc_read_ready_o (dcache_uc_read_resp_ready), .mem_resp_uc_read_ready_o(dcache_uc_read_resp_ready),
.mem_resp_uc_read_valid_i (dcache_uc_read_resp_valid), .mem_resp_uc_read_valid_i(dcache_uc_read_resp_valid),
.mem_resp_uc_read_i (dcache_uc_read_resp), .mem_resp_uc_read_i (dcache_uc_read_resp),
.mem_req_uc_write_ready_i (dcache_uc_write_ready), .mem_req_uc_write_ready_i(dcache_uc_write_ready),
.mem_req_uc_write_valid_o (dcache_uc_write_valid), .mem_req_uc_write_valid_o(dcache_uc_write_valid),
.mem_req_uc_write_o (dcache_uc_write), .mem_req_uc_write_o (dcache_uc_write),
.mem_req_uc_write_data_ready_i (dcache_uc_write_data_ready), .mem_req_uc_write_data_ready_i(dcache_uc_write_data_ready),
.mem_req_uc_write_data_valid_o (dcache_uc_write_data_valid), .mem_req_uc_write_data_valid_o(dcache_uc_write_data_valid),
.mem_req_uc_write_data_o (dcache_uc_write_data), .mem_req_uc_write_data_o (dcache_uc_write_data),
.mem_resp_uc_write_ready_o (dcache_uc_write_resp_ready), .mem_resp_uc_write_ready_o(dcache_uc_write_resp_ready),
.mem_resp_uc_write_valid_i (dcache_uc_write_resp_valid), .mem_resp_uc_write_valid_i(dcache_uc_write_resp_valid),
.mem_resp_uc_write_i (dcache_uc_write_resp), .mem_resp_uc_write_i (dcache_uc_write_resp),
.evt_cache_write_miss_o (dcache_write_miss), .evt_cache_write_miss_o(dcache_write_miss),
.evt_cache_read_miss_o (dcache_read_miss), .evt_cache_read_miss_o (dcache_read_miss),
.evt_uncached_req_o (/* unused */), .evt_uncached_req_o ( /* unused */),
.evt_cmo_req_o (/* unused */), .evt_cmo_req_o ( /* unused */),
.evt_write_req_o (/* unused */), .evt_write_req_o ( /* unused */),
.evt_read_req_o (/* unused */), .evt_read_req_o ( /* unused */),
.evt_prefetch_req_o (/* unused */), .evt_prefetch_req_o ( /* unused */),
.evt_req_on_hold_o (/* unused */), .evt_req_on_hold_o ( /* unused */),
.evt_rtab_rollback_o (/* unused */), .evt_rtab_rollback_o ( /* unused */),
.evt_stall_refill_o (/* unused */), .evt_stall_refill_o ( /* unused */),
.evt_stall_o (/* unused */), .evt_stall_o ( /* unused */),
.wbuf_empty_o (wbuffer_empty_o), .wbuf_empty_o(wbuffer_empty_o),
.cfg_enable_i (dcache_enable_i), .cfg_enable_i (dcache_enable_i),
.cfg_wbuf_threshold_i (4'd2), .cfg_wbuf_threshold_i (4'd2),
.cfg_wbuf_reset_timecnt_on_write_i (1'b1), .cfg_wbuf_reset_timecnt_on_write_i (1'b1),
.cfg_wbuf_sequential_waw_i (1'b0), .cfg_wbuf_sequential_waw_i (1'b0),
.cfg_wbuf_inhibit_write_coalescing_i (1'b0), .cfg_wbuf_inhibit_write_coalescing_i(1'b0),
.cfg_prefetch_updt_plru_i (1'b1), .cfg_prefetch_updt_plru_i (1'b1),
.cfg_error_on_cacheable_amo_i (1'b0), .cfg_error_on_cacheable_amo_i (1'b0),
.cfg_rtab_single_entry_i (1'b0) .cfg_rtab_single_entry_i (1'b0)
); );
assign dcache_miss_o = dcache_read_miss, assign dcache_miss_o = dcache_read_miss, wbuffer_not_ni_o = wbuffer_empty_o;
wbuffer_not_ni_o = wbuffer_empty_o;
always_ff @(posedge clk_i or negedge rst_ni) always_ff @(posedge clk_i or negedge rst_ni) begin : dcache_flush_ff
begin : dcache_flush_ff
if (!rst_ni) dcache_flush_ack_o <= 1'b0; if (!rst_ni) dcache_flush_ack_o <= 1'b0;
else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i; else dcache_flush_ack_o <= ~dcache_flush_ack_o & dcache_flush_i;
end end
// }}} // }}}
@ -485,102 +484,124 @@ module cva6_hpdcache_subsystem
`AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t) `AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t)
cva6_hpdcache_subsystem_axi_arbiter #( cva6_hpdcache_subsystem_axi_arbiter #(
.HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH),
.HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth),
.hpdcache_mem_req_t (hpdcache_mem_req_t), .hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t), .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t), .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
.AxiAddrWidth (CVA6Cfg.AxiAddrWidth), .AxiAddrWidth (CVA6Cfg.AxiAddrWidth),
.AxiDataWidth (CVA6Cfg.AxiDataWidth), .AxiDataWidth (CVA6Cfg.AxiDataWidth),
.AxiIdWidth (CVA6Cfg.AxiIdWidth), .AxiIdWidth (CVA6Cfg.AxiIdWidth),
.AxiUserWidth (CVA6Cfg.AxiUserWidth), .AxiUserWidth (CVA6Cfg.AxiUserWidth),
.axi_ar_chan_t (axi_ar_chan_t), .axi_ar_chan_t(axi_ar_chan_t),
.axi_aw_chan_t (axi_aw_chan_t), .axi_aw_chan_t(axi_aw_chan_t),
.axi_w_chan_t (axi_w_chan_t), .axi_w_chan_t (axi_w_chan_t),
.axi_req_t (noc_req_t), .axi_req_t (noc_req_t),
.axi_rsp_t (noc_resp_t) .axi_rsp_t (noc_resp_t)
) i_axi_arbiter ( ) i_axi_arbiter (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.icache_miss_valid_i (icache_miss_valid), .icache_miss_valid_i(icache_miss_valid),
.icache_miss_ready_o (icache_miss_ready), .icache_miss_ready_o(icache_miss_ready),
.icache_miss_i (icache_miss), .icache_miss_i (icache_miss),
.icache_miss_id_i (hpdcache_mem_id_t'(ICACHE_RDTXID)), .icache_miss_id_i (hpdcache_mem_id_t'(ICACHE_RDTXID)),
.icache_miss_resp_valid_o (icache_miss_resp_valid), .icache_miss_resp_valid_o(icache_miss_resp_valid),
.icache_miss_resp_o (icache_miss_resp), .icache_miss_resp_o (icache_miss_resp),
.dcache_miss_ready_o (dcache_miss_ready), .dcache_miss_ready_o(dcache_miss_ready),
.dcache_miss_valid_i (dcache_miss_valid), .dcache_miss_valid_i(dcache_miss_valid),
.dcache_miss_i (dcache_miss), .dcache_miss_i (dcache_miss),
.dcache_miss_resp_ready_i (dcache_miss_resp_ready), .dcache_miss_resp_ready_i(dcache_miss_resp_ready),
.dcache_miss_resp_valid_o (dcache_miss_resp_valid), .dcache_miss_resp_valid_o(dcache_miss_resp_valid),
.dcache_miss_resp_o (dcache_miss_resp), .dcache_miss_resp_o (dcache_miss_resp),
.dcache_wbuf_ready_o (dcache_wbuf_ready), .dcache_wbuf_ready_o(dcache_wbuf_ready),
.dcache_wbuf_valid_i (dcache_wbuf_valid), .dcache_wbuf_valid_i(dcache_wbuf_valid),
.dcache_wbuf_i (dcache_wbuf), .dcache_wbuf_i (dcache_wbuf),
.dcache_wbuf_data_ready_o (dcache_wbuf_data_ready), .dcache_wbuf_data_ready_o(dcache_wbuf_data_ready),
.dcache_wbuf_data_valid_i (dcache_wbuf_data_valid), .dcache_wbuf_data_valid_i(dcache_wbuf_data_valid),
.dcache_wbuf_data_i (dcache_wbuf_data), .dcache_wbuf_data_i (dcache_wbuf_data),
.dcache_wbuf_resp_ready_i (dcache_wbuf_resp_ready), .dcache_wbuf_resp_ready_i(dcache_wbuf_resp_ready),
.dcache_wbuf_resp_valid_o (dcache_wbuf_resp_valid), .dcache_wbuf_resp_valid_o(dcache_wbuf_resp_valid),
.dcache_wbuf_resp_o (dcache_wbuf_resp), .dcache_wbuf_resp_o (dcache_wbuf_resp),
.dcache_uc_read_ready_o (dcache_uc_read_ready), .dcache_uc_read_ready_o(dcache_uc_read_ready),
.dcache_uc_read_valid_i (dcache_uc_read_valid), .dcache_uc_read_valid_i(dcache_uc_read_valid),
.dcache_uc_read_i (dcache_uc_read), .dcache_uc_read_i (dcache_uc_read),
.dcache_uc_read_id_i ('1), .dcache_uc_read_id_i ('1),
.dcache_uc_read_resp_ready_i (dcache_uc_read_resp_ready), .dcache_uc_read_resp_ready_i(dcache_uc_read_resp_ready),
.dcache_uc_read_resp_valid_o (dcache_uc_read_resp_valid), .dcache_uc_read_resp_valid_o(dcache_uc_read_resp_valid),
.dcache_uc_read_resp_o (dcache_uc_read_resp), .dcache_uc_read_resp_o (dcache_uc_read_resp),
.dcache_uc_write_ready_o (dcache_uc_write_ready), .dcache_uc_write_ready_o(dcache_uc_write_ready),
.dcache_uc_write_valid_i (dcache_uc_write_valid), .dcache_uc_write_valid_i(dcache_uc_write_valid),
.dcache_uc_write_i (dcache_uc_write), .dcache_uc_write_i (dcache_uc_write),
.dcache_uc_write_id_i ('1), .dcache_uc_write_id_i ('1),
.dcache_uc_write_data_ready_o (dcache_uc_write_data_ready), .dcache_uc_write_data_ready_o(dcache_uc_write_data_ready),
.dcache_uc_write_data_valid_i (dcache_uc_write_data_valid), .dcache_uc_write_data_valid_i(dcache_uc_write_data_valid),
.dcache_uc_write_data_i (dcache_uc_write_data), .dcache_uc_write_data_i (dcache_uc_write_data),
.dcache_uc_write_resp_ready_i (dcache_uc_write_resp_ready), .dcache_uc_write_resp_ready_i(dcache_uc_write_resp_ready),
.dcache_uc_write_resp_valid_o (dcache_uc_write_resp_valid), .dcache_uc_write_resp_valid_o(dcache_uc_write_resp_valid),
.dcache_uc_write_resp_o (dcache_uc_write_resp), .dcache_uc_write_resp_o (dcache_uc_write_resp),
.axi_req_o (noc_req_o), .axi_req_o (noc_req_o),
.axi_resp_i (noc_resp_i) .axi_resp_i(noc_resp_i)
); );
// }}} // }}}
// Assertions // Assertions
// {{{ // {{{
// pragma translate_off // pragma translate_off
initial assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS)) initial
assert (hpdcache_pkg::HPDCACHE_REQ_SRC_ID_WIDTH >= $clog2(HPDCACHE_NREQUESTERS))
else $fatal(1, "HPDCACHE_REQ_SRC_ID_WIDTH is not wide enough"); else $fatal(1, "HPDCACHE_REQ_SRC_ID_WIDTH is not wide enough");
a_invalid_instruction_fetch: assert property ( a_invalid_instruction_fetch :
assert property (
@(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) @(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", else
icache_dreq_o.vaddr, icache_dreq_o.data); $warning(
1,
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr,
icache_dreq_o.data
);
a_invalid_write_data: assert property ( a_invalid_write_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX) @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX)
else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X", else
{dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index}, dcache_req_ports_i[2].data_be, dcache_req_ports_i[2].data_wdata); $warning(
1,
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
{
dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index
},
dcache_req_ports_i[2].data_be,
dcache_req_ports_i[2].data_wdata
);
for (genvar j=0; j<2; j++) begin : gen_assertion for (genvar j = 0; j < 2; j++) begin : gen_assertion
a_invalid_read_data: assert property ( a_invalid_read_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X", else
j, dcache_req_ports_o[j].data_rdata); $warning(
1,
"[l1 dcache] reading invalid data on port %01d: data=%016X",
j,
dcache_req_ports_o[j].data_rdata
);
end end
// pragma translate_on // pragma translate_on
// }}} // }}}

View file

@ -16,114 +16,114 @@ module cva6_hpdcache_subsystem_axi_arbiter
// Parameters // Parameters
// {{{ // {{{
#( #(
parameter int HPDcacheMemIdWidth = 8, parameter int HPDcacheMemIdWidth = 8,
parameter int HPDcacheMemDataWidth = 512, parameter int HPDcacheMemDataWidth = 512,
parameter type hpdcache_mem_req_t = logic, parameter type hpdcache_mem_req_t = logic,
parameter type hpdcache_mem_req_w_t = logic, parameter type hpdcache_mem_req_w_t = logic,
parameter type hpdcache_mem_resp_r_t = logic, parameter type hpdcache_mem_resp_r_t = logic,
parameter type hpdcache_mem_resp_w_t = logic, parameter type hpdcache_mem_resp_w_t = logic,
parameter int unsigned AxiAddrWidth = 1, parameter int unsigned AxiAddrWidth = 1,
parameter int unsigned AxiDataWidth = 1, parameter int unsigned AxiDataWidth = 1,
parameter int unsigned AxiIdWidth = 1, parameter int unsigned AxiIdWidth = 1,
parameter int unsigned AxiUserWidth = 1, parameter int unsigned AxiUserWidth = 1,
parameter type axi_ar_chan_t = logic, parameter type axi_ar_chan_t = logic,
parameter type axi_aw_chan_t = logic, parameter type axi_aw_chan_t = logic,
parameter type axi_w_chan_t = logic, parameter type axi_w_chan_t = logic,
parameter type axi_req_t = logic, parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic, parameter type axi_rsp_t = logic,
localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0] localparam type hpdcache_mem_id_t = logic [HPDcacheMemIdWidth-1:0]
) )
// }}} // }}}
// Ports // Ports
// {{{ // {{{
( (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// Interfaces from/to I$ // Interfaces from/to I$
// {{{ // {{{
input logic icache_miss_valid_i, input logic icache_miss_valid_i,
output logic icache_miss_ready_o, output logic icache_miss_ready_o,
input wt_cache_pkg::icache_req_t icache_miss_i, input wt_cache_pkg::icache_req_t icache_miss_i,
input hpdcache_mem_id_t icache_miss_id_i, input hpdcache_mem_id_t icache_miss_id_i,
output logic icache_miss_resp_valid_o, output logic icache_miss_resp_valid_o,
output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o, output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o,
// }}} // }}}
// Interfaces from/to D$ // Interfaces from/to D$
// {{{ // {{{
output logic dcache_miss_ready_o, output logic dcache_miss_ready_o,
input logic dcache_miss_valid_i, input logic dcache_miss_valid_i,
input hpdcache_mem_req_t dcache_miss_i, input hpdcache_mem_req_t dcache_miss_i,
input logic dcache_miss_resp_ready_i, input logic dcache_miss_resp_ready_i,
output logic dcache_miss_resp_valid_o, output logic dcache_miss_resp_valid_o,
output hpdcache_mem_resp_r_t dcache_miss_resp_o, output hpdcache_mem_resp_r_t dcache_miss_resp_o,
// Write-buffer write interface // Write-buffer write interface
output logic dcache_wbuf_ready_o, output logic dcache_wbuf_ready_o,
input logic dcache_wbuf_valid_i, input logic dcache_wbuf_valid_i,
input hpdcache_mem_req_t dcache_wbuf_i, input hpdcache_mem_req_t dcache_wbuf_i,
output logic dcache_wbuf_data_ready_o, output logic dcache_wbuf_data_ready_o,
input logic dcache_wbuf_data_valid_i, input logic dcache_wbuf_data_valid_i,
input hpdcache_mem_req_w_t dcache_wbuf_data_i, input hpdcache_mem_req_w_t dcache_wbuf_data_i,
input logic dcache_wbuf_resp_ready_i, input logic dcache_wbuf_resp_ready_i,
output logic dcache_wbuf_resp_valid_o, output logic dcache_wbuf_resp_valid_o,
output hpdcache_mem_resp_w_t dcache_wbuf_resp_o, output hpdcache_mem_resp_w_t dcache_wbuf_resp_o,
// Uncached read interface // Uncached read interface
output logic dcache_uc_read_ready_o, output logic dcache_uc_read_ready_o,
input logic dcache_uc_read_valid_i, input logic dcache_uc_read_valid_i,
input hpdcache_mem_req_t dcache_uc_read_i, input hpdcache_mem_req_t dcache_uc_read_i,
input hpdcache_mem_id_t dcache_uc_read_id_i, input hpdcache_mem_id_t dcache_uc_read_id_i,
input logic dcache_uc_read_resp_ready_i, input logic dcache_uc_read_resp_ready_i,
output logic dcache_uc_read_resp_valid_o, output logic dcache_uc_read_resp_valid_o,
output hpdcache_mem_resp_r_t dcache_uc_read_resp_o, output hpdcache_mem_resp_r_t dcache_uc_read_resp_o,
// Uncached write interface // Uncached write interface
output logic dcache_uc_write_ready_o, output logic dcache_uc_write_ready_o,
input logic dcache_uc_write_valid_i, input logic dcache_uc_write_valid_i,
input hpdcache_mem_req_t dcache_uc_write_i, input hpdcache_mem_req_t dcache_uc_write_i,
input hpdcache_mem_id_t dcache_uc_write_id_i, input hpdcache_mem_id_t dcache_uc_write_id_i,
output logic dcache_uc_write_data_ready_o, output logic dcache_uc_write_data_ready_o,
input logic dcache_uc_write_data_valid_i, input logic dcache_uc_write_data_valid_i,
input hpdcache_mem_req_w_t dcache_uc_write_data_i, input hpdcache_mem_req_w_t dcache_uc_write_data_i,
input logic dcache_uc_write_resp_ready_i, input logic dcache_uc_write_resp_ready_i,
output logic dcache_uc_write_resp_valid_o, output logic dcache_uc_write_resp_valid_o,
output hpdcache_mem_resp_w_t dcache_uc_write_resp_o, output hpdcache_mem_resp_w_t dcache_uc_write_resp_o,
// }}} // }}}
// AXI port to upstream memory/peripherals // AXI port to upstream memory/peripherals
// {{{ // {{{
output axi_req_t axi_req_o, output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i input axi_rsp_t axi_resp_i
// }}} // }}}
); );
// }}} // }}}
// Internal type definitions // Internal type definitions
// {{{ // {{{
typedef struct packed { typedef struct packed {
logic [AxiIdWidth-1:0] id; logic [AxiIdWidth-1:0] id;
logic [AxiDataWidth-1:0] data; logic [AxiDataWidth-1:0] data;
axi_pkg::resp_t resp; axi_pkg::resp_t resp;
logic last; logic last;
logic [AxiUserWidth-1:0] user; logic [AxiUserWidth-1:0] user;
} axi_r_chan_t; } axi_r_chan_t;
typedef struct packed { typedef struct packed {
logic [AxiIdWidth-1:0] id; logic [AxiIdWidth-1:0] id;
axi_pkg::resp_t resp; axi_pkg::resp_t resp;
logic [AxiUserWidth-1:0] user; logic [AxiUserWidth-1:0] user;
} axi_b_chan_t; } axi_b_chan_t;
localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth); localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth);
@ -133,24 +133,26 @@ module cva6_hpdcache_subsystem_axi_arbiter
// Adapt the I$ interface to the HPDcache memory interface // Adapt the I$ interface to the HPDcache memory interface
// {{{ // {{{
localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH/64; localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH / 64;
localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS); localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS);
localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH/8); localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8);
localparam int ICACHE_WORD_SIZE = 3; localparam int ICACHE_WORD_SIZE = 3;
localparam int ICACHE_MEM_REQ_CL_LEN = localparam int ICACHE_MEM_REQ_CL_LEN =
(ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth; (ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth;
localparam int ICACHE_MEM_REQ_CL_SIZE = localparam int ICACHE_MEM_REQ_CL_SIZE =
(HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ? (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ?
$clog2(HPDcacheMemDataWidth/8) : ICACHE_CL_SIZE; $clog2(
HPDcacheMemDataWidth / 8
) : ICACHE_CL_SIZE;
// I$ request // I$ request
hpdcache_mem_req_t icache_miss_req_wdata; hpdcache_mem_req_t icache_miss_req_wdata;
logic icache_miss_req_w, icache_miss_req_wok; logic icache_miss_req_w, icache_miss_req_wok;
hpdcache_mem_req_t icache_miss_req_rdata; hpdcache_mem_req_t icache_miss_req_rdata;
logic icache_miss_req_r, icache_miss_req_rok; logic icache_miss_req_r, icache_miss_req_rok;
logic icache_miss_pending_q; logic icache_miss_pending_q;
// This FIFO has two functionnalities: // This FIFO has two functionnalities:
// - Stabilize the ready-valid protocol. The ICACHE can abort a valid // - Stabilize the ready-valid protocol. The ICACHE can abort a valid
@ -158,88 +160,85 @@ module cva6_hpdcache_subsystem_axi_arbiter
// behavior is not supported by AXI. // behavior is not supported by AXI.
// - Cut a possible long timing path. // - Cut a possible long timing path.
hpdcache_fifo_reg #( hpdcache_fifo_reg #(
.FIFO_DEPTH (1), .FIFO_DEPTH (1),
.fifo_data_t (hpdcache_mem_req_t) .fifo_data_t(hpdcache_mem_req_t)
) i_icache_miss_req_fifo( ) i_icache_miss_req_fifo (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.w_i (icache_miss_req_w), .w_i (icache_miss_req_w),
.wok_o (icache_miss_req_wok), .wok_o (icache_miss_req_wok),
.wdata_i (icache_miss_req_wdata), .wdata_i(icache_miss_req_wdata),
.r_i (icache_miss_req_r), .r_i (icache_miss_req_r),
.rok_o (icache_miss_req_rok), .rok_o (icache_miss_req_rok),
.rdata_o (icache_miss_req_rdata) .rdata_o(icache_miss_req_rdata)
); );
assign icache_miss_req_w = icache_miss_valid_i, assign icache_miss_req_w = icache_miss_valid_i, icache_miss_ready_o = icache_miss_req_wok;
icache_miss_ready_o = icache_miss_req_wok;
assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr, assign icache_miss_req_wdata.mem_req_addr = icache_miss_i.paddr,
icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1, icache_miss_req_wdata.mem_req_len = icache_miss_i.nc ? 0 : ICACHE_MEM_REQ_CL_LEN - 1,
icache_miss_req_wdata.mem_req_size = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE, icache_miss_req_wdata.mem_req_size = icache_miss_i.nc ? ICACHE_WORD_SIZE : ICACHE_MEM_REQ_CL_SIZE,
icache_miss_req_wdata.mem_req_id = icache_miss_i.tid, icache_miss_req_wdata.mem_req_id = icache_miss_i.tid,
icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ, icache_miss_req_wdata.mem_req_command = hpdcache_pkg::HPDCACHE_MEM_READ,
icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0), icache_miss_req_wdata.mem_req_atomic = hpdcache_pkg::hpdcache_mem_atomic_e'(0),
icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc; icache_miss_req_wdata.mem_req_cacheable = ~icache_miss_i.nc;
// I$ response // I$ response
logic icache_miss_resp_w, icache_miss_resp_wok; logic icache_miss_resp_w, icache_miss_resp_wok;
hpdcache_mem_resp_r_t icache_miss_resp_wdata; hpdcache_mem_resp_r_t icache_miss_resp_wdata;
logic icache_miss_resp_data_w, icache_miss_resp_data_wok; logic icache_miss_resp_data_w, icache_miss_resp_data_wok;
logic icache_miss_resp_data_r, icache_miss_resp_data_rok; logic icache_miss_resp_data_r, icache_miss_resp_data_rok;
icache_resp_data_t icache_miss_resp_data_rdata; icache_resp_data_t icache_miss_resp_data_rdata;
logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok; logic icache_miss_resp_meta_w, icache_miss_resp_meta_wok;
logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok; logic icache_miss_resp_meta_r, icache_miss_resp_meta_rok;
hpdcache_mem_id_t icache_miss_resp_meta_id; hpdcache_mem_id_t icache_miss_resp_meta_id;
icache_resp_data_t icache_miss_rdata; icache_resp_data_t icache_miss_rdata;
generate generate
if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin
hpdcache_fifo_reg #( hpdcache_fifo_reg #(
.FIFO_DEPTH (1), .FIFO_DEPTH (1),
.fifo_data_t (hpdcache_mem_id_t) .fifo_data_t(hpdcache_mem_id_t)
) i_icache_refill_meta_fifo ( ) i_icache_refill_meta_fifo (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.w_i (icache_miss_resp_meta_w), .w_i (icache_miss_resp_meta_w),
.wok_o (icache_miss_resp_meta_wok), .wok_o (icache_miss_resp_meta_wok),
.wdata_i (icache_miss_resp_wdata.mem_resp_r_id), .wdata_i(icache_miss_resp_wdata.mem_resp_r_id),
.r_i (icache_miss_resp_meta_r), .r_i (icache_miss_resp_meta_r),
.rok_o (icache_miss_resp_meta_rok), .rok_o (icache_miss_resp_meta_rok),
.rdata_o (icache_miss_resp_meta_id) .rdata_o(icache_miss_resp_meta_id)
); );
hpdcache_data_upsize #( hpdcache_data_upsize #(
.WR_WIDTH (HPDcacheMemDataWidth), .WR_WIDTH(HPDcacheMemDataWidth),
.RD_WIDTH (ariane_pkg::ICACHE_LINE_WIDTH), .RD_WIDTH(ariane_pkg::ICACHE_LINE_WIDTH),
.DEPTH (1) .DEPTH (1)
) i_icache_hpdcache_data_upsize ( ) i_icache_hpdcache_data_upsize (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.w_i (icache_miss_resp_data_w), .w_i (icache_miss_resp_data_w),
.wlast_i (icache_miss_resp_wdata.mem_resp_r_last), .wlast_i(icache_miss_resp_wdata.mem_resp_r_last),
.wok_o (icache_miss_resp_data_wok), .wok_o (icache_miss_resp_data_wok),
.wdata_i (icache_miss_resp_wdata.mem_resp_r_data), .wdata_i(icache_miss_resp_wdata.mem_resp_r_data),
.r_i (icache_miss_resp_data_r), .r_i (icache_miss_resp_data_r),
.rok_o (icache_miss_resp_data_rok), .rok_o (icache_miss_resp_data_rok),
.rdata_o (icache_miss_resp_data_rdata) .rdata_o(icache_miss_resp_data_rdata)
); );
assign icache_miss_resp_meta_r = 1'b1, assign icache_miss_resp_meta_r = 1'b1, icache_miss_resp_data_r = 1'b1;
icache_miss_resp_data_r = 1'b1;
assign icache_miss_resp_meta_w = icache_miss_resp_w & assign icache_miss_resp_meta_w = icache_miss_resp_w & icache_miss_resp_wdata.mem_resp_r_last;
icache_miss_resp_wdata.mem_resp_r_last;
assign icache_miss_resp_data_w = icache_miss_resp_w; assign icache_miss_resp_data_w = icache_miss_resp_w;
@ -256,14 +255,13 @@ module cva6_hpdcache_subsystem_axi_arbiter
assign icache_miss_resp_data_rdata = icache_miss_resp_wdata.mem_resp_r_data; assign icache_miss_resp_data_rdata = icache_miss_resp_wdata.mem_resp_r_data;
// In the case of uncacheable accesses, the Icache expects the data to be right-aligned // In the case of uncacheable accesses, the Icache expects the data to be right-aligned
always_comb always_comb begin : icache_miss_resp_data_comb
begin : icache_miss_resp_data_comb
if (!icache_miss_req_rdata.mem_req_cacheable) begin if (!icache_miss_req_rdata.mem_req_cacheable) begin
automatic logic [ICACHE_CL_WORD_INDEX - 1: 0] icache_miss_word_index; automatic logic [ICACHE_CL_WORD_INDEX - 1:0] icache_miss_word_index;
automatic logic [63:0] icache_miss_word; automatic logic [63:0] icache_miss_word;
icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3 +: ICACHE_CL_WORD_INDEX]; icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3+:ICACHE_CL_WORD_INDEX];
icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64 +: 64]; icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64+:64];
icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH-64{1'b0}}, icache_miss_word}; icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH - 64{1'b0}}, icache_miss_word};
end else begin end else begin
icache_miss_rdata = icache_miss_resp_data_rdata; icache_miss_rdata = icache_miss_resp_data_rdata;
end end
@ -272,11 +270,11 @@ module cva6_hpdcache_subsystem_axi_arbiter
endgenerate endgenerate
assign icache_miss_resp_valid_o = icache_miss_resp_meta_rok, assign icache_miss_resp_valid_o = icache_miss_resp_meta_rok,
icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK, icache_miss_resp_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK,
icache_miss_resp_o.user = '0, icache_miss_resp_o.user = '0,
icache_miss_resp_o.inv = '0, icache_miss_resp_o.inv = '0,
icache_miss_resp_o.tid = icache_miss_resp_meta_id, icache_miss_resp_o.tid = icache_miss_resp_meta_id,
icache_miss_resp_o.data = icache_miss_rdata; icache_miss_resp_o.data = icache_miss_rdata;
// consume the Icache miss on the arrival of the response. The request // consume the Icache miss on the arrival of the response. The request
// metadata is decoded to forward the correct word in case of uncacheable // metadata is decoded to forward the correct word in case of uncacheable
@ -286,39 +284,39 @@ module cva6_hpdcache_subsystem_axi_arbiter
// Read request arbiter // Read request arbiter
// {{{ // {{{
logic mem_req_read_ready [2:0]; logic mem_req_read_ready [2:0];
logic mem_req_read_valid [2:0]; logic mem_req_read_valid [2:0];
hpdcache_mem_req_t mem_req_read [2:0]; hpdcache_mem_req_t mem_req_read [2:0];
logic mem_req_read_ready_arb; logic mem_req_read_ready_arb;
logic mem_req_read_valid_arb; logic mem_req_read_valid_arb;
hpdcache_mem_req_t mem_req_read_arb; hpdcache_mem_req_t mem_req_read_arb;
assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q, assign mem_req_read_valid[0] = icache_miss_req_rok & ~icache_miss_pending_q,
mem_req_read[0] = icache_miss_req_rdata; mem_req_read[0] = icache_miss_req_rdata;
assign dcache_miss_ready_o = mem_req_read_ready[1], assign dcache_miss_ready_o = mem_req_read_ready[1],
mem_req_read_valid[1] = dcache_miss_valid_i, mem_req_read_valid[1] = dcache_miss_valid_i,
mem_req_read[1] = dcache_miss_i; mem_req_read[1] = dcache_miss_i;
assign dcache_uc_read_ready_o = mem_req_read_ready[2], assign dcache_uc_read_ready_o = mem_req_read_ready[2],
mem_req_read_valid[2] = dcache_uc_read_valid_i, mem_req_read_valid[2] = dcache_uc_read_valid_i,
mem_req_read[2] = dcache_uc_read_i; mem_req_read[2] = dcache_uc_read_i;
hpdcache_mem_req_read_arbiter #( hpdcache_mem_req_read_arbiter #(
.N (3), .N (3),
.hpdcache_mem_req_t (hpdcache_mem_req_t) .hpdcache_mem_req_t(hpdcache_mem_req_t)
) i_mem_req_read_arbiter ( ) i_mem_req_read_arbiter (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.mem_req_read_ready_o (mem_req_read_ready), .mem_req_read_ready_o(mem_req_read_ready),
.mem_req_read_valid_i (mem_req_read_valid), .mem_req_read_valid_i(mem_req_read_valid),
.mem_req_read_i (mem_req_read), .mem_req_read_i (mem_req_read),
.mem_req_read_ready_i (mem_req_read_ready_arb), .mem_req_read_ready_i(mem_req_read_ready_arb),
.mem_req_read_valid_o (mem_req_read_valid_arb), .mem_req_read_valid_o(mem_req_read_valid_arb),
.mem_req_read_o (mem_req_read_arb) .mem_req_read_o (mem_req_read_arb)
); );
// }}} // }}}
@ -328,14 +326,13 @@ module cva6_hpdcache_subsystem_axi_arbiter
logic mem_resp_read_valid; logic mem_resp_read_valid;
hpdcache_mem_resp_r_t mem_resp_read; hpdcache_mem_resp_r_t mem_resp_read;
logic mem_resp_read_ready_arb [2:0]; logic mem_resp_read_ready_arb[2:0];
logic mem_resp_read_valid_arb [2:0]; logic mem_resp_read_valid_arb[2:0];
hpdcache_mem_resp_r_t mem_resp_read_arb [2:0]; hpdcache_mem_resp_r_t mem_resp_read_arb [2:0];
mem_resp_rt_t mem_resp_read_rt; mem_resp_rt_t mem_resp_read_rt;
always_comb always_comb begin
begin
for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
mem_resp_read_rt[i] = (i == int'( icache_miss_id_i)) ? 0 : mem_resp_read_rt[i] = (i == int'( icache_miss_id_i)) ? 0 :
(i == int'(dcache_uc_read_id_i)) ? 2 : 1; (i == int'(dcache_uc_read_id_i)) ? 2 : 1;
@ -343,95 +340,95 @@ module cva6_hpdcache_subsystem_axi_arbiter
end end
hpdcache_mem_resp_demux #( hpdcache_mem_resp_demux #(
.N (3), .N (3),
.resp_t (hpdcache_mem_resp_r_t), .resp_t (hpdcache_mem_resp_r_t),
.resp_id_t (hpdcache_mem_id_t) .resp_id_t(hpdcache_mem_id_t)
) i_mem_resp_read_demux ( ) i_mem_resp_read_demux (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.mem_resp_ready_o (mem_resp_read_ready), .mem_resp_ready_o(mem_resp_read_ready),
.mem_resp_valid_i (mem_resp_read_valid), .mem_resp_valid_i(mem_resp_read_valid),
.mem_resp_id_i (mem_resp_read.mem_resp_r_id), .mem_resp_id_i (mem_resp_read.mem_resp_r_id),
.mem_resp_i (mem_resp_read), .mem_resp_i (mem_resp_read),
.mem_resp_ready_i (mem_resp_read_ready_arb), .mem_resp_ready_i(mem_resp_read_ready_arb),
.mem_resp_valid_o (mem_resp_read_valid_arb), .mem_resp_valid_o(mem_resp_read_valid_arb),
.mem_resp_o (mem_resp_read_arb), .mem_resp_o (mem_resp_read_arb),
.mem_resp_rt_i (mem_resp_read_rt) .mem_resp_rt_i(mem_resp_read_rt)
); );
assign icache_miss_resp_w = mem_resp_read_valid_arb[0], assign icache_miss_resp_w = mem_resp_read_valid_arb[0],
icache_miss_resp_wdata = mem_resp_read_arb[0], icache_miss_resp_wdata = mem_resp_read_arb[0],
mem_resp_read_ready_arb[0] = icache_miss_resp_wok; mem_resp_read_ready_arb[0] = icache_miss_resp_wok;
assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1], assign dcache_miss_resp_valid_o = mem_resp_read_valid_arb[1],
dcache_miss_resp_o = mem_resp_read_arb[1], dcache_miss_resp_o = mem_resp_read_arb[1],
mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i; mem_resp_read_ready_arb[1] = dcache_miss_resp_ready_i;
assign dcache_uc_read_resp_valid_o = mem_resp_read_valid_arb[2], assign dcache_uc_read_resp_valid_o = mem_resp_read_valid_arb[2],
dcache_uc_read_resp_o = mem_resp_read_arb[2], dcache_uc_read_resp_o = mem_resp_read_arb[2],
mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i; mem_resp_read_ready_arb[2] = dcache_uc_read_resp_ready_i;
// }}} // }}}
// Write request arbiter // Write request arbiter
// {{{ // {{{
logic mem_req_write_ready [1:0]; logic mem_req_write_ready [1:0];
logic mem_req_write_valid [1:0]; logic mem_req_write_valid [1:0];
hpdcache_mem_req_t mem_req_write [1:0]; hpdcache_mem_req_t mem_req_write [1:0];
logic mem_req_write_data_ready [1:0]; logic mem_req_write_data_ready [1:0];
logic mem_req_write_data_valid [1:0]; logic mem_req_write_data_valid [1:0];
hpdcache_mem_req_w_t mem_req_write_data [1:0]; hpdcache_mem_req_w_t mem_req_write_data [1:0];
logic mem_req_write_ready_arb; logic mem_req_write_ready_arb;
logic mem_req_write_valid_arb; logic mem_req_write_valid_arb;
hpdcache_mem_req_t mem_req_write_arb; hpdcache_mem_req_t mem_req_write_arb;
logic mem_req_write_data_ready_arb; logic mem_req_write_data_ready_arb;
logic mem_req_write_data_valid_arb; logic mem_req_write_data_valid_arb;
hpdcache_mem_req_w_t mem_req_write_data_arb; hpdcache_mem_req_w_t mem_req_write_data_arb;
assign dcache_wbuf_ready_o = mem_req_write_ready[0], assign dcache_wbuf_ready_o = mem_req_write_ready[0],
mem_req_write_valid[0] = dcache_wbuf_valid_i, mem_req_write_valid[0] = dcache_wbuf_valid_i,
mem_req_write[0] = dcache_wbuf_i; mem_req_write[0] = dcache_wbuf_i;
assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0], assign dcache_wbuf_data_ready_o = mem_req_write_data_ready[0],
mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i, mem_req_write_data_valid[0] = dcache_wbuf_data_valid_i,
mem_req_write_data[0] = dcache_wbuf_data_i; mem_req_write_data[0] = dcache_wbuf_data_i;
assign dcache_uc_write_ready_o = mem_req_write_ready[1], assign dcache_uc_write_ready_o = mem_req_write_ready[1],
mem_req_write_valid[1] = dcache_uc_write_valid_i, mem_req_write_valid[1] = dcache_uc_write_valid_i,
mem_req_write[1] = dcache_uc_write_i; mem_req_write[1] = dcache_uc_write_i;
assign dcache_uc_write_data_ready_o = mem_req_write_data_ready[1], assign dcache_uc_write_data_ready_o = mem_req_write_data_ready[1],
mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i, mem_req_write_data_valid[1] = dcache_uc_write_data_valid_i,
mem_req_write_data[1] = dcache_uc_write_data_i; mem_req_write_data[1] = dcache_uc_write_data_i;
hpdcache_mem_req_write_arbiter #( hpdcache_mem_req_write_arbiter #(
.N (2), .N (2),
.hpdcache_mem_req_t (hpdcache_mem_req_t), .hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t) .hpdcache_mem_req_w_t(hpdcache_mem_req_w_t)
) i_mem_req_write_arbiter ( ) i_mem_req_write_arbiter (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.mem_req_write_ready_o (mem_req_write_ready), .mem_req_write_ready_o(mem_req_write_ready),
.mem_req_write_valid_i (mem_req_write_valid), .mem_req_write_valid_i(mem_req_write_valid),
.mem_req_write_i (mem_req_write), .mem_req_write_i (mem_req_write),
.mem_req_write_data_ready_o (mem_req_write_data_ready), .mem_req_write_data_ready_o(mem_req_write_data_ready),
.mem_req_write_data_valid_i (mem_req_write_data_valid), .mem_req_write_data_valid_i(mem_req_write_data_valid),
.mem_req_write_data_i (mem_req_write_data), .mem_req_write_data_i (mem_req_write_data),
.mem_req_write_ready_i (mem_req_write_ready_arb), .mem_req_write_ready_i(mem_req_write_ready_arb),
.mem_req_write_valid_o (mem_req_write_valid_arb), .mem_req_write_valid_o(mem_req_write_valid_arb),
.mem_req_write_o (mem_req_write_arb), .mem_req_write_o (mem_req_write_arb),
.mem_req_write_data_ready_i (mem_req_write_data_ready_arb), .mem_req_write_data_ready_i(mem_req_write_data_ready_arb),
.mem_req_write_data_valid_o (mem_req_write_data_valid_arb), .mem_req_write_data_valid_o(mem_req_write_data_valid_arb),
.mem_req_write_data_o (mem_req_write_data_arb) .mem_req_write_data_o (mem_req_write_data_arb)
); );
// }}} // }}}
@ -441,52 +438,50 @@ module cva6_hpdcache_subsystem_axi_arbiter
logic mem_resp_write_valid; logic mem_resp_write_valid;
hpdcache_mem_resp_w_t mem_resp_write; hpdcache_mem_resp_w_t mem_resp_write;
logic mem_resp_write_ready_arb [1:0]; logic mem_resp_write_ready_arb[1:0];
logic mem_resp_write_valid_arb [1:0]; logic mem_resp_write_valid_arb[1:0];
hpdcache_mem_resp_w_t mem_resp_write_arb [1:0]; hpdcache_mem_resp_w_t mem_resp_write_arb [1:0];
mem_resp_rt_t mem_resp_write_rt; mem_resp_rt_t mem_resp_write_rt;
always_comb always_comb begin
begin
for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin for (int i = 0; i < MEM_RESP_RT_DEPTH; i++) begin
mem_resp_write_rt[i] = (i == int'(dcache_uc_write_id_i)) ? 1 : 0; mem_resp_write_rt[i] = (i == int'(dcache_uc_write_id_i)) ? 1 : 0;
end end
end end
hpdcache_mem_resp_demux #( hpdcache_mem_resp_demux #(
.N (2), .N (2),
.resp_t (hpdcache_mem_resp_w_t), .resp_t (hpdcache_mem_resp_w_t),
.resp_id_t (hpdcache_mem_id_t) .resp_id_t(hpdcache_mem_id_t)
) i_hpdcache_mem_resp_write_demux ( ) i_hpdcache_mem_resp_write_demux (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.mem_resp_ready_o (mem_resp_write_ready), .mem_resp_ready_o(mem_resp_write_ready),
.mem_resp_valid_i (mem_resp_write_valid), .mem_resp_valid_i(mem_resp_write_valid),
.mem_resp_id_i (mem_resp_write.mem_resp_w_id), .mem_resp_id_i (mem_resp_write.mem_resp_w_id),
.mem_resp_i (mem_resp_write), .mem_resp_i (mem_resp_write),
.mem_resp_ready_i (mem_resp_write_ready_arb), .mem_resp_ready_i(mem_resp_write_ready_arb),
.mem_resp_valid_o (mem_resp_write_valid_arb), .mem_resp_valid_o(mem_resp_write_valid_arb),
.mem_resp_o (mem_resp_write_arb), .mem_resp_o (mem_resp_write_arb),
.mem_resp_rt_i (mem_resp_write_rt) .mem_resp_rt_i(mem_resp_write_rt)
); );
assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0], assign dcache_wbuf_resp_valid_o = mem_resp_write_valid_arb[0],
dcache_wbuf_resp_o = mem_resp_write_arb[0], dcache_wbuf_resp_o = mem_resp_write_arb[0],
mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i; mem_resp_write_ready_arb[0] = dcache_wbuf_resp_ready_i;
assign dcache_uc_write_resp_valid_o = mem_resp_write_valid_arb[1], assign dcache_uc_write_resp_valid_o = mem_resp_write_valid_arb[1],
dcache_uc_write_resp_o = mem_resp_write_arb[1], dcache_uc_write_resp_o = mem_resp_write_arb[1],
mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i; mem_resp_write_ready_arb[1] = dcache_uc_write_resp_ready_i;
// }}} // }}}
// I$ miss pending // I$ miss pending
// {{{ // {{{
always_ff @(posedge clk_i or negedge rst_ni) always_ff @(posedge clk_i or negedge rst_ni) begin : icache_miss_pending_ff
begin : icache_miss_pending_ff
if (!rst_ni) begin if (!rst_ni) begin
icache_miss_pending_q <= 1'b0; icache_miss_pending_q <= 1'b0;
end else begin end else begin
@ -502,59 +497,59 @@ module cva6_hpdcache_subsystem_axi_arbiter
axi_rsp_t axi_resp; axi_rsp_t axi_resp;
hpdcache_mem_to_axi_write #( hpdcache_mem_to_axi_write #(
.hpdcache_mem_req_t (hpdcache_mem_req_t), .hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t),
.hpdcache_mem_resp_w_t (hpdcache_mem_resp_w_t), .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t),
.aw_chan_t (axi_aw_chan_t), .aw_chan_t (axi_aw_chan_t),
.w_chan_t (axi_w_chan_t), .w_chan_t (axi_w_chan_t),
.b_chan_t (axi_b_chan_t) .b_chan_t (axi_b_chan_t)
) i_hpdcache_mem_to_axi_write ( ) i_hpdcache_mem_to_axi_write (
.req_ready_o (mem_req_write_ready_arb), .req_ready_o(mem_req_write_ready_arb),
.req_valid_i (mem_req_write_valid_arb), .req_valid_i(mem_req_write_valid_arb),
.req_i (mem_req_write_arb), .req_i (mem_req_write_arb),
.req_data_ready_o (mem_req_write_data_ready_arb), .req_data_ready_o(mem_req_write_data_ready_arb),
.req_data_valid_i (mem_req_write_data_valid_arb), .req_data_valid_i(mem_req_write_data_valid_arb),
.req_data_i (mem_req_write_data_arb), .req_data_i (mem_req_write_data_arb),
.resp_ready_i (mem_resp_write_ready), .resp_ready_i(mem_resp_write_ready),
.resp_valid_o (mem_resp_write_valid), .resp_valid_o(mem_resp_write_valid),
.resp_o (mem_resp_write), .resp_o (mem_resp_write),
.axi_aw_valid_o (axi_req.aw_valid), .axi_aw_valid_o(axi_req.aw_valid),
.axi_aw_o (axi_req.aw), .axi_aw_o (axi_req.aw),
.axi_aw_ready_i (axi_resp.aw_ready), .axi_aw_ready_i(axi_resp.aw_ready),
.axi_w_valid_o (axi_req.w_valid), .axi_w_valid_o(axi_req.w_valid),
.axi_w_o (axi_req.w), .axi_w_o (axi_req.w),
.axi_w_ready_i (axi_resp.w_ready), .axi_w_ready_i(axi_resp.w_ready),
.axi_b_valid_i (axi_resp.b_valid), .axi_b_valid_i(axi_resp.b_valid),
.axi_b_i (axi_resp.b), .axi_b_i (axi_resp.b),
.axi_b_ready_o (axi_req.b_ready) .axi_b_ready_o(axi_req.b_ready)
); );
hpdcache_mem_to_axi_read #( hpdcache_mem_to_axi_read #(
.hpdcache_mem_req_t (hpdcache_mem_req_t), .hpdcache_mem_req_t (hpdcache_mem_req_t),
.hpdcache_mem_resp_r_t (hpdcache_mem_resp_r_t), .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t),
.ar_chan_t (axi_ar_chan_t), .ar_chan_t (axi_ar_chan_t),
.r_chan_t (axi_r_chan_t) .r_chan_t (axi_r_chan_t)
) i_hpdcache_mem_to_axi_read ( ) i_hpdcache_mem_to_axi_read (
.req_ready_o (mem_req_read_ready_arb), .req_ready_o(mem_req_read_ready_arb),
.req_valid_i (mem_req_read_valid_arb), .req_valid_i(mem_req_read_valid_arb),
.req_i (mem_req_read_arb), .req_i (mem_req_read_arb),
.resp_ready_i (mem_resp_read_ready), .resp_ready_i(mem_resp_read_ready),
.resp_valid_o (mem_resp_read_valid), .resp_valid_o(mem_resp_read_valid),
.resp_o (mem_resp_read), .resp_o (mem_resp_read),
.axi_ar_valid_o (axi_req.ar_valid), .axi_ar_valid_o(axi_req.ar_valid),
.axi_ar_o (axi_req.ar), .axi_ar_o (axi_req.ar),
.axi_ar_ready_i (axi_resp.ar_ready), .axi_ar_ready_i(axi_resp.ar_ready),
.axi_r_valid_i (axi_resp.r_valid), .axi_r_valid_i(axi_resp.r_valid),
.axi_r_i (axi_resp.r), .axi_r_i (axi_resp.r),
.axi_r_ready_o (axi_req.r_ready) .axi_r_ready_o(axi_req.r_ready)
); );
assign axi_req_o = axi_req; assign axi_req_o = axi_req;
@ -564,16 +559,27 @@ module cva6_hpdcache_subsystem_axi_arbiter
// Assertions // Assertions
// {{{ // {{{
// pragma translate_off // pragma translate_off
initial assert (HPDcacheMemIdWidth <= AxiIdWidth) else initial
$fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth"); assert (HPDcacheMemIdWidth <= AxiIdWidth)
initial assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1)) else else $fatal("HPDcacheMemIdWidth shall be less or equal to AxiIdWidth");
$fatal("HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses"); initial
initial assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1)) else assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_MSHR_SET_WIDTH + hpdcache_pkg::HPDCACHE_MSHR_WAY_WIDTH + 1))
$fatal("HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes"); else
initial assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) else $fatal(
$fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line"); "HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache misses and Icache misses"
initial assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH) else );
$fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line"); initial
assert (HPDcacheMemIdWidth >= (hpdcache_pkg::HPDCACHE_WBUF_DIR_PTR_WIDTH + 1))
else
$fatal(
"HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes"
);
initial
assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH)
else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line");
initial
assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH)
else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line");
// pragma translate_on // pragma translate_on
// }}} // }}}

View file

@ -25,38 +25,40 @@
// //
module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #( module cva6_icache
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
/// ID to be used for read transactions import wt_cache_pkg::*;
parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0 #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
/// ID to be used for read transactions
parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
/// flush the icache, flush and kill have to be asserted together /// flush the icache, flush and kill have to be asserted together
input logic flush_i, input logic flush_i,
/// enable icache /// enable icache
input logic en_i, input logic en_i,
/// to performance counter /// to performance counter
output logic miss_o, output logic miss_o,
// address translation requests // address translation requests
input icache_areq_t areq_i, input icache_areq_t areq_i,
output icache_arsp_t areq_o, output icache_arsp_t areq_o,
// data requests // data requests
input icache_dreq_t dreq_i, input icache_dreq_t dreq_i,
output icache_drsp_t dreq_o, output icache_drsp_t dreq_o,
// refill port // refill port
input logic mem_rtrn_vld_i, input logic mem_rtrn_vld_i,
input icache_rtrn_t mem_rtrn_i, input icache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o, output logic mem_data_req_o,
input logic mem_data_ack_i, input logic mem_data_ack_i,
output icache_req_t mem_data_o output icache_req_t mem_data_o
); );
// functions // functions
function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh ( function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh(
input logic [L1I_WAY_WIDTH-1:0] in input logic [L1I_WAY_WIDTH-1:0] in);
);
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out; logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out;
out = '0; out = '0;
out[in] = 1'b1; out[in] = 1'b1;
@ -64,59 +66,70 @@ module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
endfunction endfunction
// signals // signals
logic cache_en_d, cache_en_q; // cache is enabled logic cache_en_d, cache_en_q; // cache is enabled
logic [riscv::VLEN-1:0] vaddr_d, vaddr_q; logic [riscv::VLEN-1:0] vaddr_d, vaddr_q;
logic paddr_is_nc; // asserted if physical address is non-cacheable logic paddr_is_nc; // asserted if physical address is non-cacheable
logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare
logic cache_rden; // triggers cache lookup logic cache_rden; // triggers cache lookup
logic cache_wren; // triggers write to cacheline logic cache_wren; // triggers write to cacheline
logic cmp_en_d, cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal. logic
logic flush_d, flush_q; // used to register and signal pending flushes cmp_en_d,
cmp_en_q; // enable tag comparison in next cycle. used to cut long path due to NC signal.
logic flush_d, flush_q; // used to register and signal pending flushes
// replacement strategy // replacement strategy
logic update_lfsr; // shift the LFSR logic update_lfsr; // shift the LFSR
logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered
logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement
logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace
logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot) logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot)
logic all_ways_valid; // we need to switch repl strategy since all are valid logic all_ways_valid; // we need to switch repl strategy since all are valid
// invalidations / flushing // invalidations / flushing
logic inv_en; // incoming invalidations logic inv_en; // incoming invalidations
logic inv_d, inv_q; // invalidation in progress logic inv_d, inv_q; // invalidation in progress
logic flush_en, flush_done; // used to flush cache entries logic flush_en, flush_done; // used to flush cache entries
logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries logic [ICACHE_CL_IDX_WIDTH-1:0] flush_cnt_d, flush_cnt_q; // used to flush cache entries
// mem arrays // mem arrays
logic cl_we; // write enable to memory array logic cl_we; // write enable to memory array
logic [ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array logic [ ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array
logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array
logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem
logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache
logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0]cl_sel; // selected word from each cacheline logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable logic vld_we; // valid bits write enable
logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs
logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
// cpmtroller FSM // cpmtroller FSM
typedef enum logic[2:0] {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS} state_e; typedef enum logic [2:0] {
FLUSH,
IDLE,
READ,
MISS,
KILL_ATRANS,
KILL_MISS
} state_e;
state_e state_d, state_q; state_e state_d, state_q;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// address -> cl_index mapping, interface plumbing // address -> cl_index mapping, interface plumbing
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// extract tag from physical address, check if NC // extract tag from physical address, check if NC
assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q; assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q;
// noncacheable if request goes to I/O space, or if cache is disabled // noncacheable if request goes to I/O space, or if cache is disabled
assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions(CVA6Cfg, {{{64-riscv::PLEN}{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}})); assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions(
CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}}
));
// pass exception through // pass exception through
assign dreq_o.ex = areq_i.fetch_exception; assign dreq_o.ex = areq_i.fetch_exception;
@ -124,65 +137,64 @@ module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
// latch this in case we have to stall later on // latch this in case we have to stall later on
// make sure this is 32bit aligned // make sure this is 32bit aligned
assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q; assign vaddr_d = (dreq_o.ready & dreq_i.req) ? dreq_i.vaddr : vaddr_q;
assign areq_o.fetch_vaddr = {vaddr_q>>2, 2'b0}; assign areq_o.fetch_vaddr = {vaddr_q >> 2, 2'b0};
// split virtual address into index and offset to address cache arrays // split virtual address into index and offset to address cache arrays
assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH]; assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH];
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
// if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory // if we generate a noncacheable access, the word will be at offset 0 or 4 in the cl coming from memory
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} : assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} :
( paddr_is_nc & mem_data_req_o ) ? cl_offset_q[2]<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case ( paddr_is_nc & mem_data_req_o ) ? cl_offset_q[2]<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case
cl_offset_q; cl_offset_q;
// request word address instead of cl address in case of NC access // request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end else begin : gen_piton_offset end else begin : gen_piton_offset
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not. // icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
// since the piton cache system replicates the data, we can always index the full CL // since the piton cache system replicates the data, we can always index the full CL
assign cl_offset_d = ( dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr>>2, 2'b0} : assign cl_offset_d = (dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr >> 2, 2'b0} : cl_offset_q;
cl_offset_q;
// request word address instead of cl address in case of NC access // request word address instead of cl address in case of NC access
assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit
{cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl
end end
assign mem_data_o.tid = RdTxId; assign mem_data_o.tid = RdTxId;
assign mem_data_o.nc = paddr_is_nc; assign mem_data_o.nc = paddr_is_nc;
// way that is being replaced // way that is being replaced
assign mem_data_o.way = repl_way; assign mem_data_o.way = repl_way;
assign dreq_o.vaddr = vaddr_q; assign dreq_o.vaddr = vaddr_q;
// invalidations take two cycles // invalidations take two cycles
assign inv_d = inv_en; assign inv_d = inv_en;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// main control logic // main control logic
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
logic addr_ni; logic addr_ni;
assign addr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, areq_i.fetch_paddr); assign addr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, areq_i.fetch_paddr);
always_comb begin : p_fsm always_comb begin : p_fsm
// default assignment // default assignment
state_d = state_q; state_d = state_q;
cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush cache_en_d = cache_en_q & en_i;// disabling the cache is always possible, enable needs to go via flush
flush_en = 1'b0; flush_en = 1'b0;
cmp_en_d = 1'b0; cmp_en_d = 1'b0;
cache_rden = 1'b0; cache_rden = 1'b0;
cache_wren = 1'b0; cache_wren = 1'b0;
inv_en = 1'b0; inv_en = 1'b0;
flush_d = flush_q | flush_i; // register incoming flush flush_d = flush_q | flush_i; // register incoming flush
// interfaces // interfaces
dreq_o.ready = 1'b0; dreq_o.ready = 1'b0;
areq_o.fetch_req = 1'b0; areq_o.fetch_req = 1'b0;
dreq_o.valid = 1'b0; dreq_o.valid = 1'b0;
mem_data_req_o = 1'b0; mem_data_req_o = 1'b0;
// performance counter // performance counter
miss_o = 1'b0; miss_o = 1'b0;
// handle invalidations unconditionally // handle invalidations unconditionally
// note: invald are mutually exclusive with // note: invald are mutually exclusive with
@ -197,7 +209,7 @@ end else begin : gen_piton_offset
////////////////////////////////// //////////////////////////////////
// this clears all valid bits // this clears all valid bits
FLUSH: begin FLUSH: begin
flush_en = 1'b1; flush_en = 1'b1;
if (flush_done) begin if (flush_done) begin
state_d = IDLE; state_d = IDLE;
flush_d = 1'b0; flush_d = 1'b0;
@ -208,27 +220,27 @@ end else begin : gen_piton_offset
////////////////////////////////// //////////////////////////////////
// wait for an incoming request // wait for an incoming request
IDLE: begin IDLE: begin
// only enable tag comparison if cache is enabled // only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q; cmp_en_d = cache_en_q;
// handle pending flushes, or perform cache clear upon enable // handle pending flushes, or perform cache clear upon enable
if (flush_d || (en_i && !cache_en_q)) begin if (flush_d || (en_i && !cache_en_q)) begin
state_d = FLUSH; state_d = FLUSH;
// wait for incoming requests // wait for incoming requests
end else begin end else begin
// mem requests are for sure invals here // mem requests are for sure invals here
if (!mem_rtrn_vld_i) begin if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1; dreq_o.ready = 1'b1;
// we have a new request // we have a new request
if (dreq_i.req) begin if (dreq_i.req) begin
cache_rden = 1'b1; cache_rden = 1'b1;
state_d = READ; state_d = READ;
end
end
if (dreq_i.kill_s1) begin
state_d = IDLE;
end end
end end
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
end
end end
////////////////////////////////// //////////////////////////////////
// check whether we have a hit // check whether we have a hit
@ -237,53 +249,53 @@ end else begin : gen_piton_offset
// reuse the miss mechanism to handle // reuse the miss mechanism to handle
// the request // the request
READ: begin READ: begin
areq_o.fetch_req = '1; areq_o.fetch_req = '1;
// only enable tag comparison if cache is enabled // only enable tag comparison if cache is enabled
cmp_en_d = cache_en_q; cmp_en_d = cache_en_q;
// readout speculatively // readout speculatively
cache_rden = cache_en_q; cache_rden = cache_en_q;
if (areq_i.fetch_valid && (!dreq_i.spec || !addr_ni) ) begin if (areq_i.fetch_valid && (!dreq_i.spec || !addr_ni)) begin
// check if we have to flush // check if we have to flush
if (flush_d) begin if (flush_d) begin
state_d = IDLE; state_d = IDLE;
// we have a hit or an exception output valid result // we have a hit or an exception output valid result
end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin end else if (((|cl_hit && cache_en_q) || areq_i.fetch_exception.valid) && !inv_q) begin
dreq_o.valid = ~dreq_i.kill_s2;// just don't output in this case dreq_o.valid = ~dreq_i.kill_s2; // just don't output in this case
state_d = IDLE; state_d = IDLE;
// we can accept another request // we can accept another request
// and stay here, but only if no inval is coming in // and stay here, but only if no inval is coming in
// note: we are not expecting ifill return packets here... // note: we are not expecting ifill return packets here...
if (!mem_rtrn_vld_i) begin if (!mem_rtrn_vld_i) begin
dreq_o.ready = 1'b1; dreq_o.ready = 1'b1;
if (dreq_i.req) begin if (dreq_i.req) begin
state_d = READ; state_d = READ;
end
end
// if a request is being killed at this stage,
// we have to bail out and wait for the address translation to complete
if (dreq_i.kill_s1) begin
state_d = IDLE;
end
// we have a miss / NC transaction
end else if (dreq_i.kill_s2) begin
state_d = IDLE;
end else if (!inv_q) begin
cmp_en_d = 1'b0;
// only count this as a miss if the cache is enabled, and
// the address is cacheable
// send out ifill request
mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin
miss_o = ~paddr_is_nc;
state_d = MISS;
end end
end end
// bail out if this request is being killed (and we missed on the TLB) // if a request is being killed at this stage,
end else if (dreq_i.kill_s2 || flush_d) begin // we have to bail out and wait for the address translation to complete
state_d = KILL_ATRANS; if (dreq_i.kill_s1) begin
state_d = IDLE;
end
// we have a miss / NC transaction
end else if (dreq_i.kill_s2) begin
state_d = IDLE;
end else if (!inv_q) begin
cmp_en_d = 1'b0;
// only count this as a miss if the cache is enabled, and
// the address is cacheable
// send out ifill request
mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin
miss_o = ~paddr_is_nc;
state_d = MISS;
end
end end
// bail out if this request is being killed (and we missed on the TLB)
end else if (dreq_i.kill_s2 || flush_d) begin
state_d = KILL_ATRANS;
end
end end
////////////////////////////////// //////////////////////////////////
// wait until the memory transaction // wait until the memory transaction
@ -293,16 +305,16 @@ end else begin : gen_piton_offset
// note: this is mutually exclusive with ICACHE_INV_REQ, // note: this is mutually exclusive with ICACHE_INV_REQ,
// so we do not have to check for invals here // so we do not have to check for invals here
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE; state_d = IDLE;
// only return data if request is not being killed // only return data if request is not being killed
if (!(dreq_i.kill_s2 || flush_d)) begin if (!(dreq_i.kill_s2 || flush_d)) begin
dreq_o.valid = 1'b1; dreq_o.valid = 1'b1;
// only write to cache if this address is cacheable // only write to cache if this address is cacheable
cache_wren = ~paddr_is_nc; cache_wren = ~paddr_is_nc;
end end
// bail out if this request is being killed // bail out if this request is being killed
end else if (dreq_i.kill_s2 || flush_d) begin end else if (dreq_i.kill_s2 || flush_d) begin
state_d = KILL_MISS; state_d = KILL_MISS;
end end
end end
////////////////////////////////// //////////////////////////////////
@ -312,7 +324,7 @@ end else begin : gen_piton_offset
KILL_ATRANS: begin KILL_ATRANS: begin
areq_o.fetch_req = '1; areq_o.fetch_req = '1;
if (areq_i.fetch_valid) begin if (areq_i.fetch_valid) begin
state_d = IDLE; state_d = IDLE;
end end
end end
////////////////////////////////// //////////////////////////////////
@ -321,30 +333,28 @@ end else begin : gen_piton_offset
// go back to idle // go back to idle
KILL_MISS: begin KILL_MISS: begin
if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin if (mem_rtrn_vld_i && mem_rtrn_i.rtype == ICACHE_IFILL_ACK) begin
state_d = IDLE; state_d = IDLE;
end end
end end
default: begin default: begin
// we should never get here // we should never get here
state_d = FLUSH; state_d = FLUSH;
end end
endcase // state_q endcase // state_q
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// valid bit invalidation and replacement strategy // valid bit invalidation and replacement strategy
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// note: it cannot happen that we get an invalidation + a cl replacement // note: it cannot happen that we get an invalidation + a cl replacement
// in the same cycle as these requests arrive via the same interface // in the same cycle as these requests arrive via the same interface
// flushes take precedence over invalidations (it is ok if we ignore // flushes take precedence over invalidations (it is ok if we ignore
// the inval since the cache is cleared anyway) // the inval since the cache is cleared anyway)
assign flush_cnt_d = (flush_done) ? '0 : assign flush_cnt_d = (flush_done) ? '0 : (flush_en) ? flush_cnt_q + 1 : flush_cnt_q;
(flush_en) ? flush_cnt_q + 1 :
flush_cnt_q;
assign flush_done = (flush_cnt_q==(ICACHE_NUM_WORDS-1)); assign flush_done = (flush_cnt_q == (ICACHE_NUM_WORDS - 1));
// invalidation/clearing address // invalidation/clearing address
// flushing takes precedence over invals // flushing takes precedence over invals
@ -354,67 +364,66 @@ end else begin : gen_piton_offset
assign vld_req = (flush_en || cache_rden) ? '1 : assign vld_req = (flush_en || cache_rden) ? '1 :
(mem_rtrn_i.inv.all && inv_en) ? '1 : (mem_rtrn_i.inv.all && inv_en) ? '1 :
(mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(mem_rtrn_i.inv.way) : (mem_rtrn_i.inv.vld && inv_en) ? icache_way_bin2oh(
repl_way_oh_q; mem_rtrn_i.inv.way
) : repl_way_oh_q;
assign vld_wdata = (cache_wren) ? '1 : '0; assign vld_wdata = (cache_wren) ? '1 : '0;
assign vld_we = (cache_wren | inv_en | flush_en); assign vld_we = (cache_wren | inv_en | flush_en);
// assign vld_req = (vld_we | cache_rden); // assign vld_req = (vld_we | cache_rden);
// chose random replacement if all are valid // chose random replacement if all are valid
assign update_lfsr = cache_wren & all_ways_valid; assign update_lfsr = cache_wren & all_ways_valid;
assign repl_way = (all_ways_valid) ? rnd_way : inv_way; assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q; assign repl_way_oh_d = (cmp_en_q) ? icache_way_bin2oh(repl_way) : repl_way_oh_q;
// enable signals for memory arrays // enable signals for memory arrays
assign cl_req = (cache_rden) ? '1 : assign cl_req = (cache_rden) ? '1 : (cache_wren) ? repl_way_oh_q : '0;
(cache_wren) ? repl_way_oh_q : assign cl_we = cache_wren;
'0;
assign cl_we = cache_wren;
// find invalid cache line // find invalid cache line
lzc #( lzc #(
.WIDTH ( ICACHE_SET_ASSOC ) .WIDTH(ICACHE_SET_ASSOC)
) i_lzc ( ) i_lzc (
.in_i ( ~vld_rdata ), .in_i (~vld_rdata),
.cnt_o ( inv_way ), .cnt_o (inv_way),
.empty_o ( all_ways_valid ) .empty_o(all_ways_valid)
); );
// generate random cacheline index // generate random cacheline index
lfsr #( lfsr #(
.LfsrWidth ( 8 ), .LfsrWidth(8),
.OutWidth ( $clog2(ariane_pkg::ICACHE_SET_ASSOC)) .OutWidth ($clog2(ariane_pkg::ICACHE_SET_ASSOC))
) i_lfsr ( ) i_lfsr (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni(rst_ni),
.en_i ( update_lfsr ), .en_i (update_lfsr),
.out_o ( rnd_way ) .out_o (rnd_way)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// tag comparison, hit generation // tag comparison, hit generation
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx; logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx;
for (genvar i=0;i<ICACHE_SET_ASSOC;i++) begin : gen_tag_cmpsel for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i]; assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q,3'b0} +: FETCH_WIDTH]; assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:FETCH_WIDTH];
assign cl_user[i] = cl_ruser[i][{cl_offset_q,3'b0} +: FETCH_USER_WIDTH]; assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
end end
lzc #( lzc #(
.WIDTH ( ICACHE_SET_ASSOC ) .WIDTH(ICACHE_SET_ASSOC)
) i_lzc_hit ( ) i_lzc_hit (
.in_i ( cl_hit ), .in_i (cl_hit),
.cnt_o ( hit_idx ), .cnt_o (hit_idx),
.empty_o ( ) .empty_o()
); );
always_comb begin always_comb begin
@ -422,37 +431,37 @@ end else begin : gen_piton_offset
dreq_o.data = cl_sel[hit_idx]; dreq_o.data = cl_sel[hit_idx];
dreq_o.user = cl_user[hit_idx]; dreq_o.user = cl_user[hit_idx];
end else begin end else begin
dreq_o.data = mem_rtrn_i.data[{cl_offset_q,3'b0} +: FETCH_WIDTH]; dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:FETCH_WIDTH];
dreq_o.user = mem_rtrn_i.user[{cl_offset_q,3'b0} +: FETCH_USER_WIDTH]; dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH];
end end
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// memory arrays and regs // memory arrays and regs
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata [ICACHE_SET_ASSOC-1:0]; logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata[ICACHE_SET_ASSOC-1:0];
for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram
// Tag RAM // Tag RAM
sram #( sram #(
// tag + valid bit // tag + valid bit
.DATA_WIDTH ( ICACHE_TAG_WIDTH+1 ), .DATA_WIDTH(ICACHE_TAG_WIDTH + 1),
.NUM_WORDS ( ICACHE_NUM_WORDS ) .NUM_WORDS (ICACHE_NUM_WORDS)
) tag_sram ( ) tag_sram (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.req_i ( vld_req[i] ), .req_i (vld_req[i]),
.we_i ( vld_we ), .we_i (vld_we),
.addr_i ( vld_addr ), .addr_i (vld_addr),
// we can always use the saved tag here since it takes a // we can always use the saved tag here since it takes a
// couple of cycle until we write to the cache upon a miss // couple of cycle until we write to the cache upon a miss
.wuser_i ( '0 ), .wuser_i('0),
.wdata_i ( {vld_wdata[i], cl_tag_q} ), .wdata_i({vld_wdata[i], cl_tag_q}),
.be_i ( '1 ), .be_i ('1),
.ruser_o ( ), .ruser_o(),
.rdata_o ( cl_tag_valid_rdata[i] ) .rdata_o(cl_tag_valid_rdata[i])
); );
assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0]; assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0];
@ -460,27 +469,27 @@ end else begin : gen_piton_offset
// Data RAM // Data RAM
sram #( sram #(
.USER_WIDTH ( ICACHE_USER_LINE_WIDTH ), .USER_WIDTH(ICACHE_USER_LINE_WIDTH),
.DATA_WIDTH ( ICACHE_LINE_WIDTH ), .DATA_WIDTH(ICACHE_LINE_WIDTH),
.USER_EN ( ariane_pkg::FETCH_USER_EN ), .USER_EN (ariane_pkg::FETCH_USER_EN),
.NUM_WORDS ( ICACHE_NUM_WORDS ) .NUM_WORDS (ICACHE_NUM_WORDS)
) data_sram ( ) data_sram (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.req_i ( cl_req[i] ), .req_i (cl_req[i]),
.we_i ( cl_we ), .we_i (cl_we),
.addr_i ( cl_index ), .addr_i (cl_index),
.wuser_i ( mem_rtrn_i.user ), .wuser_i(mem_rtrn_i.user),
.wdata_i ( mem_rtrn_i.data ), .wdata_i(mem_rtrn_i.data),
.be_i ( '1 ), .be_i ('1),
.ruser_o ( cl_ruser[i] ), .ruser_o(cl_ruser[i]),
.rdata_o ( cl_rdata[i] ) .rdata_o(cl_rdata[i])
); );
end end
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(!rst_ni) begin if (!rst_ni) begin
cl_tag_q <= '0; cl_tag_q <= '0;
flush_cnt_q <= '0; flush_cnt_q <= '0;
vaddr_q <= '0; vaddr_q <= '0;
@ -505,40 +514,46 @@ end else begin : gen_piton_offset
end end
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
repl_inval0: assert property ( repl_inval0 :
assert property (
@(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld)) @(posedge clk_i) disable iff (!rst_ni) cache_wren |-> !(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously"); else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
repl_inval1: assert property ( repl_inval1 :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren) @(posedge clk_i) disable iff (!rst_ni) (mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld) |-> !cache_wren)
else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously"); else $fatal(1, "[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
invalid_state: assert property ( invalid_state :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS})) @(posedge clk_i) disable iff (!rst_ni) (state_q inside {FLUSH, IDLE, READ, MISS, KILL_ATRANS, KILL_MISS}))
else $fatal(1,"[l1 icache] fsm reached an invalid state"); else $fatal(1, "[l1 icache] fsm reached an invalid state");
hot1: assert property ( hot1 :
@(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(cl_hit)) assert property (
else $fatal(1,"[l1 icache] cl_hit signal must be hot1"); @(posedge clk_i) disable iff (!rst_ni) (!inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(
cl_hit
))
else $fatal(1, "[l1 icache] cl_hit signal must be hot1");
// this is only used for verification! // this is only used for verification!
logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];
logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test; logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
if(!rst_ni) begin if (!rst_ni) begin
vld_mirror <= '{default:'0}; vld_mirror <= '{default: '0};
tag_mirror <= '{default:'0}; tag_mirror <= '{default: '0};
end else begin end else begin
for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin
if(vld_req[i] & vld_we) begin if (vld_req[i] & vld_we) begin
vld_mirror[vld_addr][i] <= vld_wdata[i]; vld_mirror[vld_addr][i] <= vld_wdata[i];
tag_mirror[vld_addr][i] <= cl_tag_q; tag_mirror[vld_addr][i] <= cl_tag_q;
end end
@ -550,17 +565,18 @@ end else begin : gen_piton_offset
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata); assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata);
end end
tag_write_duplicate: assert property ( tag_write_duplicate :
assert property (
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test)) @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
else $fatal(1,"[l1 icache] cannot allocate a CL that is already present in the cache"); else $fatal(1, "[l1 icache] cannot allocate a CL that is already present in the cache");
initial begin initial begin
// assert wrong parameterizations // assert wrong parameterizations
assert (ICACHE_INDEX_WIDTH<=12) assert (ICACHE_INDEX_WIDTH <= 12)
else $fatal(1,"[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages"); else $fatal(1, "[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages");
end end
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // cva6_icache endmodule // cva6_icache

View file

@ -13,56 +13,60 @@
// Description: wrapper module to connect the L1I$ to a 64bit AXI bus. // Description: wrapper module to connect the L1I$ to a 64bit AXI bus.
// //
module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #( module cva6_icache_axi_wrapper
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter type axi_req_t = logic, import wt_cache_pkg::*;
parameter type axi_rsp_t = logic #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input riscv::priv_lvl_t priv_lvl_i, input riscv::priv_lvl_t priv_lvl_i,
input logic flush_i, // flush the icache, flush and kill have to be asserted together input logic flush_i, // flush the icache, flush and kill have to be asserted together
input logic en_i, // enable icache input logic en_i, // enable icache
output logic miss_o, // to performance counter output logic miss_o, // to performance counter
// address translation requests // address translation requests
input icache_areq_t areq_i, input icache_areq_t areq_i,
output icache_arsp_t areq_o, output icache_arsp_t areq_o,
// data requests // data requests
input icache_dreq_t dreq_i, input icache_dreq_t dreq_i,
output icache_drsp_t dreq_o, output icache_drsp_t dreq_o,
// AXI refill port // AXI refill port
output axi_req_t axi_req_o, output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i input axi_rsp_t axi_resp_i
); );
localparam AxiNumWords = (ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) + localparam AxiNumWords = (ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) +
(DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ; (DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ;
logic icache_mem_rtrn_vld; logic icache_mem_rtrn_vld;
icache_rtrn_t icache_mem_rtrn; icache_rtrn_t icache_mem_rtrn;
logic icache_mem_data_req; logic icache_mem_data_req;
logic icache_mem_data_ack; logic icache_mem_data_ack;
icache_req_t icache_mem_data; icache_req_t icache_mem_data;
logic axi_rd_req; logic axi_rd_req;
logic axi_rd_gnt; logic axi_rd_gnt;
logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr; logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr;
logic [$clog2(AxiNumWords)-1:0] axi_rd_blen; logic [ $clog2(AxiNumWords)-1:0] axi_rd_blen;
logic [2:0] axi_rd_size; logic [ 2:0] axi_rd_size;
logic [CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in; logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in;
logic axi_rd_rdy; logic axi_rd_rdy;
logic axi_rd_lock; logic axi_rd_lock;
logic axi_rd_last; logic axi_rd_last;
logic axi_rd_valid; logic axi_rd_valid;
logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data; logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
logic [CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_out; logic [ CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_out;
logic axi_rd_exokay; logic axi_rd_exokay;
logic req_valid_d, req_valid_q; logic req_valid_d, req_valid_q;
icache_req_t req_data_d, req_data_q; icache_req_t req_data_d, req_data_q;
logic first_d, first_q; logic first_d, first_q;
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] rd_shift_d, rd_shift_q; logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
rd_shift_d, rd_shift_q;
// Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but // Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but
// required by AXI). // required by AXI).
@ -76,8 +80,8 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
assign axi_rd_addr = CVA6Cfg.AxiAddrWidth'(req_data_d.paddr); assign axi_rd_addr = CVA6Cfg.AxiAddrWidth'(req_data_d.paddr);
// Fetch a full cache line on a cache miss, or a single word on a bypassed access // Fetch a full cache line on a cache miss, or a single word on a bypassed access
assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH/64-1; assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH / 64 - 1;
assign axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth/8); // Maximum assign axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth / 8); // Maximum
assign axi_rd_id_in = req_data_d.tid; assign axi_rd_id_in = req_data_d.tid;
assign axi_rd_rdy = 1'b1; assign axi_rd_rdy = 1'b1;
assign axi_rd_lock = 1'b0; assign axi_rd_lock = 1'b0;
@ -96,68 +100,68 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
// I-Cache // I-Cache
// ------- // -------
cva6_icache #( cva6_icache #(
// use ID 0 for icache reads // use ID 0 for icache reads
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg(CVA6Cfg),
.RdTxId ( 0 ) .RdTxId (0)
) i_cva6_icache ( ) i_cva6_icache (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_i ), .flush_i (flush_i),
.en_i ( en_i ), .en_i (en_i),
.miss_o ( miss_o ), .miss_o (miss_o),
.areq_i ( areq_i ), .areq_i (areq_i),
.areq_o ( areq_o ), .areq_o (areq_o),
.dreq_i ( dreq_i ), .dreq_i (dreq_i),
.dreq_o ( dreq_o ), .dreq_o (dreq_o),
.mem_rtrn_vld_i ( icache_mem_rtrn_vld ), .mem_rtrn_vld_i(icache_mem_rtrn_vld),
.mem_rtrn_i ( icache_mem_rtrn ), .mem_rtrn_i (icache_mem_rtrn),
.mem_data_req_o ( icache_mem_data_req ), .mem_data_req_o(icache_mem_data_req),
.mem_data_ack_i ( icache_mem_data_ack ), .mem_data_ack_i(icache_mem_data_ack),
.mem_data_o ( icache_mem_data ) .mem_data_o (icache_mem_data)
); );
// -------- // --------
// AXI shim // AXI shim
// -------- // --------
axi_shim #( axi_shim #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.AxiNumWords ( AxiNumWords ), .AxiNumWords(AxiNumWords),
.axi_req_t ( axi_req_t ), .axi_req_t (axi_req_t),
.axi_rsp_t ( axi_rsp_t ) .axi_rsp_t (axi_rsp_t)
) i_axi_shim ( ) i_axi_shim (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.rd_req_i ( axi_rd_req ), .rd_req_i (axi_rd_req),
.rd_gnt_o ( axi_rd_gnt ), .rd_gnt_o (axi_rd_gnt),
.rd_addr_i ( axi_rd_addr ), .rd_addr_i (axi_rd_addr),
.rd_blen_i ( axi_rd_blen ), .rd_blen_i (axi_rd_blen),
.rd_size_i ( axi_rd_size ), .rd_size_i (axi_rd_size),
.rd_id_i ( axi_rd_id_in ), .rd_id_i (axi_rd_id_in),
.rd_rdy_i ( axi_rd_rdy ), .rd_rdy_i (axi_rd_rdy),
.rd_lock_i ( axi_rd_lock ), .rd_lock_i (axi_rd_lock),
.rd_last_o ( axi_rd_last ), .rd_last_o (axi_rd_last),
.rd_valid_o ( axi_rd_valid ), .rd_valid_o (axi_rd_valid),
.rd_data_o ( axi_rd_data ), .rd_data_o (axi_rd_data),
.rd_user_o ( ), .rd_user_o (),
.rd_id_o ( axi_rd_id_out ), .rd_id_o (axi_rd_id_out),
.rd_exokay_o ( axi_rd_exokay ), .rd_exokay_o(axi_rd_exokay),
.wr_req_i ( '0 ), .wr_req_i ('0),
.wr_gnt_o ( ), .wr_gnt_o (),
.wr_addr_i ( '0 ), .wr_addr_i ('0),
.wr_data_i ( '0 ), .wr_data_i ('0),
.wr_user_i ( '0 ), .wr_user_i ('0),
.wr_be_i ( '0 ), .wr_be_i ('0),
.wr_blen_i ( '0 ), .wr_blen_i ('0),
.wr_size_i ( '0 ), .wr_size_i ('0),
.wr_id_i ( '0 ), .wr_id_i ('0),
.wr_lock_i ( '0 ), .wr_lock_i ('0),
.wr_atop_i ( '0 ), .wr_atop_i ('0),
.wr_rdy_i ( '0 ), .wr_rdy_i ('0),
.wr_valid_o ( ), .wr_valid_o (),
.wr_id_o ( ), .wr_id_o (),
.wr_exokay_o ( ), .wr_exokay_o(),
.axi_req_o ( axi_req_o ), .axi_req_o (axi_req_o),
.axi_resp_i ( axi_resp_i ) .axi_resp_i (axi_resp_i)
); );
// Buffer burst data in shift register // Buffer burst data in shift register
@ -166,7 +170,7 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
rd_shift_d = rd_shift_q; rd_shift_d = rd_shift_q;
if (axi_rd_valid) begin if (axi_rd_valid) begin
first_d = axi_rd_last; first_d = axi_rd_last;
if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
rd_shift_d = axi_rd_data; rd_shift_d = axi_rd_data;
end else begin end else begin
@ -195,4 +199,4 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
end end
end end
endmodule // cva6_icache_axi_wrapper endmodule // cva6_icache_axi_wrapper

File diff suppressed because it is too large Load diff

View file

@ -15,277 +15,301 @@
// write-back data cache. // write-back data cache.
module std_cache_subsystem import ariane_pkg::*; import std_cache_pkg::*; #( module std_cache_subsystem
import ariane_pkg::*;
import std_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4, parameter int unsigned NumPorts = 4,
parameter type axi_ar_chan_t = logic, parameter type axi_ar_chan_t = logic,
parameter type axi_aw_chan_t = logic, parameter type axi_aw_chan_t = logic,
parameter type axi_w_chan_t = logic, parameter type axi_w_chan_t = logic,
parameter type axi_req_t = logic, parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic parameter type axi_rsp_t = logic
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input riscv::priv_lvl_t priv_lvl_i, input riscv::priv_lvl_t priv_lvl_i,
// I$ // I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter output logic icache_miss_o, // to performance counter
// address translation requests // address translation requests
input icache_areq_t icache_areq_i, // to/from frontend input icache_areq_t icache_areq_i, // to/from frontend
output icache_arsp_t icache_areq_o, output icache_arsp_t icache_areq_o,
// data requests // data requests
input icache_dreq_t icache_dreq_i, // to/from frontend input icache_dreq_t icache_dreq_i, // to/from frontend
output icache_drsp_t icache_dreq_o, output icache_drsp_t icache_dreq_o,
// AMOs // AMOs
input amo_req_t amo_req_i, input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o, output amo_resp_t amo_resp_o,
// D$ // D$
// Cache management // Cache management
input logic dcache_enable_i, // from CSR input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st output logic dcache_miss_o, // we missed on a ld/st
output logic wbuffer_empty_o, // statically set to 1, as there is no wbuffer in this cache system output logic wbuffer_empty_o, // statically set to 1, as there is no wbuffer in this cache system
// Request ports // Request ports
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
// memory side // memory side
output axi_req_t axi_req_o, output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i input axi_rsp_t axi_resp_i
); );
assign wbuffer_empty_o = 1'b1; assign wbuffer_empty_o = 1'b1;
axi_req_t axi_req_icache; axi_req_t axi_req_icache;
axi_rsp_t axi_resp_icache; axi_rsp_t axi_resp_icache;
axi_req_t axi_req_bypass; axi_req_t axi_req_bypass;
axi_rsp_t axi_resp_bypass; axi_rsp_t axi_resp_bypass;
axi_req_t axi_req_data; axi_req_t axi_req_data;
axi_rsp_t axi_resp_data; axi_rsp_t axi_resp_data;
cva6_icache_axi_wrapper #( cva6_icache_axi_wrapper #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.axi_req_t ( axi_req_t ), .axi_req_t(axi_req_t),
.axi_rsp_t ( axi_rsp_t ) .axi_rsp_t(axi_rsp_t)
) i_cva6_icache_axi_wrapper ( ) i_cva6_icache_axi_wrapper (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.priv_lvl_i ( priv_lvl_i ), .priv_lvl_i(priv_lvl_i),
.flush_i ( icache_flush_i ), .flush_i (icache_flush_i),
.en_i ( icache_en_i ), .en_i (icache_en_i),
.miss_o ( icache_miss_o ), .miss_o (icache_miss_o),
.areq_i ( icache_areq_i ), .areq_i (icache_areq_i),
.areq_o ( icache_areq_o ), .areq_o (icache_areq_o),
.dreq_i ( icache_dreq_i ), .dreq_i (icache_dreq_i),
.dreq_o ( icache_dreq_o ), .dreq_o (icache_dreq_o),
.axi_req_o ( axi_req_icache ), .axi_req_o (axi_req_icache),
.axi_resp_i ( axi_resp_icache ) .axi_resp_i(axi_resp_icache)
); );
// decreasing priority // decreasing priority
// Port 0: PTW // Port 0: PTW
// Port 1: Load Unit // Port 1: Load Unit
// Port 2: Accelerator // Port 2: Accelerator
// Port 3: Store Unit // Port 3: Store Unit
std_nbdcache #( std_nbdcache #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.NumPorts ( NumPorts ), .NumPorts (NumPorts),
.axi_req_t ( axi_req_t ), .axi_req_t(axi_req_t),
.axi_rsp_t ( axi_rsp_t ) .axi_rsp_t(axi_rsp_t)
) i_nbdcache ( ) i_nbdcache (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.enable_i ( dcache_enable_i ), .enable_i (dcache_enable_i),
.flush_i ( dcache_flush_i ), .flush_i (dcache_flush_i),
.flush_ack_o ( dcache_flush_ack_o ), .flush_ack_o (dcache_flush_ack_o),
.miss_o ( dcache_miss_o ), .miss_o (dcache_miss_o),
.axi_bypass_o ( axi_req_bypass ), .axi_bypass_o(axi_req_bypass),
.axi_bypass_i ( axi_resp_bypass ), .axi_bypass_i(axi_resp_bypass),
.axi_data_o ( axi_req_data ), .axi_data_o (axi_req_data),
.axi_data_i ( axi_resp_data ), .axi_data_i (axi_resp_data),
.req_ports_i ( dcache_req_ports_i ), .req_ports_i (dcache_req_ports_i),
.req_ports_o ( dcache_req_ports_o ), .req_ports_o (dcache_req_ports_o),
.amo_req_i, .amo_req_i,
.amo_resp_o .amo_resp_o
); );
// ----------------------- // -----------------------
// Arbitrate AXI Ports // Arbitrate AXI Ports
// ----------------------- // -----------------------
logic [1:0] w_select, w_select_fifo, w_select_arbiter; logic [1:0] w_select, w_select_fifo, w_select_arbiter;
logic [1:0] w_fifo_usage; logic [1:0] w_fifo_usage;
logic w_fifo_empty, w_fifo_full; logic w_fifo_empty, w_fifo_full;
// AR Channel // AR Channel
stream_arbiter #( stream_arbiter #(
.DATA_T ( axi_ar_chan_t ), .DATA_T(axi_ar_chan_t),
.N_INP ( 3 ) .N_INP (3)
) i_stream_arbiter_ar ( ) i_stream_arbiter_ar (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.inp_data_i ( {axi_req_icache.ar, axi_req_bypass.ar, axi_req_data.ar} ), .inp_data_i ({axi_req_icache.ar, axi_req_bypass.ar, axi_req_data.ar}),
.inp_valid_i ( {axi_req_icache.ar_valid, axi_req_bypass.ar_valid, axi_req_data.ar_valid} ), .inp_valid_i({axi_req_icache.ar_valid, axi_req_bypass.ar_valid, axi_req_data.ar_valid}),
.inp_ready_o ( {axi_resp_icache.ar_ready, axi_resp_bypass.ar_ready, axi_resp_data.ar_ready} ), .inp_ready_o({axi_resp_icache.ar_ready, axi_resp_bypass.ar_ready, axi_resp_data.ar_ready}),
.oup_data_o ( axi_req_o.ar ), .oup_data_o (axi_req_o.ar),
.oup_valid_o ( axi_req_o.ar_valid ), .oup_valid_o(axi_req_o.ar_valid),
.oup_ready_i ( axi_resp_i.ar_ready ) .oup_ready_i(axi_resp_i.ar_ready)
); );
// AW Channel // AW Channel
stream_arbiter #( stream_arbiter #(
.DATA_T ( axi_aw_chan_t ), .DATA_T(axi_aw_chan_t),
.N_INP ( 3 ) .N_INP (3)
) i_stream_arbiter_aw ( ) i_stream_arbiter_aw (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.inp_data_i ( {axi_req_icache.aw, axi_req_bypass.aw, axi_req_data.aw} ), .inp_data_i ({axi_req_icache.aw, axi_req_bypass.aw, axi_req_data.aw}),
.inp_valid_i ( {axi_req_icache.aw_valid, axi_req_bypass.aw_valid, axi_req_data.aw_valid} ), .inp_valid_i({axi_req_icache.aw_valid, axi_req_bypass.aw_valid, axi_req_data.aw_valid}),
.inp_ready_o ( {axi_resp_icache.aw_ready, axi_resp_bypass.aw_ready, axi_resp_data.aw_ready} ), .inp_ready_o({axi_resp_icache.aw_ready, axi_resp_bypass.aw_ready, axi_resp_data.aw_ready}),
.oup_data_o ( axi_req_o.aw ), .oup_data_o (axi_req_o.aw),
.oup_valid_o ( axi_req_o.aw_valid ), .oup_valid_o(axi_req_o.aw_valid),
.oup_ready_i ( axi_resp_i.aw_ready ) .oup_ready_i(axi_resp_i.aw_ready)
); );
// WID has been removed in AXI 4 so we need to keep track which AW request has been accepted // WID has been removed in AXI 4 so we need to keep track which AW request has been accepted
// to forward the correct write data. // to forward the correct write data.
always_comb begin always_comb begin
w_select = 0; w_select = 0;
unique casez (axi_req_o.aw.id) unique casez (axi_req_o.aw.id)
4'b0111: w_select = 2; // dcache 4'b0111: w_select = 2; // dcache
4'b1???: w_select = 1; // bypass 4'b1???: w_select = 1; // bypass
default: w_select = 0; // icache default: w_select = 0; // icache
endcase endcase
end end
// W Channel // W Channel
fifo_v3 #( fifo_v3 #(
.DATA_WIDTH ( 2 ), .DATA_WIDTH (2),
// we can have a maximum of 4 oustanding transactions as each port is blocking // we can have a maximum of 4 oustanding transactions as each port is blocking
.DEPTH ( 4 ), .DEPTH (4),
.FALL_THROUGH ( 1'b1 ) .FALL_THROUGH(1'b1)
) i_fifo_w_channel ( ) i_fifo_w_channel (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( w_fifo_full ), .full_o (w_fifo_full),
.empty_o ( ), // leave open .empty_o (), // leave open
.usage_o ( w_fifo_usage ), .usage_o (w_fifo_usage),
.data_i ( w_select ), .data_i (w_select),
// a new transaction was requested and granted // a new transaction was requested and granted
.push_i ( axi_req_o.aw_valid & axi_resp_i.aw_ready ), .push_i (axi_req_o.aw_valid & axi_resp_i.aw_ready),
// write ID to select the output MUX // write ID to select the output MUX
.data_o ( w_select_fifo ), .data_o (w_select_fifo),
// transaction has finished // transaction has finished
.pop_i ( axi_req_o.w_valid & axi_resp_i.w_ready & axi_req_o.w.last ) .pop_i (axi_req_o.w_valid & axi_resp_i.w_ready & axi_req_o.w.last)
); );
// In fall-through mode, the empty_o will be low when push_i is high (on zero usage). // In fall-through mode, the empty_o will be low when push_i is high (on zero usage).
// We do not want this here. Also, usage_o is missing the MSB, so on full fifo, usage_o is zero. // We do not want this here. Also, usage_o is missing the MSB, so on full fifo, usage_o is zero.
assign w_fifo_empty = w_fifo_usage == 0 && !w_fifo_full; assign w_fifo_empty = w_fifo_usage == 0 && !w_fifo_full;
// icache will never write so select it as default (e.g.: when no arbitration is active) // icache will never write so select it as default (e.g.: when no arbitration is active)
// this is equal to setting it to zero // this is equal to setting it to zero
assign w_select_arbiter = w_fifo_empty ? (axi_req_o.aw_valid ? w_select : 0) : w_select_fifo; assign w_select_arbiter = w_fifo_empty ? (axi_req_o.aw_valid ? w_select : 0) : w_select_fifo;
stream_mux #( stream_mux #(
.DATA_T ( axi_w_chan_t ), .DATA_T(axi_w_chan_t),
.N_INP ( 3 ) .N_INP (3)
) i_stream_mux_w ( ) i_stream_mux_w (
.inp_data_i ( {axi_req_data.w, axi_req_bypass.w, axi_req_icache.w} ), .inp_data_i ({axi_req_data.w, axi_req_bypass.w, axi_req_icache.w}),
.inp_valid_i ( {axi_req_data.w_valid, axi_req_bypass.w_valid, axi_req_icache.w_valid} ), .inp_valid_i({axi_req_data.w_valid, axi_req_bypass.w_valid, axi_req_icache.w_valid}),
.inp_ready_o ( {axi_resp_data.w_ready, axi_resp_bypass.w_ready, axi_resp_icache.w_ready} ), .inp_ready_o({axi_resp_data.w_ready, axi_resp_bypass.w_ready, axi_resp_icache.w_ready}),
.inp_sel_i ( w_select_arbiter ), .inp_sel_i (w_select_arbiter),
.oup_data_o ( axi_req_o.w ), .oup_data_o (axi_req_o.w),
.oup_valid_o ( axi_req_o.w_valid ), .oup_valid_o(axi_req_o.w_valid),
.oup_ready_i ( axi_resp_i.w_ready ) .oup_ready_i(axi_resp_i.w_ready)
); );
// Route responses based on ID // Route responses based on ID
// 0000 -> I$ // 0000 -> I$
// 0111 -> D$ // 0111 -> D$
// 1??? -> Bypass // 1??? -> Bypass
// R Channel // R Channel
assign axi_resp_icache.r = axi_resp_i.r; assign axi_resp_icache.r = axi_resp_i.r;
assign axi_resp_bypass.r = axi_resp_i.r; assign axi_resp_bypass.r = axi_resp_i.r;
assign axi_resp_data.r = axi_resp_i.r; assign axi_resp_data.r = axi_resp_i.r;
logic [1:0] r_select; logic [1:0] r_select;
always_comb begin always_comb begin
r_select = 0; r_select = 0;
unique casez (axi_resp_i.r.id) unique casez (axi_resp_i.r.id)
4'b0111: r_select = 0; // dcache 4'b0111: r_select = 0; // dcache
4'b1???: r_select = 1; // bypass 4'b1???: r_select = 1; // bypass
4'b0000: r_select = 2; // icache 4'b0000: r_select = 2; // icache
default: r_select = 0; default: r_select = 0;
endcase endcase
end end
stream_demux #( stream_demux #(
.N_OUP ( 3 ) .N_OUP(3)
) i_stream_demux_r ( ) i_stream_demux_r (
.inp_valid_i ( axi_resp_i.r_valid ), .inp_valid_i(axi_resp_i.r_valid),
.inp_ready_o ( axi_req_o.r_ready ), .inp_ready_o(axi_req_o.r_ready),
.oup_sel_i ( r_select ), .oup_sel_i (r_select),
.oup_valid_o ( {axi_resp_icache.r_valid, axi_resp_bypass.r_valid, axi_resp_data.r_valid} ), .oup_valid_o({axi_resp_icache.r_valid, axi_resp_bypass.r_valid, axi_resp_data.r_valid}),
.oup_ready_i ( {axi_req_icache.r_ready, axi_req_bypass.r_ready, axi_req_data.r_ready} ) .oup_ready_i({axi_req_icache.r_ready, axi_req_bypass.r_ready, axi_req_data.r_ready})
); );
// B Channel // B Channel
logic [1:0] b_select; logic [1:0] b_select;
assign axi_resp_icache.b = axi_resp_i.b; assign axi_resp_icache.b = axi_resp_i.b;
assign axi_resp_bypass.b = axi_resp_i.b; assign axi_resp_bypass.b = axi_resp_i.b;
assign axi_resp_data.b = axi_resp_i.b; assign axi_resp_data.b = axi_resp_i.b;
always_comb begin always_comb begin
b_select = 0; b_select = 0;
unique casez (axi_resp_i.b.id) unique casez (axi_resp_i.b.id)
4'b0111: b_select = 0; // dcache 4'b0111: b_select = 0; // dcache
4'b1???: b_select = 1; // bypass 4'b1???: b_select = 1; // bypass
4'b0000: b_select = 2; // icache 4'b0000: b_select = 2; // icache
default: b_select = 0; default: b_select = 0;
endcase endcase
end end
stream_demux #( stream_demux #(
.N_OUP ( 3 ) .N_OUP(3)
) i_stream_demux_b ( ) i_stream_demux_b (
.inp_valid_i ( axi_resp_i.b_valid ), .inp_valid_i(axi_resp_i.b_valid),
.inp_ready_o ( axi_req_o.b_ready ), .inp_ready_o(axi_req_o.b_ready),
.oup_sel_i ( b_select ), .oup_sel_i (b_select),
.oup_valid_o ( {axi_resp_icache.b_valid, axi_resp_bypass.b_valid, axi_resp_data.b_valid} ), .oup_valid_o({axi_resp_icache.b_valid, axi_resp_bypass.b_valid, axi_resp_data.b_valid}),
.oup_ready_i ( {axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready} ) .oup_ready_i({axi_req_icache.b_ready, axi_req_bypass.b_ready, axi_req_data.b_ready})
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
a_invalid_instruction_fetch: assert property ( a_invalid_instruction_fetch :
assert property (
@(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) @(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", else
icache_dreq_o.vaddr, icache_dreq_o.data); $warning(
1,
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr,
icache_dreq_o.data
);
a_invalid_write_data: assert property ( a_invalid_write_data :
assert property (
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> |dcache_req_ports_i[NumPorts-1].data_be |-> (|dcache_req_ports_i[NumPorts-1].data_wdata) !== 1'hX) @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> |dcache_req_ports_i[NumPorts-1].data_be |-> (|dcache_req_ports_i[NumPorts-1].data_wdata) !== 1'hX)
else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X", else
{dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index}, dcache_req_ports_i[NumPorts-1].data_be, dcache_req_ports_i[NumPorts-1].data_wdata); $warning(
1,
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
{
dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
},
dcache_req_ports_i[NumPorts-1].data_be,
dcache_req_ports_i[NumPorts-1].data_wdata
);
generate generate
for(genvar j=0; j<NumPorts-1; j++) begin for (genvar j = 0; j < NumPorts - 1; j++) begin
a_invalid_read_data: assert property ( a_invalid_read_data :
assert property (
@(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X", else
j, dcache_req_ports_o[j].data_rdata); $warning(
end 1,
"[l1 dcache] reading invalid data on port %01d: data=%016X",
j,
dcache_req_ports_o[j].data_rdata
);
end
endgenerate endgenerate
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // std_cache_subsystem endmodule // std_cache_subsystem

View file

@ -13,263 +13,267 @@
// Description: Nonblocking private L1 dcache // Description: Nonblocking private L1 dcache
module std_nbdcache import std_cache_pkg::*; import ariane_pkg::*; #( module std_nbdcache
import std_cache_pkg::*;
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4, parameter int unsigned NumPorts = 4,
parameter type axi_req_t = logic, parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic parameter type axi_rsp_t = logic
)( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
// Cache management // Cache management
input logic enable_i, // from CSR input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a LD/ST output logic miss_o, // we missed on a LD/ST
// AMOs // AMOs
input amo_req_t amo_req_i, input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o, output amo_resp_t amo_resp_o,
// Request ports // Request ports
input dcache_req_i_t [NumPorts-1:0] req_ports_i, // request ports input dcache_req_i_t [NumPorts-1:0] req_ports_i, // request ports
output dcache_req_o_t [NumPorts-1:0] req_ports_o, // request ports output dcache_req_o_t [NumPorts-1:0] req_ports_o, // request ports
// Cache AXI refill port // Cache AXI refill port
output axi_req_t axi_data_o, output axi_req_t axi_data_o,
input axi_rsp_t axi_data_i, input axi_rsp_t axi_data_i,
output axi_req_t axi_bypass_o, output axi_req_t axi_bypass_o,
input axi_rsp_t axi_bypass_i input axi_rsp_t axi_bypass_i
); );
import std_cache_pkg::*; import std_cache_pkg::*;
// ------------------------------- // -------------------------------
// Controller <-> Arbiter // Controller <-> Arbiter
// ------------------------------- // -------------------------------
// 1. Miss handler // 1. Miss handler
// 2. PTW // 2. PTW
// 3. Load Unit // 3. Load Unit
// 4. Accelerator // 4. Accelerator
// 5. Store unit // 5. Store unit
logic [NumPorts:0][DCACHE_SET_ASSOC-1:0] req; logic [ NumPorts:0][ DCACHE_SET_ASSOC-1:0] req;
logic [NumPorts:0][DCACHE_INDEX_WIDTH-1:0]addr; logic [ NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr;
logic [NumPorts:0] gnt; logic [ NumPorts:0] gnt;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata; cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata;
logic [NumPorts:0][DCACHE_TAG_WIDTH-1:0] tag; logic [ NumPorts:0][ DCACHE_TAG_WIDTH-1:0] tag;
cache_line_t [NumPorts:0] wdata; cache_line_t [ NumPorts:0] wdata;
logic [NumPorts:0] we; logic [ NumPorts:0] we;
cl_be_t [NumPorts:0] be; cl_be_t [ NumPorts:0] be;
logic [DCACHE_SET_ASSOC-1:0] hit_way; logic [ DCACHE_SET_ASSOC-1:0] hit_way;
// ------------------------------- // -------------------------------
// Controller <-> Miss unit // Controller <-> Miss unit
// ------------------------------- // -------------------------------
logic [NumPorts-1:0] busy; logic [ NumPorts-1:0] busy;
logic [NumPorts-1:0][55:0] mshr_addr; logic [ NumPorts-1:0][ 55:0] mshr_addr;
logic [NumPorts-1:0] mshr_addr_matches; logic [ NumPorts-1:0] mshr_addr_matches;
logic [NumPorts-1:0] mshr_index_matches; logic [ NumPorts-1:0] mshr_index_matches;
logic [63:0] critical_word; logic [ 63:0] critical_word;
logic critical_word_valid; logic critical_word_valid;
logic [NumPorts-1:0][$bits(miss_req_t)-1:0] miss_req; logic [ NumPorts-1:0][ $bits(miss_req_t)-1:0] miss_req;
logic [NumPorts-1:0] miss_gnt; logic [ NumPorts-1:0] miss_gnt;
logic [NumPorts-1:0] active_serving; logic [ NumPorts-1:0] active_serving;
logic [NumPorts-1:0] bypass_gnt; logic [ NumPorts-1:0] bypass_gnt;
logic [NumPorts-1:0] bypass_valid; logic [ NumPorts-1:0] bypass_valid;
logic [NumPorts-1:0][63:0] bypass_data; logic [ NumPorts-1:0][ 63:0] bypass_data;
// ------------------------------- // -------------------------------
// Arbiter <-> Datram, // Arbiter <-> Datram,
// ------------------------------- // -------------------------------
logic [DCACHE_SET_ASSOC-1:0] req_ram; logic [ DCACHE_SET_ASSOC-1:0] req_ram;
logic [DCACHE_INDEX_WIDTH-1:0] addr_ram; logic [DCACHE_INDEX_WIDTH-1:0] addr_ram;
logic we_ram; logic we_ram;
cache_line_t wdata_ram; cache_line_t wdata_ram;
cache_line_t [DCACHE_SET_ASSOC-1:0] rdata_ram; cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram;
cl_be_t be_ram; cl_be_t be_ram;
// ------------------ // ------------------
// Cache Controller // Cache Controller
// ------------------ // ------------------
generate generate
for (genvar i = 0; i < NumPorts; i++) begin : master_ports for (genvar i = 0; i < NumPorts; i++) begin : master_ports
cache_ctrl #( cache_ctrl #(
.CVA6Cfg ( CVA6Cfg ) .CVA6Cfg(CVA6Cfg)
) i_cache_ctrl ( ) i_cache_ctrl (
.bypass_i ( ~enable_i ), .bypass_i (~enable_i),
.busy_o ( busy [i] ), .busy_o (busy[i]),
// from core // from core
.req_port_i ( req_ports_i [i] ), .req_port_i(req_ports_i[i]),
.req_port_o ( req_ports_o [i] ), .req_port_o(req_ports_o[i]),
// to SRAM array // to SRAM array
.req_o ( req [i+1] ), .req_o (req[i+1]),
.addr_o ( addr [i+1] ), .addr_o (addr[i+1]),
.gnt_i ( gnt [i+1] ), .gnt_i (gnt[i+1]),
.data_i ( rdata ), .data_i (rdata),
.tag_o ( tag [i+1] ), .tag_o (tag[i+1]),
.data_o ( wdata [i+1] ), .data_o (wdata[i+1]),
.we_o ( we [i+1] ), .we_o (we[i+1]),
.be_o ( be [i+1] ), .be_o (be[i+1]),
.hit_way_i ( hit_way ), .hit_way_i (hit_way),
.miss_req_o ( miss_req [i] ), .miss_req_o (miss_req[i]),
.miss_gnt_i ( miss_gnt [i] ), .miss_gnt_i (miss_gnt[i]),
.active_serving_i ( active_serving [i] ), .active_serving_i (active_serving[i]),
.critical_word_i ( critical_word ), .critical_word_i (critical_word),
.critical_word_valid_i ( critical_word_valid ), .critical_word_valid_i(critical_word_valid),
.bypass_gnt_i ( bypass_gnt [i] ), .bypass_gnt_i (bypass_gnt[i]),
.bypass_valid_i ( bypass_valid [i] ), .bypass_valid_i (bypass_valid[i]),
.bypass_data_i ( bypass_data [i] ), .bypass_data_i (bypass_data[i]),
.mshr_addr_o ( mshr_addr [i] ), .mshr_addr_o (mshr_addr[i]),
.mshr_addr_matches_i ( mshr_addr_matches [i] ), .mshr_addr_matches_i (mshr_addr_matches[i]),
.mshr_index_matches_i ( mshr_index_matches[i] ), .mshr_index_matches_i(mshr_index_matches[i]),
.* .*
); );
end end
endgenerate endgenerate
// ------------------ // ------------------
// Miss Handling Unit // Miss Handling Unit
// ------------------ // ------------------
miss_handler #( miss_handler #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.NR_PORTS ( NumPorts ), .NR_PORTS (NumPorts),
.axi_req_t ( axi_req_t ), .axi_req_t(axi_req_t),
.axi_rsp_t ( axi_rsp_t ) .axi_rsp_t(axi_rsp_t)
) i_miss_handler ( ) i_miss_handler (
.flush_i ( flush_i ), .flush_i (flush_i),
.busy_i ( |busy ), .busy_i (|busy),
// AMOs // AMOs
.amo_req_i ( amo_req_i ), .amo_req_i (amo_req_i),
.amo_resp_o ( amo_resp_o ), .amo_resp_o (amo_resp_o),
.miss_req_i ( miss_req ), .miss_req_i (miss_req),
.miss_gnt_o ( miss_gnt ), .miss_gnt_o (miss_gnt),
.bypass_gnt_o ( bypass_gnt ), .bypass_gnt_o (bypass_gnt),
.bypass_valid_o ( bypass_valid ), .bypass_valid_o (bypass_valid),
.bypass_data_o ( bypass_data ), .bypass_data_o (bypass_data),
.critical_word_o ( critical_word ), .critical_word_o (critical_word),
.critical_word_valid_o ( critical_word_valid ), .critical_word_valid_o(critical_word_valid),
.mshr_addr_i ( mshr_addr ), .mshr_addr_i (mshr_addr),
.mshr_addr_matches_o ( mshr_addr_matches ), .mshr_addr_matches_o (mshr_addr_matches),
.mshr_index_matches_o ( mshr_index_matches ), .mshr_index_matches_o (mshr_index_matches),
.active_serving_o ( active_serving ), .active_serving_o (active_serving),
.req_o ( req [0] ), .req_o (req[0]),
.addr_o ( addr [0] ), .addr_o (addr[0]),
.data_i ( rdata ), .data_i (rdata),
.be_o ( be [0] ), .be_o (be[0]),
.data_o ( wdata [0] ), .data_o (wdata[0]),
.we_o ( we [0] ), .we_o (we[0]),
.axi_bypass_o, .axi_bypass_o,
.axi_bypass_i, .axi_bypass_i,
.axi_data_o, .axi_data_o,
.axi_data_i, .axi_data_i,
.*
);
assign tag[0] = '0;
// --------------
// Memory Arrays
// --------------
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
sram #(
.DATA_WIDTH(DCACHE_LINE_WIDTH),
.NUM_WORDS (DCACHE_NUM_WORDS)
) data_sram (
.req_i (req_ram[i]),
.rst_ni (rst_ni),
.we_i (we_ram),
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(wdata_ram.data),
.be_i (be_ram.data),
.ruser_o(),
.rdata_o(rdata_ram[i].data),
.* .*
); );
assign tag[0] = '0;
// --------------
// Memory Arrays
// --------------
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block
sram #(
.DATA_WIDTH ( DCACHE_LINE_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) data_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wuser_i ( '0 ),
.wdata_i ( wdata_ram.data ),
.be_i ( be_ram.data ),
.ruser_o ( ),
.rdata_o ( rdata_ram[i].data ),
.*
);
sram #(
.DATA_WIDTH ( DCACHE_TAG_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) tag_sram (
.req_i ( req_ram [i] ),
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wuser_i ( '0 ),
.wdata_i ( wdata_ram.tag ),
.be_i ( be_ram.tag ),
.ruser_o ( ),
.rdata_o ( rdata_ram[i].tag ),
.*
);
end
// ----------------
// Valid/Dirty Regs
// ----------------
// align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
// note: if you have an SRAM that supports flat bit enables for your target technology,
// you can use it here to save the extra 4x overhead introduced by this workaround.
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[8*i] = wdata_ram.dirty;
assign dirty_wdata[8*i+1] = wdata_ram.valid;
assign rdata_ram[i].dirty = dirty_rdata[8*i];
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
end
sram #( sram #(
.USER_WIDTH ( 1 ), .DATA_WIDTH(DCACHE_TAG_WIDTH),
.DATA_WIDTH ( 4*DCACHE_DIRTY_WIDTH ), .NUM_WORDS (DCACHE_NUM_WORDS)
.NUM_WORDS ( DCACHE_NUM_WORDS ) ) tag_sram (
) valid_dirty_sram ( .req_i (req_ram[i]),
.clk_i ( clk_i ), .rst_ni (rst_ni),
.rst_ni ( rst_ni ), .we_i (we_ram),
.req_i ( |req_ram ), .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.we_i ( we_ram ), .wuser_i('0),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ), .wdata_i(wdata_ram.tag),
.wuser_i ( '0 ), .be_i (be_ram.tag),
.wdata_i ( dirty_wdata ), .ruser_o(),
.be_i ( be_ram.vldrty ), .rdata_o(rdata_ram[i].tag),
.ruser_o ( ),
.rdata_o ( dirty_rdata )
);
// ------------------------------------------------
// Tag Comparison and memory arbitration
// ------------------------------------------------
tag_cmp #(
.CVA6Cfg ( CVA6Cfg ),
.NR_PORTS ( NumPorts+1 ),
.ADDR_WIDTH ( DCACHE_INDEX_WIDTH ),
.DCACHE_SET_ASSOC ( DCACHE_SET_ASSOC )
) i_tag_cmp (
.req_i ( req ),
.gnt_o ( gnt ),
.addr_i ( addr ),
.wdata_i ( wdata ),
.we_i ( we ),
.be_i ( be ),
.rdata_o ( rdata ),
.tag_i ( tag ),
.hit_way_o ( hit_way ),
.req_o ( req_ram ),
.addr_o ( addr_ram ),
.wdata_o ( wdata_ram ),
.we_o ( we_ram ),
.be_o ( be_ram ),
.rdata_i ( rdata_ram ),
.* .*
); );
end
//pragma translate_off // ----------------
initial begin // Valid/Dirty Regs
assert (DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth"); // ----------------
end
//pragma translate_on // align each valid/dirty bit pair to a byte boundary in order to leverage byte enable signals.
// note: if you have an SRAM that supports flat bit enables for your target technology,
// you can use it here to save the extra 4x overhead introduced by this workaround.
logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata;
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
assign dirty_wdata[8*i] = wdata_ram.dirty;
assign dirty_wdata[8*i+1] = wdata_ram.valid;
assign rdata_ram[i].dirty = dirty_rdata[8*i];
assign rdata_ram[i].valid = dirty_rdata[8*i+1];
end
sram #(
.USER_WIDTH(1),
.DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH),
.NUM_WORDS (DCACHE_NUM_WORDS)
) valid_dirty_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (|req_ram),
.we_i (we_ram),
.addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]),
.wuser_i('0),
.wdata_i(dirty_wdata),
.be_i (be_ram.vldrty),
.ruser_o(),
.rdata_o(dirty_rdata)
);
// ------------------------------------------------
// Tag Comparison and memory arbitration
// ------------------------------------------------
tag_cmp #(
.CVA6Cfg (CVA6Cfg),
.NR_PORTS (NumPorts + 1),
.ADDR_WIDTH (DCACHE_INDEX_WIDTH),
.DCACHE_SET_ASSOC(DCACHE_SET_ASSOC)
) i_tag_cmp (
.req_i (req),
.gnt_o (gnt),
.addr_i (addr),
.wdata_i (wdata),
.we_i (we),
.be_i (be),
.rdata_o (rdata),
.tag_i (tag),
.hit_way_o(hit_way),
.req_o (req_ram),
.addr_o (addr_ram),
.wdata_o(wdata_ram),
.we_o (we_ram),
.be_o (be_ram),
.rdata_i(rdata_ram),
.*
);
//pragma translate_off
initial begin
assert (DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16})
else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth");
end
//pragma translate_on
endmodule endmodule

View file

@ -16,92 +16,91 @@
// checks for hit or miss on cache // checks for hit or miss on cache
// //
module tag_cmp #( module tag_cmp #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NR_PORTS = 3, parameter int unsigned NR_PORTS = 3,
parameter int unsigned ADDR_WIDTH = 64, parameter int unsigned ADDR_WIDTH = 64,
parameter type l_data_t = std_cache_pkg::cache_line_t, parameter type l_data_t = std_cache_pkg::cache_line_t,
parameter type l_be_t = std_cache_pkg::cl_be_t, parameter type l_be_t = std_cache_pkg::cl_be_t,
parameter int unsigned DCACHE_SET_ASSOC = 8 parameter int unsigned DCACHE_SET_ASSOC = 8
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i, input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i,
output logic [NR_PORTS-1:0] gnt_o, output logic [NR_PORTS-1:0] gnt_o,
input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i, input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i,
input l_data_t [NR_PORTS-1:0] wdata_i, input l_data_t [NR_PORTS-1:0] wdata_i,
input logic [NR_PORTS-1:0] we_i, input logic [NR_PORTS-1:0] we_i,
input l_be_t [NR_PORTS-1:0] be_i, input l_be_t [NR_PORTS-1:0] be_i,
output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o, output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o,
input logic [NR_PORTS-1:0][ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later input logic [NR_PORTS-1:0][ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later
output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way
output logic [DCACHE_SET_ASSOC-1:0] req_o, output logic [DCACHE_SET_ASSOC-1:0] req_o,
output logic [ADDR_WIDTH-1:0] addr_o, output logic [ ADDR_WIDTH-1:0] addr_o,
output l_data_t wdata_o, output l_data_t wdata_o,
output logic we_o, output logic we_o,
output l_be_t be_o, output l_be_t be_o,
input l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i input l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i
); );
assign rdata_o = rdata_i; assign rdata_o = rdata_i;
// one hot encoded // one hot encoded
logic [NR_PORTS-1:0] id_d, id_q; logic [NR_PORTS-1:0] id_d, id_q;
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag; logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag;
always_comb begin : tag_sel always_comb begin : tag_sel
sel_tag = '0; sel_tag = '0;
for (int unsigned i = 0; i < NR_PORTS; i++) for (int unsigned i = 0; i < NR_PORTS; i++) if (id_q[i]) sel_tag = tag_i[i];
if (id_q[i]) end
sel_tag = tag_i[i];
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0;
end
always_comb begin
gnt_o = '0;
id_d = '0;
wdata_o = '0;
req_o = '0;
addr_o = '0;
be_o = '0;
we_o = '0;
// Request Side
// priority select
for (int unsigned i = 0; i < NR_PORTS; i++) begin
req_o = req_i[i];
id_d = (1'b1 << i);
gnt_o[i] = 1'b1;
addr_o = addr_i[i];
be_o = be_i[i];
we_o = we_i[i];
wdata_o = wdata_i[i];
if (req_i[i]) break;
end end
for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp `ifndef SYNTHESIS
assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0; `ifndef VERILATOR
// assert that cache only hits on one way
// this only needs to be checked one cycle after all ways have been requested
onehot :
assert property (@(posedge clk_i) disable iff (!rst_ni) &req_i |=> $onehot0(hit_way_o))
else begin
$fatal(1, "Hit should be one-hot encoded");
end end
`endif
`endif
end
always_comb begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
gnt_o = '0; id_q <= 0;
id_d = '0; end else begin
wdata_o = '0; id_q <= id_d;
req_o = '0;
addr_o = '0;
be_o = '0;
we_o = '0;
// Request Side
// priority select
for (int unsigned i = 0; i < NR_PORTS; i++) begin
req_o = req_i[i];
id_d = (1'b1 << i);
gnt_o[i] = 1'b1;
addr_o = addr_i[i];
be_o = be_i[i];
we_o = we_i[i];
wdata_o = wdata_i[i];
if (req_i[i])
break;
end
`ifndef SYNTHESIS
`ifndef VERILATOR
// assert that cache only hits on one way
// this only needs to be checked one cycle after all ways have been requested
onehot: assert property (
@(posedge clk_i) disable iff (!rst_ni) &req_i |=> $onehot0(hit_way_o))
else begin $fatal(1,"Hit should be one-hot encoded"); end
`endif
`endif
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
id_q <= 0;
end else begin
id_q <= id_d;
end
end end
end
endmodule endmodule

View file

@ -14,40 +14,43 @@
// //
module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #( module wt_axi_adapter
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int unsigned ReqFifoDepth = 2, import wt_cache_pkg::*;
parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX, #(
parameter type axi_req_t = logic, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type axi_rsp_t = logic parameter int unsigned ReqFifoDepth = 2,
parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX,
parameter type axi_req_t = logic,
parameter type axi_rsp_t = logic
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// icache // icache
input logic icache_data_req_i, input logic icache_data_req_i,
output logic icache_data_ack_o, output logic icache_data_ack_o,
input icache_req_t icache_data_i, input icache_req_t icache_data_i,
// returning packets must be consumed immediately // returning packets must be consumed immediately
output logic icache_rtrn_vld_o, output logic icache_rtrn_vld_o,
output icache_rtrn_t icache_rtrn_o, output icache_rtrn_t icache_rtrn_o,
// dcache // dcache
input logic dcache_data_req_i, input logic dcache_data_req_i,
output logic dcache_data_ack_o, output logic dcache_data_ack_o,
input dcache_req_t dcache_data_i, input dcache_req_t dcache_data_i,
// returning packets must be consumed immediately // returning packets must be consumed immediately
output logic dcache_rtrn_vld_o, output logic dcache_rtrn_vld_o,
output dcache_rtrn_t dcache_rtrn_o, output dcache_rtrn_t dcache_rtrn_o,
// AXI port // AXI port
output axi_req_t axi_req_o, output axi_req_t axi_req_o,
input axi_rsp_t axi_resp_i, input axi_rsp_t axi_resp_i,
// Invalidations // Invalidations
input logic [63:0] inval_addr_i, input logic [63:0] inval_addr_i,
input logic inval_valid_i, input logic inval_valid_i,
output logic inval_ready_o output logic inval_ready_o
); );
// support up to 512bit cache lines // support up to 512bit cache lines
@ -65,7 +68,7 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
logic dcache_data_full, dcache_data_empty; logic dcache_data_full, dcache_data_empty;
logic [1:0] arb_req, arb_ack; logic [1:0] arb_req, arb_ack;
logic arb_idx, arb_gnt; logic arb_idx, arb_gnt;
logic axi_rd_req, axi_rd_gnt; logic axi_rd_req, axi_rd_gnt;
logic axi_wr_req, axi_wr_gnt; logic axi_wr_req, axi_wr_gnt;
@ -74,12 +77,13 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr, axi_wr_addr; logic [CVA6Cfg.AxiAddrWidth-1:0] axi_rd_addr, axi_wr_addr;
logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen; logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen;
logic [2:0] axi_rd_size, axi_wr_size; logic [2:0] axi_rd_size, axi_wr_size;
logic [CVA6Cfg.AxiIdWidth-1:0] axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out, wr_id_out; logic [CVA6Cfg.AxiIdWidth-1:0]
axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out, wr_id_out;
logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] axi_wr_data; logic [AxiNumWords-1:0][CVA6Cfg.AxiDataWidth-1:0] axi_wr_data;
logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] axi_wr_user; logic [AxiNumWords-1:0][CVA6Cfg.AxiUserWidth-1:0] axi_wr_user;
logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data; logic [CVA6Cfg.AxiDataWidth-1:0] axi_rd_data;
logic [CVA6Cfg.AxiUserWidth-1:0] axi_rd_user; logic [CVA6Cfg.AxiUserWidth-1:0] axi_rd_user;
logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] axi_wr_be; logic [AxiNumWords-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] axi_wr_be;
logic [5:0] axi_wr_atop; logic [5:0] axi_wr_atop;
logic invalidate; logic invalidate;
logic [$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] amo_off_d, amo_off_q; logic [$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] amo_off_d, amo_off_q;
@ -94,35 +98,33 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
logic dcache_rd_full, dcache_rd_empty; logic dcache_rd_full, dcache_rd_empty;
logic dcache_wr_full, dcache_wr_empty; logic dcache_wr_full, dcache_wr_empty;
assign icache_data_ack_o = icache_data_req_i & ~icache_data_full; assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full; assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
// arbiter // arbiter
assign arb_req = {~(dcache_data_empty | assign arb_req = {
dcache_wr_full | ~(dcache_data_empty | dcache_wr_full | dcache_rd_full), ~(icache_data_empty | icache_rd_full)
dcache_rd_full), };
~(icache_data_empty |
icache_rd_full)};
assign arb_gnt = axi_rd_gnt | axi_wr_gnt; assign arb_gnt = axi_rd_gnt | axi_wr_gnt;
rr_arb_tree #( rr_arb_tree #(
.NumIn (2), .NumIn (2),
.DataWidth (1), .DataWidth(1),
.AxiVldRdy (1'b1), .AxiVldRdy(1'b1),
.LockIn (1'b1) .LockIn (1'b1)
) i_rr_arb_tree ( ) i_rr_arb_tree (
.clk_i (clk_i ), .clk_i (clk_i),
.rst_ni (rst_ni ), .rst_ni (rst_ni),
.flush_i('0 ), .flush_i('0),
.rr_i ('0 ), .rr_i ('0),
.req_i (arb_req ), .req_i (arb_req),
.gnt_o (arb_ack ), .gnt_o (arb_ack),
.data_i ('0 ), .data_i ('0),
.gnt_i (arb_gnt ), .gnt_i (arb_gnt),
.req_o ( ), .req_o (),
.data_o ( ), .data_o (),
.idx_o (arb_idx ) .idx_o (arb_idx)
); );
// request side // request side
@ -149,9 +151,9 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
axi_rd_blen = '0; axi_rd_blen = '0;
if (dcache_data.paddr[2] == 1'b0) begin if (dcache_data.paddr[2] == 1'b0) begin
axi_wr_user = {{64-CVA6Cfg.AxiUserWidth{1'b0}}, dcache_data.user}; axi_wr_user = {{64 - CVA6Cfg.AxiUserWidth{1'b0}}, dcache_data.user};
end else begin end else begin
axi_wr_user = {dcache_data.user, {64-CVA6Cfg.AxiUserWidth{1'b0}}}; axi_wr_user = {dcache_data.user, {64 - CVA6Cfg.AxiUserWidth{1'b0}}};
end end
// arbiter mux // arbiter mux
@ -159,45 +161,50 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
// Cast to AXI address width // Cast to AXI address width
axi_rd_addr = dcache_data.paddr; axi_rd_addr = dcache_data.paddr;
// If dcache_data.size MSB is set, we want to read as much as possible // If dcache_data.size MSB is set, we want to read as much as possible
axi_rd_size = dcache_data.size[2] ? $clog2(CVA6Cfg.AxiDataWidth/8) : dcache_data.size; axi_rd_size = dcache_data.size[2] ? $clog2(CVA6Cfg.AxiDataWidth / 8) : dcache_data.size;
if (dcache_data.size[2]) begin if (dcache_data.size[2]) begin
axi_rd_blen = ariane_pkg::DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1; axi_rd_blen = ariane_pkg::DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
end end
end else begin end else begin
// Cast to AXI address width // Cast to AXI address width
axi_rd_addr = icache_data.paddr; axi_rd_addr = icache_data.paddr;
axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth/8); // always request max number of words in case of ifill axi_rd_size =
$clog2(CVA6Cfg.AxiDataWidth / 8); // always request max number of words in case of ifill
if (!icache_data.nc) begin if (!icache_data.nc) begin
axi_rd_blen = ariane_pkg::ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1; axi_rd_blen = ariane_pkg::ICACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1;
end end
end end
// signal that an invalidation message // signal that an invalidation message
// needs to be generated // needs to be generated
invalidate = 1'b0; invalidate = 1'b0;
// decode message type // decode message type
if (|arb_req) begin if (|arb_req) begin
if (arb_idx == 0) begin if (arb_idx == 0) begin
////////////////////////////////////// //////////////////////////////////////
// IMISS // IMISS
axi_rd_req = 1'b1; axi_rd_req = 1'b1;
////////////////////////////////////// //////////////////////////////////////
end else begin end else begin
unique case (dcache_data.rtype) unique case (dcache_data.rtype)
////////////////////////////////////// //////////////////////////////////////
wt_cache_pkg::DCACHE_LOAD_REQ: begin wt_cache_pkg::DCACHE_LOAD_REQ: begin
axi_rd_req = 1'b1; axi_rd_req = 1'b1;
end end
////////////////////////////////////// //////////////////////////////////////
wt_cache_pkg::DCACHE_STORE_REQ: begin wt_cache_pkg::DCACHE_STORE_REQ: begin
axi_wr_req = 1'b1; axi_wr_req = 1'b1;
axi_wr_be = '0; axi_wr_be = '0;
unique case(dcache_data.size[1:0]) unique case (dcache_data.size[1:0])
2'b00: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte 2'b00:
2'b01: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:2 ] = '1; // hword axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
2'b10: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:4 ] = '1; // word 2'b01:
default: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:8 ] = '1; // dword axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = '1; // hword
2'b10:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = '1; // word
default:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:8] = '1; // dword
endcase endcase
end end
////////////////////////////////////// //////////////////////////////////////
@ -207,16 +214,20 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
// since we only keep one read tx in flight, and since // since we only keep one read tx in flight, and since
// the dcache drains all writes/reads before executing // the dcache drains all writes/reads before executing
// an atomic, this is safe. // an atomic, this is safe.
invalidate = arb_gnt; invalidate = arb_gnt;
axi_wr_req = 1'b1; axi_wr_req = 1'b1;
axi_wr_be = '0; axi_wr_be = '0;
unique case(dcache_data.size[1:0]) unique case (dcache_data.size[1:0])
2'b00: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte 2'b00:
2'b01: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:2 ] = '1; // hword axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]] = '1; // byte
2'b10: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:4 ] = '1; // word 2'b01:
default: axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] +:8 ] = '1; // dword axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:2] = '1; // hword
2'b10:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:4] = '1; // word
default:
axi_wr_be[0][dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0]+:8] = '1; // dword
endcase endcase
amo_gen_r_d = 1'b1; amo_gen_r_d = 1'b1;
// need to use a separate ID here, so concat an additional bit // need to use a separate ID here, so concat an additional bit
axi_wr_id_in[1] = 1'b1; axi_wr_id_in[1] = 1'b1;
@ -226,206 +237,235 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
axi_rd_req = 1'b1; axi_rd_req = 1'b1;
axi_rd_id_in[1] = 1'b1; axi_rd_id_in[1] = 1'b1;
// tie to zero in this special case // tie to zero in this special case
axi_wr_req = 1'b0; axi_wr_req = 1'b0;
axi_wr_be = '0; axi_wr_be = '0;
end end
AMO_SC: begin AMO_SC: begin
axi_wr_lock = 1'b1; axi_wr_lock = 1'b1;
amo_gen_r_d = 1'b0; amo_gen_r_d = 1'b0;
// needed to properly encode success. store the result at offset within the returned // needed to properly encode success. store the result at offset within the returned
// AXI data word aligned with the requested word size. // AXI data word aligned with the requested word size.
amo_off_d = dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-1:0] & ~((1 << dcache_data.size[1:0]) - 1); amo_off_d = dcache_data.paddr[$clog2(CVA6Cfg.AxiDataWidth/8)-
1:0] & ~((1 << dcache_data.size[1:0]) - 1);
end end
// RISC-V atops have a load semantic // RISC-V atops have a load semantic
AMO_SWAP: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ATOMICSWAP}; AMO_SWAP:
AMO_ADD: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD}; axi_wr_atop = {
AMO_AND: begin axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ATOMICSWAP
};
AMO_ADD:
axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
AMO_AND: begin
// in this case we need to invert the data to get a "CLR" // in this case we need to invert the data to get a "CLR"
axi_wr_data = ~{(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.data}}; axi_wr_data = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.data}};
axi_wr_user = ~{(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.user}}; axi_wr_user = ~{(CVA6Cfg.AxiDataWidth / riscv::XLEN) {dcache_data.user}};
axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR}; axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR
};
end end
AMO_OR: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET}; AMO_OR:
AMO_XOR: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR}; axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
AMO_MAX: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX}; AMO_XOR:
AMO_MAXU: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX}; axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
AMO_MIN: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN}; AMO_MAX:
AMO_MINU: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN}; axi_wr_atop = {
default: ; // Do nothing axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX
};
AMO_MAXU:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX
};
AMO_MIN:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN
};
AMO_MINU:
axi_wr_atop = {
axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN
};
default: ; // Do nothing
endcase endcase
end end
default: ; // Do nothing default: ; // Do nothing
////////////////////////////////////// //////////////////////////////////////
endcase endcase
end end
end end
end end
fifo_v3 #( fifo_v3 #(
.dtype ( icache_req_t ), .dtype(icache_req_t),
.DEPTH ( ReqFifoDepth ) .DEPTH(ReqFifoDepth)
) i_icache_data_fifo ( ) i_icache_data_fifo (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( icache_data_full ), .full_o (icache_data_full),
.empty_o ( icache_data_empty ), .empty_o (icache_data_empty),
.usage_o ( ), .usage_o (),
.data_i ( icache_data_i ), .data_i (icache_data_i),
.push_i ( icache_data_ack_o ), .push_i (icache_data_ack_o),
.data_o ( icache_data ), .data_o (icache_data),
.pop_i ( arb_ack[0] ) .pop_i (arb_ack[0])
); );
fifo_v3 #( fifo_v3 #(
.dtype ( dcache_req_t ), .dtype(dcache_req_t),
.DEPTH ( ReqFifoDepth ) .DEPTH(ReqFifoDepth)
) i_dcache_data_fifo ( ) i_dcache_data_fifo (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( dcache_data_full ), .full_o (dcache_data_full),
.empty_o ( dcache_data_empty ), .empty_o (dcache_data_empty),
.usage_o ( ), .usage_o (),
.data_i ( dcache_data_i ), .data_i (dcache_data_i),
.push_i ( dcache_data_ack_o ), .push_i (dcache_data_ack_o),
.data_o ( dcache_data ), .data_o (dcache_data),
.pop_i ( arb_ack[1] ) .pop_i (arb_ack[1])
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// meta info feedback fifos // meta info feedback fifos
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
logic icache_rtrn_rd_en, dcache_rtrn_rd_en; logic icache_rtrn_rd_en, dcache_rtrn_rd_en;
logic icache_rtrn_vld_d, icache_rtrn_vld_q, dcache_rtrn_vld_d, dcache_rtrn_vld_q; logic icache_rtrn_vld_d, icache_rtrn_vld_q, dcache_rtrn_vld_d, dcache_rtrn_vld_q;
fifo_v3 #( fifo_v3 #(
.DATA_WIDTH ( wt_cache_pkg::CACHE_ID_WIDTH ), .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
.DEPTH ( MetaFifoDepth ) .DEPTH (MetaFifoDepth)
) i_rd_icache_id ( ) i_rd_icache_id (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( icache_rd_full ), .full_o (icache_rd_full),
.empty_o ( icache_rd_empty ), .empty_o (icache_rd_empty),
.usage_o ( ), .usage_o (),
.data_i ( icache_data.tid ), .data_i (icache_data.tid),
.push_i ( arb_ack[0] & axi_rd_gnt ), .push_i (arb_ack[0] & axi_rd_gnt),
.data_o ( icache_rtrn_tid_d ), .data_o (icache_rtrn_tid_d),
.pop_i ( icache_rtrn_vld_d ) .pop_i (icache_rtrn_vld_d)
); );
fifo_v3 #( fifo_v3 #(
.DATA_WIDTH ( wt_cache_pkg::CACHE_ID_WIDTH ), .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
.DEPTH ( MetaFifoDepth ) .DEPTH (MetaFifoDepth)
) i_rd_dcache_id ( ) i_rd_dcache_id (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( dcache_rd_full ), .full_o (dcache_rd_full),
.empty_o ( dcache_rd_empty ), .empty_o (dcache_rd_empty),
.usage_o ( ), .usage_o (),
.data_i ( dcache_data.tid ), .data_i (dcache_data.tid),
.push_i ( arb_ack[1] & axi_rd_gnt ), .push_i (arb_ack[1] & axi_rd_gnt),
.data_o ( dcache_rtrn_rd_tid ), .data_o (dcache_rtrn_rd_tid),
.pop_i ( dcache_rd_pop ) .pop_i (dcache_rd_pop)
); );
fifo_v3 #( fifo_v3 #(
.DATA_WIDTH ( wt_cache_pkg::CACHE_ID_WIDTH ), .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH),
.DEPTH ( MetaFifoDepth ) .DEPTH (MetaFifoDepth)
) i_wr_dcache_id ( ) i_wr_dcache_id (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( dcache_wr_full ), .full_o (dcache_wr_full),
.empty_o ( dcache_wr_empty ), .empty_o (dcache_wr_empty),
.usage_o ( ), .usage_o (),
.data_i ( dcache_data.tid ), .data_i (dcache_data.tid),
.push_i ( arb_ack[1] & axi_wr_gnt ), .push_i (arb_ack[1] & axi_wr_gnt),
.data_o ( dcache_rtrn_wr_tid ), .data_o (dcache_rtrn_wr_tid),
.pop_i ( dcache_wr_pop ) .pop_i (dcache_wr_pop)
); );
// select correct tid to return // select correct tid to return
assign dcache_rtrn_tid_d = (dcache_wr_pop) ? dcache_rtrn_wr_tid : dcache_rtrn_rd_tid; assign dcache_rtrn_tid_d = (dcache_wr_pop) ? dcache_rtrn_wr_tid : dcache_rtrn_rd_tid;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// return path // return path
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// buffer write responses // buffer write responses
logic b_full, b_empty, b_push, b_pop; logic b_full, b_empty, b_push, b_pop;
assign axi_wr_rdy = ~b_full; assign axi_wr_rdy = ~b_full;
assign b_push = axi_wr_valid & axi_wr_rdy; assign b_push = axi_wr_valid & axi_wr_rdy;
fifo_v3 #( fifo_v3 #(
.DATA_WIDTH ( CVA6Cfg.AxiIdWidth + 1 ), .DATA_WIDTH (CVA6Cfg.AxiIdWidth + 1),
.DEPTH ( MetaFifoDepth ), .DEPTH (MetaFifoDepth),
.FALL_THROUGH ( 1'b1 ) .FALL_THROUGH(1'b1)
) i_b_fifo ( ) i_b_fifo (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( b_full ), .full_o (b_full),
.empty_o ( b_empty ), .empty_o (b_empty),
.usage_o ( ), .usage_o (),
.data_i ( {axi_wr_exokay, axi_wr_id_out} ), .data_i ({axi_wr_exokay, axi_wr_id_out}),
.push_i ( b_push ), .push_i (b_push),
.data_o ( {wr_exokay, wr_id_out} ), .data_o ({wr_exokay, wr_id_out}),
.pop_i ( b_pop ) .pop_i (b_pop)
); );
// buffer read responses in shift regs // buffer read responses in shift regs
logic icache_first_d, icache_first_q, dcache_first_d, dcache_first_q; logic icache_first_d, icache_first_q, dcache_first_d, dcache_first_q;
logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] icache_rd_shift_user_d, icache_rd_shift_user_q; logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] dcache_rd_shift_user_d, dcache_rd_shift_user_q; icache_rd_shift_user_d, icache_rd_shift_user_q;
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] icache_rd_shift_d, icache_rd_shift_q; logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0]
logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] dcache_rd_shift_d, dcache_rd_shift_q; dcache_rd_shift_user_d, dcache_rd_shift_user_q;
logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
icache_rd_shift_d, icache_rd_shift_q;
logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0]
dcache_rd_shift_d, dcache_rd_shift_q;
wt_cache_pkg::dcache_in_t dcache_rtrn_type_d, dcache_rtrn_type_q; wt_cache_pkg::dcache_in_t dcache_rtrn_type_d, dcache_rtrn_type_q;
wt_cache_pkg::dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q; wt_cache_pkg::dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q;
logic dcache_sc_rtrn, axi_rd_last; logic dcache_sc_rtrn, axi_rd_last;
always_comb begin : p_axi_rtrn_shift always_comb begin : p_axi_rtrn_shift
// output directly from regs // output directly from regs
icache_rtrn_o = '0; icache_rtrn_o = '0;
icache_rtrn_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK; icache_rtrn_o.rtype = wt_cache_pkg::ICACHE_IFILL_ACK;
icache_rtrn_o.tid = icache_rtrn_tid_q; icache_rtrn_o.tid = icache_rtrn_tid_q;
icache_rtrn_o.data = icache_rd_shift_q; icache_rtrn_o.data = icache_rd_shift_q;
icache_rtrn_o.user = icache_rd_shift_user_q; icache_rtrn_o.user = icache_rd_shift_user_q;
icache_rtrn_vld_o = icache_rtrn_vld_q; icache_rtrn_vld_o = icache_rtrn_vld_q;
dcache_rtrn_o = '0; dcache_rtrn_o = '0;
dcache_rtrn_o.rtype = dcache_rtrn_type_q; dcache_rtrn_o.rtype = dcache_rtrn_type_q;
dcache_rtrn_o.inv = dcache_rtrn_inv_q; dcache_rtrn_o.inv = dcache_rtrn_inv_q;
dcache_rtrn_o.tid = dcache_rtrn_tid_q; dcache_rtrn_o.tid = dcache_rtrn_tid_q;
dcache_rtrn_o.data = dcache_rd_shift_q; dcache_rtrn_o.data = dcache_rd_shift_q;
dcache_rtrn_o.user = dcache_rd_shift_user_q; dcache_rtrn_o.user = dcache_rd_shift_user_q;
dcache_rtrn_vld_o = dcache_rtrn_vld_q; dcache_rtrn_vld_o = dcache_rtrn_vld_q;
// read shift registers // read shift registers
icache_rd_shift_d = icache_rd_shift_q; icache_rd_shift_d = icache_rd_shift_q;
icache_rd_shift_user_d = icache_rd_shift_user_q; icache_rd_shift_user_d = icache_rd_shift_user_q;
dcache_rd_shift_d = dcache_rd_shift_q; dcache_rd_shift_d = dcache_rd_shift_q;
dcache_rd_shift_user_d = dcache_rd_shift_user_q; dcache_rd_shift_user_d = dcache_rd_shift_user_q;
icache_first_d = icache_first_q; icache_first_d = icache_first_q;
dcache_first_d = dcache_first_q; dcache_first_d = dcache_first_q;
if (icache_rtrn_rd_en) begin if (icache_rtrn_rd_en) begin
icache_first_d = axi_rd_last; icache_first_d = axi_rd_last;
if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
icache_rd_shift_d = axi_rd_data; icache_rd_shift_d = axi_rd_data;
end else begin end else begin
icache_rd_shift_d = {axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]}; icache_rd_shift_d = {
axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
};
end end
icache_rd_shift_user_d = {axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]}; icache_rd_shift_user_d = {
axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
};
// if this is a single word transaction, we need to make sure that word is placed at offset 0 // if this is a single word transaction, we need to make sure that word is placed at offset 0
if (icache_first_q) begin if (icache_first_q) begin
icache_rd_shift_d[0] = axi_rd_data; icache_rd_shift_d[0] = axi_rd_data;
@ -434,13 +474,17 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
end end
if (dcache_rtrn_rd_en) begin if (dcache_rtrn_rd_en) begin
dcache_first_d = axi_rd_last; dcache_first_d = axi_rd_last;
if (DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin if (DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin
dcache_rd_shift_d = axi_rd_data; dcache_rd_shift_d = axi_rd_data;
end else begin end else begin
dcache_rd_shift_d = {axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]}; dcache_rd_shift_d = {
axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]
};
end end
dcache_rd_shift_user_d = {axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]}; dcache_rd_shift_user_d = {
axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1]
};
// if this is a single word transaction, we need to make sure that word is placed at offset 0 // if this is a single word transaction, we need to make sure that word is placed at offset 0
if (dcache_first_q) begin if (dcache_first_q) begin
dcache_rd_shift_d[0] = axi_rd_data; dcache_rd_shift_d[0] = axi_rd_data;
@ -471,64 +515,64 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
icache_rtrn_vld_d = axi_rd_last; icache_rtrn_vld_d = axi_rd_last;
end end
dcache_rtrn_rd_en = 1'b0; dcache_rtrn_rd_en = 1'b0;
dcache_rtrn_vld_d = 1'b0; dcache_rtrn_vld_d = 1'b0;
dcache_rd_pop = 1'b0; dcache_rd_pop = 1'b0;
dcache_wr_pop = 1'b0; dcache_wr_pop = 1'b0;
dcache_rtrn_inv_d = '0; dcache_rtrn_inv_d = '0;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_LOAD_ACK; dcache_rtrn_type_d = wt_cache_pkg::DCACHE_LOAD_ACK;
b_pop = 1'b0; b_pop = 1'b0;
dcache_sc_rtrn = 1'b0; dcache_sc_rtrn = 1'b0;
// External invalidation requests (from coprocessor). This is safe as // External invalidation requests (from coprocessor). This is safe as
// there are no other transactions when a coprocessor has pending stores. // there are no other transactions when a coprocessor has pending stores.
inval_ready_o = 1'b0; inval_ready_o = 1'b0;
if (inval_valid_i) begin if (inval_valid_i) begin
inval_ready_o = 1'b1; inval_ready_o = 1'b1;
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ; dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
dcache_rtrn_vld_d = 1'b1; dcache_rtrn_vld_d = 1'b1;
dcache_rtrn_inv_d.all = 1'b1; dcache_rtrn_inv_d.all = 1'b1;
dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
////////////////////////////////////// //////////////////////////////////////
// dcache needs some special treatment // dcache needs some special treatment
// for arbitration and decoding of atomics // for arbitration and decoding of atomics
////////////////////////////////////// //////////////////////////////////////
// this is safe, there is no other read tx in flight than this atomic. // this is safe, there is no other read tx in flight than this atomic.
// note that this self invalidation is handled in this way due to the // note that this self invalidation is handled in this way due to the
// write-through cache architecture, which is aligned with the openpiton // write-through cache architecture, which is aligned with the openpiton
// cache subsystem. // cache subsystem.
end else if (invalidate) begin end else if (invalidate) begin
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ; dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ;
dcache_rtrn_vld_d = 1'b1; dcache_rtrn_vld_d = 1'b1;
dcache_rtrn_inv_d.all = 1'b1; dcache_rtrn_inv_d.all = 1'b1;
dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
////////////////////////////////////// //////////////////////////////////////
// read responses // read responses
// note that in case of atomics, the dcache sequentializes requests and // note that in case of atomics, the dcache sequentializes requests and
// guarantees that there are no other pending transactions in flight // guarantees that there are no other pending transactions in flight
end else if (axi_rd_valid && axi_rd_id_out[0] && axi_rd_rdy) begin end else if (axi_rd_valid && axi_rd_id_out[0] && axi_rd_rdy) begin
dcache_rtrn_rd_en = 1'b1; dcache_rtrn_rd_en = 1'b1;
dcache_rtrn_vld_d = axi_rd_last; dcache_rtrn_vld_d = axi_rd_last;
// if this was an atomic op // if this was an atomic op
if (axi_rd_id_out[1]) begin if (axi_rd_id_out[1]) begin
dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK; dcache_rtrn_type_d = wt_cache_pkg::DCACHE_ATOMIC_ACK;
// check if transaction was issued over write channel and pop that ID // check if transaction was issued over write channel and pop that ID
if (!dcache_wr_empty) begin if (!dcache_wr_empty) begin
dcache_wr_pop = axi_rd_last; dcache_wr_pop = axi_rd_last;
// if this is not the case, there MUST be an id in the read channel (LR) // if this is not the case, there MUST be an id in the read channel (LR)
end else begin end else begin
dcache_rd_pop = axi_rd_last; dcache_rd_pop = axi_rd_last;
end end
end else begin end else begin
dcache_rd_pop = axi_rd_last; dcache_rd_pop = axi_rd_last;
end end
////////////////////////////////////// //////////////////////////////////////
// write responses, check b fifo // write responses, check b fifo
end else if (!b_empty) begin end else if (!b_empty) begin
b_pop = 1'b1; b_pop = 1'b1;
// this was an atomic // this was an atomic
if (wr_id_out[1]) begin if (wr_id_out[1]) begin
@ -537,9 +581,9 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
// silently discard b response if we already popped the fifo // silently discard b response if we already popped the fifo
// with a R beat (iff the amo transaction generated an R beat) // with a R beat (iff the amo transaction generated an R beat)
if (!amo_gen_r_q) begin if (!amo_gen_r_q) begin
dcache_rtrn_vld_d = 1'b1; dcache_rtrn_vld_d = 1'b1;
dcache_wr_pop = 1'b1; dcache_wr_pop = 1'b1;
dcache_sc_rtrn = 1'b1; dcache_sc_rtrn = 1'b1;
end end
end else begin end else begin
// regular response // regular response
@ -565,92 +609,92 @@ module wt_axi_adapter import ariane_pkg::*; import wt_cache_pkg::*; #(
always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf always_ff @(posedge clk_i or negedge rst_ni) begin : p_rd_buf
if (!rst_ni) begin if (!rst_ni) begin
icache_first_q <= 1'b1; icache_first_q <= 1'b1;
dcache_first_q <= 1'b1; dcache_first_q <= 1'b1;
icache_rd_shift_q <= '0; icache_rd_shift_q <= '0;
icache_rd_shift_user_q <= '0; icache_rd_shift_user_q <= '0;
dcache_rd_shift_q <= '0; dcache_rd_shift_q <= '0;
dcache_rd_shift_user_q <= '0; dcache_rd_shift_user_q <= '0;
icache_rtrn_vld_q <= '0; icache_rtrn_vld_q <= '0;
dcache_rtrn_vld_q <= '0; dcache_rtrn_vld_q <= '0;
icache_rtrn_tid_q <= '0; icache_rtrn_tid_q <= '0;
dcache_rtrn_tid_q <= '0; dcache_rtrn_tid_q <= '0;
dcache_rtrn_type_q <= wt_cache_pkg::DCACHE_LOAD_ACK; dcache_rtrn_type_q <= wt_cache_pkg::DCACHE_LOAD_ACK;
dcache_rtrn_inv_q <= '0; dcache_rtrn_inv_q <= '0;
amo_off_q <= '0; amo_off_q <= '0;
amo_gen_r_q <= 1'b0; amo_gen_r_q <= 1'b0;
end else begin end else begin
icache_first_q <= icache_first_d; icache_first_q <= icache_first_d;
dcache_first_q <= dcache_first_d; dcache_first_q <= dcache_first_d;
icache_rd_shift_q <= icache_rd_shift_d; icache_rd_shift_q <= icache_rd_shift_d;
icache_rd_shift_user_q <= icache_rd_shift_user_d; icache_rd_shift_user_q <= icache_rd_shift_user_d;
dcache_rd_shift_q <= dcache_rd_shift_d; dcache_rd_shift_q <= dcache_rd_shift_d;
dcache_rd_shift_user_q <= dcache_rd_shift_user_d; dcache_rd_shift_user_q <= dcache_rd_shift_user_d;
icache_rtrn_vld_q <= icache_rtrn_vld_d; icache_rtrn_vld_q <= icache_rtrn_vld_d;
dcache_rtrn_vld_q <= dcache_rtrn_vld_d; dcache_rtrn_vld_q <= dcache_rtrn_vld_d;
icache_rtrn_tid_q <= icache_rtrn_tid_d; icache_rtrn_tid_q <= icache_rtrn_tid_d;
dcache_rtrn_tid_q <= dcache_rtrn_tid_d; dcache_rtrn_tid_q <= dcache_rtrn_tid_d;
dcache_rtrn_type_q <= dcache_rtrn_type_d; dcache_rtrn_type_q <= dcache_rtrn_type_d;
dcache_rtrn_inv_q <= dcache_rtrn_inv_d; dcache_rtrn_inv_q <= dcache_rtrn_inv_d;
amo_off_q <= amo_off_d; amo_off_q <= amo_off_d;
amo_gen_r_q <= amo_gen_r_d; amo_gen_r_q <= amo_gen_r_d;
end end
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// axi protocol shim // axi protocol shim
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
axi_shim #( axi_shim #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.AxiNumWords ( AxiNumWords ), .AxiNumWords(AxiNumWords),
.axi_req_t ( axi_req_t ), .axi_req_t (axi_req_t),
.axi_rsp_t ( axi_rsp_t ) .axi_rsp_t (axi_rsp_t)
) i_axi_shim ( ) i_axi_shim (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.rd_req_i ( axi_rd_req ), .rd_req_i (axi_rd_req),
.rd_gnt_o ( axi_rd_gnt ), .rd_gnt_o (axi_rd_gnt),
.rd_addr_i ( axi_rd_addr ), .rd_addr_i (axi_rd_addr),
.rd_blen_i ( axi_rd_blen ), .rd_blen_i (axi_rd_blen),
.rd_size_i ( axi_rd_size ), .rd_size_i (axi_rd_size),
.rd_id_i ( axi_rd_id_in ), .rd_id_i (axi_rd_id_in),
.rd_rdy_i ( axi_rd_rdy ), .rd_rdy_i (axi_rd_rdy),
.rd_lock_i ( axi_rd_lock ), .rd_lock_i (axi_rd_lock),
.rd_last_o ( axi_rd_last ), .rd_last_o (axi_rd_last),
.rd_valid_o ( axi_rd_valid ), .rd_valid_o (axi_rd_valid),
.rd_data_o ( axi_rd_data ), .rd_data_o (axi_rd_data),
.rd_user_o ( axi_rd_user ), .rd_user_o (axi_rd_user),
.rd_id_o ( axi_rd_id_out ), .rd_id_o (axi_rd_id_out),
.rd_exokay_o ( axi_rd_exokay ), .rd_exokay_o(axi_rd_exokay),
.wr_req_i ( axi_wr_req ), .wr_req_i (axi_wr_req),
.wr_gnt_o ( axi_wr_gnt ), .wr_gnt_o (axi_wr_gnt),
.wr_addr_i ( axi_wr_addr ), .wr_addr_i (axi_wr_addr),
.wr_data_i ( axi_wr_data ), .wr_data_i (axi_wr_data),
.wr_user_i ( axi_wr_user ), .wr_user_i (axi_wr_user),
.wr_be_i ( axi_wr_be ), .wr_be_i (axi_wr_be),
.wr_blen_i ( axi_wr_blen ), .wr_blen_i (axi_wr_blen),
.wr_size_i ( axi_wr_size ), .wr_size_i (axi_wr_size),
.wr_id_i ( axi_wr_id_in ), .wr_id_i (axi_wr_id_in),
.wr_lock_i ( axi_wr_lock ), .wr_lock_i (axi_wr_lock),
.wr_atop_i ( axi_wr_atop ), .wr_atop_i (axi_wr_atop),
.wr_rdy_i ( axi_wr_rdy ), .wr_rdy_i (axi_wr_rdy),
.wr_valid_o ( axi_wr_valid ), .wr_valid_o (axi_wr_valid),
.wr_id_o ( axi_wr_id_out ), .wr_id_o (axi_wr_id_out),
.wr_exokay_o ( axi_wr_exokay ), .wr_exokay_o(axi_wr_exokay),
.axi_req_o ( axi_req_o ), .axi_req_o (axi_req_o),
.axi_resp_i ( axi_resp_i ) .axi_resp_i (axi_resp_i)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // wt_l15_adapter endmodule // wt_l15_adapter

View file

@ -19,49 +19,52 @@
// L1.5 interface. // L1.5 interface.
module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #( module wt_cache_subsystem
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int unsigned NumPorts = 4, import wt_cache_pkg::*;
parameter type noc_req_t = logic, #(
parameter type noc_resp_t = logic parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 4,
parameter type noc_req_t = logic,
parameter type noc_resp_t = logic
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// I$ // I$
input logic icache_en_i, // enable icache (or bypass e.g: in debug mode) input logic icache_en_i, // enable icache (or bypass e.g: in debug mode)
input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together
output logic icache_miss_o, // to performance counter output logic icache_miss_o, // to performance counter
// address translation requests // address translation requests
input icache_areq_t icache_areq_i, // to/from frontend input icache_areq_t icache_areq_i, // to/from frontend
output icache_arsp_t icache_areq_o, output icache_arsp_t icache_areq_o,
// data requests // data requests
input icache_dreq_t icache_dreq_i, // to/from frontend input icache_dreq_t icache_dreq_i, // to/from frontend
output icache_drsp_t icache_dreq_o, output icache_drsp_t icache_dreq_o,
// D$ // D$
// Cache management // Cache management
input logic dcache_enable_i, // from CSR input logic dcache_enable_i, // from CSR
input logic dcache_flush_i, // high until acknowledged input logic dcache_flush_i, // high until acknowledged
output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic dcache_miss_o, // we missed on a ld/st output logic dcache_miss_o, // we missed on a ld/st
// For Performance Counter // For Performance Counter
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
// AMO interface // AMO interface
input amo_req_t dcache_amo_req_i, input amo_req_t dcache_amo_req_i,
output amo_resp_t dcache_amo_resp_o, output amo_resp_t dcache_amo_resp_o,
// Request ports // Request ports
input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // to/from LSU
output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to/from LSU
// writebuffer status // writebuffer status
output logic wbuffer_empty_o, output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o, output logic wbuffer_not_ni_o,
// memory side // memory side
output noc_req_t noc_req_o, output noc_req_t noc_req_o,
input noc_resp_t noc_resp_i, input noc_resp_t noc_resp_i,
// Invalidations // Invalidations
input logic [63:0] inval_addr_i, input logic [63:0] inval_addr_i,
input logic inval_valid_i, input logic inval_valid_i,
output logic inval_ready_o output logic inval_ready_o
// TODO: interrupt interface // TODO: interrupt interface
); );
logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld; logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld;
@ -74,24 +77,24 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #(
wt_cache_pkg::dcache_rtrn_t adapter_dcache; wt_cache_pkg::dcache_rtrn_t adapter_dcache;
cva6_icache #( cva6_icache #(
// use ID 0 for icache reads // use ID 0 for icache reads
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg(CVA6Cfg),
.RdTxId ( 0 ) .RdTxId (0)
) i_cva6_icache ( ) i_cva6_icache (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( icache_flush_i ), .flush_i (icache_flush_i),
.en_i ( icache_en_i ), .en_i (icache_en_i),
.miss_o ( icache_miss_o ), .miss_o (icache_miss_o),
.areq_i ( icache_areq_i ), .areq_i (icache_areq_i),
.areq_o ( icache_areq_o ), .areq_o (icache_areq_o),
.dreq_i ( icache_dreq_i ), .dreq_i (icache_dreq_i),
.dreq_o ( icache_dreq_o ), .dreq_o (icache_dreq_o),
.mem_rtrn_vld_i ( adapter_icache_rtrn_vld ), .mem_rtrn_vld_i(adapter_icache_rtrn_vld),
.mem_rtrn_i ( adapter_icache ), .mem_rtrn_i (adapter_icache),
.mem_data_req_o ( icache_adapter_data_req ), .mem_data_req_o(icache_adapter_data_req),
.mem_data_ack_i ( adapter_icache_data_ack ), .mem_data_ack_i(adapter_icache_data_ack),
.mem_data_o ( icache_adapter ) .mem_data_o (icache_adapter)
); );
@ -100,109 +103,131 @@ module wt_cache_subsystem import ariane_pkg::*; import wt_cache_pkg::*; #(
// they have equal prio and are RR arbited // they have equal prio and are RR arbited
// Port 2 is write only and goes into the merging write buffer // Port 2 is write only and goes into the merging write buffer
wt_dcache #( wt_dcache #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
// use ID 1 for dcache reads and amos. note that the writebuffer // use ID 1 for dcache reads and amos. note that the writebuffer
// uses all IDs up to DCACHE_MAX_TX-1 for write transactions. // uses all IDs up to DCACHE_MAX_TX-1 for write transactions.
.RdAmoTxId ( 1 ) .RdAmoTxId(1)
) i_wt_dcache ( ) i_wt_dcache (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.enable_i ( dcache_enable_i ), .enable_i (dcache_enable_i),
.flush_i ( dcache_flush_i ), .flush_i (dcache_flush_i),
.flush_ack_o ( dcache_flush_ack_o ), .flush_ack_o (dcache_flush_ack_o),
.miss_o ( dcache_miss_o ), .miss_o (dcache_miss_o),
.wbuffer_empty_o ( wbuffer_empty_o ), .wbuffer_empty_o (wbuffer_empty_o),
.wbuffer_not_ni_o ( wbuffer_not_ni_o ), .wbuffer_not_ni_o(wbuffer_not_ni_o),
.amo_req_i ( dcache_amo_req_i ), .amo_req_i (dcache_amo_req_i),
.amo_resp_o ( dcache_amo_resp_o ), .amo_resp_o (dcache_amo_resp_o),
.req_ports_i ( dcache_req_ports_i ), .req_ports_i (dcache_req_ports_i),
.req_ports_o ( dcache_req_ports_o ), .req_ports_o (dcache_req_ports_o),
.miss_vld_bits_o ( miss_vld_bits_o ), .miss_vld_bits_o (miss_vld_bits_o),
.mem_rtrn_vld_i ( adapter_dcache_rtrn_vld ), .mem_rtrn_vld_i (adapter_dcache_rtrn_vld),
.mem_rtrn_i ( adapter_dcache ), .mem_rtrn_i (adapter_dcache),
.mem_data_req_o ( dcache_adapter_data_req ), .mem_data_req_o (dcache_adapter_data_req),
.mem_data_ack_i ( adapter_dcache_data_ack ), .mem_data_ack_i (adapter_dcache_data_ack),
.mem_data_o ( dcache_adapter ) .mem_data_o (dcache_adapter)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// memory plumbing, either use 64bit AXI port or native // memory plumbing, either use 64bit AXI port or native
// L15 cache interface (derived from OpenSPARC CCX). // L15 cache interface (derived from OpenSPARC CCX).
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
`ifdef PITON_ARIANE `ifdef PITON_ARIANE
wt_l15_adapter #( wt_l15_adapter #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg(CVA6Cfg),
) i_adapter ( ) i_adapter (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.icache_data_req_i ( icache_adapter_data_req ), .icache_data_req_i(icache_adapter_data_req),
.icache_data_ack_o ( adapter_icache_data_ack ), .icache_data_ack_o(adapter_icache_data_ack),
.icache_data_i ( icache_adapter ), .icache_data_i (icache_adapter),
.icache_rtrn_vld_o ( adapter_icache_rtrn_vld ), .icache_rtrn_vld_o(adapter_icache_rtrn_vld),
.icache_rtrn_o ( adapter_icache ), .icache_rtrn_o (adapter_icache),
.dcache_data_req_i ( dcache_adapter_data_req ), .dcache_data_req_i(dcache_adapter_data_req),
.dcache_data_ack_o ( adapter_dcache_data_ack ), .dcache_data_ack_o(adapter_dcache_data_ack),
.dcache_data_i ( dcache_adapter ), .dcache_data_i (dcache_adapter),
.dcache_rtrn_vld_o ( adapter_dcache_rtrn_vld ), .dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
.dcache_rtrn_o ( adapter_dcache ), .dcache_rtrn_o (adapter_dcache),
.l15_req_o ( noc_req_o ), .l15_req_o (noc_req_o),
.l15_rtrn_i ( noc_resp_i ) .l15_rtrn_i (noc_resp_i)
); );
`else `else
wt_axi_adapter #( wt_axi_adapter #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.axi_req_t ( noc_req_t ), .axi_req_t(noc_req_t),
.axi_rsp_t ( noc_resp_t ) .axi_rsp_t(noc_resp_t)
) i_adapter ( ) i_adapter (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.icache_data_req_i ( icache_adapter_data_req ), .icache_data_req_i(icache_adapter_data_req),
.icache_data_ack_o ( adapter_icache_data_ack ), .icache_data_ack_o(adapter_icache_data_ack),
.icache_data_i ( icache_adapter ), .icache_data_i (icache_adapter),
.icache_rtrn_vld_o ( adapter_icache_rtrn_vld ), .icache_rtrn_vld_o(adapter_icache_rtrn_vld),
.icache_rtrn_o ( adapter_icache ), .icache_rtrn_o (adapter_icache),
.dcache_data_req_i ( dcache_adapter_data_req ), .dcache_data_req_i(dcache_adapter_data_req),
.dcache_data_ack_o ( adapter_dcache_data_ack ), .dcache_data_ack_o(adapter_dcache_data_ack),
.dcache_data_i ( dcache_adapter ), .dcache_data_i (dcache_adapter),
.dcache_rtrn_vld_o ( adapter_dcache_rtrn_vld ), .dcache_rtrn_vld_o(adapter_dcache_rtrn_vld),
.dcache_rtrn_o ( adapter_dcache ), .dcache_rtrn_o (adapter_dcache),
.axi_req_o ( noc_req_o ), .axi_req_o (noc_req_o),
.axi_resp_i ( noc_resp_i ), .axi_resp_i (noc_resp_i),
.inval_addr_i ( inval_addr_i ), .inval_addr_i (inval_addr_i),
.inval_valid_i ( inval_valid_i ), .inval_valid_i (inval_valid_i),
.inval_ready_o ( inval_ready_o ) .inval_ready_o (inval_ready_o)
); );
`endif `endif
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
a_invalid_instruction_fetch: assert property ( a_invalid_instruction_fetch :
assert property (
@(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX) @(posedge clk_i) disable iff (!rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X", else
icache_dreq_o.vaddr, icache_dreq_o.data); $warning(
1,
"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
icache_dreq_o.vaddr,
icache_dreq_o.data
);
for (genvar j=0; j<riscv::XLEN/8; j++) begin : gen_invalid_write_assertion for (genvar j = 0; j < riscv::XLEN / 8; j++) begin : gen_invalid_write_assertion
a_invalid_write_data: assert property ( a_invalid_write_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> dcache_req_ports_i[NumPorts-1].data_be[j] |-> (|dcache_req_ports_i[NumPorts-1].data_wdata[j*8+:8] !== 1'hX)) @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> dcache_req_ports_i[NumPorts-1].data_be[j] |-> (|dcache_req_ports_i[NumPorts-1].data_wdata[j*8+:8] !== 1'hX))
else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X, databe=%016X", else
{dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index}, dcache_req_ports_i[NumPorts-1].data_be, dcache_req_ports_i[NumPorts-1].data_wdata, dcache_req_ports_i[NumPorts-1].data_be & dcache_req_ports_i[NumPorts-1].data_wdata); $warning(
1,
"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X, databe=%016X",
{
dcache_req_ports_i[NumPorts-1].address_tag, dcache_req_ports_i[NumPorts-1].address_index
},
dcache_req_ports_i[NumPorts-1].data_be,
dcache_req_ports_i[NumPorts-1].data_wdata,
dcache_req_ports_i[NumPorts-1].data_be & dcache_req_ports_i[NumPorts-1].data_wdata
);
end end
for (genvar j=0; j<NumPorts-1; j++) begin : gen_assertion for (genvar j = 0; j < NumPorts - 1; j++) begin : gen_assertion
a_invalid_read_data: assert property ( a_invalid_read_data :
assert property (
@(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX) @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_o[j].data_rvalid && ~dcache_req_ports_i[j].kill_req |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X", else
j, dcache_req_ports_o[j].data_rdata); $warning(
1,
"[l1 dcache] reading invalid data on port %01d: data=%016X",
j,
dcache_req_ports_o[j].data_rdata
);
end end
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // wt_cache_subsystem endmodule // wt_cache_subsystem

View file

@ -13,325 +13,329 @@
// Description: Write-Through Data cache that is compatible with openpiton. // Description: Write-Through Data cache that is compatible with openpiton.
module wt_dcache import ariane_pkg::*; import wt_cache_pkg::*; #( module wt_dcache
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int unsigned NumPorts = 4, // number of miss ports import wt_cache_pkg::*;
// ID to be used for read and AMO transactions. #(
// note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1 parameter int unsigned NumPorts = 4, // number of miss ports
// ID to be used for read and AMO transactions.
// note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions
parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
// Cache management // Cache management
input logic enable_i, // from CSR input logic enable_i, // from CSR
input logic flush_i, // high until acknowledged input logic flush_i, // high until acknowledged
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st output logic miss_o, // we missed on a ld/st
output logic wbuffer_empty_o, output logic wbuffer_empty_o,
output logic wbuffer_not_ni_o, output logic wbuffer_not_ni_o,
// AMO interface // AMO interface
input amo_req_t amo_req_i, input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o, output amo_resp_t amo_resp_o,
// Request ports // Request ports
input dcache_req_i_t [NumPorts-1:0] req_ports_i, input dcache_req_i_t [NumPorts-1:0] req_ports_i,
output dcache_req_o_t [NumPorts-1:0] req_ports_o, output dcache_req_o_t [NumPorts-1:0] req_ports_o,
output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o,
input logic mem_rtrn_vld_i, input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i, input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o, output logic mem_data_req_o,
input logic mem_data_ack_i, input logic mem_data_ack_i,
output dcache_req_t mem_data_o output dcache_req_t mem_data_o
); );
// miss unit <-> read controllers // miss unit <-> read controllers
logic cache_en; logic cache_en;
// miss unit <-> memory // miss unit <-> memory
logic wr_cl_vld; logic wr_cl_vld;
logic wr_cl_nc; logic wr_cl_nc;
logic [DCACHE_SET_ASSOC-1:0] wr_cl_we; logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we;
logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag; logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx; logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx;
logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off; logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off;
logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data; logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data;
logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user; logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user;
logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be; logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be;
logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits; logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits;
logic [DCACHE_SET_ASSOC-1:0] wr_req; logic [ DCACHE_SET_ASSOC-1:0] wr_req;
logic wr_ack; logic wr_ack;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx; logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_idx;
logic [DCACHE_OFFSET_WIDTH-1:0] wr_off; logic [ DCACHE_OFFSET_WIDTH-1:0] wr_off;
riscv::xlen_t wr_data; riscv::xlen_t wr_data;
logic [(riscv::XLEN/8)-1:0] wr_data_be; logic [ (riscv::XLEN/8)-1:0] wr_data_be;
logic [DCACHE_USER_WIDTH-1:0] wr_user; logic [ DCACHE_USER_WIDTH-1:0] wr_user;
// miss unit <-> controllers/wbuffer // miss unit <-> controllers/wbuffer
logic [NumPorts-1:0] miss_req; logic [ NumPorts-1:0] miss_req;
logic [NumPorts-1:0] miss_ack; logic [ NumPorts-1:0] miss_ack;
logic [NumPorts-1:0] miss_nc; logic [ NumPorts-1:0] miss_nc;
logic [NumPorts-1:0] miss_we; logic [ NumPorts-1:0] miss_we;
logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata; logic [ NumPorts-1:0][ riscv::XLEN-1:0] miss_wdata;
logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser; logic [ NumPorts-1:0][ DCACHE_USER_WIDTH-1:0] miss_wuser;
logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr; logic [ NumPorts-1:0][ riscv::PLEN-1:0] miss_paddr;
logic [NumPorts-1:0][2:0] miss_size; logic [ NumPorts-1:0][ 2:0] miss_size;
logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id; logic [ NumPorts-1:0][ CACHE_ID_WIDTH-1:0] miss_id;
logic [NumPorts-1:0] miss_replay; logic [ NumPorts-1:0] miss_replay;
logic [NumPorts-1:0] miss_rtrn_vld; logic [ NumPorts-1:0] miss_rtrn_vld;
logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id; logic [ CACHE_ID_WIDTH-1:0] miss_rtrn_id;
// memory <-> read controllers/miss unit // memory <-> read controllers/miss unit
logic [NumPorts-1:0] rd_prio; logic [ NumPorts-1:0] rd_prio;
logic [NumPorts-1:0] rd_tag_only; logic [ NumPorts-1:0] rd_tag_only;
logic [NumPorts-1:0] rd_req; logic [ NumPorts-1:0] rd_req;
logic [NumPorts-1:0] rd_ack; logic [ NumPorts-1:0] rd_ack;
logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag; logic [ NumPorts-1:0][ DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx; logic [ NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx;
logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off; logic [ NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off;
riscv::xlen_t rd_data; riscv::xlen_t rd_data;
logic [DCACHE_USER_WIDTH-1:0] rd_user; logic [ DCACHE_USER_WIDTH-1:0] rd_user;
logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits; logic [ DCACHE_SET_ASSOC-1:0] rd_vld_bits;
logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh; logic [ DCACHE_SET_ASSOC-1:0] rd_hit_oh;
// miss unit <-> wbuffer // miss unit <-> wbuffer
logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr; logic [ DCACHE_MAX_TX-1:0][ riscv::PLEN-1:0] tx_paddr;
logic [DCACHE_MAX_TX-1:0] tx_vld; logic [ DCACHE_MAX_TX-1:0] tx_vld;
// wbuffer <-> memory // wbuffer <-> memory
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data; wbuffer_t [ DCACHE_WBUF_DEPTH-1:0] wbuffer_data;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// miss handling unit // miss handling unit
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
wt_dcache_missunit #( wt_dcache_missunit #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.AmoTxId ( RdAmoTxId ), .AmoTxId (RdAmoTxId),
.NumPorts ( NumPorts ) .NumPorts(NumPorts)
) i_wt_dcache_missunit ( ) i_wt_dcache_missunit (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.enable_i ( enable_i ), .enable_i (enable_i),
.flush_i ( flush_i ), .flush_i (flush_i),
.flush_ack_o ( flush_ack_o ), .flush_ack_o (flush_ack_o),
.miss_o ( miss_o ), .miss_o (miss_o),
.wbuffer_empty_i ( wbuffer_empty_o ), .wbuffer_empty_i(wbuffer_empty_o),
.cache_en_o ( cache_en ), .cache_en_o (cache_en),
// amo interface // amo interface
.amo_req_i ( amo_req_i ), .amo_req_i (amo_req_i),
.amo_resp_o ( amo_resp_o ), .amo_resp_o (amo_resp_o),
// miss handling interface // miss handling interface
.miss_req_i ( miss_req ), .miss_req_i (miss_req),
.miss_ack_o ( miss_ack ), .miss_ack_o (miss_ack),
.miss_nc_i ( miss_nc ), .miss_nc_i (miss_nc),
.miss_we_i ( miss_we ), .miss_we_i (miss_we),
.miss_wdata_i ( miss_wdata ), .miss_wdata_i (miss_wdata),
.miss_wuser_i ( miss_wuser ), .miss_wuser_i (miss_wuser),
.miss_paddr_i ( miss_paddr ), .miss_paddr_i (miss_paddr),
.miss_vld_bits_i ( miss_vld_bits_o ), .miss_vld_bits_i(miss_vld_bits_o),
.miss_size_i ( miss_size ), .miss_size_i (miss_size),
.miss_id_i ( miss_id ), .miss_id_i (miss_id),
.miss_replay_o ( miss_replay ), .miss_replay_o (miss_replay),
.miss_rtrn_vld_o ( miss_rtrn_vld ), .miss_rtrn_vld_o(miss_rtrn_vld),
.miss_rtrn_id_o ( miss_rtrn_id ), .miss_rtrn_id_o (miss_rtrn_id),
// from writebuffer // from writebuffer
.tx_paddr_i ( tx_paddr ), .tx_paddr_i (tx_paddr),
.tx_vld_i ( tx_vld ), .tx_vld_i (tx_vld),
// cache memory interface // cache memory interface
.wr_cl_vld_o ( wr_cl_vld ), .wr_cl_vld_o (wr_cl_vld),
.wr_cl_nc_o ( wr_cl_nc ), .wr_cl_nc_o (wr_cl_nc),
.wr_cl_we_o ( wr_cl_we ), .wr_cl_we_o (wr_cl_we),
.wr_cl_tag_o ( wr_cl_tag ), .wr_cl_tag_o (wr_cl_tag),
.wr_cl_idx_o ( wr_cl_idx ), .wr_cl_idx_o (wr_cl_idx),
.wr_cl_off_o ( wr_cl_off ), .wr_cl_off_o (wr_cl_off),
.wr_cl_data_o ( wr_cl_data ), .wr_cl_data_o (wr_cl_data),
.wr_cl_user_o ( wr_cl_user ), .wr_cl_user_o (wr_cl_user),
.wr_cl_data_be_o ( wr_cl_data_be ), .wr_cl_data_be_o(wr_cl_data_be),
.wr_vld_bits_o ( wr_vld_bits ), .wr_vld_bits_o (wr_vld_bits),
// memory interface // memory interface
.mem_rtrn_vld_i ( mem_rtrn_vld_i ), .mem_rtrn_vld_i (mem_rtrn_vld_i),
.mem_rtrn_i ( mem_rtrn_i ), .mem_rtrn_i (mem_rtrn_i),
.mem_data_req_o ( mem_data_req_o ), .mem_data_req_o (mem_data_req_o),
.mem_data_ack_i ( mem_data_ack_i ), .mem_data_ack_i (mem_data_ack_i),
.mem_data_o ( mem_data_o ) .mem_data_o (mem_data_o)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// read controllers (LD unit and PTW/MMU) // read controllers (LD unit and PTW/MMU)
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// note: last read port is used by the write buffer // note: last read port is used by the write buffer
for(genvar k=0; k<NumPorts-1; k++) begin : gen_rd_ports for (genvar k = 0; k < NumPorts - 1; k++) begin : gen_rd_ports
// set these to high prio ports // set these to high prio ports
assign rd_prio[k] = 1'b1; assign rd_prio[k] = 1'b1;
wt_dcache_ctrl #( wt_dcache_ctrl #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg(CVA6Cfg),
.RdTxId ( RdAmoTxId ) .RdTxId (RdAmoTxId)
) i_wt_dcache_ctrl ( ) i_wt_dcache_ctrl (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.cache_en_i ( cache_en ), .cache_en_i (cache_en),
// reqs from core // reqs from core
.req_port_i ( req_ports_i [k] ), .req_port_i (req_ports_i[k]),
.req_port_o ( req_ports_o [k] ), .req_port_o (req_ports_o[k]),
// miss interface // miss interface
.miss_req_o ( miss_req [k] ), .miss_req_o (miss_req[k]),
.miss_ack_i ( miss_ack [k] ), .miss_ack_i (miss_ack[k]),
.miss_we_o ( miss_we [k] ), .miss_we_o (miss_we[k]),
.miss_wdata_o ( miss_wdata [k] ), .miss_wdata_o (miss_wdata[k]),
.miss_wuser_o ( miss_wuser [k] ), .miss_wuser_o (miss_wuser[k]),
.miss_vld_bits_o ( miss_vld_bits_o[k]), .miss_vld_bits_o(miss_vld_bits_o[k]),
.miss_paddr_o ( miss_paddr [k] ), .miss_paddr_o (miss_paddr[k]),
.miss_nc_o ( miss_nc [k] ), .miss_nc_o (miss_nc[k]),
.miss_size_o ( miss_size [k] ), .miss_size_o (miss_size[k]),
.miss_id_o ( miss_id [k] ), .miss_id_o (miss_id[k]),
.miss_replay_i ( miss_replay [k] ), .miss_replay_i (miss_replay[k]),
.miss_rtrn_vld_i ( miss_rtrn_vld [k] ), .miss_rtrn_vld_i(miss_rtrn_vld[k]),
// used to detect readout mux collisions // used to detect readout mux collisions
.wr_cl_vld_i ( wr_cl_vld ), .wr_cl_vld_i (wr_cl_vld),
// cache mem interface // cache mem interface
.rd_tag_o ( rd_tag [k] ), .rd_tag_o (rd_tag[k]),
.rd_idx_o ( rd_idx [k] ), .rd_idx_o (rd_idx[k]),
.rd_off_o ( rd_off [k] ), .rd_off_o (rd_off[k]),
.rd_req_o ( rd_req [k] ), .rd_req_o (rd_req[k]),
.rd_tag_only_o ( rd_tag_only [k] ), .rd_tag_only_o (rd_tag_only[k]),
.rd_ack_i ( rd_ack [k] ), .rd_ack_i (rd_ack[k]),
.rd_data_i ( rd_data ), .rd_data_i (rd_data),
.rd_user_i ( rd_user ), .rd_user_i (rd_user),
.rd_vld_bits_i ( rd_vld_bits ), .rd_vld_bits_i (rd_vld_bits),
.rd_hit_oh_i ( rd_hit_oh ) .rd_hit_oh_i (rd_hit_oh)
); );
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// store unit controller // store unit controller
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// set read port to low priority // set read port to low priority
assign rd_prio[NumPorts-1] = 1'b0; assign rd_prio[NumPorts-1] = 1'b0;
wt_dcache_wbuffer #( wt_dcache_wbuffer #(
.CVA6Cfg ( CVA6Cfg ) .CVA6Cfg(CVA6Cfg)
) i_wt_dcache_wbuffer ( ) i_wt_dcache_wbuffer (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.empty_o ( wbuffer_empty_o ), .empty_o (wbuffer_empty_o),
.not_ni_o ( wbuffer_not_ni_o ), .not_ni_o (wbuffer_not_ni_o),
// TODO: fix this // TODO: fix this
.cache_en_i ( cache_en ), .cache_en_i (cache_en),
// .cache_en_i ( '0 ), // .cache_en_i ( '0 ),
// request ports from core (store unit) // request ports from core (store unit)
.req_port_i ( req_ports_i [NumPorts-1] ), .req_port_i (req_ports_i[NumPorts-1]),
.req_port_o ( req_ports_o [NumPorts-1] ), .req_port_o (req_ports_o[NumPorts-1]),
// miss unit interface // miss unit interface
.miss_req_o ( miss_req [NumPorts-1] ), .miss_req_o (miss_req[NumPorts-1]),
.miss_ack_i ( miss_ack [NumPorts-1] ), .miss_ack_i (miss_ack[NumPorts-1]),
.miss_we_o ( miss_we [NumPorts-1] ), .miss_we_o (miss_we[NumPorts-1]),
.miss_wdata_o ( miss_wdata [NumPorts-1] ), .miss_wdata_o (miss_wdata[NumPorts-1]),
.miss_wuser_o ( miss_wuser [NumPorts-1] ), .miss_wuser_o (miss_wuser[NumPorts-1]),
.miss_vld_bits_o ( miss_vld_bits_o[NumPorts-1] ), .miss_vld_bits_o(miss_vld_bits_o[NumPorts-1]),
.miss_paddr_o ( miss_paddr [NumPorts-1] ), .miss_paddr_o (miss_paddr[NumPorts-1]),
.miss_nc_o ( miss_nc [NumPorts-1] ), .miss_nc_o (miss_nc[NumPorts-1]),
.miss_size_o ( miss_size [NumPorts-1] ), .miss_size_o (miss_size[NumPorts-1]),
.miss_id_o ( miss_id [NumPorts-1] ), .miss_id_o (miss_id[NumPorts-1]),
.miss_rtrn_vld_i ( miss_rtrn_vld [NumPorts-1] ), .miss_rtrn_vld_i(miss_rtrn_vld[NumPorts-1]),
.miss_rtrn_id_i ( miss_rtrn_id ), .miss_rtrn_id_i (miss_rtrn_id),
// cache read interface // cache read interface
.rd_tag_o ( rd_tag [NumPorts-1] ), .rd_tag_o (rd_tag[NumPorts-1]),
.rd_idx_o ( rd_idx [NumPorts-1] ), .rd_idx_o (rd_idx[NumPorts-1]),
.rd_off_o ( rd_off [NumPorts-1] ), .rd_off_o (rd_off[NumPorts-1]),
.rd_req_o ( rd_req [NumPorts-1] ), .rd_req_o (rd_req[NumPorts-1]),
.rd_tag_only_o ( rd_tag_only [NumPorts-1] ), .rd_tag_only_o (rd_tag_only[NumPorts-1]),
.rd_ack_i ( rd_ack [NumPorts-1] ), .rd_ack_i (rd_ack[NumPorts-1]),
.rd_data_i ( rd_data ), .rd_data_i (rd_data),
.rd_vld_bits_i ( rd_vld_bits ), .rd_vld_bits_i (rd_vld_bits),
.rd_hit_oh_i ( rd_hit_oh ), .rd_hit_oh_i (rd_hit_oh),
// incoming invalidations/cache refills // incoming invalidations/cache refills
.wr_cl_vld_i ( wr_cl_vld ), .wr_cl_vld_i (wr_cl_vld),
.wr_cl_idx_i ( wr_cl_idx ), .wr_cl_idx_i (wr_cl_idx),
// single word write interface // single word write interface
.wr_req_o ( wr_req ), .wr_req_o (wr_req),
.wr_ack_i ( wr_ack ), .wr_ack_i (wr_ack),
.wr_idx_o ( wr_idx ), .wr_idx_o (wr_idx),
.wr_off_o ( wr_off ), .wr_off_o (wr_off),
.wr_data_o ( wr_data ), .wr_data_o (wr_data),
.wr_user_o ( wr_user ), .wr_user_o (wr_user),
.wr_data_be_o ( wr_data_be ), .wr_data_be_o (wr_data_be),
// write buffer forwarding // write buffer forwarding
.wbuffer_data_o ( wbuffer_data ), .wbuffer_data_o (wbuffer_data),
.tx_paddr_o ( tx_paddr ), .tx_paddr_o (tx_paddr),
.tx_vld_o ( tx_vld ) .tx_vld_o (tx_vld)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// memory arrays, arbitration and tag comparison // memory arrays, arbitration and tag comparison
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
wt_dcache_mem #( wt_dcache_mem #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.NumPorts ( NumPorts ) .NumPorts(NumPorts)
) i_wt_dcache_mem ( ) i_wt_dcache_mem (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
// read ports // read ports
.rd_prio_i ( rd_prio ), .rd_prio_i (rd_prio),
.rd_tag_i ( rd_tag ), .rd_tag_i (rd_tag),
.rd_idx_i ( rd_idx ), .rd_idx_i (rd_idx),
.rd_off_i ( rd_off ), .rd_off_i (rd_off),
.rd_req_i ( rd_req ), .rd_req_i (rd_req),
.rd_tag_only_i ( rd_tag_only ), .rd_tag_only_i (rd_tag_only),
.rd_ack_o ( rd_ack ), .rd_ack_o (rd_ack),
.rd_vld_bits_o ( rd_vld_bits ), .rd_vld_bits_o (rd_vld_bits),
.rd_hit_oh_o ( rd_hit_oh ), .rd_hit_oh_o (rd_hit_oh),
.rd_data_o ( rd_data ), .rd_data_o (rd_data),
.rd_user_o ( rd_user ), .rd_user_o (rd_user),
// cacheline write port // cacheline write port
.wr_cl_vld_i ( wr_cl_vld ), .wr_cl_vld_i (wr_cl_vld),
.wr_cl_nc_i ( wr_cl_nc ), .wr_cl_nc_i (wr_cl_nc),
.wr_cl_we_i ( wr_cl_we ), .wr_cl_we_i (wr_cl_we),
.wr_cl_tag_i ( wr_cl_tag ), .wr_cl_tag_i (wr_cl_tag),
.wr_cl_idx_i ( wr_cl_idx ), .wr_cl_idx_i (wr_cl_idx),
.wr_cl_off_i ( wr_cl_off ), .wr_cl_off_i (wr_cl_off),
.wr_cl_data_i ( wr_cl_data ), .wr_cl_data_i (wr_cl_data),
.wr_cl_user_i ( wr_cl_user ), .wr_cl_user_i (wr_cl_user),
.wr_cl_data_be_i ( wr_cl_data_be ), .wr_cl_data_be_i(wr_cl_data_be),
.wr_vld_bits_i ( wr_vld_bits ), .wr_vld_bits_i (wr_vld_bits),
// single word write port // single word write port
.wr_req_i ( wr_req ), .wr_req_i (wr_req),
.wr_ack_o ( wr_ack ), .wr_ack_o (wr_ack),
.wr_idx_i ( wr_idx ), .wr_idx_i (wr_idx),
.wr_off_i ( wr_off ), .wr_off_i (wr_off),
.wr_data_i ( wr_data ), .wr_data_i (wr_data),
.wr_user_i ( wr_user ), .wr_user_i (wr_user),
.wr_data_be_i ( wr_data_be ), .wr_data_be_i (wr_data_be),
// write buffer forwarding // write buffer forwarding
.wbuffer_data_i ( wbuffer_data ) .wbuffer_data_i (wbuffer_data)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// check for concurrency issues // check for concurrency issues
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
flush: assert property ( flush :
assert property (
@(posedge clk_i) disable iff (!rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o) @(posedge clk_i) disable iff (!rst_ni) flush_i |-> flush_ack_o |-> wbuffer_empty_o)
else $fatal(1,"[l1 dcache] flushed cache implies flushed wbuffer"); else $fatal(1, "[l1 dcache] flushed cache implies flushed wbuffer");
initial begin initial begin
// assert wrong parameterizations // assert wrong parameterizations
assert (DCACHE_INDEX_WIDTH<=12) assert (DCACHE_INDEX_WIDTH <= 12)
else $fatal(1,"[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages"); else $fatal(1, "[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages");
end end
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // wt_dcache endmodule // wt_dcache

View file

@ -13,95 +13,110 @@
// Description: DCache controller for read port // Description: DCache controller for read port
module wt_dcache_ctrl import ariane_pkg::*; import wt_cache_pkg::*; #( module wt_dcache_ctrl
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 1 import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 1
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic cache_en_i, input logic cache_en_i,
// core request ports // core request ports
input dcache_req_i_t req_port_i, input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o, output dcache_req_o_t req_port_o,
// interface to miss handler // interface to miss handler
output logic miss_req_o, output logic miss_req_o,
input logic miss_ack_i, input logic miss_ack_i,
output logic miss_we_o, // unused (set to 0) output logic miss_we_o, // unused (set to 0)
output riscv::xlen_t miss_wdata_o, // unused (set to 0) output riscv::xlen_t miss_wdata_o, // unused (set to 0)
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0) output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0)
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index
output logic [riscv::PLEN-1:0] miss_paddr_o, output logic [riscv::PLEN-1:0] miss_paddr_o,
output logic miss_nc_o, // request to I/O space output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID
input logic miss_replay_i, // request collided with pending miss - have to replay the request input logic miss_replay_i, // request collided with pending miss - have to replay the request
input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory
// used to detect readout mux collisions // used to detect readout mux collisions
input logic wr_cl_vld_i, input logic wr_cl_vld_i,
// cache memory interface // cache memory interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o, output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o, output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to zero here output logic rd_tag_only_o, // set to zero here
input logic rd_ack_i, input logic rd_ack_i,
input riscv::xlen_t rd_data_i, input riscv::xlen_t rd_data_i,
input logic [DCACHE_USER_WIDTH-1:0] rd_user_i, input logic [DCACHE_USER_WIDTH-1:0] rd_user_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i,
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i
); );
// controller FSM // controller FSM
typedef enum logic[2:0] {IDLE, READ, MISS_REQ, MISS_WAIT, KILL_MISS, KILL_MISS_ACK, REPLAY_REQ, REPLAY_READ} state_e; typedef enum logic [2:0] {
IDLE,
READ,
MISS_REQ,
MISS_WAIT,
KILL_MISS,
KILL_MISS_ACK,
REPLAY_REQ,
REPLAY_READ
} state_e;
state_e state_d, state_q; state_e state_d, state_q;
logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q; logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q;
logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q; logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q; logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q;
logic [DCACHE_TID_WIDTH-1:0] id_d, id_q; logic [DCACHE_TID_WIDTH-1:0] id_d, id_q;
logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q; logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q;
logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q; logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q;
logic [1:0] data_size_d, data_size_q; logic [1:0] data_size_d, data_size_q;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// misc // misc
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// map address to tag/idx/offset and save // map address to tag/idx/offset and save
assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q; assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q;
assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q; assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q;
assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q; assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q;
assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q; assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q;
assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q; assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q;
assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q; assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q;
assign rd_tag_o = address_tag_d; assign rd_tag_o = address_tag_d;
assign rd_idx_o = address_idx_d; assign rd_idx_o = address_idx_d;
assign rd_off_o = address_off_d; assign rd_off_o = address_off_d;
assign req_port_o.data_rdata = rd_data_i; assign req_port_o.data_rdata = rd_data_i;
assign req_port_o.data_ruser = rd_user_i; assign req_port_o.data_ruser = rd_user_i;
assign req_port_o.data_rid = id_q; assign req_port_o.data_rid = id_q;
// to miss unit // to miss unit
assign miss_vld_bits_o = vld_data_q; assign miss_vld_bits_o = vld_data_q;
assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q}; assign miss_paddr_o = {address_tag_q, address_idx_q, address_off_q};
assign miss_size_o = (miss_nc_o) ? data_size_q : 3'b111; assign miss_size_o = (miss_nc_o) ? data_size_q : 3'b111;
// noncacheable if request goes to I/O space, or if cache is disabled // noncacheable if request goes to I/O space, or if cache is disabled
assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions(CVA6Cfg, {{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}})); assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}}
));
assign miss_we_o = '0; assign miss_we_o = '0;
assign miss_wdata_o = '0; assign miss_wdata_o = '0;
assign miss_wuser_o = '0; assign miss_wuser_o = '0;
assign miss_id_o = RdTxId; assign miss_id_o = RdTxId;
assign rd_req_d = rd_req_o; assign rd_req_d = rd_req_o;
assign rd_ack_d = rd_ack_i; assign rd_ack_d = rd_ack_i;
assign rd_tag_only_o = '0; assign rd_tag_only_o = '0;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// main control logic // main control logic
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
always_comb begin : p_fsm always_comb begin : p_fsm
// default assignment // default assignment
@ -114,168 +129,171 @@ module wt_dcache_ctrl import ariane_pkg::*; import wt_cache_pkg::*; #(
// interfaces // interfaces
unique case (state_q) unique case (state_q)
////////////////////////////////// //////////////////////////////////
// wait for an incoming request // wait for an incoming request
IDLE: begin IDLE: begin
if (req_port_i.data_req) begin if (req_port_i.data_req) begin
rd_req_o = 1'b1; rd_req_o = 1'b1;
// if read ack then ack the `req_port_o`, and goto `READ` state // if read ack then ack the `req_port_o`, and goto `READ` state
if (rd_ack_i) begin if (rd_ack_i) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
end
end
//////////////////////////////////
// check whether we have a hit
// in case the cache is disabled,
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ, REPLAY_READ: begin
// speculatively request cache line
rd_req_o = 1'b1;
// kill -> go back to IDLE
if (req_port_i.kill_req) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end else if (req_port_i.tag_valid | state_q == REPLAY_READ) begin
save_tag = (state_q != REPLAY_READ);
if (wr_cl_vld_i || !rd_ack_q) begin
state_d = REPLAY_REQ;
// we've got a hit
end else if ((|rd_hit_oh_i) && cache_en_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
// we can handle another request
if (rd_ack_i && req_port_i.data_req) begin
state_d = READ; state_d = READ;
req_port_o.data_gnt = 1'b1; req_port_o.data_gnt = 1'b1;
end end
end
end
//////////////////////////////////
// check whether we have a hit
// in case the cache is disabled,
// or in case the address is NC, we
// reuse the miss mechanism to handle
// the request
READ, REPLAY_READ: begin
// speculatively request cache line
rd_req_o = 1'b1;
// kill -> go back to IDLE
if(req_port_i.kill_req) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end else if(req_port_i.tag_valid | state_q==REPLAY_READ) begin
save_tag = (state_q!=REPLAY_READ);
if(wr_cl_vld_i || !rd_ack_q) begin
state_d = REPLAY_REQ;
// we've got a hit
end else if((|rd_hit_oh_i) && cache_en_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
// we can handle another request
if (rd_ack_i && req_port_i.data_req) begin
state_d = READ;
req_port_o.data_gnt = 1'b1;
end
// we've got a miss // we've got a miss
end else begin end else begin
state_d = MISS_REQ; state_d = MISS_REQ;
end
end end
end end
////////////////////////////////// end
// issue request //////////////////////////////////
MISS_REQ: begin // issue request
miss_req_o = 1'b1; MISS_REQ: begin
miss_req_o = 1'b1;
if(req_port_i.kill_req) begin if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1; req_port_o.data_rvalid = 1'b1;
if(miss_ack_i) begin if (miss_ack_i) begin
state_d = KILL_MISS;
end else begin
state_d = KILL_MISS_ACK;
end
end else if(miss_replay_i) begin
state_d = REPLAY_REQ;
end else if(miss_ack_i) begin
state_d = MISS_WAIT;
end
end
//////////////////////////////////
// wait until the memory transaction
// returns.
MISS_WAIT: begin
if(req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if(miss_rtrn_vld_i) begin
state_d = IDLE;
end else begin
state_d = KILL_MISS;
end
end else if(miss_rtrn_vld_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end
end
//////////////////////////////////
// replay read request
REPLAY_REQ: begin
rd_req_o = 1'b1;
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
state_d = IDLE;
end else if(rd_ack_i) begin
state_d = REPLAY_READ;
end
end
//////////////////////////////////
KILL_MISS_ACK: begin
miss_req_o = 1'b1;
// in this case the miss handler did not issue
// a transaction and we can safely go to idle
if(miss_replay_i) begin
state_d = IDLE;
end else if(miss_ack_i) begin
state_d = KILL_MISS; state_d = KILL_MISS;
end else begin
state_d = KILL_MISS_ACK;
end end
end else if (miss_replay_i) begin
state_d = REPLAY_REQ;
end else if (miss_ack_i) begin
state_d = MISS_WAIT;
end end
////////////////////////////////// end
// killed miss, //////////////////////////////////
// wait until miss unit responds and // wait until the memory transaction
// go back to idle // returns.
KILL_MISS: begin MISS_WAIT: begin
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
if (miss_rtrn_vld_i) begin if (miss_rtrn_vld_i) begin
state_d = IDLE; state_d = IDLE;
end else begin
state_d = KILL_MISS;
end end
end else if (miss_rtrn_vld_i) begin
state_d = IDLE;
req_port_o.data_rvalid = 1'b1;
end end
default: begin end
// we should never get here //////////////////////////////////
// replay read request
REPLAY_REQ: begin
rd_req_o = 1'b1;
if (req_port_i.kill_req) begin
req_port_o.data_rvalid = 1'b1;
state_d = IDLE;
end else if (rd_ack_i) begin
state_d = REPLAY_READ;
end
end
//////////////////////////////////
KILL_MISS_ACK: begin
miss_req_o = 1'b1;
// in this case the miss handler did not issue
// a transaction and we can safely go to idle
if (miss_replay_i) begin
state_d = IDLE;
end else if (miss_ack_i) begin
state_d = KILL_MISS;
end
end
//////////////////////////////////
// killed miss,
// wait until miss unit responds and
// go back to idle
KILL_MISS: begin
if (miss_rtrn_vld_i) begin
state_d = IDLE; state_d = IDLE;
end end
endcase // state_q end
default: begin
// we should never get here
state_d = IDLE;
end
endcase // state_q
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// ff's // ff's
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(!rst_ni) begin if (!rst_ni) begin
state_q <= IDLE; state_q <= IDLE;
address_tag_q <= '0; address_tag_q <= '0;
address_idx_q <= '0; address_idx_q <= '0;
address_off_q <= '0; address_off_q <= '0;
id_q <= '0; id_q <= '0;
vld_data_q <= '0; vld_data_q <= '0;
data_size_q <= '0; data_size_q <= '0;
rd_req_q <= '0; rd_req_q <= '0;
rd_ack_q <= '0; rd_ack_q <= '0;
end else begin end else begin
state_q <= state_d; state_q <= state_d;
address_tag_q <= address_tag_d; address_tag_q <= address_tag_d;
address_idx_q <= address_idx_d; address_idx_q <= address_idx_d;
address_off_q <= address_off_d; address_off_q <= address_off_d;
id_q <= id_d; id_q <= id_d;
vld_data_q <= vld_data_d; vld_data_q <= vld_data_d;
data_size_q <= data_size_d; data_size_q <= data_size_d;
rd_req_q <= rd_req_d; rd_req_q <= rd_req_d;
rd_ack_q <= rd_ack_d; rd_ack_q <= rd_ack_d;
end end
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
hot1: assert property ( hot1 :
@(posedge clk_i) disable iff (!rst_ni) (!rd_ack_i) |=> cache_en_i |-> $onehot0(rd_hit_oh_i)) assert property (@(posedge clk_i) disable iff (!rst_ni) (!rd_ack_i) |=> cache_en_i |-> $onehot0(
else $fatal(1,"[l1 dcache ctrl] rd_hit_oh_i signal must be hot1"); rd_hit_oh_i
))
else $fatal(1, "[l1 dcache ctrl] rd_hit_oh_i signal must be hot1");
initial begin initial begin
// assert wrong parameterizations // assert wrong parameterizations
assert (DCACHE_INDEX_WIDTH<=12) assert (DCACHE_INDEX_WIDTH <= 12)
else $fatal(1,"[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages"); else
$fatal(1, "[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages");
end end
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // wt_dcache_ctrl endmodule // wt_dcache_ctrl

View file

@ -26,55 +26,57 @@
// low prio ports (rd_prio_i[port_nr] = '1b0) // low prio ports (rd_prio_i[port_nr] = '1b0)
module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #( module wt_dcache_mem
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int unsigned NumPorts = 3 import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 3
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// ports // ports
input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later
input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i, input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i,
input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i, input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i,
input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways
input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays
input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio
output logic [NumPorts-1:0] rd_ack_o, output logic [NumPorts-1:0] rd_ack_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o, output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o,
output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o, output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o,
output riscv::xlen_t rd_data_o, output riscv::xlen_t rd_data_o,
output logic [DCACHE_USER_WIDTH-1:0] rd_user_o, output logic [DCACHE_USER_WIDTH-1:0] rd_user_o,
// only available on port 0, uses address signals of port 0 // only available on port 0, uses address signals of port 0
input logic wr_cl_vld_i, input logic wr_cl_vld_i,
input logic wr_cl_nc_i, // noncacheable access input logic wr_cl_nc_i, // noncacheable access
input logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline input logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline
input logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i, input logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i, input logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i, input logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i,
input logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_i, input logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data_i,
input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i, input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i,
input logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i, input logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i,
input logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_i, input logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits_i,
// separate port for single word write, no tag access // separate port for single word write, no tag access
input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3] input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3]
output logic wr_ack_o, output logic wr_ack_o,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i, input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i,
input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i, input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i,
input riscv::xlen_t wr_data_i, input riscv::xlen_t wr_data_i,
input logic [DCACHE_USER_WIDTH-1:0] wr_user_i, input logic [DCACHE_USER_WIDTH-1:0] wr_user_i,
input logic [(riscv::XLEN/8)-1:0] wr_data_be_i, input logic [(riscv::XLEN/8)-1:0] wr_data_be_i,
// forwarded wbuffer // forwarded wbuffer
input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i
); );
// functions // functions
function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh ( function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh(
input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in);
);
logic [DCACHE_NUM_BANKS-1:0] out; logic [DCACHE_NUM_BANKS-1:0] out;
out = '0; out = '0;
out[in] = 1'b1; out[in] = 1'b1;
@ -83,44 +85,48 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
// number of bits needed to address AXI data. If AxiDataWidth equals XLEN this parameter // number of bits needed to address AXI data. If AxiDataWidth equals XLEN this parameter
// is not needed. Therefore, increment it by one to avoid reverse range select during elaboration. // is not needed. Therefore, increment it by one to avoid reverse range select during elaboration.
localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2(CVA6Cfg.AxiDataWidth/8)+1 : $clog2(CVA6Cfg.AxiDataWidth/8); localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2(
CVA6Cfg.AxiDataWidth / 8
) + 1 : $clog2(
CVA6Cfg.AxiDataWidth / 8
);
logic [DCACHE_NUM_BANKS-1:0] bank_req; logic [DCACHE_NUM_BANKS-1:0] bank_req;
logic [DCACHE_NUM_BANKS-1:0] bank_we; logic [DCACHE_NUM_BANKS-1:0] bank_we;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be; logic [DCACHE_NUM_BANKS-1:0][ DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx; logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q; logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q;
logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q; logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q;
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata; // logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata; // logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata; //
logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl; // selected word from each cacheline logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl; // selected word from each cacheline
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser; // logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser; //
logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser; // logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser; //
logic [DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] ruser_cl; // selected word from each cacheline logic [DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] ruser_cl; // selected word from each cacheline
logic [DCACHE_TAG_WIDTH-1:0] rd_tag; logic [DCACHE_TAG_WIDTH-1:0] rd_tag;
logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable logic vld_we; // valid bits write enable
logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem
logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit
logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q; logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh; logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh;
logic [(riscv::XLEN/8)-1:0] wbuffer_be; logic [ (riscv::XLEN/8)-1:0] wbuffer_be;
riscv::xlen_t wbuffer_rdata, rdata; riscv::xlen_t wbuffer_rdata, rdata;
logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser; logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser;
logic [riscv::PLEN-1:0] wbuffer_cmp_addr; logic [riscv::PLEN-1:0] wbuffer_cmp_addr;
logic cmp_en_d, cmp_en_q; logic cmp_en_d, cmp_en_q;
logic rd_acked; logic rd_acked;
logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio; logic [NumPorts-1:0] bank_collision, rd_req_masked, rd_req_prio;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// arbiter // arbiter
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// Priority is highest for lowest read port index // Priority is highest for lowest read port index
// //
@ -130,8 +136,8 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
// [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] .. // [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] ..
// byte enable mapping // byte enable mapping
for (genvar k=0;k<DCACHE_NUM_BANKS;k++) begin : gen_bank for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_bank
for (genvar j=0;j<DCACHE_SET_ASSOC;j++) begin : gen_bank_way for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : gen_bank_way
assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*(riscv::XLEN/8) +: (riscv::XLEN/8)] : assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*(riscv::XLEN/8) +: (riscv::XLEN/8)] :
(wr_req_i[j] & wr_ack_o) ? wr_data_be_i : (wr_req_i[j] & wr_ack_o) ? wr_data_be_i :
'0; '0;
@ -142,12 +148,12 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
end end
end end
assign vld_wdata = wr_vld_bits_i; assign vld_wdata = wr_vld_bits_i;
assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d]; assign vld_addr = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle assign rd_tag = rd_tag_i[vld_sel_q]; //delayed by one cycle
assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d]; assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i : rd_off_i[vld_sel_d];
assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d]; assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i : rd_idx_i[vld_sel_d];
assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0; assign vld_req = (wr_cl_vld_i) ? wr_cl_we_i : (rd_acked) ? '1 : '0;
// priority masking // priority masking
@ -157,20 +163,20 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
logic rd_req; logic rd_req;
rr_arb_tree #( rr_arb_tree #(
.NumIn (NumPorts), .NumIn (NumPorts),
.DataWidth (1) .DataWidth(1)
) i_rr_arb_tree ( ) i_rr_arb_tree (
.clk_i (clk_i ), .clk_i (clk_i),
.rst_ni (rst_ni ), .rst_ni (rst_ni),
.flush_i('0 ), .flush_i('0),
.rr_i ('0 ), .rr_i ('0),
.req_i (rd_req_masked ), .req_i (rd_req_masked),
.gnt_o (rd_ack_o ), .gnt_o (rd_ack_o),
.data_i ('0 ), .data_i ('0),
.gnt_i (~wr_cl_vld_i ), .gnt_i (~wr_cl_vld_i),
.req_o (rd_req ), .req_o (rd_req),
.data_o ( ), .data_o (),
.idx_o (vld_sel_d ) .idx_o (vld_sel_d)
); );
assign rd_acked = rd_req & ~wr_cl_vld_i; assign rd_acked = rd_req & ~wr_cl_vld_i;
@ -180,42 +186,43 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
bank_req = '0; bank_req = '0;
wr_ack_o = '0; wr_ack_o = '0;
bank_we = '0; bank_we = '0;
bank_idx = '{default:wr_idx_i}; bank_idx = '{default: wr_idx_i};
for(int k=0; k<NumPorts; k++) begin for (int k = 0; k < NumPorts; k++) begin
bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] == wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]; bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] == wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
end end
if(wr_cl_vld_i & |wr_cl_we_i) begin if (wr_cl_vld_i & |wr_cl_we_i) begin
bank_req = '1; bank_req = '1;
bank_we = '1; bank_we = '1;
bank_idx = '{default:wr_cl_idx_i}; bank_idx = '{default: wr_cl_idx_i};
end else begin end else begin
if(rd_acked) begin if (rd_acked) begin
if(!rd_tag_only_i[vld_sel_d]) begin if (!rd_tag_only_i[vld_sel_d]) begin
bank_req = dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); bank_req =
dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d]; bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d];
end end
end end
if(|wr_req_i) begin if (|wr_req_i) begin
if(rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin if (rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin
wr_ack_o = 1'b1; wr_ack_o = 1'b1;
bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]);
end end
end end
end end
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// tag comparison, hit generatio, readoud muxes // tag comparison, hit generatio, readoud muxes
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off; logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off;
logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off; logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx; logic [ $clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx; logic [ $clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx;
assign cmp_en_d = (|vld_req) & ~vld_we; assign cmp_en_d = (|vld_req) & ~vld_we;
@ -223,32 +230,32 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} : assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} :
{rd_tag, bank_idx_q, bank_off_q}; {rd_tag, bank_idx_q, bank_off_q};
// hit generation // hit generation
for (genvar i=0;i<DCACHE_SET_ASSOC;i++) begin : gen_tag_cmpsel for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
// tag comparison of ways >0 // tag comparison of ways >0
assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q; assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q;
// byte offset mux of ways >0 // byte offset mux of ways >0
assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i]; assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
assign ruser_cl[i] = bank_ruser[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i]; assign ruser_cl[i] = bank_ruser[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i];
end end
for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit
assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & (wbuffer_data_i[k].wtag == (wbuffer_cmp_addr >> riscv::XLEN_ALIGN_BYTES)); assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & (wbuffer_data_i[k].wtag == (wbuffer_cmp_addr >> riscv::XLEN_ALIGN_BYTES));
end end
lzc #( lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH ) .WIDTH(DCACHE_WBUF_DEPTH)
) i_lzc_wbuffer_hit ( ) i_lzc_wbuffer_hit (
.in_i ( wbuffer_hit_oh ), .in_i (wbuffer_hit_oh),
.cnt_o ( wbuffer_hit_idx ), .cnt_o (wbuffer_hit_idx),
.empty_o ( ) .empty_o()
); );
lzc #( lzc #(
.WIDTH ( DCACHE_SET_ASSOC ) .WIDTH(DCACHE_SET_ASSOC)
) i_lzc_rd_hit ( ) i_lzc_rd_hit (
.in_i ( rd_hit_oh_o ), .in_i (rd_hit_oh_o),
.cnt_o ( rd_hit_idx ), .cnt_o (rd_hit_idx),
.empty_o ( ) .empty_o()
); );
assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data; assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data;
@ -256,18 +263,18 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0; assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0;
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset
// In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read // In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read
assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 : assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 :
wr_cl_off_i[AXI_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] : wr_cl_off_i[AXI_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] :
wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]; wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES];
end else begin : gen_piton_offset end else begin : gen_piton_offset
assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3]; assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
end end
always_comb begin always_comb begin
if (wr_cl_vld_i) begin if (wr_cl_vld_i) begin
rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN +: riscv::XLEN]; rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN+:riscv::XLEN];
ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH +: DCACHE_USER_WIDTH]; ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH+:DCACHE_USER_WIDTH];
end else begin end else begin
rdata = rdata_cl[rd_hit_idx]; rdata = rdata_cl[rd_hit_idx];
ruser = ruser_cl[rd_hit_idx]; ruser = ruser_cl[rd_hit_idx];
@ -275,37 +282,37 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
end end
// overlay bytes that hit in the write buffer // overlay bytes that hit in the write buffer
for(genvar k=0; k<(riscv::XLEN/8); k++) begin : gen_rd_data for (genvar k = 0; k < (riscv::XLEN / 8); k++) begin : gen_rd_data
assign rd_data_o[8*k +: 8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k +: 8] : rdata[8*k +: 8]; assign rd_data_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k+:8] : rdata[8*k+:8];
end end
for(genvar k=0; k<DCACHE_USER_WIDTH/8; k++) begin : gen_rd_user for (genvar k = 0; k < DCACHE_USER_WIDTH / 8; k++) begin : gen_rd_user
assign rd_user_o[8*k +: 8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k +: 8] : ruser[8*k +: 8]; assign rd_user_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k+:8] : ruser[8*k+:8];
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// memory arrays and regs // memory arrays and regs
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata [DCACHE_SET_ASSOC-1:0]; logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata[DCACHE_SET_ASSOC-1:0];
for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_data_banks for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_data_banks
// Data RAM // Data RAM
sram #( sram #(
.USER_WIDTH ( ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH ), .USER_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH),
.DATA_WIDTH ( ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN ), .DATA_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN),
.USER_EN ( ariane_pkg::DATA_USER_EN ), .USER_EN (ariane_pkg::DATA_USER_EN),
.NUM_WORDS ( wt_cache_pkg::DCACHE_NUM_WORDS ) .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
) i_data_sram ( ) i_data_sram (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.req_i ( bank_req [k] ), .req_i (bank_req[k]),
.we_i ( bank_we [k] ), .we_i (bank_we[k]),
.addr_i ( bank_idx [k] ), .addr_i (bank_idx[k]),
.wuser_i ( bank_wuser [k] ), .wuser_i(bank_wuser[k]),
.wdata_i ( bank_wdata [k] ), .wdata_i(bank_wdata[k]),
.be_i ( bank_be [k] ), .be_i (bank_be[k]),
.ruser_o ( bank_ruser [k] ), .ruser_o(bank_ruser[k]),
.rdata_o ( bank_rdata [k] ) .rdata_o(bank_rdata[k])
); );
end end
@ -316,25 +323,25 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
// Tag RAM // Tag RAM
sram #( sram #(
// tag + valid bit // tag + valid bit
.DATA_WIDTH ( ariane_pkg::DCACHE_TAG_WIDTH + 1 ), .DATA_WIDTH(ariane_pkg::DCACHE_TAG_WIDTH + 1),
.NUM_WORDS ( wt_cache_pkg::DCACHE_NUM_WORDS ) .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS)
) i_tag_sram ( ) i_tag_sram (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.req_i ( vld_req[i] ), .req_i (vld_req[i]),
.we_i ( vld_we ), .we_i (vld_we),
.addr_i ( vld_addr ), .addr_i (vld_addr),
.wuser_i ( '0 ), .wuser_i('0),
.wdata_i ( {vld_wdata[i], wr_cl_tag_i} ), .wdata_i({vld_wdata[i], wr_cl_tag_i}),
.be_i ( '1 ), .be_i ('1),
.ruser_o ( ), .ruser_o(),
.rdata_o ( vld_tag_rdata[i] ) .rdata_o(vld_tag_rdata[i])
); );
end end
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if(!rst_ni) begin if (!rst_ni) begin
bank_idx_q <= '0; bank_idx_q <= '0;
bank_off_q <= '0; bank_off_q <= '0;
vld_sel_q <= '0; vld_sel_q <= '0;
@ -342,56 +349,63 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
end else begin end else begin
bank_idx_q <= bank_idx_d; bank_idx_q <= bank_idx_d;
bank_off_q <= bank_off_d; bank_off_q <= bank_off_d;
vld_sel_q <= vld_sel_d ; vld_sel_q <= vld_sel_d;
cmp_en_q <= cmp_en_d; cmp_en_q <= cmp_en_d;
end end
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
initial begin initial begin
cach_line_width_axi: assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth) cach_line_width_axi :
else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width"); assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth)
else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width");
end end
initial begin initial begin
axi_xlen: assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN) axi_xlen :
else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN"); assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN)
else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN");
end end
initial begin initial begin
cach_line_width_xlen: assert (DCACHE_LINE_WIDTH > riscv::XLEN) cach_line_width_xlen :
else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN"); assert (DCACHE_LINE_WIDTH > riscv::XLEN)
else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN");
end end
hit_hot1: assert property ( hit_hot1 :
@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(rd_hit_oh_o)) assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
else $fatal(1,"[l1 dcache] rd_hit_oh_o signal must be hot1"); rd_hit_oh_o
))
else $fatal(1, "[l1 dcache] rd_hit_oh_o signal must be hot1");
word_write_hot1: assert property ( word_write_hot1 :
@(posedge clk_i) disable iff (!rst_ni) wr_ack_o |-> $onehot0(wr_req_i)) assert property (@(posedge clk_i) disable iff (!rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
else $fatal(1,"[l1 dcache] wr_req_i signal must be hot1"); else $fatal(1, "[l1 dcache] wr_req_i signal must be hot1");
wbuffer_hit_hot1: assert property ( wbuffer_hit_hot1 :
@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(wbuffer_hit_oh)) assert property (@(posedge clk_i) disable iff (!rst_ni) &vld_req |-> !vld_we |=> $onehot0(
else $fatal(1,"[l1 dcache] wbuffer_hit_oh signal must be hot1"); wbuffer_hit_oh
))
else $fatal(1, "[l1 dcache] wbuffer_hit_oh signal must be hot1");
// this is only used for verification! // this is only used for verification!
logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0]; logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0]; logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test; logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
if(!rst_ni) begin if (!rst_ni) begin
vld_mirror <= '{default:'0}; vld_mirror <= '{default: '0};
tag_mirror <= '{default:'0}; tag_mirror <= '{default: '0};
end else begin end else begin
for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin
if(vld_req[i] & vld_we) begin if (vld_req[i] & vld_we) begin
vld_mirror[vld_addr][i] <= vld_wdata[i]; vld_mirror[vld_addr][i] <= vld_wdata[i];
tag_mirror[vld_addr][i] <= wr_cl_tag_i; tag_mirror[vld_addr][i] <= wr_cl_tag_i;
end end
@ -400,14 +414,15 @@ module wt_dcache_mem import ariane_pkg::*; import wt_cache_pkg::*; #(
end end
for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test
assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata); assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata);
end end
tag_write_duplicate: assert property ( tag_write_duplicate :
assert property (
@(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test)) @(posedge clk_i) disable iff (!rst_ni) |vld_req |-> vld_we |-> !(|tag_write_duplicate_test))
else $fatal(1,"[l1 dcache] cannot allocate a CL that is already present in the cache"); else $fatal(1, "[l1 dcache] cannot allocate a CL that is already present in the cache");
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // wt_dcache_mem endmodule // wt_dcache_mem

View file

@ -14,66 +14,68 @@
// is that the port with the highest index issues writes instead of reads. // is that the port with the highest index issues writes instead of reads.
module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #( module wt_dcache_missunit
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs import wt_cache_pkg::*;
parameter int unsigned NumPorts = 4 // number of miss ports #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs
parameter int unsigned NumPorts = 4 // number of miss ports
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
// cache management, signals from/to core // cache management, signals from/to core
input logic enable_i, // from CSR input logic enable_i, // from CSR
input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache input logic flush_i, // flush request, this waits for pending tx (write, read) to finish and will clear the cache
output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed output logic flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed
output logic miss_o, // we missed on a ld/st output logic miss_o, // we missed on a ld/st
// local cache management signals // local cache management signals
input logic wbuffer_empty_i, input logic wbuffer_empty_i,
output logic cache_en_o, // local cache enable signal output logic cache_en_o, // local cache enable signal
// AMO interface // AMO interface
input amo_req_t amo_req_i, input amo_req_t amo_req_i,
output amo_resp_t amo_resp_o, output amo_resp_t amo_resp_o,
// miss handling interface (ld, ptw, wbuffer) // miss handling interface (ld, ptw, wbuffer)
input logic [NumPorts-1:0] miss_req_i, input logic [NumPorts-1:0] miss_req_i,
output logic [NumPorts-1:0] miss_ack_o, output logic [NumPorts-1:0] miss_ack_o,
input logic [NumPorts-1:0] miss_nc_i, input logic [NumPorts-1:0] miss_nc_i,
input logic [NumPorts-1:0] miss_we_i, input logic [NumPorts-1:0] miss_we_i,
input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i, input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i,
input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i, input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i,
input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i, input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i,
input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i, input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i,
input logic [NumPorts-1:0][2:0] miss_size_i, input logic [NumPorts-1:0][2:0] miss_size_i,
input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID
// signals that the request collided with a pending read // signals that the request collided with a pending read
output logic [NumPorts-1:0] miss_replay_o, output logic [NumPorts-1:0] miss_replay_o,
// signals response from memory // signals response from memory
output logic [NumPorts-1:0] miss_rtrn_vld_o, output logic [NumPorts-1:0] miss_rtrn_vld_o,
output logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads output logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads
// from writebuffer // from writebuffer
input logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations input logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations
input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations
// write interface to cache memory // write interface to cache memory
output logic wr_cl_vld_o, // writes a full cacheline output logic wr_cl_vld_o, // writes a full cacheline
output logic wr_cl_nc_o, // writes a full cacheline output logic wr_cl_nc_o, // writes a full cacheline
output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline
output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o, output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o, output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o, output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o,
output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o, output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o,
output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o, output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o,
output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o, output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o,
output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o, output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o,
// memory interface // memory interface
input logic mem_rtrn_vld_i, input logic mem_rtrn_vld_i,
input dcache_rtrn_t mem_rtrn_i, input dcache_rtrn_t mem_rtrn_i,
output logic mem_data_req_o, output logic mem_data_req_o,
input logic mem_data_ack_i, input logic mem_data_ack_i,
output dcache_req_t mem_data_o output dcache_req_t mem_data_o
); );
// functions // functions
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh ( function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh(
input logic [L1D_WAY_WIDTH-1:0] in input logic [L1D_WAY_WIDTH-1:0] in);
);
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out; logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out;
out = '0; out = '0;
out[in] = 1'b1; out[in] = 1'b1;
@ -86,35 +88,41 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// 010: word // 010: word
// 011: dword // 011: dword
// 111: DCACHE line // 111: DCACHE line
function automatic logic [riscv::PLEN-1:0] paddrSizeAlign( function automatic logic [riscv::PLEN-1:0] paddrSizeAlign(input logic [riscv::PLEN-1:0] paddr,
input logic [riscv::PLEN-1:0] paddr, input logic [2:0] size);
input logic [2:0] size
);
logic [riscv::PLEN-1:0] out; logic [riscv::PLEN-1:0] out;
out = paddr; out = paddr;
unique case (size) unique case (size)
3'b001: out[0:0] = '0; 3'b001: out[0:0] = '0;
3'b010: out[1:0] = '0; 3'b010: out[1:0] = '0;
3'b011: out[2:0] = '0; 3'b011: out[2:0] = '0;
3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0; 3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0;
default: ; default: ;
endcase endcase
return out; return out;
endfunction : paddrSizeAlign endfunction : paddrSizeAlign
// controller FSM // controller FSM
typedef enum logic[2:0] {IDLE, DRAIN, AMO, FLUSH, STORE_WAIT, LOAD_WAIT, AMO_WAIT} state_e; typedef enum logic [2:0] {
IDLE,
DRAIN,
AMO,
FLUSH,
STORE_WAIT,
LOAD_WAIT,
AMO_WAIT
} state_e;
state_e state_d, state_q; state_e state_d, state_q;
// MSHR for reads // MSHR for reads
typedef struct packed { typedef struct packed {
logic [riscv::PLEN-1:0] paddr ; logic [riscv::PLEN-1:0] paddr;
logic [2:0] size ; logic [2:0] size;
logic [DCACHE_SET_ASSOC-1:0] vld_bits; logic [DCACHE_SET_ASSOC-1:0] vld_bits;
logic [CACHE_ID_WIDTH-1:0] id ; logic [CACHE_ID_WIDTH-1:0] id;
logic nc ; logic nc;
logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way; logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way;
logic [$clog2(NumPorts)-1:0] miss_port_idx; logic [$clog2(NumPorts)-1:0] miss_port_idx;
} mshr_t; } mshr_t;
mshr_t mshr_d, mshr_q; mshr_t mshr_d, mshr_q;
@ -131,7 +139,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
logic amo_req_d, amo_req_q; logic amo_req_d, amo_req_q;
logic [63:0] amo_rtrn_mux; logic [63:0] amo_rtrn_mux;
riscv::xlen_t amo_data; riscv::xlen_t amo_data;
logic [63:0] amo_user; //DCACHE USER ? DATA_USER_WIDTH logic [63:0] amo_user; //DCACHE USER ? DATA_USER_WIDTH
logic [riscv::PLEN-1:0] tmp_paddr; logic [riscv::PLEN-1:0] tmp_paddr;
logic [$clog2(NumPorts)-1:0] miss_port_idx; logic [$clog2(NumPorts)-1:0] miss_port_idx;
logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q; logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q;
@ -144,25 +152,25 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
logic [NumPorts-1:0] mshr_rdrd_collision; logic [NumPorts-1:0] mshr_rdrd_collision;
logic tx_rdwr_collision, mshr_rdwr_collision; logic tx_rdwr_collision, mshr_rdwr_collision;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// input arbitration and general control sigs // input arbitration and general control sigs
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
assign cache_en_o = enable_q; assign cache_en_o = enable_q;
assign cnt_d = (flush_en) ? cnt_q + 1 : '0; assign cnt_d = (flush_en) ? cnt_q + 1 : '0;
assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS-1); assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS - 1);
assign miss_req_masked_d = (lock_reqs) ? miss_req_masked_q : assign miss_req_masked_d = (lock_reqs) ? miss_req_masked_q :
(mask_reads) ? miss_we_i & miss_req_i : miss_req_i; (mask_reads) ? miss_we_i & miss_req_i : miss_req_i;
assign miss_is_write = miss_we_i[miss_port_idx]; assign miss_is_write = miss_we_i[miss_port_idx];
// read port arbiter // read port arbiter
lzc #( lzc #(
.WIDTH ( NumPorts ) .WIDTH(NumPorts)
) i_lzc_reqs ( ) i_lzc_reqs (
.in_i ( miss_req_masked_d ), .in_i (miss_req_masked_d),
.cnt_o ( miss_port_idx ), .cnt_o (miss_port_idx),
.empty_o ( ) .empty_o()
); );
always_comb begin : p_ack always_comb begin : p_ack
@ -172,49 +180,47 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
end end
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// MSHR and way replacement logic (only for read ops) // MSHR and way replacement logic (only for read ops)
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// find invalid cache line // find invalid cache line
lzc #( lzc #(
.WIDTH ( ariane_pkg::DCACHE_SET_ASSOC ) .WIDTH(ariane_pkg::DCACHE_SET_ASSOC)
) i_lzc_inv ( ) i_lzc_inv (
.in_i ( ~miss_vld_bits_i[miss_port_idx] ), .in_i (~miss_vld_bits_i[miss_port_idx]),
.cnt_o ( inv_way ), .cnt_o (inv_way),
.empty_o ( all_ways_valid ) .empty_o(all_ways_valid)
); );
// generate random cacheline index // generate random cacheline index
lfsr #( lfsr #(
.LfsrWidth ( 8 ), .LfsrWidth(8),
.OutWidth ( $clog2(ariane_pkg::DCACHE_SET_ASSOC)) .OutWidth ($clog2(ariane_pkg::DCACHE_SET_ASSOC))
) i_lfsr_inv ( ) i_lfsr_inv (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni(rst_ni),
.en_i ( update_lfsr ), .en_i (update_lfsr),
.out_o ( rnd_way ) .out_o (rnd_way)
); );
assign repl_way = (all_ways_valid) ? rnd_way : inv_way; assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign mshr_d.size = (mshr_allocate) ? miss_size_i [miss_port_idx] : mshr_q.size; assign mshr_d.size = (mshr_allocate) ? miss_size_i[miss_port_idx] : mshr_q.size;
assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i [miss_port_idx] : mshr_q.paddr; assign mshr_d.paddr = (mshr_allocate) ? miss_paddr_i[miss_port_idx] : mshr_q.paddr;
assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits; assign mshr_d.vld_bits = (mshr_allocate) ? miss_vld_bits_i[miss_port_idx] : mshr_q.vld_bits;
assign mshr_d.id = (mshr_allocate) ? miss_id_i [miss_port_idx] : mshr_q.id; assign mshr_d.id = (mshr_allocate) ? miss_id_i[miss_port_idx] : mshr_q.id;
assign mshr_d.nc = (mshr_allocate) ? miss_nc_i [miss_port_idx] : mshr_q.nc; assign mshr_d.nc = (mshr_allocate) ? miss_nc_i[miss_port_idx] : mshr_q.nc;
assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way; assign mshr_d.repl_way = (mshr_allocate) ? repl_way : mshr_q.repl_way;
assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx; assign mshr_d.miss_port_idx = (mshr_allocate) ? miss_port_idx : mshr_q.miss_port_idx;
// currently we only have one outstanding read TX, hence an incoming load clears the MSHR // currently we only have one outstanding read TX, hence an incoming load clears the MSHR
assign mshr_vld_d = (mshr_allocate) ? 1'b1 : assign mshr_vld_d = (mshr_allocate) ? 1'b1 : (load_ack) ? 1'b0 : mshr_vld_q;
(load_ack) ? 1'b0 :
mshr_vld_q;
assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0; assign miss_o = (mshr_allocate) ? ~miss_nc_i[miss_port_idx] : 1'b0;
for(genvar k=0; k<NumPorts; k++) begin : gen_rdrd_collision for (genvar k = 0; k < NumPorts; k++) begin : gen_rdrd_collision
assign mshr_rdrd_collision[k] = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1); assign mshr_rdrd_collision[k] = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1);
assign mshr_rdrd_collision_d[k] = (!miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k]; assign mshr_rdrd_collision_d[k] = (!miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k];
end end
@ -226,25 +232,25 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// read collides with inflight TX // read collides with inflight TX
always_comb begin : p_tx_coll always_comb begin : p_tx_coll
tx_rdwr_collision = 1'b0; tx_rdwr_collision = 1'b0;
for(int k=0; k<DCACHE_MAX_TX; k++) begin for (int k = 0; k < DCACHE_MAX_TX; k++) begin
tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k]; tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k];
end end
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// to memory // to memory
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// if size = 32bit word, select appropriate offset, replicate for openpiton... // if size = 32bit word, select appropriate offset, replicate for openpiton...
always_comb begin always_comb begin
if (riscv::IS_XLEN64) begin if (riscv::IS_XLEN64) begin
if (amo_req_i.size==2'b10) begin if (amo_req_i.size == 2'b10) begin
amo_data = {amo_req_i.operand_b[0 +: 32], amo_req_i.operand_b[0 +: 32]}; amo_data = {amo_req_i.operand_b[0+:32], amo_req_i.operand_b[0+:32]};
end else begin end else begin
amo_data = amo_req_i.operand_b; amo_data = amo_req_i.operand_b;
end end
end else begin end else begin
amo_data = amo_req_i.operand_b[0 +: 32]; amo_data = amo_req_i.operand_b[0+:32];
end end
if (ariane_pkg::DATA_USER_EN) begin if (ariane_pkg::DATA_USER_EN) begin
amo_user = amo_data; amo_user = amo_data;
@ -256,12 +262,14 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// note: openpiton returns a full cacheline! // note: openpiton returns a full cacheline!
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_rtrn_mux if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_rtrn_mux
if (CVA6Cfg.AxiDataWidth > 64) begin if (CVA6Cfg.AxiDataWidth > 64) begin
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[$clog2(CVA6Cfg.AxiDataWidth/8)-1:3]*64 +: 64]; assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[$clog2(
CVA6Cfg.AxiDataWidth/8
)-1:3]*64+:64];
end else begin end else begin
assign amo_rtrn_mux = mem_rtrn_i.data[0 +: 64]; assign amo_rtrn_mux = mem_rtrn_i.data[0+:64];
end end
end else begin : gen_piton_rtrn_mux end else begin : gen_piton_rtrn_mux
assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64 +: 64]; assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64+:64];
end end
// always sign extend 32bit values // always sign extend 32bit values
@ -273,41 +281,41 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
assign amo_req_d = amo_req_i.req; assign amo_req_d = amo_req_i.req;
// outgoing memory requests (AMOs are always uncached) // outgoing memory requests (AMOs are always uncached)
assign mem_data_o.tid = (amo_sel) ? AmoTxId : miss_id_i[miss_port_idx]; assign mem_data_o.tid = (amo_sel) ? AmoTxId : miss_id_i[miss_port_idx];
assign mem_data_o.nc = (amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx]; assign mem_data_o.nc = (amo_sel) ? 1'b1 : miss_nc_i[miss_port_idx];
assign mem_data_o.way = (amo_sel) ? '0 : repl_way; assign mem_data_o.way = (amo_sel) ? '0 : repl_way;
assign mem_data_o.data = (amo_sel) ? amo_data : miss_wdata_i[miss_port_idx]; assign mem_data_o.data = (amo_sel) ? amo_data : miss_wdata_i[miss_port_idx];
assign mem_data_o.user = (amo_sel) ? amo_user : miss_wuser_i[miss_port_idx]; assign mem_data_o.user = (amo_sel) ? amo_user : miss_wuser_i[miss_port_idx];
assign mem_data_o.size = (amo_sel) ? amo_req_i.size : miss_size_i [miss_port_idx]; assign mem_data_o.size = (amo_sel) ? amo_req_i.size : miss_size_i[miss_port_idx];
assign mem_data_o.amo_op = (amo_sel) ? amo_req_i.amo_op : AMO_NONE; assign mem_data_o.amo_op = (amo_sel) ? amo_req_i.amo_op : AMO_NONE;
assign tmp_paddr = (amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx]; assign tmp_paddr = (amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx];
assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size); assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size);
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// back-off mechanism for LR/SC completion guarantee // back-off mechanism for LR/SC completion guarantee
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
logic sc_fail, sc_pass, sc_backoff_over; logic sc_fail, sc_pass, sc_backoff_over;
exp_backoff #( exp_backoff #(
.Seed(3), .Seed (3),
.MaxExp(16) .MaxExp(16)
) i_exp_backoff ( ) i_exp_backoff (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.set_i ( sc_fail ), .set_i (sc_fail),
.clr_i ( sc_pass ), .clr_i (sc_pass),
.is_zero_o ( sc_backoff_over ) .is_zero_o(sc_backoff_over)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// responses from memory // responses from memory
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// keep track of pending stores // keep track of pending stores
logic store_sent; logic store_sent;
logic [$clog2(wt_cache_pkg::DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q; logic [$clog2(wt_cache_pkg::DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q;
assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ); assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ);
assign stores_inflight_d = (store_ack && store_sent) ? stores_inflight_q : assign stores_inflight_d = (store_ack && store_sent) ? stores_inflight_q :
(store_ack) ? stores_inflight_q - 1 : (store_ack) ? stores_inflight_q - 1 :
@ -323,7 +331,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
inv_vld_all = 1'b0; inv_vld_all = 1'b0;
sc_fail = 1'b0; sc_fail = 1'b0;
sc_pass = 1'b0; sc_pass = 1'b0;
miss_rtrn_vld_o ='0; miss_rtrn_vld_o = '0;
if (mem_rtrn_vld_i) begin if (mem_rtrn_vld_i) begin
unique case (mem_rtrn_i.rtype) unique case (mem_rtrn_i.rtype)
DCACHE_LOAD_ACK: begin DCACHE_LOAD_ACK: begin
@ -333,7 +341,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
end end
end end
DCACHE_STORE_ACK: begin DCACHE_STORE_ACK: begin
if (stores_inflight_q>0) begin if (stores_inflight_q > 0) begin
store_ack = 1'b1; store_ack = 1'b1;
miss_rtrn_vld_o[NumPorts-1] = 1'b1; miss_rtrn_vld_o[NumPorts-1] = 1'b1;
end end
@ -344,7 +352,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// need to set SC backoff counter if // need to set SC backoff counter if
// this op failed // this op failed
if (amo_req_i.amo_op == AMO_SC) begin if (amo_req_i.amo_op == AMO_SC) begin
if (amo_resp_o.result>0) begin if (amo_resp_o.result > 0) begin
sc_fail = 1'b1; sc_fail = 1'b1;
end else begin end else begin
sc_pass = 1'b1; sc_pass = 1'b1;
@ -359,50 +367,49 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// TODO: // TODO:
// DCACHE_INT_REQ: begin // DCACHE_INT_REQ: begin
// end // end
default : begin default: begin
end end
endcase endcase
end end
end end
// to write buffer // to write buffer
assign miss_rtrn_id_o = mem_rtrn_i.tid; assign miss_rtrn_id_o = mem_rtrn_i.tid;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// writes to cache memory // writes to cache memory
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// cacheline write port // cacheline write port
assign wr_cl_nc_o = mshr_q.nc; assign wr_cl_nc_o = mshr_q.nc;
assign wr_cl_vld_o = load_ack | (| wr_cl_we_o); assign wr_cl_vld_o = load_ack | (|wr_cl_we_o);
assign wr_cl_we_o = (flush_en ) ? '1 : assign wr_cl_we_o = (flush_en) ? '1 : (inv_vld_all) ? '1 : (inv_vld) ? dcache_way_bin2oh(
(inv_vld_all) ? '1 : mem_rtrn_i.inv.way
(inv_vld ) ? dcache_way_bin2oh(mem_rtrn_i.inv.way) : ) : (cl_write_en) ? dcache_way_bin2oh(
(cl_write_en) ? dcache_way_bin2oh(mshr_q.repl_way) : mshr_q.repl_way
'0; ) : '0;
assign wr_vld_bits_o = (flush_en ) ? '0 : assign wr_vld_bits_o = (flush_en) ? '0 : (inv_vld) ? '0 : (cl_write_en) ? dcache_way_bin2oh(
(inv_vld ) ? '0 : mshr_q.repl_way
(cl_write_en) ? dcache_way_bin2oh(mshr_q.repl_way) : ) : '0;
'0;
assign wr_cl_idx_o = (flush_en) ? cnt_q : assign wr_cl_idx_o = (flush_en) ? cnt_q :
(inv_vld) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : (inv_vld) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] :
mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH];
assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0]; assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_cl_data_o = mem_rtrn_i.data; assign wr_cl_data_o = mem_rtrn_i.data;
assign wr_cl_user_o = mem_rtrn_i.user; assign wr_cl_user_o = mem_rtrn_i.user;
assign wr_cl_data_be_o = (cl_write_en) ? '1 : '0;// we only write complete cachelines into the memory assign wr_cl_data_be_o = (cl_write_en) ? '1 : '0;// we only write complete cachelines into the memory
// only non-NC responses write to the cache // only non-NC responses write to the cache
assign cl_write_en = load_ack & ~mshr_q.nc; assign cl_write_en = load_ack & ~mshr_q.nc;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// main control logic for generating tx // main control logic for generating tx
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
always_comb begin : p_fsm always_comb begin : p_fsm
// default assignment // default assignment
@ -434,39 +441,39 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
flush_ack_d = flush_i; flush_ack_d = flush_i;
state_d = FLUSH; state_d = FLUSH;
end else begin end else begin
state_d = DRAIN; state_d = DRAIN;
end end
end else if (amo_req_i.req) begin end else if (amo_req_i.req) begin
if (wbuffer_empty_i && !mshr_vld_q) begin if (wbuffer_empty_i && !mshr_vld_q) begin
state_d = AMO; state_d = AMO;
end else begin end else begin
state_d = DRAIN; state_d = DRAIN;
end end
// we've got a miss to handle // we've got a miss to handle
end else if (|miss_req_masked_d) begin end else if (|miss_req_masked_d) begin
// this is a write miss, just pass through (but check whether write collides with MSHR) // this is a write miss, just pass through (but check whether write collides with MSHR)
if (miss_is_write) begin if (miss_is_write) begin
// stall in case this write collides with the MSHR address // stall in case this write collides with the MSHR address
if (!mshr_rdwr_collision) begin if (!mshr_rdwr_collision) begin
mem_data_req_o = 1'b1; mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ; mem_data_o.rtype = DCACHE_STORE_REQ;
if (!mem_data_ack_i) begin if (!mem_data_ack_i) begin
state_d = STORE_WAIT; state_d = STORE_WAIT;
end end
end end
// this is a read miss, can only allocate 1 MSHR // this is a read miss, can only allocate 1 MSHR
// in case of a load_ack we can accept a new miss, since the MSHR is being cleared // in case of a load_ack we can accept a new miss, since the MSHR is being cleared
end else if (!mshr_vld_q || load_ack) begin end else if (!mshr_vld_q || load_ack) begin
// replay the read request in case the address has collided with MSHR during the time the request was pending // replay the read request in case the address has collided with MSHR during the time the request was pending
// i.e., the cache state may have been updated in the mean time due to a refill at the same CL address // i.e., the cache state may have been updated in the mean time due to a refill at the same CL address
if (mshr_rdrd_collision_d[miss_port_idx]) begin if (mshr_rdrd_collision_d[miss_port_idx]) begin
miss_replay_o[miss_port_idx] = 1'b1; miss_replay_o[miss_port_idx] = 1'b1;
// stall in case this CL address overlaps with a write TX that is in flight // stall in case this CL address overlaps with a write TX that is in flight
end else if (!tx_rdwr_collision) begin end else if (!tx_rdwr_collision) begin
mem_data_req_o = 1'b1; mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ; mem_data_o.rtype = DCACHE_LOAD_REQ;
update_lfsr = all_ways_valid & mem_data_ack_i;// need to evict a random way update_lfsr = all_ways_valid & mem_data_ack_i; // need to evict a random way
mshr_allocate = mem_data_ack_i; mshr_allocate = mem_data_ack_i;
if (!mem_data_ack_i) begin if (!mem_data_ack_i) begin
state_d = LOAD_WAIT; state_d = LOAD_WAIT;
end end
@ -477,9 +484,9 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
////////////////////////////////// //////////////////////////////////
// wait until this request is acked // wait until this request is acked
STORE_WAIT: begin STORE_WAIT: begin
lock_reqs = 1'b1; lock_reqs = 1'b1;
mem_data_req_o = 1'b1; mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ; mem_data_o.rtype = DCACHE_STORE_REQ;
if (mem_data_ack_i) begin if (mem_data_ack_i) begin
state_d = IDLE; state_d = IDLE;
end end
@ -487,11 +494,11 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
////////////////////////////////// //////////////////////////////////
// wait until this request is acked // wait until this request is acked
LOAD_WAIT: begin LOAD_WAIT: begin
lock_reqs = 1'b1; lock_reqs = 1'b1;
mem_data_req_o = 1'b1; mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_LOAD_REQ; mem_data_o.rtype = DCACHE_LOAD_REQ;
if (mem_data_ack_i) begin if (mem_data_ack_i) begin
update_lfsr = all_ways_valid;// need to evict a random way update_lfsr = all_ways_valid; // need to evict a random way
mshr_allocate = 1'b1; mshr_allocate = 1'b1;
state_d = IDLE; state_d = IDLE;
end end
@ -503,8 +510,8 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
mask_reads = 1'b1; mask_reads = 1'b1;
// these are writes, check whether they collide with MSHR // these are writes, check whether they collide with MSHR
if (|miss_req_masked_d && !mshr_rdwr_collision) begin if (|miss_req_masked_d && !mshr_rdwr_collision) begin
mem_data_req_o = 1'b1; mem_data_req_o = 1'b1;
mem_data_o.rtype = DCACHE_STORE_REQ; mem_data_o.rtype = DCACHE_STORE_REQ;
end end
if (wbuffer_empty_i && !mshr_vld_q) begin if (wbuffer_empty_i && !mshr_vld_q) begin
@ -515,7 +522,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// flush the cache // flush the cache
FLUSH: begin FLUSH: begin
// internal flush signal // internal flush signal
flush_en = 1'b1; flush_en = 1'b1;
if (flush_done) begin if (flush_done) begin
state_d = IDLE; state_d = IDLE;
flush_ack_o = flush_ack_q; flush_ack_o = flush_ack_q;
@ -530,7 +537,7 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
amo_sel = 1'b1; amo_sel = 1'b1;
// if this is an LR, we need to consult the backoff counter // if this is an LR, we need to consult the backoff counter
if ((amo_req_i.amo_op != AMO_LR) || sc_backoff_over) begin if ((amo_req_i.amo_op != AMO_LR) || sc_backoff_over) begin
mem_data_req_o = 1'b1; mem_data_req_o = 1'b1;
if (mem_data_ack_i) begin if (mem_data_ack_i) begin
state_d = AMO_WAIT; state_d = AMO_WAIT;
end end
@ -550,66 +557,72 @@ module wt_dcache_missunit import ariane_pkg::*; import wt_cache_pkg::*; #(
// we should never get here // we should never get here
state_d = IDLE; state_d = IDLE;
end end
endcase // state_q endcase // state_q
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// ff's // ff's
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin if (!rst_ni) begin
state_q <= FLUSH; state_q <= FLUSH;
cnt_q <= '0; cnt_q <= '0;
enable_q <= '0; enable_q <= '0;
flush_ack_q <= '0; flush_ack_q <= '0;
mshr_vld_q <= '0; mshr_vld_q <= '0;
mshr_vld_q1 <= '0; mshr_vld_q1 <= '0;
mshr_q <= '0; mshr_q <= '0;
mshr_rdrd_collision_q <= '0; mshr_rdrd_collision_q <= '0;
miss_req_masked_q <= '0; miss_req_masked_q <= '0;
amo_req_q <= '0; amo_req_q <= '0;
stores_inflight_q <= '0; stores_inflight_q <= '0;
end else begin end else begin
state_q <= state_d; state_q <= state_d;
cnt_q <= cnt_d; cnt_q <= cnt_d;
enable_q <= enable_d; enable_q <= enable_d;
flush_ack_q <= flush_ack_d; flush_ack_q <= flush_ack_d;
mshr_vld_q <= mshr_vld_d; mshr_vld_q <= mshr_vld_d;
mshr_vld_q1 <= mshr_vld_q; mshr_vld_q1 <= mshr_vld_q;
mshr_q <= mshr_d; mshr_q <= mshr_d;
mshr_rdrd_collision_q <= mshr_rdrd_collision_d; mshr_rdrd_collision_q <= mshr_rdrd_collision_d;
miss_req_masked_q <= miss_req_masked_d; miss_req_masked_q <= miss_req_masked_d;
amo_req_q <= amo_req_d; amo_req_q <= amo_req_d;
stores_inflight_q <= stores_inflight_d; stores_inflight_q <= stores_inflight_d;
end
end end
end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
read_tid : assert property ( read_tid :
assert property (
@(posedge clk_i) disable iff (!rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id) @(posedge clk_i) disable iff (!rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id)
else $fatal(1,"[l1 dcache missunit] TID of load response doesn't match"); else $fatal(1, "[l1 dcache missunit] TID of load response doesn't match");
read_ports : assert property ( read_ports :
assert property (
@(posedge clk_i) disable iff (!rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0) @(posedge clk_i) disable iff (!rst_ni) |miss_req_i[NumPorts-2:0] |-> miss_we_i[NumPorts-2:0] == 0)
else $fatal(1,"[l1 dcache missunit] only last port can issue write requests"); else $fatal(1, "[l1 dcache missunit] only last port can issue write requests");
write_port : assert property ( write_port :
assert property (
@(posedge clk_i) disable iff (!rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1]) @(posedge clk_i) disable iff (!rst_ni) miss_req_i[NumPorts-1] |-> miss_we_i[NumPorts-1])
else $fatal(1,"[l1 dcache missunit] last port can only issue write requests"); else $fatal(1, "[l1 dcache missunit] last port can only issue write requests");
initial begin initial begin
// assert wrong parameterizations // assert wrong parameterizations
assert (NumPorts>=2) assert (NumPorts >= 2)
else $fatal(1,"[l1 dcache missunit] at least two ports are required (one read port, one write port)"); else
end $fatal(
1, "[l1 dcache missunit] at least two ports are required (one read port, one write port)"
);
end
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // wt_dcache_missunit endmodule // wt_dcache_missunit

View file

@ -49,69 +49,73 @@
// word has been evicted from the write buffer. // word has been evicted from the write buffer.
module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #( module wt_dcache_wbuffer
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic cache_en_i, // writes are treated as NC if disabled input logic cache_en_i, // writes are treated as NC if disabled
output logic empty_o, // asserted if no data is present in write buffer output logic empty_o, // asserted if no data is present in write buffer
output logic not_ni_o, // asserted if no ni data is present in write buffer output logic not_ni_o, // asserted if no ni data is present in write buffer
// core request ports // core request ports
input dcache_req_i_t req_port_i, input dcache_req_i_t req_port_i,
output dcache_req_o_t req_port_o, output dcache_req_o_t req_port_o,
// interface to miss handler // interface to miss handler
input logic miss_ack_i, input logic miss_ack_i,
output logic [riscv::PLEN-1:0] miss_paddr_o, output logic [riscv::PLEN-1:0] miss_paddr_o,
output logic miss_req_o, output logic miss_req_o,
output logic miss_we_o, // always 1 here output logic miss_we_o, // always 1 here
output riscv::xlen_t miss_wdata_o, output riscv::xlen_t miss_wdata_o,
output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o,
output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0) output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0)
output logic miss_nc_o, // request to I/O space output logic miss_nc_o, // request to I/O space
output logic [2:0] miss_size_o, // output logic [2:0] miss_size_o, //
output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1) output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1)
// write responses from memory // write responses from memory
input logic miss_rtrn_vld_i, input logic miss_rtrn_vld_i,
input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear
// cache read interface // cache read interface
output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later
output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o, output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o, output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o,
output logic rd_req_o, // read the word at offset off_i[:3] in all ways output logic rd_req_o, // read the word at offset off_i[:3] in all ways
output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays
input logic rd_ack_i, input logic rd_ack_i,
input riscv::xlen_t rd_data_i, // unused input riscv::xlen_t rd_data_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused
input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i, input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i,
// cacheline writes // cacheline writes
input logic wr_cl_vld_i, input logic wr_cl_vld_i,
input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i, input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i,
// cache word write interface // cache word write interface
output logic [DCACHE_SET_ASSOC-1:0] wr_req_o, output logic [DCACHE_SET_ASSOC-1:0] wr_req_o,
input logic wr_ack_i, input logic wr_ack_i,
output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o, output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o,
output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o, output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o,
output riscv::xlen_t wr_data_o, output riscv::xlen_t wr_data_o,
output logic [(riscv::XLEN/8)-1:0] wr_data_be_o, output logic [(riscv::XLEN/8)-1:0] wr_data_be_o,
output logic [DCACHE_USER_WIDTH-1:0] wr_user_o, output logic [DCACHE_USER_WIDTH-1:0] wr_user_o,
// to forwarding logic and miss unit // to forwarding logic and miss unit
output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o, output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o,
output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations
output logic [DCACHE_MAX_TX-1:0] tx_vld_o output logic [DCACHE_MAX_TX-1:0] tx_vld_o
); );
tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q; tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q;
wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q; wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q;
logic [DCACHE_WBUF_DEPTH-1:0] valid; logic [DCACHE_WBUF_DEPTH-1:0] valid;
logic [DCACHE_WBUF_DEPTH-1:0] dirty; logic [DCACHE_WBUF_DEPTH-1:0] dirty;
logic [DCACHE_WBUF_DEPTH-1:0] tocheck; logic [DCACHE_WBUF_DEPTH-1:0] tocheck;
logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit; logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit;
//logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty; //logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty;
logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty; logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty;
logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr; logic [$clog2(DCACHE_WBUF_DEPTH)-1:0]
next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr;
logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id; logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id;
logic [riscv::XLEN_ALIGN_BYTES-1:0] bdirty_off; logic [riscv::XLEN_ALIGN_BYTES-1:0] bdirty_off;
@ -129,54 +133,66 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
logic wr_cl_vld_q, wr_cl_vld_d; logic wr_cl_vld_q, wr_cl_vld_d;
logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d; logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d;
logic [riscv::PLEN-1:0] debug_paddr [DCACHE_WBUF_DEPTH-1:0]; logic [riscv::PLEN-1:0] debug_paddr[DCACHE_WBUF_DEPTH-1:0];
wbuffer_t wbuffer_check_mux, wbuffer_dirty_mux; wbuffer_t wbuffer_check_mux, wbuffer_dirty_mux;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// misc // misc
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] miss_tag; logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] miss_tag;
logic is_nc_miss; logic is_nc_miss;
logic is_ni; logic is_ni;
assign miss_tag = miss_paddr_o[ariane_pkg::DCACHE_INDEX_WIDTH+:ariane_pkg::DCACHE_TAG_WIDTH]; assign miss_tag = miss_paddr_o[ariane_pkg::DCACHE_INDEX_WIDTH+:ariane_pkg::DCACHE_TAG_WIDTH];
assign is_nc_miss = !config_pkg::is_inside_cacheable_regions(CVA6Cfg, {{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}}}); assign is_nc_miss = !config_pkg::is_inside_cacheable_regions(
CVA6Cfg,
{
{64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}}
}
);
assign miss_nc_o = !cache_en_i || is_nc_miss; assign miss_nc_o = !cache_en_i || is_nc_miss;
// Non-idempotent if request goes to NI region // Non-idempotent if request goes to NI region
assign is_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH{1'b0}}, req_port_i.address_tag, {DCACHE_INDEX_WIDTH{1'b0}}}); assign is_ni = config_pkg::is_inside_nonidempotent_regions(
CVA6Cfg,
{
{64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}},
req_port_i.address_tag,
{DCACHE_INDEX_WIDTH{1'b0}}
}
);
assign miss_we_o = 1'b1; assign miss_we_o = 1'b1;
assign miss_vld_bits_o = '0; assign miss_vld_bits_o = '0;
assign wbuffer_data_o = wbuffer_q; assign wbuffer_data_o = wbuffer_q;
for (genvar k=0; k<DCACHE_MAX_TX;k++) begin : gen_tx_vld for (genvar k = 0; k < DCACHE_MAX_TX; k++) begin : gen_tx_vld
assign tx_vld_o[k] = tx_stat_q[k].vld; assign tx_vld_o[k] = tx_stat_q[k].vld;
assign tx_paddr_o[k] = wbuffer_q[tx_stat_q[k].ptr].wtag<<riscv::XLEN_ALIGN_BYTES; assign tx_paddr_o[k] = wbuffer_q[tx_stat_q[k].ptr].wtag << riscv::XLEN_ALIGN_BYTES;
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// openpiton does not understand byte enable sigs // openpiton does not understand byte enable sigs
// need to convert to the four cases: // need to convert to the four cases:
// 00: byte // 00: byte
// 01: halfword // 01: halfword
// 10: word // 10: word
// 11: dword // 11: dword
// non-contiguous writes need to be serialized! // non-contiguous writes need to be serialized!
// e.g. merged dwords with BE like this: 8'b01001100 // e.g. merged dwords with BE like this: 8'b01001100
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// get byte offset // get byte offset
lzc #( lzc #(
.WIDTH ( riscv::XLEN/8 ) .WIDTH(riscv::XLEN / 8)
) i_vld_bdirty ( ) i_vld_bdirty (
.in_i ( bdirty[dirty_ptr] ), .in_i (bdirty[dirty_ptr]),
.cnt_o ( bdirty_off ), .cnt_o (bdirty_off),
.empty_o ( ) .empty_o()
); );
// add the offset to the physical base address of this buffer entry // add the offset to the physical base address of this buffer entry
assign miss_paddr_o = {wbuffer_dirty_mux.wtag, bdirty_off}; assign miss_paddr_o = {wbuffer_dirty_mux.wtag, bdirty_off};
assign miss_id_o = tx_id; assign miss_id_o = tx_id;
// is there any dirty word to be transmitted, and is there a free TX slot? // is there any dirty word to be transmitted, and is there a free TX slot?
assign miss_req_o = (|dirty) && free_tx_slots; assign miss_req_o = (|dirty) && free_tx_slots;
@ -186,43 +202,51 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
// we have to split unaligned data into multiple transfers (see toSize64) // we have to split unaligned data into multiple transfers (see toSize64)
// e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000 // e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000
assign miss_size_o = riscv::IS_XLEN64 ? toSize64(bdirty[dirty_ptr]): assign miss_size_o = riscv::IS_XLEN64 ? toSize64(bdirty[dirty_ptr]) : toSize32(bdirty[dirty_ptr]);
toSize32(bdirty[dirty_ptr]);
// replicate transfers shorter than a dword // replicate transfers shorter than a dword
assign miss_wdata_o = riscv::IS_XLEN64 ? repData64(wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]): assign miss_wdata_o = riscv::IS_XLEN64 ? repData64(
repData32(wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]); wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
) : repData32(
wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0]
);
if (ariane_pkg::DATA_USER_EN) begin if (ariane_pkg::DATA_USER_EN) begin
assign miss_wuser_o = riscv::IS_XLEN64 ? repData64(wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]): assign miss_wuser_o = riscv::IS_XLEN64 ? repData64(
repData32(wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]); wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
) : repData32(
wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0]
);
end else begin end else begin
assign miss_wuser_o = '0; assign miss_wuser_o = '0;
end end
assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8(bdirty_off, miss_size_o[1:0]): assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8(
to_byte_enable4(bdirty_off, miss_size_o[1:0]); bdirty_off, miss_size_o[1:0]
) : to_byte_enable4(
bdirty_off, miss_size_o[1:0]
);
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// TX status registers and ID counters // TX status registers and ID counters
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// TODO: todo: make this fall through if timing permits it // TODO: todo: make this fall through if timing permits it
fifo_v3 #( fifo_v3 #(
.FALL_THROUGH ( 1'b0 ), .FALL_THROUGH(1'b0),
.DATA_WIDTH ( $clog2(DCACHE_MAX_TX) ), .DATA_WIDTH ($clog2(DCACHE_MAX_TX)),
.DEPTH ( DCACHE_MAX_TX ) .DEPTH (DCACHE_MAX_TX)
) i_rtrn_id_fifo ( ) i_rtrn_id_fifo (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( ), .full_o (),
.empty_o ( rtrn_empty ), .empty_o (rtrn_empty),
.usage_o ( ), .usage_o (),
.data_i ( miss_rtrn_id_i ), .data_i (miss_rtrn_id_i),
.push_i ( miss_rtrn_vld_i ), .push_i (miss_rtrn_vld_i),
.data_o ( rtrn_id ), .data_o (rtrn_id),
.pop_i ( evict ) .pop_i (evict)
); );
always_comb begin : p_tx_stat always_comb begin : p_tx_stat
@ -258,61 +282,61 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
// next word to lookup in the cache // next word to lookup in the cache
rr_arb_tree #( rr_arb_tree #(
.NumIn (DCACHE_MAX_TX), .NumIn (DCACHE_MAX_TX),
.LockIn (1'b1), .LockIn (1'b1),
.DataWidth (1) .DataWidth(1)
) i_tx_id_rr ( ) i_tx_id_rr (
.clk_i (clk_i ), .clk_i (clk_i),
.rst_ni (rst_ni ), .rst_ni (rst_ni),
.flush_i('0 ), .flush_i('0),
.rr_i ('0 ), .rr_i ('0),
.req_i (~tx_vld_o ), .req_i (~tx_vld_o),
.gnt_o ( ), .gnt_o (),
.data_i ('0 ), .data_i ('0),
.gnt_i (dirty_rd_en ), .gnt_i (dirty_rd_en),
.req_o ( ), .req_o (),
.data_o ( ), .data_o (),
.idx_o (tx_id ) .idx_o (tx_id)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// cache readout & update // cache readout & update
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
assign extract_tag = rd_paddr>>DCACHE_INDEX_WIDTH; assign extract_tag = rd_paddr >> DCACHE_INDEX_WIDTH;
assign rd_tag_d = extract_tag[DCACHE_TAG_WIDTH-1:0]; assign rd_tag_d = extract_tag[DCACHE_TAG_WIDTH-1:0];
// trigger TAG readout in cache // trigger TAG readout in cache
assign rd_tag_only_o = 1'b1; assign rd_tag_only_o = 1'b1;
assign rd_paddr = wbuffer_check_mux.wtag<<riscv::XLEN_ALIGN_BYTES; assign rd_paddr = wbuffer_check_mux.wtag << riscv::XLEN_ALIGN_BYTES;
assign rd_req_o = |tocheck; assign rd_req_o = |tocheck;
assign rd_tag_o = rd_tag_q;//delay by one cycle assign rd_tag_o = rd_tag_q; //delay by one cycle
assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0]; assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign check_en_d = rd_req_o & rd_ack_i; assign check_en_d = rd_req_o & rd_ack_i;
// cache update port // cache update port
assign rtrn_ptr = tx_stat_q[rtrn_id].ptr; assign rtrn_ptr = tx_stat_q[rtrn_id].ptr;
// if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache // if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache
// when the TX returns // when the TX returns
assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty); assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty);
assign wr_paddr = wbuffer_q[rtrn_ptr].wtag<<riscv::XLEN_ALIGN_BYTES; assign wr_paddr = wbuffer_q[rtrn_ptr].wtag << riscv::XLEN_ALIGN_BYTES;
assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH];
assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0]; assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0];
assign wr_data_o = wbuffer_q[rtrn_ptr].data; assign wr_data_o = wbuffer_q[rtrn_ptr].data;
assign wr_user_o = wbuffer_q[rtrn_ptr].user; assign wr_user_o = wbuffer_q[rtrn_ptr].user;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// readout of status bits, index calculation // readout of status bits, index calculation
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
logic [DCACHE_WBUF_DEPTH-1:0][DCACHE_CL_IDX_WIDTH-1:0] wtag_comp; logic [DCACHE_WBUF_DEPTH-1:0][DCACHE_CL_IDX_WIDTH-1:0] wtag_comp;
assign wr_cl_vld_d = wr_cl_vld_i; assign wr_cl_vld_d = wr_cl_vld_i;
assign wr_cl_idx_d = wr_cl_idx_i; assign wr_cl_idx_d = wr_cl_idx_i;
for (genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin : gen_flags for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_flags
// only for debug, will be pruned // only for debug, will be pruned
assign debug_paddr[k] = wbuffer_q[k].wtag << riscv::XLEN_ALIGN_BYTES; assign debug_paddr[k] = wbuffer_q[k].wtag << riscv::XLEN_ALIGN_BYTES;
@ -322,8 +346,8 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
assign bdirty[k] = (|wbuffer_q[k].txblock) ? '0 : wbuffer_q[k].dirty & wbuffer_q[k].valid; assign bdirty[k] = (|wbuffer_q[k].txblock) ? '0 : wbuffer_q[k].dirty & wbuffer_q[k].valid;
assign dirty[k] = |bdirty[k]; assign dirty[k] = |bdirty[k];
assign valid[k] = |wbuffer_q[k].valid; assign valid[k] = |wbuffer_q[k].valid;
assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]}); assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]});
// checks if an invalidation/cache refill hits a particular word // checks if an invalidation/cache refill hits a particular word
@ -334,70 +358,70 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
(wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q)); (wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q));
// these word have to be looked up in the cache // these word have to be looked up in the cache
assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k]; assign tocheck[k] = (~wbuffer_q[k].checked) & valid[k];
end end
assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr; assign wr_ptr = (|wbuffer_hit_oh) ? hit_ptr : next_ptr;
assign rdy = (|wbuffer_hit_oh) | (~full); assign rdy = (|wbuffer_hit_oh) | (~full);
// next free entry in the buffer // next free entry in the buffer
lzc #( lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH ) .WIDTH(DCACHE_WBUF_DEPTH)
) i_vld_lzc ( ) i_vld_lzc (
.in_i ( ~valid ), .in_i (~valid),
.cnt_o ( next_ptr ), .cnt_o (next_ptr),
.empty_o ( full ) .empty_o(full)
); );
// get index of hit // get index of hit
lzc #( lzc #(
.WIDTH ( DCACHE_WBUF_DEPTH ) .WIDTH(DCACHE_WBUF_DEPTH)
) i_hit_lzc ( ) i_hit_lzc (
.in_i ( wbuffer_hit_oh ), .in_i (wbuffer_hit_oh),
.cnt_o ( hit_ptr ), .cnt_o (hit_ptr),
.empty_o ( ) .empty_o()
); );
// next dirty word to serve // next dirty word to serve
rr_arb_tree #( rr_arb_tree #(
.NumIn ( DCACHE_WBUF_DEPTH ), .NumIn (DCACHE_WBUF_DEPTH),
.LockIn ( 1'b1 ), .LockIn (1'b1),
.DataType ( wbuffer_t ) .DataType(wbuffer_t)
) i_dirty_rr ( ) i_dirty_rr (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i( '0 ), .flush_i('0),
.rr_i ( '0 ), .rr_i ('0),
.req_i ( dirty ), .req_i (dirty),
.gnt_o ( ), .gnt_o (),
.data_i ( wbuffer_q ), .data_i (wbuffer_q),
.gnt_i ( dirty_rd_en ), .gnt_i (dirty_rd_en),
.req_o ( ), .req_o (),
.data_o ( wbuffer_dirty_mux ), .data_o (wbuffer_dirty_mux),
.idx_o ( dirty_ptr ) .idx_o (dirty_ptr)
); );
// next word to lookup in the cache // next word to lookup in the cache
rr_arb_tree #( rr_arb_tree #(
.NumIn ( DCACHE_WBUF_DEPTH ), .NumIn (DCACHE_WBUF_DEPTH),
.DataType ( wbuffer_t ) .DataType(wbuffer_t)
) i_clean_rr ( ) i_clean_rr (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i( '0 ), .flush_i('0),
.rr_i ( '0 ), .rr_i ('0),
.req_i ( tocheck ), .req_i (tocheck),
.gnt_o ( ), .gnt_o (),
.data_i ( wbuffer_q ), .data_i (wbuffer_q),
.gnt_i ( check_en_d ), .gnt_i (check_en_d),
.req_o ( ), .req_o (),
.data_o ( wbuffer_check_mux ), .data_o (wbuffer_check_mux),
.idx_o ( check_ptr_d ) .idx_o (check_ptr_d)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// update logic // update logic
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
assign req_port_o.data_rvalid = '0; assign req_port_o.data_rvalid = '0;
assign req_port_o.data_rdata = '0; assign req_port_o.data_rdata = '0;
@ -406,7 +430,7 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
assign rd_hit_oh_d = rd_hit_oh_i; assign rd_hit_oh_d = rd_hit_oh_i;
logic ni_inside,ni_conflict; logic ni_inside, ni_conflict;
assign ni_inside = |ni_pending_q; assign ni_inside = |ni_pending_q;
assign ni_conflict = is_ni && ni_inside; assign ni_conflict = is_ni && ni_inside;
assign not_ni_o = !ni_inside; assign not_ni_o = !ni_inside;
@ -424,13 +448,13 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
if (check_en_q1) begin if (check_en_q1) begin
if (|wbuffer_q[check_ptr_q1].valid) begin if (|wbuffer_q[check_ptr_q1].valid) begin
wbuffer_d[check_ptr_q1].checked = 1'b1; wbuffer_d[check_ptr_q1].checked = 1'b1;
wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q; wbuffer_d[check_ptr_q1].hit_oh = rd_hit_oh_q;
end end
end end
// if an invalidation or cache line refill comes in and hits on the write buffer, // if an invalidation or cache line refill comes in and hits on the write buffer,
// we have to discard our knowledge of the corresponding cacheline state // we have to discard our knowledge of the corresponding cacheline state
for (int k=0; k<DCACHE_WBUF_DEPTH; k++) begin for (int k = 0; k < DCACHE_WBUF_DEPTH; k++) begin
if (inval_hit[k]) begin if (inval_hit[k]) begin
wbuffer_d[k].checked = 1'b0; wbuffer_d[k].checked = 1'b0;
end end
@ -439,7 +463,7 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
// once TX write response came back, we can clear the TX block. if it was not dirty, we // once TX write response came back, we can clear the TX block. if it was not dirty, we
// can completely evict it - otherwise we have to leave it there for retransmission // can completely evict it - otherwise we have to leave it there for retransmission
if (evict) begin if (evict) begin
for (int k=0; k<(riscv::XLEN/8); k++) begin for (int k = 0; k < (riscv::XLEN / 8); k++) begin
if (tx_stat_q[rtrn_id].be[k]) begin if (tx_stat_q[rtrn_id].be[k]) begin
wbuffer_d[rtrn_ptr].txblock[k] = 1'b0; wbuffer_d[rtrn_ptr].txblock[k] = 1'b0;
if (!wbuffer_q[rtrn_ptr].dirty[k]) begin if (!wbuffer_q[rtrn_ptr].dirty[k]) begin
@ -461,7 +485,7 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
// mark bytes sent out to the memory system // mark bytes sent out to the memory system
if (miss_req_o && miss_ack_i) begin if (miss_req_o && miss_ack_i) begin
dirty_rd_en = 1'b1; dirty_rd_en = 1'b1;
for (int k=0; k<(riscv::XLEN/8); k++) begin for (int k = 0; k < (riscv::XLEN / 8); k++) begin
if (tx_be[k]) begin if (tx_be[k]) begin
wbuffer_d[dirty_ptr].dirty[k] = 1'b0; wbuffer_d[dirty_ptr].dirty[k] = 1'b0;
wbuffer_d[dirty_ptr].txblock[k] = 1'b1; wbuffer_d[dirty_ptr].txblock[k] = 1'b1;
@ -473,25 +497,28 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
if (req_port_i.data_req && rdy) begin if (req_port_i.data_req && rdy) begin
// in case we have an NI address, need to drain the buffer first // in case we have an NI address, need to drain the buffer first
// in case we are serving an NI address, we block until it is written to memory // in case we are serving an NI address, we block until it is written to memory
if (!ni_conflict) begin //empty of NI operations if (!ni_conflict) begin //empty of NI operations
wbuffer_wren = 1'b1; wbuffer_wren = 1'b1;
req_port_o.data_gnt = 1'b1; req_port_o.data_gnt = 1'b1;
ni_pending_d[wr_ptr] = is_ni; ni_pending_d[wr_ptr] = is_ni;
wbuffer_d[wr_ptr].checked = 1'b0; wbuffer_d[wr_ptr].checked = 1'b0;
wbuffer_d[wr_ptr].wtag = {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]}; wbuffer_d[wr_ptr].wtag = {
req_port_i.address_tag,
req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]
};
// mark bytes as dirty // mark bytes as dirty
for (int k=0; k<(riscv::XLEN/8); k++) begin for (int k = 0; k < (riscv::XLEN / 8); k++) begin
if (req_port_i.data_be[k]) begin if (req_port_i.data_be[k]) begin
wbuffer_d[wr_ptr].valid[k] = 1'b1; wbuffer_d[wr_ptr].valid[k] = 1'b1;
wbuffer_d[wr_ptr].dirty[k] = 1'b1; wbuffer_d[wr_ptr].dirty[k] = 1'b1;
wbuffer_d[wr_ptr].data[k*8 +: 8] = req_port_i.data_wdata[k*8 +: 8]; wbuffer_d[wr_ptr].data[k*8+:8] = req_port_i.data_wdata[k*8+:8];
if (ariane_pkg::DATA_USER_EN) begin if (ariane_pkg::DATA_USER_EN) begin
wbuffer_d[wr_ptr].user[k*8 +: 8] = req_port_i.data_wuser[k*8 +: 8]; wbuffer_d[wr_ptr].user[k*8+:8] = req_port_i.data_wuser[k*8+:8];
end else begin end else begin
wbuffer_d[wr_ptr].user[k*8 +: 8] = '0; wbuffer_d[wr_ptr].user[k*8+:8] = '0;
end end
end end
end end
@ -500,86 +527,96 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #(
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// ff's // ff's
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (!rst_ni) begin if (!rst_ni) begin
wbuffer_q <= '{default: '0}; wbuffer_q <= '{default: '0};
tx_stat_q <= '{default: '0}; tx_stat_q <= '{default: '0};
ni_pending_q <= '0; ni_pending_q <= '0;
check_ptr_q <= '0; check_ptr_q <= '0;
check_ptr_q1 <= '0; check_ptr_q1 <= '0;
check_en_q <= '0; check_en_q <= '0;
check_en_q1 <= '0; check_en_q1 <= '0;
rd_tag_q <= '0; rd_tag_q <= '0;
rd_hit_oh_q <= '0; rd_hit_oh_q <= '0;
wr_cl_vld_q <= '0; wr_cl_vld_q <= '0;
wr_cl_idx_q <= '0; wr_cl_idx_q <= '0;
end else begin end else begin
wbuffer_q <= wbuffer_d; wbuffer_q <= wbuffer_d;
tx_stat_q <= tx_stat_d; tx_stat_q <= tx_stat_d;
ni_pending_q <= ni_pending_d; ni_pending_q <= ni_pending_d;
check_ptr_q <= check_ptr_d; check_ptr_q <= check_ptr_d;
check_ptr_q1 <= check_ptr_q; check_ptr_q1 <= check_ptr_q;
check_en_q <= check_en_d; check_en_q <= check_en_d;
check_en_q1 <= check_en_q; check_en_q1 <= check_en_q;
rd_tag_q <= rd_tag_d; rd_tag_q <= rd_tag_d;
rd_hit_oh_q <= rd_hit_oh_d; rd_hit_oh_q <= rd_hit_oh_d;
wr_cl_vld_q <= wr_cl_vld_d; wr_cl_vld_q <= wr_cl_vld_d;
wr_cl_idx_q <= wr_cl_idx_d; wr_cl_idx_q <= wr_cl_idx_d;
end end
end end
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
hot1: assert property ( hot1 :
@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> $onehot0(wbuffer_hit_oh)) assert property (@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> $onehot0(
else $fatal(1,"[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1"); wbuffer_hit_oh
))
else $fatal(1, "[l1 dcache wbuffer] wbuffer_hit_oh signal must be hot1");
tx_status: assert property ( tx_status :
assert property (
@(posedge clk_i) disable iff (!rst_ni) evict && miss_ack_i && miss_req_o |-> (tx_id != rtrn_id)) @(posedge clk_i) disable iff (!rst_ni) evict && miss_ack_i && miss_req_o |-> (tx_id != rtrn_id))
else $fatal(1,"[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle"); else $fatal(1, "[l1 dcache wbuffer] cannot allocate and clear same tx slot id in the same cycle");
tx_valid0: assert property ( tx_valid0 :
@(posedge clk_i) disable iff (!rst_ni) evict |-> tx_stat_q[rtrn_id].vld) assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> tx_stat_q[rtrn_id].vld)
else $fatal(1,"[l1 dcache wbuffer] evicting invalid transaction slot"); else $fatal(1, "[l1 dcache wbuffer] evicting invalid transaction slot");
tx_valid1: assert property ( tx_valid1 :
@(posedge clk_i) disable iff (!rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid) assert property (@(posedge clk_i) disable iff (!rst_ni) evict |-> |wbuffer_q[rtrn_ptr].valid)
else $fatal(1,"[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid"); else $fatal(1, "[l1 dcache wbuffer] wbuffer entry corresponding to this transaction is invalid");
write_full: assert property ( write_full :
assert property (
@(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((!full) || (|wbuffer_hit_oh))) @(posedge clk_i) disable iff (!rst_ni) req_port_i.data_req |-> req_port_o.data_gnt |-> ((!full) || (|wbuffer_hit_oh)))
else $fatal(1,"[l1 dcache wbuffer] cannot write if full or no hit"); else $fatal(1, "[l1 dcache wbuffer] cannot write if full or no hit");
unused0: assert property ( unused0 :
@(posedge clk_i) disable iff (!rst_ni) !req_port_i.tag_valid) assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.tag_valid)
else $fatal(1,"[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted"); else $fatal(1, "[l1 dcache wbuffer] req_port_i.tag_valid should not be asserted");
unused1: assert property ( unused1 :
@(posedge clk_i) disable iff (!rst_ni) !req_port_i.kill_req) assert property (@(posedge clk_i) disable iff (!rst_ni) !req_port_i.kill_req)
else $fatal(1,"[l1 dcache wbuffer] req_port_i.kill_req should not be asserted"); else $fatal(1, "[l1 dcache wbuffer] req_port_i.kill_req should not be asserted");
for (genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin : gen_assert1 for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_assert1
for (genvar j=0; j<(riscv::XLEN/8); j++) begin : gen_assert2 for (genvar j = 0; j < (riscv::XLEN / 8); j++) begin : gen_assert2
byteStates: assert property ( byteStates :
assert property (
@(posedge clk_i) disable iff (!rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} ) @(posedge clk_i) disable iff (!rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} )
else $fatal(1,"[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b", else
j,k, $fatal(
1,
"[l1 dcache wbuffer] byte %02d of wbuffer entry %02d has invalid state: valid=%01b, dirty=%01b, txblock=%01b",
j,
k,
wbuffer_q[k].valid[j], wbuffer_q[k].valid[j],
wbuffer_q[k].dirty[j], wbuffer_q[k].dirty[j],
wbuffer_q[k].txblock[j]); wbuffer_q[k].txblock[j]
);
end end
end end
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // wt_dcache_wbuffer endmodule // wt_dcache_wbuffer

View file

@ -49,51 +49,54 @@
// //
module wt_l15_adapter import ariane_pkg::*; import wt_cache_pkg::*; #( module wt_l15_adapter
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty import ariane_pkg::*;
import wt_cache_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// icache // icache
input logic icache_data_req_i, input logic icache_data_req_i,
output logic icache_data_ack_o, output logic icache_data_ack_o,
input icache_req_t icache_data_i, input icache_req_t icache_data_i,
// returning packets must be consumed immediately // returning packets must be consumed immediately
output logic icache_rtrn_vld_o, output logic icache_rtrn_vld_o,
output icache_rtrn_t icache_rtrn_o, output icache_rtrn_t icache_rtrn_o,
// dcache // dcache
input logic dcache_data_req_i, input logic dcache_data_req_i,
output logic dcache_data_ack_o, output logic dcache_data_ack_o,
input dcache_req_t dcache_data_i, input dcache_req_t dcache_data_i,
// returning packets must be consumed immediately // returning packets must be consumed immediately
output logic dcache_rtrn_vld_o, output logic dcache_rtrn_vld_o,
output dcache_rtrn_t dcache_rtrn_o, output dcache_rtrn_t dcache_rtrn_o,
// L15 // L15
output l15_req_t l15_req_o, output l15_req_t l15_req_o,
input l15_rtrn_t l15_rtrn_i input l15_rtrn_t l15_rtrn_i
); );
// request path // request path
icache_req_t icache_data; icache_req_t icache_data;
logic icache_data_full, icache_data_empty; logic icache_data_full, icache_data_empty;
dcache_req_t dcache_data; dcache_req_t dcache_data;
logic dcache_data_full, dcache_data_empty; logic dcache_data_full, dcache_data_empty;
logic [1:0] arb_req, arb_ack; logic [1:0] arb_req, arb_ack;
logic arb_idx; logic arb_idx;
// return path // return path
logic rtrn_fifo_empty, rtrn_fifo_full, rtrn_fifo_pop; logic rtrn_fifo_empty, rtrn_fifo_full, rtrn_fifo_pop;
l15_rtrn_t rtrn_fifo_data; l15_rtrn_t rtrn_fifo_data;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// request path to L15 // request path to L15
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// relevant l15 signals // relevant l15 signals
// l15_req_t l15_req_o.l15_rqtype; // see below for encoding // l15_req_t l15_req_o.l15_rqtype; // see below for encoding
@ -108,58 +111,58 @@ l15_rtrn_t rtrn_fifo_data;
// logic [L15_TLB_CSM_WIDTH-1:0] l15_req_o.l15_csm_data; // logic [L15_TLB_CSM_WIDTH-1:0] l15_req_o.l15_csm_data;
assign icache_data_ack_o = icache_data_req_i & ~icache_data_full; assign icache_data_ack_o = icache_data_req_i & ~icache_data_full;
assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full; assign dcache_data_ack_o = dcache_data_req_i & ~dcache_data_full;
// data mux // data mux
assign l15_req_o.l15_nc = (arb_idx) ? dcache_data.nc : icache_data.nc; assign l15_req_o.l15_nc = (arb_idx) ? dcache_data.nc : icache_data.nc;
// icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not. // icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not.
assign l15_req_o.l15_size = (arb_idx) ? dcache_data.size : assign l15_req_o.l15_size = (arb_idx) ? dcache_data.size : (icache_data.nc) ? 3'b010 : 3'b111;
(icache_data.nc) ? 3'b010 : 3'b111; assign l15_req_o.l15_threadid = (arb_idx) ? dcache_data.tid : icache_data.tid;
assign l15_req_o.l15_threadid = (arb_idx) ? dcache_data.tid : icache_data.tid; assign l15_req_o.l15_prefetch = '0; // unused in openpiton
assign l15_req_o.l15_prefetch = '0; // unused in openpiton
assign l15_req_o.l15_invalidate_cacheline = '0; // unused by Ariane as L1 has no ECC at the moment assign l15_req_o.l15_invalidate_cacheline = '0; // unused by Ariane as L1 has no ECC at the moment
assign l15_req_o.l15_blockstore = '0; // unused in openpiton assign l15_req_o.l15_blockstore = '0; // unused in openpiton
assign l15_req_o.l15_blockinitstore = '0; // unused in openpiton assign l15_req_o.l15_blockinitstore = '0; // unused in openpiton
assign l15_req_o.l15_l1rplway = (arb_idx) ? dcache_data.way : icache_data.way; assign l15_req_o.l15_l1rplway = (arb_idx) ? dcache_data.way : icache_data.way;
assign l15_req_o.l15_address = (arb_idx) ? dcache_data.paddr : assign l15_req_o.l15_address = (arb_idx) ? dcache_data.paddr : icache_data.paddr;
icache_data.paddr;
assign l15_req_o.l15_data_next_entry = '0; // unused in Ariane (only used for CAS atomic requests) assign l15_req_o.l15_data_next_entry = '0; // unused in Ariane (only used for CAS atomic requests)
assign l15_req_o.l15_csm_data = '0; // unused in Ariane (only used for coherence domain restriction features) assign l15_req_o.l15_csm_data = '0; // unused in Ariane (only used for coherence domain restriction features)
assign l15_req_o.l15_amo_op = dcache_data.amo_op; assign l15_req_o.l15_amo_op = dcache_data.amo_op;
// openpiton is big endian // openpiton is big endian
if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_L15_BIG_ENDIAN) assign l15_req_o.l15_data = swendian64(dcache_data.data); if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_L15_BIG_ENDIAN)
else if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_L15_LITTLE_ENDIAN) assign l15_req_o.l15_data = dcache_data.data; assign l15_req_o.l15_data = swendian64(dcache_data.data);
else $fatal(1,"[wt_l15_adapter] Unsupported NOC type"); else if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_L15_LITTLE_ENDIAN)
assign l15_req_o.l15_data = dcache_data.data;
else $fatal(1, "[wt_l15_adapter] Unsupported NOC type");
// arbiter // arbiter
rrarbiter #( rrarbiter #(
.NUM_REQ(2), .NUM_REQ(2),
.LOCK_IN(1) .LOCK_IN(1)
) i_rrarbiter ( ) i_rrarbiter (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i( '0 ), .flush_i('0),
.en_i ( l15_rtrn_i.l15_ack ), .en_i (l15_rtrn_i.l15_ack),
.req_i ( arb_req ), .req_i (arb_req),
.ack_o ( arb_ack ), .ack_o (arb_ack),
.vld_o ( ), .vld_o (),
.idx_o ( arb_idx ) .idx_o (arb_idx)
); );
assign arb_req = {~dcache_data_empty, ~icache_data_empty}; assign arb_req = {~dcache_data_empty, ~icache_data_empty};
assign l15_req_o.l15_val = (|arb_req);// & ~header_ack_q; assign l15_req_o.l15_val = (|arb_req); // & ~header_ack_q;
// encode packet type // encode packet type
always_comb begin : p_req always_comb begin : p_req
l15_req_o.l15_rqtype = L15_LOAD_RQ; l15_req_o.l15_rqtype = L15_LOAD_RQ;
unique case (arb_idx) unique case (arb_idx)
0: begin// icache 0: begin // icache
l15_req_o.l15_rqtype = L15_IMISS_RQ; l15_req_o.l15_rqtype = L15_IMISS_RQ;
end end
1: begin 1: begin
@ -179,53 +182,53 @@ l15_rtrn_t rtrn_fifo_data;
default: begin default: begin
; ;
end end
endcase // dcache_data.rtype endcase // dcache_data.rtype
end end
default: begin default: begin
; ;
end end
endcase endcase
end // p_req end // p_req
fifo_v2 #( fifo_v2 #(
.dtype ( icache_req_t ), .dtype(icache_req_t),
.DEPTH ( ADAPTER_REQ_FIFO_DEPTH ) .DEPTH(ADAPTER_REQ_FIFO_DEPTH)
) i_icache_data_fifo ( ) i_icache_data_fifo (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i (1'b0),
.full_o ( icache_data_full ), .full_o (icache_data_full),
.empty_o ( icache_data_empty ), .empty_o (icache_data_empty),
.alm_full_o ( ), .alm_full_o (),
.alm_empty_o ( ), .alm_empty_o(),
.data_i ( icache_data_i ), .data_i (icache_data_i),
.push_i ( icache_data_ack_o ), .push_i (icache_data_ack_o),
.data_o ( icache_data ), .data_o (icache_data),
.pop_i ( arb_ack[0] ) .pop_i (arb_ack[0])
); );
fifo_v2 #( fifo_v2 #(
.dtype ( dcache_req_t ), .dtype(dcache_req_t),
.DEPTH ( ADAPTER_REQ_FIFO_DEPTH ) .DEPTH(ADAPTER_REQ_FIFO_DEPTH)
) i_dcache_data_fifo ( ) i_dcache_data_fifo (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i (1'b0),
.full_o ( dcache_data_full ), .full_o (dcache_data_full),
.empty_o ( dcache_data_empty ), .empty_o (dcache_data_empty),
.alm_full_o ( ), .alm_full_o (),
.alm_empty_o ( ), .alm_empty_o(),
.data_i ( dcache_data_i ), .data_i (dcache_data_i),
.push_i ( dcache_data_ack_o ), .push_i (dcache_data_ack_o),
.data_o ( dcache_data ), .data_o (dcache_data),
.pop_i ( arb_ack[1] ) .pop_i (arb_ack[1])
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// return path from L15 // return path from L15
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// relevant l15 signals // relevant l15 signals
// l15_rtrn_i.l15_returntype; // see below for encoding // l15_rtrn_i.l15_returntype; // see below for encoding
@ -253,13 +256,13 @@ l15_rtrn_t rtrn_fifo_data;
dcache_rtrn_o.rtype = DCACHE_LOAD_ACK; dcache_rtrn_o.rtype = DCACHE_LOAD_ACK;
icache_rtrn_vld_o = 1'b0; icache_rtrn_vld_o = 1'b0;
dcache_rtrn_vld_o = 1'b0; dcache_rtrn_vld_o = 1'b0;
if(!rtrn_fifo_empty) begin if (!rtrn_fifo_empty) begin
unique case (rtrn_fifo_data.l15_returntype) unique case (rtrn_fifo_data.l15_returntype)
L15_LOAD_RET: begin L15_LOAD_RET: begin
dcache_rtrn_o.rtype = DCACHE_LOAD_ACK; dcache_rtrn_o.rtype = DCACHE_LOAD_ACK;
dcache_rtrn_vld_o = 1'b1; dcache_rtrn_vld_o = 1'b1;
end end
L15_ST_ACK: begin L15_ST_ACK: begin
dcache_rtrn_o.rtype = DCACHE_STORE_ACK; dcache_rtrn_o.rtype = DCACHE_STORE_ACK;
dcache_rtrn_vld_o = 1'b1; dcache_rtrn_vld_o = 1'b1;
end end
@ -282,107 +285,126 @@ l15_rtrn_t rtrn_fifo_data;
// dcache_rtrn_o.reqType = DCACHE_INT_ACK; // dcache_rtrn_o.reqType = DCACHE_INT_ACK;
// end // end
default: begin default: begin
; ;
end end
endcase // rtrn_fifo_data.l15_returntype endcase // rtrn_fifo_data.l15_returntype
end end
end end
// openpiton is big endian // openpiton is big endian
if (SwapEndianess) begin : gen_swap if (SwapEndianess) begin : gen_swap
assign dcache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_1), assign dcache_rtrn_o.data = {
swendian64(rtrn_fifo_data.l15_data_0) }; swendian64(rtrn_fifo_data.l15_data_1), swendian64(rtrn_fifo_data.l15_data_0)
};
assign icache_rtrn_o.data = { swendian64(rtrn_fifo_data.l15_data_3), assign icache_rtrn_o.data = {
swendian64(rtrn_fifo_data.l15_data_2), swendian64(rtrn_fifo_data.l15_data_3),
swendian64(rtrn_fifo_data.l15_data_1), swendian64(rtrn_fifo_data.l15_data_2),
swendian64(rtrn_fifo_data.l15_data_0) }; swendian64(rtrn_fifo_data.l15_data_1),
swendian64(rtrn_fifo_data.l15_data_0)
};
end else begin : gen_no_swap end else begin : gen_no_swap
assign dcache_rtrn_o.data = { rtrn_fifo_data.l15_data_1, assign dcache_rtrn_o.data = {rtrn_fifo_data.l15_data_1, rtrn_fifo_data.l15_data_0};
rtrn_fifo_data.l15_data_0 };
assign icache_rtrn_o.data = { rtrn_fifo_data.l15_data_3, assign icache_rtrn_o.data = {
rtrn_fifo_data.l15_data_2, rtrn_fifo_data.l15_data_3,
rtrn_fifo_data.l15_data_1, rtrn_fifo_data.l15_data_2,
rtrn_fifo_data.l15_data_0 }; rtrn_fifo_data.l15_data_1,
rtrn_fifo_data.l15_data_0
};
end end
// fifo signals // fifo signals
assign icache_rtrn_o.tid = rtrn_fifo_data.l15_threadid; assign icache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
assign dcache_rtrn_o.tid = rtrn_fifo_data.l15_threadid; assign dcache_rtrn_o.tid = rtrn_fifo_data.l15_threadid;
// invalidation signal mapping // invalidation signal mapping
assign icache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000}; assign icache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};
assign icache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way; assign icache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign icache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_icache_inval; assign icache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_icache_inval;
assign icache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_icache_all_way; assign icache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_icache_all_way;
assign dcache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000}; assign dcache_rtrn_o.inv.idx = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};
assign dcache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way; assign dcache_rtrn_o.inv.way = rtrn_fifo_data.l15_inval_way;
assign dcache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_dcache_inval; assign dcache_rtrn_o.inv.vld = rtrn_fifo_data.l15_inval_dcache_inval;
assign dcache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_dcache_all_way; assign dcache_rtrn_o.inv.all = rtrn_fifo_data.l15_inval_dcache_all_way;
fifo_v2 #( fifo_v2 #(
.dtype ( l15_rtrn_t ), .dtype(l15_rtrn_t),
.DEPTH ( ADAPTER_RTRN_FIFO_DEPTH ) .DEPTH(ADAPTER_RTRN_FIFO_DEPTH)
) i_rtrn_fifo ( ) i_rtrn_fifo (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i (1'b0),
.full_o ( rtrn_fifo_full ), .full_o (rtrn_fifo_full),
.empty_o ( rtrn_fifo_empty ), .empty_o (rtrn_fifo_empty),
.alm_full_o ( ), .alm_full_o (),
.alm_empty_o ( ), .alm_empty_o(),
.data_i ( l15_rtrn_i ), .data_i (l15_rtrn_i),
.push_i ( l15_req_o.l15_req_ack ), .push_i (l15_req_o.l15_req_ack),
.data_o ( rtrn_fifo_data ), .data_o (rtrn_fifo_data),
.pop_i ( rtrn_fifo_pop ) .pop_i (rtrn_fifo_pop)
); );
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
invalidations: assert property ( invalidations :
assert property (
@(posedge clk_i) disable iff (!rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype == L15_EVICT_REQ |-> (l15_rtrn_i.l15_inval_icache_inval | @(posedge clk_i) disable iff (!rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype == L15_EVICT_REQ |-> (l15_rtrn_i.l15_inval_icache_inval |
l15_rtrn_i.l15_inval_dcache_inval | l15_rtrn_i.l15_inval_dcache_inval |
l15_rtrn_i.l15_inval_icache_all_way | l15_rtrn_i.l15_inval_icache_all_way |
l15_rtrn_i.l15_inval_dcache_all_way)) l15_rtrn_i.l15_inval_dcache_all_way))
else $fatal(1,"[l15_adapter] got invalidation package with zero invalidation flags"); else $fatal(1, "[l15_adapter] got invalidation package with zero invalidation flags");
blockstore_o: assert property ( blockstore_o :
assert property (
@(posedge clk_i) disable iff (!rst_ni) l15_req_o.l15_val |-> l15_req_o.l15_rqtype == L15_STORE_RQ |-> !(l15_req_o.l15_blockstore || l15_req_o.l15_blockinitstore)) @(posedge clk_i) disable iff (!rst_ni) l15_req_o.l15_val |-> l15_req_o.l15_rqtype == L15_STORE_RQ |-> !(l15_req_o.l15_blockstore || l15_req_o.l15_blockinitstore))
else $fatal(1,"[l15_adapter] blockstores are not supported (out)"); else $fatal(1, "[l15_adapter] blockstores are not supported (out)");
blockstore_i: assert property ( blockstore_i :
assert property (
@(posedge clk_i) disable iff (!rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_ST_ACK, L15_ST_ACK} |-> !l15_rtrn_i.l15_blockinitstore) @(posedge clk_i) disable iff (!rst_ni) l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_ST_ACK, L15_ST_ACK} |-> !l15_rtrn_i.l15_blockinitstore)
else $fatal(1,"[l15_adapter] blockstores are not supported (in)"); else $fatal(1, "[l15_adapter] blockstores are not supported (in)");
unsuported_rtrn_types: assert property ( unsuported_rtrn_types :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_LOAD_RET, L15_ST_ACK, L15_IFILL_RET, L15_EVICT_REQ, L15_CPX_RESTYPE_ATOMIC_RES})) @(posedge clk_i) disable iff (!rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_LOAD_RET, L15_ST_ACK, L15_IFILL_RET, L15_EVICT_REQ, L15_CPX_RESTYPE_ATOMIC_RES}))
else $warning("[l15_adapter] return type %X04 is not (yet) supported by l15 adapter.", l15_rtrn_i.l15_returntype); else
$warning(
"[l15_adapter] return type %X04 is not (yet) supported by l15 adapter.",
l15_rtrn_i.l15_returntype
);
amo_type: assert property ( amo_type :
assert property (
@(posedge clk_i) disable iff (!rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_CPX_RESTYPE_ATOMIC_RES} |-> l15_rtrn_i.l15_atomic )) @(posedge clk_i) disable iff (!rst_ni) (l15_rtrn_i.l15_val |-> l15_rtrn_i.l15_returntype inside {L15_CPX_RESTYPE_ATOMIC_RES} |-> l15_rtrn_i.l15_atomic ))
else $fatal(1,"[l15_adapter] l15_atomic must be asserted when the return type is an ATOMIC_RES"); else $fatal(1, "[l15_adapter] l15_atomic must be asserted when the return type is an ATOMIC_RES");
initial begin initial begin
// assert wrong parameterizations // assert wrong parameterizations
assert (L15_SET_ASSOC >= ICACHE_SET_ASSOC) assert (L15_SET_ASSOC >= ICACHE_SET_ASSOC)
else $fatal(1,"[l15_adapter] number of icache ways must be smaller or equal the number of L15 ways"); else
$fatal(
1, "[l15_adapter] number of icache ways must be smaller or equal the number of L15 ways"
);
// assert wrong parameterizations // assert wrong parameterizations
assert (L15_SET_ASSOC >= DCACHE_SET_ASSOC) assert (L15_SET_ASSOC >= DCACHE_SET_ASSOC)
else $fatal(1,"[l15_adapter] number of dcache ways must be smaller or equal the number of L15 ways"); else
$fatal(
1, "[l15_adapter] number of dcache ways must be smaller or equal the number of L15 ways"
);
// invalidation address returned by L1.5 is 16 bit // invalidation address returned by L1.5 is 16 bit
assert (16 >= DCACHE_INDEX_WIDTH && 16 >= ICACHE_INDEX_WIDTH) assert (16 >= DCACHE_INDEX_WIDTH && 16 >= ICACHE_INDEX_WIDTH)
else $fatal(1,"[l15_adapter] maximum number of index bits supported by L1.5 is 16"); else $fatal(1, "[l15_adapter] maximum number of index bits supported by L1.5 is 16");
end end
`endif `endif
//pragma translate_on //pragma translate_on
endmodule // wt_l15_adapter endmodule // wt_l15_adapter

View file

@ -13,282 +13,286 @@
// Description: Commits to the architectural state resulting from the scoreboard. // Description: Commits to the architectural state resulting from the scoreboard.
module commit_stage import ariane_pkg::*; #( module commit_stage
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
)( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic halt_i, // request to halt the core input logic halt_i, // request to halt the core
input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline
output exception_t exception_o, // take exception to controller output exception_t exception_o, // take exception to controller
output logic dirty_fp_state_o, // mark the F state as dirty output logic dirty_fp_state_o, // mark the F state as dirty
input logic single_step_i, // we are in single step debug mode input logic single_step_i, // we are in single step debug mode
// from scoreboard // from scoreboard
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit
output logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_o, // acknowledge that we are indeed committing output logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_o, // acknowledge that we are indeed committing
// to register file // to register file
output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, // register file write address output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, // register file write address
output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, // register file write data output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, // register file write data
output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, // register file write enable output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, // register file write enable
output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // floating point register enable output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // floating point register enable
// Atomic memory operations // Atomic memory operations
input amo_resp_t amo_resp_i, // result of AMO operation input amo_resp_t amo_resp_i, // result of AMO operation
// to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline) // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline)
output logic [riscv::VLEN-1:0] pc_o, output logic [riscv::VLEN-1:0] pc_o,
// to/from CSR file // to/from CSR file
output fu_op csr_op_o, // decoded CSR operation output fu_op csr_op_o, // decoded CSR operation
output riscv::xlen_t csr_wdata_o, // data to write to CSR output riscv::xlen_t csr_wdata_o, // data to write to CSR
input riscv::xlen_t csr_rdata_i, // data to read from CSR input riscv::xlen_t csr_rdata_i, // data to read from CSR
input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit) input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit)
output logic csr_write_fflags_o, // write the fflags CSR output logic csr_write_fflags_o, // write the fflags CSR
// commit signals to ex // commit signals to ex
output logic commit_lsu_o, // commit the pending store output logic commit_lsu_o, // commit the pending store
input logic commit_lsu_ready_i, // commit buffer of LSU is ready input logic commit_lsu_ready_i, // commit buffer of LSU is ready
output logic [TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port output logic [TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port
output logic amo_valid_commit_o, // valid AMO in commit stage output logic amo_valid_commit_o, // valid AMO in commit stage
input logic no_st_pending_i, // there is no store pending input logic no_st_pending_i, // there is no store pending
output logic commit_csr_o, // commit the pending CSR instruction output logic commit_csr_o, // commit the pending CSR instruction
output logic fence_i_o, // flush I$ and pipeline output logic fence_i_o, // flush I$ and pipeline
output logic fence_o, // flush D$ and pipeline output logic fence_o, // flush D$ and pipeline
output logic flush_commit_o, // request a pipeline flush output logic flush_commit_o, // request a pipeline flush
output logic sfence_vma_o // flush TLBs and pipeline output logic sfence_vma_o // flush TLBs and pipeline
); );
// ila_0 i_ila_commit ( // ila_0 i_ila_commit (
// .clk(clk_i), // input wire clk // .clk(clk_i), // input wire clk
// .probe0(commit_instr_i[0].pc), // input wire [63:0] probe0 // .probe0(commit_instr_i[0].pc), // input wire [63:0] probe0
// .probe1(commit_instr_i[1].pc), // input wire [63:0] probe1 // .probe1(commit_instr_i[1].pc), // input wire [63:0] probe1
// .probe2(commit_instr_i[0].valid), // input wire [0:0] probe2 // .probe2(commit_instr_i[0].valid), // input wire [0:0] probe2
// .probe3(commit_instr_i[1].valid), // input wire [0:0] probe3 // .probe3(commit_instr_i[1].valid), // input wire [0:0] probe3
// .probe4(commit_ack_o[0]), // input wire [0:0] probe4 // .probe4(commit_ack_o[0]), // input wire [0:0] probe4
// .probe5(commit_ack_o[0]), // input wire [0:0] probe5 // .probe5(commit_ack_o[0]), // input wire [0:0] probe5
// .probe6(1'b0), // input wire [0:0] probe6 // .probe6(1'b0), // input wire [0:0] probe6
// .probe7(1'b0), // input wire [0:0] probe7 // .probe7(1'b0), // input wire [0:0] probe7
// .probe8(1'b0), // input wire [0:0] probe8 // .probe8(1'b0), // input wire [0:0] probe8
// .probe9(1'b0) // input wire [0:0] probe9 // .probe9(1'b0) // input wire [0:0] probe9
// ); // );
for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_waddr for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_waddr
assign waddr_o[i] = commit_instr_i[i].rd[4:0]; assign waddr_o[i] = commit_instr_i[i].rd[4:0];
end
assign pc_o = commit_instr_i[0].pc;
// Dirty the FP state if we are committing anything related to the FPU
always_comb begin : dirty_fp_state
dirty_fp_state_o = 1'b0;
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
commit_instr_i[i].op
)));
// Check if we issued a vector floating-point instruction to the accellerator
dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp;
end end
end
assign pc_o = commit_instr_i[0].pc; assign commit_tran_id_o = commit_instr_i[0].trans_id;
// Dirty the FP state if we are committing anything related to the FPU
always_comb begin : dirty_fp_state logic instr_0_is_amo;
dirty_fp_state_o = 1'b0; assign instr_0_is_amo = is_amo(commit_instr_i[0].op);
for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin // -------------------
dirty_fp_state_o |= commit_ack_o[i] & (commit_instr_i[i].fu inside {FPU, FPU_VEC} || (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[i].op))); // Commit Instruction
// Check if we issued a vector floating-point instruction to the accellerator // -------------------
dirty_fp_state_o |= commit_instr_i[i].fu == ACCEL && commit_instr_i[i].vfp; // write register file or commit instruction in LSU or CSR Buffer
always_comb begin : commit
// default assignments
commit_ack_o[0] = 1'b0;
amo_valid_commit_o = 1'b0;
we_gpr_o[0] = 1'b0;
we_fpr_o = '{default: 1'b0};
commit_lsu_o = 1'b0;
commit_csr_o = 1'b0;
// amos will commit on port 0
wdata_o[0] = (amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = {riscv::XLEN{1'b0}};
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
csr_write_fflags_o = 1'b0;
flush_commit_o = 1'b0;
// we will not commit the instruction if we took an exception
// and we do not commit the instruction if we requested a halt
if (commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i) begin
// we can definitely write the register file
// if the instruction is not committing anything the destination
commit_ack_o[0] = 1'b1;
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[0].op)) begin
we_fpr_o[0] = 1'b1;
end else begin
we_gpr_o[0] = 1'b1;
end
// check whether the instruction we retire was a store
if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin
// check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store)
if (commit_lsu_ready_i) begin
commit_ack_o[0] = 1'b1;
commit_lsu_o = 1'b1;
// stall in case the store buffer is not able to accept anymore instructions
end else begin
commit_ack_o[0] = 1'b0;
end
end
// ---------
// FPU Flags
// ---------
if (CVA6Cfg.FpPresent) begin
if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
// write the CSR with potential exception flags from retiring floating point instruction
csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[0].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
commit_ack_o[0] = 1'b1;
end
end
// ---------
// CSR Logic
// ---------
// check whether the instruction we retire was a CSR instruction and it did not
// throw an exception
if (commit_instr_i[0].fu == CSR) begin
// write the CSR file
csr_op_o = commit_instr_i[0].op;
csr_wdata_o = commit_instr_i[0].result;
if (!csr_exception_i.valid) begin
commit_csr_o = 1'b1;
wdata_o[0] = csr_rdata_i;
commit_ack_o[0] = 1'b1;
end else begin
commit_ack_o[0] = 1'b0;
we_gpr_o[0] = 1'b0;
end
end
// ------------------
// SFENCE.VMA Logic
// ------------------
// sfence.vma is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// check if this instruction was a SFENCE_VMA
if (commit_instr_i[0].op == SFENCE_VMA) begin
// no store pending so we can flush the TLBs and pipeline
sfence_vma_o = no_st_pending_i;
// wait for the store buffer to drain until flushing the pipeline
commit_ack_o[0] = no_st_pending_i;
end
// ------------------
// FENCE.I Logic
// ------------------
// fence.i is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// Fence synchronizes data and instruction streams. That means that we need to flush the private icache
// and the private dcache. This is the most expensive instruction.
if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the I$
fence_i_o = no_st_pending_i;
end
// ------------------
// FENCE Logic
// ------------------
// fence is idempotent so we can safely re-execute it after returning
// from interrupt service routine
if (commit_instr_i[0].op == FENCE) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the D$
fence_o = no_st_pending_i;
end
// ------------------
// AMO
// ------------------
if (CVA6Cfg.RVA && instr_0_is_amo) begin
// AMO finished
commit_ack_o[0] = amo_resp_i.ack;
// flush the pipeline
flush_commit_o = amo_resp_i.ack;
amo_valid_commit_o = 1'b1;
we_gpr_o[0] = amo_resp_i.ack;
end end
end end
assign commit_tran_id_o = commit_instr_i[0].trans_id; if (CVA6Cfg.NrCommitPorts > 1) begin
logic instr_0_is_amo; commit_ack_o[1] = 1'b0;
assign instr_0_is_amo = is_amo(commit_instr_i[0].op); we_gpr_o[1] = 1'b0;
// ------------------- wdata_o[1] = commit_instr_i[1].result;
// Commit Instruction
// -------------------
// write register file or commit instruction in LSU or CSR Buffer
always_comb begin : commit
// default assignments
commit_ack_o[0] = 1'b0;
amo_valid_commit_o = 1'b0; // -----------------
// Commit Port 2
we_gpr_o[0] = 1'b0; // -----------------
we_fpr_o = '{default: 1'b0}; // check if the second instruction can be committed as well and the first wasn't a CSR instruction
commit_lsu_o = 1'b0; // also if we are in single step mode don't retire the second instruction
commit_csr_o = 1'b0; if (commit_ack_o[0] && commit_instr_i[1].valid
// amos will commit on port 0
wdata_o[0] = (amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result;
csr_op_o = ADD; // this corresponds to a CSR NOP
csr_wdata_o = {riscv::XLEN{1'b0}};
fence_i_o = 1'b0;
fence_o = 1'b0;
sfence_vma_o = 1'b0;
csr_write_fflags_o = 1'b0;
flush_commit_o = 1'b0;
// we will not commit the instruction if we took an exception
// and we do not commit the instruction if we requested a halt
if (commit_instr_i[0].valid && !commit_instr_i[0].ex.valid && !halt_i) begin
// we can definitely write the register file
// if the instruction is not committing anything the destination
commit_ack_o[0] = 1'b1;
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[0].op)) begin
we_fpr_o[0] = 1'b1;
end else begin
we_gpr_o[0] = 1'b1;
end
// check whether the instruction we retire was a store
if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin
// check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store)
if (commit_lsu_ready_i) begin
commit_ack_o[0] = 1'b1;
commit_lsu_o = 1'b1;
// stall in case the store buffer is not able to accept anymore instructions
end else begin
commit_ack_o[0] = 1'b0;
end
end
// ---------
// FPU Flags
// ---------
if(CVA6Cfg.FpPresent) begin
if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin
// write the CSR with potential exception flags from retiring floating point instruction
csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[0].ex.cause[4:0]};
csr_write_fflags_o = 1'b1;
commit_ack_o[0] = 1'b1;
end
end
// ---------
// CSR Logic
// ---------
// check whether the instruction we retire was a CSR instruction and it did not
// throw an exception
if (commit_instr_i[0].fu == CSR) begin
// write the CSR file
csr_op_o = commit_instr_i[0].op;
csr_wdata_o = commit_instr_i[0].result;
if (!csr_exception_i.valid) begin
commit_csr_o = 1'b1;
wdata_o[0] = csr_rdata_i;
commit_ack_o[0] = 1'b1;
end else begin
commit_ack_o[0] = 1'b0;
we_gpr_o[0] = 1'b0;
end
end
// ------------------
// SFENCE.VMA Logic
// ------------------
// sfence.vma is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// check if this instruction was a SFENCE_VMA
if (commit_instr_i[0].op == SFENCE_VMA) begin
// no store pending so we can flush the TLBs and pipeline
sfence_vma_o = no_st_pending_i;
// wait for the store buffer to drain until flushing the pipeline
commit_ack_o[0] = no_st_pending_i;
end
// ------------------
// FENCE.I Logic
// ------------------
// fence.i is idempotent so we can safely re-execute it after returning
// from interrupt service routine
// Fence synchronizes data and instruction streams. That means that we need to flush the private icache
// and the private dcache. This is the most expensive instruction.
if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the I$
fence_i_o = no_st_pending_i;
end
// ------------------
// FENCE Logic
// ------------------
// fence is idempotent so we can safely re-execute it after returning
// from interrupt service routine
if (commit_instr_i[0].op == FENCE) begin
commit_ack_o[0] = no_st_pending_i;
// tell the controller to flush the D$
fence_o = no_st_pending_i;
end
// ------------------
// AMO
// ------------------
if (CVA6Cfg.RVA && instr_0_is_amo) begin
// AMO finished
commit_ack_o[0] = amo_resp_i.ack;
// flush the pipeline
flush_commit_o = amo_resp_i.ack;
amo_valid_commit_o = 1'b1;
we_gpr_o[0] = amo_resp_i.ack;
end
end
if (CVA6Cfg.NrCommitPorts > 1) begin
commit_ack_o[1] = 1'b0;
we_gpr_o[1] = 1'b0;
wdata_o[1] = commit_instr_i[1].result;
// -----------------
// Commit Port 2
// -----------------
// check if the second instruction can be committed as well and the first wasn't a CSR instruction
// also if we are in single step mode don't retire the second instruction
if (commit_ack_o[0] && commit_instr_i[1].valid
&& !halt_i && !halt_i
&& !(commit_instr_i[0].fu inside {CSR}) && !(commit_instr_i[0].fu inside {CSR})
&& !flush_dcache_i && !flush_dcache_i
&& !instr_0_is_amo && !instr_0_is_amo
&& !single_step_i) begin && !single_step_i) begin
// only if the first instruction didn't throw an exception and this instruction won't throw an exception // only if the first instruction didn't throw an exception and this instruction won't throw an exception
// and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC
if (!exception_o.valid && !commit_instr_i[1].ex.valid if (!exception_o.valid && !commit_instr_i[1].ex.valid
&& (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin
if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[1].op)) if (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_i[1].op)) we_fpr_o[1] = 1'b1;
we_fpr_o[1] = 1'b1; else we_gpr_o[1] = 1'b1;
else
we_gpr_o[1] = 1'b1;
commit_ack_o[1] = 1'b1; commit_ack_o[1] = 1'b1;
// additionally check if we are retiring an FPU instruction because we need to make sure that we write all // additionally check if we are retiring an FPU instruction because we need to make sure that we write all
// exception flags // exception flags
if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin
if (csr_write_fflags_o) if (csr_write_fflags_o)
csr_wdata_o = {{riscv::XLEN-5{1'b0}}, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])}; csr_wdata_o = {
else {riscv::XLEN - 5{1'b0}},
csr_wdata_o = {{riscv::XLEN-5{1'b0}}, commit_instr_i[1].ex.cause[4:0]}; (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])
};
else csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[1].ex.cause[4:0]};
csr_write_fflags_o = 1'b1; csr_write_fflags_o = 1'b1;
end end
end
end
end end
end
end end
end
// ----------------------------- // -----------------------------
// Exception & Interrupt Logic // Exception & Interrupt Logic
// ----------------------------- // -----------------------------
// here we know for sure that we are taking the exception // here we know for sure that we are taking the exception
always_comb begin : exception_handling always_comb begin : exception_handling
// Multiple simultaneous interrupts and traps at the same privilege level are handled in the following decreasing // Multiple simultaneous interrupts and traps at the same privilege level are handled in the following decreasing
// priority order: external interrupts, software interrupts, timer interrupts, then finally any synchronous traps. (1.10 p.30) // priority order: external interrupts, software interrupts, timer interrupts, then finally any synchronous traps. (1.10 p.30)
// interrupts are correctly prioritized in the CSR reg file, exceptions are prioritized here // interrupts are correctly prioritized in the CSR reg file, exceptions are prioritized here
exception_o.valid = 1'b0; exception_o.valid = 1'b0;
exception_o.cause = '0; exception_o.cause = '0;
exception_o.tval = '0; exception_o.tval = '0;
// we need a valid instruction in the commit stage // we need a valid instruction in the commit stage
if (commit_instr_i[0].valid) begin if (commit_instr_i[0].valid) begin
// ------------------------ // ------------------------
// check for CSR exception // check for CSR exception
// ------------------------ // ------------------------
if (csr_exception_i.valid) begin if (csr_exception_i.valid) begin
exception_o = csr_exception_i; exception_o = csr_exception_i;
// if no earlier exception happened the commit instruction will still contain // if no earlier exception happened the commit instruction will still contain
// the instruction bits from the ID stage. If a earlier exception happened we don't care // the instruction bits from the ID stage. If a earlier exception happened we don't care
// as we will overwrite it anyway in the next IF bl // as we will overwrite it anyway in the next IF bl
exception_o.tval = commit_instr_i[0].ex.tval; exception_o.tval = commit_instr_i[0].ex.tval;
end end
// ------------------------ // ------------------------
// Earlier Exceptions // Earlier Exceptions
// ------------------------ // ------------------------
// but we give precedence to exceptions which happened earlier e.g.: instruction page // but we give precedence to exceptions which happened earlier e.g.: instruction page
// faults for example // faults for example
if (commit_instr_i[0].ex.valid) begin if (commit_instr_i[0].ex.valid) begin
exception_o = commit_instr_i[0].ex; exception_o = commit_instr_i[0].ex;
end end
end
// Don't take any exceptions iff:
// - If we halted the processor
if (halt_i) begin
exception_o.valid = 1'b0;
end
end end
// Don't take any exceptions iff:
// - If we halted the processor
if (halt_i) begin
exception_o.valid = 1'b0;
end
end
endmodule endmodule

File diff suppressed because it is too large Load diff

View file

@ -13,174 +13,176 @@
// Description: Flush controller // Description: Flush controller
module controller import ariane_pkg::*; #( module controller
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
output logic set_pc_commit_o, // Set PC om PC Gen output logic set_pc_commit_o, // Set PC om PC Gen
output logic flush_if_o, // Flush the IF stage output logic flush_if_o, // Flush the IF stage
output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard output logic flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard
output logic flush_id_o, // Flush ID stage output logic flush_id_o, // Flush ID stage
output logic flush_ex_o, // Flush EX stage output logic flush_ex_o, // Flush EX stage
output logic flush_bp_o, // Flush branch predictors output logic flush_bp_o, // Flush branch predictors
output logic flush_icache_o, // Flush ICache output logic flush_icache_o, // Flush ICache
output logic flush_dcache_o, // Flush DCache output logic flush_dcache_o, // Flush DCache
input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush input logic flush_dcache_ack_i, // Acknowledge the whole DCache Flush
output logic flush_tlb_o, // Flush TLBs output logic flush_tlb_o, // Flush TLBs
input logic halt_csr_i, // Halt request from CSR (WFI instruction) input logic halt_csr_i, // Halt request from CSR (WFI instruction)
input logic halt_acc_i, // Halt request from accelerator dispatcher input logic halt_acc_i, // Halt request from accelerator dispatcher
output logic halt_o, // Halt signal to commit stage output logic halt_o, // Halt signal to commit stage
input logic eret_i, // Return from exception input logic eret_i, // Return from exception
input logic ex_valid_i, // We got an exception, flush the pipeline input logic ex_valid_i, // We got an exception, flush the pipeline
input logic set_debug_pc_i, // set the debug pc from CSR input logic set_debug_pc_i, // set the debug pc from CSR
input bp_resolve_t resolved_branch_i, // We got a resolved branch, check if we need to flush the front-end input bp_resolve_t resolved_branch_i, // We got a resolved branch, check if we need to flush the front-end
input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline input logic flush_csr_i, // We got an instruction which altered the CSR, flush the pipeline
input logic fence_i_i, // fence.i in input logic fence_i_i, // fence.i in
input logic fence_i, // fence in input logic fence_i, // fence in
input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline input logic sfence_vma_i, // We got an instruction to flush the TLBs and pipeline
input logic flush_commit_i, // Flush request from commit stage input logic flush_commit_i, // Flush request from commit stage
input logic flush_acc_i // Flush request from accelerator input logic flush_acc_i // Flush request from accelerator
); );
// active fence - high if we are currently flushing the dcache // active fence - high if we are currently flushing the dcache
logic fence_active_d, fence_active_q; logic fence_active_d, fence_active_q;
logic flush_dcache; logic flush_dcache;
// ------------
// Flush CTRL
// ------------
always_comb begin : flush_ctrl
fence_active_d = fence_active_q;
set_pc_commit_o = 1'b0;
flush_if_o = 1'b0;
flush_unissued_instr_o = 1'b0;
flush_id_o = 1'b0;
flush_ex_o = 1'b0;
flush_dcache = 1'b0;
flush_icache_o = 1'b0;
flush_tlb_o = 1'b0;
flush_bp_o = 1'b0;
// ------------ // ------------
// Flush CTRL // Mis-predict
// ------------ // ------------
always_comb begin : flush_ctrl // flush on mispredict
fence_active_d = fence_active_q; if (resolved_branch_i.is_mispredict) begin
set_pc_commit_o = 1'b0; // flush only un-issued instructions
flush_if_o = 1'b0; flush_unissued_instr_o = 1'b1;
flush_unissued_instr_o = 1'b0; // and if stage
flush_id_o = 1'b0; flush_if_o = 1'b1;
flush_ex_o = 1'b0; end
flush_dcache = 1'b0;
flush_icache_o = 1'b0;
flush_tlb_o = 1'b0;
flush_bp_o = 1'b0;
// ------------
// Mis-predict
// ------------
// flush on mispredict
if (resolved_branch_i.is_mispredict) begin
// flush only un-issued instructions
flush_unissued_instr_o = 1'b1;
// and if stage
flush_if_o = 1'b1;
end
// --------------------------------- // ---------------------------------
// FENCE // FENCE
// --------------------------------- // ---------------------------------
if (fence_i) begin if (fence_i) begin
// this can be seen as a CSR instruction with side-effect // this can be seen as a CSR instruction with side-effect
set_pc_commit_o = 1'b1; set_pc_commit_o = 1'b1;
flush_if_o = 1'b1; flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1; flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1; flush_id_o = 1'b1;
flush_ex_o = 1'b1; flush_ex_o = 1'b1;
// this is not needed in the case since we // this is not needed in the case since we
// have a write-through cache in this case // have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
flush_dcache = 1'b1;
fence_active_d = 1'b1;
end
end
// ---------------------------------
// FENCE.I
// ---------------------------------
if (fence_i_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_icache_o = 1'b1;
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
flush_dcache = 1'b1;
fence_active_d = 1'b1;
end
end
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin if (DCACHE_TYPE == int'(config_pkg::WB)) begin
// wait for the acknowledge here flush_dcache = 1'b1;
if (flush_dcache_ack_i && fence_active_q) begin fence_active_d = 1'b1;
fence_active_d = 1'b0;
// keep the flush dcache signal high as long as we didn't get the acknowledge from the cache
end else if (fence_active_q) begin
flush_dcache = 1'b1;
end
end end
// ---------------------------------
// SFENCE.VMA
// ---------------------------------
if (sfence_vma_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_tlb_o = 1'b1;
end
// Set PC to commit stage and flush pipeline
if (flush_csr_i || flush_commit_i || flush_acc_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
end
// ---------------------------------
// 1. Exception
// 2. Return from exception
// ---------------------------------
if (ex_valid_i || eret_i || set_debug_pc_i) begin
// don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal
// for the PC Gen stage but instead tells it to take the PC we gave it
set_pc_commit_o = 1'b0;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
// this potentially reduces performance, but is needed
// to suppress speculative fetches to virtual memory from
// machine mode. TODO: remove when PMA checkers have been
// added to the system
flush_bp_o = 1'b1;
end
end end
// ---------------------- // ---------------------------------
// Halt Logic // FENCE.I
// ---------------------- // ---------------------------------
always_comb begin if (fence_i_i) begin
// halt the core if the fence is active set_pc_commit_o = 1'b1;
halt_o = halt_csr_i || halt_acc_i || fence_active_q; flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_icache_o = 1'b1;
// this is not needed in the case since we
// have a write-through cache in this case
if (DCACHE_TYPE == int'(config_pkg::WB)) begin
flush_dcache = 1'b1;
fence_active_d = 1'b1;
end
end end
// ---------------------- // this is not needed in the case since we
// Registers // have a write-through cache in this case
// ---------------------- if (DCACHE_TYPE == int'(config_pkg::WB)) begin
always_ff @(posedge clk_i or negedge rst_ni) begin // wait for the acknowledge here
if (~rst_ni) begin if (flush_dcache_ack_i && fence_active_q) begin
fence_active_q <= 1'b0; fence_active_d = 1'b0;
flush_dcache_o <= 1'b0; // keep the flush dcache signal high as long as we didn't get the acknowledge from the cache
end else begin end else if (fence_active_q) begin
fence_active_q <= fence_active_d; flush_dcache = 1'b1;
// register on the flush signal, this signal might be critical end
flush_dcache_o <= flush_dcache;
end
end end
// ---------------------------------
// SFENCE.VMA
// ---------------------------------
if (sfence_vma_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_tlb_o = 1'b1;
end
// Set PC to commit stage and flush pipeline
if (flush_csr_i || flush_commit_i || flush_acc_i) begin
set_pc_commit_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
end
// ---------------------------------
// 1. Exception
// 2. Return from exception
// ---------------------------------
if (ex_valid_i || eret_i || set_debug_pc_i) begin
// don't flush pcgen as we want to take the exception: Flush PCGen is not a flush signal
// for the PC Gen stage but instead tells it to take the PC we gave it
set_pc_commit_o = 1'b0;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
// this potentially reduces performance, but is needed
// to suppress speculative fetches to virtual memory from
// machine mode. TODO: remove when PMA checkers have been
// added to the system
flush_bp_o = 1'b1;
end
end
// ----------------------
// Halt Logic
// ----------------------
always_comb begin
// halt the core if the fence is active
halt_o = halt_csr_i || halt_acc_i || fence_active_q;
end
// ----------------------
// Registers
// ----------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
fence_active_q <= 1'b0;
flush_dcache_o <= 1'b0;
end else begin
fence_active_q <= fence_active_d;
// register on the flush signal, this signal might be critical
flush_dcache_o <= flush_dcache;
end
end
endmodule endmodule

View file

@ -14,62 +14,63 @@
// to the scoreboard. // to the scoreboard.
module csr_buffer import ariane_pkg::*; #( module csr_buffer
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, input logic flush_i,
input fu_data_t fu_data_i, input fu_data_t fu_data_i,
output logic csr_ready_o, // FU is ready e.g. not busy output logic csr_ready_o, // FU is ready e.g. not busy
input logic csr_valid_i, // Input is valid input logic csr_valid_i, // Input is valid
output riscv::xlen_t csr_result_o, output riscv::xlen_t csr_result_o,
input logic csr_commit_i, // commit the pending CSR OP input logic csr_commit_i, // commit the pending CSR OP
// to CSR file // to CSR file
output logic [11:0] csr_addr_o // CSR address to commit stage output logic [11:0] csr_addr_o // CSR address to commit stage
); );
// this is a single entry store buffer for the address of the CSR // this is a single entry store buffer for the address of the CSR
// which we are going to need in the commit stage // which we are going to need in the commit stage
struct packed { struct packed {
logic [11:0] csr_address; logic [11:0] csr_address;
logic valid; logic valid;
} csr_reg_n, csr_reg_q; }
csr_reg_n, csr_reg_q;
// control logic, scoreboard signals // control logic, scoreboard signals
assign csr_result_o = fu_data_i.operand_a; assign csr_result_o = fu_data_i.operand_a;
assign csr_addr_o = csr_reg_q.csr_address; assign csr_addr_o = csr_reg_q.csr_address;
// write logic // write logic
always_comb begin : write always_comb begin : write
csr_reg_n = csr_reg_q; csr_reg_n = csr_reg_q;
// by default we are ready // by default we are ready
csr_ready_o = 1'b1; csr_ready_o = 1'b1;
// if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready // if we have a valid uncomiited csr req or are just getting one WITHOUT a commit in, we are not ready
if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i) if ((csr_reg_q.valid || csr_valid_i) && ~csr_commit_i) csr_ready_o = 1'b0;
csr_ready_o = 1'b0; // if we got a valid from the scoreboard
// if we got a valid from the scoreboard // store the CSR address
// store the CSR address if (csr_valid_i) begin
if (csr_valid_i) begin csr_reg_n.csr_address = fu_data_i.operand_b[11:0];
csr_reg_n.csr_address = fu_data_i.operand_b[11:0]; csr_reg_n.valid = 1'b1;
csr_reg_n.valid = 1'b1;
end
// if we get a commit and no new valid instruction -> clear the valid bit
if (csr_commit_i && ~csr_valid_i) begin
csr_reg_n.valid = 1'b0;
end
// clear the buffer if we flushed
if (flush_i)
csr_reg_n.valid = 1'b0;
end end
// sequential process // if we get a commit and no new valid instruction -> clear the valid bit
always_ff @(posedge clk_i or negedge rst_ni) begin if (csr_commit_i && ~csr_valid_i) begin
if(~rst_ni) begin csr_reg_n.valid = 1'b0;
csr_reg_q <= '{default: 0};
end else begin
csr_reg_q <= csr_reg_n;
end
end end
// clear the buffer if we flushed
if (flush_i) csr_reg_n.valid = 1'b0;
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
csr_reg_q <= '{default: 0};
end else begin
csr_reg_q <= csr_reg_n;
end
end
endmodule endmodule

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -7,14 +7,16 @@
// Module stub for the cva6_accel_first_pass_decoder. Replace this with your accelerator's // Module stub for the cva6_accel_first_pass_decoder. Replace this with your accelerator's
// first pass decoder. // first pass decoder.
module cva6_accel_first_pass_decoder import ariane_pkg::*; ( module cva6_accel_first_pass_decoder
input logic [31:0] instruction_i, // instruction from IF import ariane_pkg::*;
input riscv::xs_t fs_i, // floating point extension status (
input riscv::xs_t vs_i, // vector extension status input logic [31:0] instruction_i, // instruction from IF
output logic is_accel_o, // is an accelerator instruction input riscv::xs_t fs_i, // floating point extension status
output scoreboard_entry_t instruction_o, // predecoded instruction input riscv::xs_t vs_i, // vector extension status
output logic illegal_instr_o, // is an illegal instruction output logic is_accel_o, // is an accelerator instruction
output logic is_control_flow_instr_o // is a control flow instruction output scoreboard_entry_t instruction_o, // predecoded instruction
output logic illegal_instr_o, // is an illegal instruction
output logic is_control_flow_instr_o // is a control flow instruction
); );
assign is_accel_o = 1'b0; assign is_accel_o = 1'b0;

View file

@ -9,51 +9,53 @@
// Example coprocessor adds rs1,rs2(,rs3) together and gives back the result to the CPU via the CoreV-X-Interface. // Example coprocessor adds rs1,rs2(,rs3) together and gives back the result to the CPU via the CoreV-X-Interface.
// Coprocessor delays the sending of the result depending on result least significant bits. // Coprocessor delays the sending of the result depending on result least significant bits.
module cvxif_example_coprocessor import cvxif_pkg::*; module cvxif_example_coprocessor
import cvxif_instr_pkg::*;( import cvxif_pkg::*;
input logic clk_i, // Clock import cvxif_instr_pkg::*;
input logic rst_ni, // Asynchronous reset active low (
input cvxif_req_t cvxif_req_i, input logic clk_i, // Clock
output cvxif_resp_t cvxif_resp_o input logic rst_ni, // Asynchronous reset active low
input cvxif_req_t cvxif_req_i,
output cvxif_resp_t cvxif_resp_o
); );
//Compressed interface //Compressed interface
logic x_compressed_valid_i; logic x_compressed_valid_i;
logic x_compressed_ready_o; logic x_compressed_ready_o;
x_compressed_req_t x_compressed_req_i; x_compressed_req_t x_compressed_req_i;
x_compressed_resp_t x_compressed_resp_o; x_compressed_resp_t x_compressed_resp_o;
//Issue interface //Issue interface
logic x_issue_valid_i; logic x_issue_valid_i;
logic x_issue_ready_o; logic x_issue_ready_o;
x_issue_req_t x_issue_req_i; x_issue_req_t x_issue_req_i;
x_issue_resp_t x_issue_resp_o; x_issue_resp_t x_issue_resp_o;
//Commit interface //Commit interface
logic x_commit_valid_i; logic x_commit_valid_i;
x_commit_t x_commit_i; x_commit_t x_commit_i;
//Memory interface //Memory interface
logic x_mem_valid_o; logic x_mem_valid_o;
logic x_mem_ready_i; logic x_mem_ready_i;
x_mem_req_t x_mem_req_o; x_mem_req_t x_mem_req_o;
x_mem_resp_t x_mem_resp_i; x_mem_resp_t x_mem_resp_i;
//Memory result interface //Memory result interface
logic x_mem_result_valid_i; logic x_mem_result_valid_i;
x_mem_result_t x_mem_result_i; x_mem_result_t x_mem_result_i;
//Result interface //Result interface
logic x_result_valid_o; logic x_result_valid_o;
logic x_result_ready_i; logic x_result_ready_i;
x_result_t x_result_o; x_result_t x_result_o;
assign x_compressed_valid_i = cvxif_req_i.x_compressed_valid; assign x_compressed_valid_i = cvxif_req_i.x_compressed_valid;
assign x_compressed_req_i = cvxif_req_i.x_compressed_req; assign x_compressed_req_i = cvxif_req_i.x_compressed_req;
assign x_issue_valid_i = cvxif_req_i.x_issue_valid; assign x_issue_valid_i = cvxif_req_i.x_issue_valid;
assign x_issue_req_i = cvxif_req_i.x_issue_req; assign x_issue_req_i = cvxif_req_i.x_issue_req;
assign x_commit_valid_i = cvxif_req_i.x_commit_valid; assign x_commit_valid_i = cvxif_req_i.x_commit_valid;
assign x_commit_i = cvxif_req_i.x_commit; assign x_commit_i = cvxif_req_i.x_commit;
assign x_mem_ready_i = cvxif_req_i.x_mem_ready; assign x_mem_ready_i = cvxif_req_i.x_mem_ready;
assign x_mem_resp_i = cvxif_req_i.x_mem_resp; assign x_mem_resp_i = cvxif_req_i.x_mem_resp;
assign x_mem_result_valid_i = cvxif_req_i.x_mem_result_valid; assign x_mem_result_valid_i = cvxif_req_i.x_mem_result_valid;
assign x_mem_result_i = cvxif_req_i.x_mem_result; assign x_mem_result_i = cvxif_req_i.x_mem_result;
assign x_result_ready_i = cvxif_req_i.x_result_ready; assign x_result_ready_i = cvxif_req_i.x_result_ready;
assign cvxif_resp_o.x_compressed_ready = x_compressed_ready_o; assign cvxif_resp_o.x_compressed_ready = x_compressed_ready_o;
assign cvxif_resp_o.x_compressed_resp = x_compressed_resp_o; assign cvxif_resp_o.x_compressed_resp = x_compressed_resp_o;
@ -65,17 +67,17 @@ module cvxif_example_coprocessor import cvxif_pkg::*;
assign cvxif_resp_o.x_result = x_result_o; assign cvxif_resp_o.x_result = x_result_o;
//Compressed interface //Compressed interface
assign x_compressed_ready_o = '0; assign x_compressed_ready_o = '0;
assign x_compressed_resp_o.instr = '0; assign x_compressed_resp_o.instr = '0;
assign x_compressed_resp_o.accept = '0; assign x_compressed_resp_o.accept = '0;
instr_decoder #( instr_decoder #(
.NbInstr ( cvxif_instr_pkg::NbInstr ), .NbInstr (cvxif_instr_pkg::NbInstr),
.CoproInstr ( cvxif_instr_pkg::CoproInstr ) .CoproInstr(cvxif_instr_pkg::CoproInstr)
) instr_decoder_i ( ) instr_decoder_i (
.clk_i ( clk_i ), .clk_i (clk_i),
.x_issue_req_i ( x_issue_req_i ), .x_issue_req_i (x_issue_req_i),
.x_issue_resp_o ( x_issue_resp_o ) .x_issue_resp_o(x_issue_resp_o)
); );
typedef struct packed { typedef struct packed {
@ -86,20 +88,20 @@ module cvxif_example_coprocessor import cvxif_pkg::*;
logic fifo_full, fifo_empty; logic fifo_full, fifo_empty;
logic x_issue_ready_q; logic x_issue_ready_q;
logic instr_push, instr_pop; logic instr_push, instr_pop;
x_issue_t req_i; x_issue_t req_i;
x_issue_t req_o; x_issue_t req_o;
assign instr_push = x_issue_resp_o.accept ? 1 : 0 ; assign instr_push = x_issue_resp_o.accept ? 1 : 0;
assign instr_pop = (x_commit_i.x_commit_kill && x_commit_valid_i) || x_result_valid_o; assign instr_pop = (x_commit_i.x_commit_kill && x_commit_valid_i) || x_result_valid_o;
assign x_issue_ready_q = ~fifo_full; // if something is in the fifo, the instruction is being processed assign x_issue_ready_q = ~fifo_full; // if something is in the fifo, the instruction is being processed
// so we can't receive anything else // so we can't receive anything else
assign req_i.req = x_issue_req_i; assign req_i.req = x_issue_req_i;
assign req_i.resp = x_issue_resp_o; assign req_i.resp = x_issue_resp_o;
always_ff @(posedge clk_i or negedge rst_ni) begin : regs always_ff @(posedge clk_i or negedge rst_ni) begin : regs
if(!rst_ni) begin if (!rst_ni) begin
x_issue_ready_o <= 1; x_issue_ready_o <= 1;
end else begin end else begin
x_issue_ready_o <= x_issue_ready_q; x_issue_ready_o <= x_issue_ready_q;
@ -107,47 +109,47 @@ module cvxif_example_coprocessor import cvxif_pkg::*;
end end
fifo_v3 #( fifo_v3 #(
.FALL_THROUGH ( 1 ), //data_o ready and pop in the same cycle .FALL_THROUGH(1), //data_o ready and pop in the same cycle
.DATA_WIDTH ( 64 ), .DATA_WIDTH (64),
.DEPTH ( 8 ), .DEPTH (8),
.dtype ( x_issue_t ) .dtype (x_issue_t)
) fifo_commit_i ( ) fifo_commit_i (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i (1'b0),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( fifo_full ), .full_o (fifo_full),
.empty_o ( fifo_empty ), .empty_o (fifo_empty),
.usage_o ( ), .usage_o (),
.data_i ( req_i ), .data_i (req_i),
.push_i ( instr_push ), .push_i (instr_push),
.data_o ( req_o ), .data_o (req_o),
.pop_i ( instr_pop ) .pop_i (instr_pop)
); );
logic [3:0] c; logic [3:0] c;
counter #( counter #(
.WIDTH(4) .WIDTH(4)
) counter_i( ) counter_i (
.clk_i ( clk_i), .clk_i (clk_i),
.rst_ni ( rst_ni), .rst_ni (rst_ni),
.clear_i ( ~x_commit_i.x_commit_kill && x_commit_valid_i ), .clear_i (~x_commit_i.x_commit_kill && x_commit_valid_i),
.en_i ( 1'b1 ), .en_i (1'b1),
.load_i ( ), .load_i (),
.down_i ( ), .down_i (),
.d_i ( ), .d_i (),
.q_o ( c ), .q_o (c),
.overflow_o ( ) .overflow_o()
); );
always_comb begin always_comb begin
x_result_o.data = req_o.req.rs[0] + req_o.req.rs[1] + ( X_NUM_RS == 3 ? req_o.req.rs[2] : 0); x_result_o.data = req_o.req.rs[0] + req_o.req.rs[1] + (X_NUM_RS == 3 ? req_o.req.rs[2] : 0);
x_result_valid_o = (c == x_result_o.data[3:0]) && ~fifo_empty ? 1 : 0; x_result_valid_o = (c == x_result_o.data[3:0]) && ~fifo_empty ? 1 : 0;
x_result_o.id = req_o.req.id; x_result_o.id = req_o.req.id;
x_result_o.rd = req_o.req.instr[11:7]; x_result_o.rd = req_o.req.instr[11:7];
x_result_o.we = req_o.resp.writeback & x_result_valid_o; x_result_o.we = req_o.resp.writeback & x_result_valid_o;
x_result_o.exc = 0; x_result_o.exc = 0;
x_result_o.exccode = 0; x_result_o.exccode = 0;
end end
endmodule endmodule

View file

@ -10,38 +10,38 @@
package cvxif_instr_pkg; package cvxif_instr_pkg;
typedef struct packed { typedef struct packed {
logic [31:0] instr; logic [31:0] instr;
logic [31:0] mask; logic [31:0] mask;
cvxif_pkg::x_issue_resp_t resp; cvxif_pkg::x_issue_resp_t resp;
} copro_issue_resp_t; } copro_issue_resp_t;
// 2 Possible RISCV instructions for Coprocessor // 2 Possible RISCV instructions for Coprocessor
parameter int unsigned NbInstr = 2; parameter int unsigned NbInstr = 2;
parameter copro_issue_resp_t CoproInstr[NbInstr] = '{ parameter copro_issue_resp_t CoproInstr[NbInstr] = '{
'{ '{
instr: 32'b 00000_00_00000_00000_0_00_00000_0101011, // custom1 opcode instr: 32'b00000_00_00000_00000_0_00_00000_0101011, // custom1 opcode
mask: 32'b 00000_00_00000_00000_0_00_00000_1111111, mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
resp : '{ resp : '{
accept : 1'b1, accept : 1'b1,
writeback : 1'b0, writeback : 1'b0,
dualwrite : 1'b0, dualwrite : 1'b0,
dualread : 1'b0, dualread : 1'b0,
loadstore : 1'b0, loadstore : 1'b0,
exc : 1'b0 exc : 1'b0
}
},
'{
instr: 32'b00000_00_00000_00000_0_00_00000_1011011, // custom2 opcode
mask: 32'b00000_00_00000_00000_0_00_00000_1111111,
resp : '{
accept : 1'b1,
writeback : 1'b1,
dualwrite : 1'b0,
dualread : 1'b0,
loadstore : 1'b0,
exc : 1'b0
}
} }
},
'{
instr: 32'b 00000_00_00000_00000_0_00_00000_1011011, // custom2 opcode
mask: 32'b 00000_00_00000_00000_0_00_00000_1111111,
resp : '{
accept : 1'b1,
writeback : 1'b1,
dualwrite : 1'b0,
dualread : 1'b0,
loadstore : 1'b0,
exc : 1'b0
}
}
}; };
endpackage endpackage

View file

@ -7,42 +7,43 @@
// //
// Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com) // Original Author: Guillaume Chauvon (guillaume.chauvon@thalesgroup.com)
module instr_decoder import cvxif_pkg::*; #( module instr_decoder
parameter int NbInstr = 1, import cvxif_pkg::*;
parameter cvxif_instr_pkg::copro_issue_resp_t CoproInstr[NbInstr] = {0} #(
) parameter int NbInstr = 1,
( parameter cvxif_instr_pkg::copro_issue_resp_t CoproInstr[NbInstr] = {0}
input logic clk_i, ) (
input x_issue_req_t x_issue_req_i, input logic clk_i,
output x_issue_resp_t x_issue_resp_o input x_issue_req_t x_issue_req_i,
output x_issue_resp_t x_issue_resp_o
); );
logic [NbInstr-1:0] sel; logic [NbInstr-1:0] sel;
for (genvar i = 0; i < NbInstr; i++) begin : gen_predecoder_selector for (genvar i = 0; i < NbInstr; i++) begin : gen_predecoder_selector
assign sel[i] = assign sel[i] = ((CoproInstr[i].mask & x_issue_req_i.instr) == CoproInstr[i].instr);
((CoproInstr[i].mask & x_issue_req_i.instr) == CoproInstr[i].instr);
end end
always_comb begin always_comb begin
x_issue_resp_o.accept = '0; x_issue_resp_o.accept = '0;
x_issue_resp_o.writeback = '0; x_issue_resp_o.writeback = '0;
x_issue_resp_o.dualwrite = '0; x_issue_resp_o.dualwrite = '0;
x_issue_resp_o.dualread = '0; x_issue_resp_o.dualread = '0;
x_issue_resp_o.loadstore = '0; x_issue_resp_o.loadstore = '0;
x_issue_resp_o.exc = '0; x_issue_resp_o.exc = '0;
for (int unsigned i = 0; i < NbInstr; i++) begin for (int unsigned i = 0; i < NbInstr; i++) begin
if (sel[i]) begin if (sel[i]) begin
x_issue_resp_o.accept = CoproInstr[i].resp.accept; x_issue_resp_o.accept = CoproInstr[i].resp.accept;
x_issue_resp_o.writeback = CoproInstr[i].resp.writeback; x_issue_resp_o.writeback = CoproInstr[i].resp.writeback;
x_issue_resp_o.dualwrite = CoproInstr[i].resp.dualwrite; x_issue_resp_o.dualwrite = CoproInstr[i].resp.dualwrite;
x_issue_resp_o.dualread = CoproInstr[i].resp.dualread; x_issue_resp_o.dualread = CoproInstr[i].resp.dualread;
x_issue_resp_o.loadstore = CoproInstr[i].resp.loadstore; x_issue_resp_o.loadstore = CoproInstr[i].resp.loadstore;
x_issue_resp_o.exc = CoproInstr[i].resp.exc; x_issue_resp_o.exc = CoproInstr[i].resp.exc;
end end
end end
end end
assert property( @(posedge clk_i) $onehot0(sel)) else $warning("This offloaded instruction is valid for multiple coprocessor instructions !"); assert property (@(posedge clk_i) $onehot0(sel))
else $warning("This offloaded instruction is valid for multiple coprocessor instructions !");
endmodule endmodule

View file

@ -10,93 +10,95 @@
// Functional Unit for the logic of the CoreV-X-Interface // Functional Unit for the logic of the CoreV-X-Interface
module cvxif_fu import ariane_pkg::*; #( module cvxif_fu
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input fu_data_t fu_data_i, input fu_data_t fu_data_i,
input riscv::priv_lvl_t priv_lvl_i, input riscv::priv_lvl_t priv_lvl_i,
//from issue //from issue
input logic x_valid_i, input logic x_valid_i,
output logic x_ready_o, output logic x_ready_o,
input logic [31:0] x_off_instr_i, input logic [ 31:0] x_off_instr_i,
//to writeback //to writeback
output logic [TRANS_ID_BITS-1:0] x_trans_id_o, output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
output exception_t x_exception_o, output exception_t x_exception_o,
output riscv::xlen_t x_result_o, output riscv::xlen_t x_result_o,
output logic x_valid_o, output logic x_valid_o,
output logic x_we_o, output logic x_we_o,
//to coprocessor //to coprocessor
output cvxif_pkg::cvxif_req_t cvxif_req_o, output cvxif_pkg::cvxif_req_t cvxif_req_o,
input cvxif_pkg::cvxif_resp_t cvxif_resp_i input cvxif_pkg::cvxif_resp_t cvxif_resp_i
); );
logic illegal_n, illegal_q; logic illegal_n, illegal_q;
logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q; logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q;
logic [31:0] illegal_instr_n, illegal_instr_q; logic [31:0] illegal_instr_n, illegal_instr_q;
always_comb begin always_comb begin
cvxif_req_o = '0; cvxif_req_o = '0;
cvxif_req_o.x_result_ready = 1'b1; cvxif_req_o.x_result_ready = 1'b1;
x_ready_o = cvxif_resp_i.x_issue_ready; x_ready_o = cvxif_resp_i.x_issue_ready;
if (x_valid_i) begin if (x_valid_i) begin
cvxif_req_o.x_issue_valid = x_valid_i; cvxif_req_o.x_issue_valid = x_valid_i;
cvxif_req_o.x_issue_req.instr = x_off_instr_i; cvxif_req_o.x_issue_req.instr = x_off_instr_i;
cvxif_req_o.x_issue_req.mode = priv_lvl_i; cvxif_req_o.x_issue_req.mode = priv_lvl_i;
cvxif_req_o.x_issue_req.id = fu_data_i.trans_id; cvxif_req_o.x_issue_req.id = fu_data_i.trans_id;
cvxif_req_o.x_issue_req.rs[0] = fu_data_i.operand_a; cvxif_req_o.x_issue_req.rs[0] = fu_data_i.operand_a;
cvxif_req_o.x_issue_req.rs[1] = fu_data_i.operand_b; cvxif_req_o.x_issue_req.rs[1] = fu_data_i.operand_b;
if (cvxif_pkg::X_NUM_RS == 3) begin if (cvxif_pkg::X_NUM_RS == 3) begin
cvxif_req_o.x_issue_req.rs[2] = fu_data_i.imm; cvxif_req_o.x_issue_req.rs[2] = fu_data_i.imm;
end end
cvxif_req_o.x_issue_req.rs_valid = cvxif_pkg::X_NUM_RS == 3 ? 3'b111 : 2'b11; cvxif_req_o.x_issue_req.rs_valid = cvxif_pkg::X_NUM_RS == 3 ? 3'b111 : 2'b11;
cvxif_req_o.x_commit_valid = x_valid_i; cvxif_req_o.x_commit_valid = x_valid_i;
cvxif_req_o.x_commit.id = fu_data_i.trans_id; cvxif_req_o.x_commit.id = fu_data_i.trans_id;
cvxif_req_o.x_commit.x_commit_kill = 1'b0; cvxif_req_o.x_commit.x_commit_kill = 1'b0;
end
end
always_comb begin
illegal_n = illegal_q;
illegal_id_n = illegal_id_q;
illegal_instr_n = illegal_instr_q;
if (~cvxif_resp_i.x_issue_resp.accept && cvxif_req_o.x_issue_valid && cvxif_resp_i.x_issue_ready && ~illegal_n) begin
illegal_n = 1'b1;
illegal_id_n = cvxif_req_o.x_issue_req.id;
illegal_instr_n = cvxif_req_o.x_issue_req.instr;
end
x_valid_o = cvxif_resp_i.x_result_valid; //Read result only when CVXIF is enabled
x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0;
x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0;
x_exception_o.cause = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0;
x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0;
x_exception_o.tval = '0;
x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0;
if (illegal_n) begin
if (~x_valid_o) begin
x_trans_id_o = illegal_id_n;
x_result_o = '0;
x_valid_o = 1'b1;
x_exception_o.cause = riscv::ILLEGAL_INSTR;
x_exception_o.valid = 1'b1;
x_exception_o.tval = illegal_instr_n;
x_we_o = '0;
illegal_n = '0; // Reset flag for illegal instr. illegal_id and illegal instr values are a don't care, no need to reset it.
end end
end end
end
always_comb begin always_ff @(posedge clk_i, negedge rst_ni) begin
illegal_n = illegal_q; if (~rst_ni) begin
illegal_id_n = illegal_id_q; illegal_q <= 1'b0;
illegal_instr_n = illegal_instr_q; illegal_id_q <= '0;
if (~cvxif_resp_i.x_issue_resp.accept && cvxif_req_o.x_issue_valid && cvxif_resp_i.x_issue_ready && ~illegal_n) begin illegal_instr_q <= '0;
illegal_n = 1'b1; end else begin
illegal_id_n = cvxif_req_o.x_issue_req.id; illegal_q <= illegal_n;
illegal_instr_n = cvxif_req_o.x_issue_req.instr; illegal_id_q <= illegal_id_n;
end illegal_instr_q <= illegal_instr_n;
x_valid_o = cvxif_resp_i.x_result_valid; //Read result only when CVXIF is enabled
x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0;
x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0;
x_exception_o.cause = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0;
x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0;
x_exception_o.tval = '0;
x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0;
if (illegal_n) begin
if (~x_valid_o) begin
x_trans_id_o = illegal_id_n;
x_result_o = '0;
x_valid_o = 1'b1;
x_exception_o.cause = riscv::ILLEGAL_INSTR;
x_exception_o.valid = 1'b1;
x_exception_o.tval = illegal_instr_n;
x_we_o = '0;
illegal_n = '0; // Reset flag for illegal instr. illegal_id and illegal instr values are a don't care, no need to reset it.
end
end
end
always_ff @(posedge clk_i, negedge rst_ni) begin
if (~rst_ni) begin
illegal_q <= 1'b0;
illegal_id_q <= '0;
illegal_instr_q <= '0;
end else begin
illegal_q <= illegal_n;
illegal_id_q <= illegal_id_n;
illegal_instr_q <= illegal_instr_n;
end
end end
end
endmodule endmodule

File diff suppressed because it is too large Load diff

View file

@ -14,398 +14,400 @@
// Description: Instantiation of all functional units residing in the execute stage // Description: Instantiation of all functional units residing in the execute stage
module ex_stage import ariane_pkg::*; #( module ex_stage
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ASID_WIDTH = 1 parameter int unsigned ASID_WIDTH = 1
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, input logic flush_i,
input logic debug_mode_i, input logic debug_mode_i,
input logic [riscv::VLEN-1:0] rs1_forwarding_i, input logic [riscv::VLEN-1:0] rs1_forwarding_i,
input logic [riscv::VLEN-1:0] rs2_forwarding_i, input logic [riscv::VLEN-1:0] rs2_forwarding_i,
input fu_data_t fu_data_i, input fu_data_t fu_data_i,
input logic [riscv::VLEN-1:0] pc_i, // PC of current instruction input logic [riscv::VLEN-1:0] pc_i, // PC of current instruction
input logic is_compressed_instr_i, // we need to know if this was a compressed instruction input logic is_compressed_instr_i, // we need to know if this was a compressed instruction
// in order to calculate the next PC on a mis-predict // in order to calculate the next PC on a mis-predict
// Fixed latency unit(s) // Fixed latency unit(s)
output riscv::xlen_t flu_result_o, output riscv::xlen_t flu_result_o,
output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back
output exception_t flu_exception_o, output exception_t flu_exception_o,
output logic flu_ready_o, // FLU is ready output logic flu_ready_o, // FLU is ready
output logic flu_valid_o, // FLU result is valid output logic flu_valid_o, // FLU result is valid
// Branches and Jumps // Branches and Jumps
// ALU 1 // ALU 1
input logic alu_valid_i, // Output is valid input logic alu_valid_i, // Output is valid
// Branch Unit // Branch Unit
input logic branch_valid_i, // we are using the branch unit input logic branch_valid_i, // we are using the branch unit
input branchpredict_sbe_t branch_predict_i, input branchpredict_sbe_t branch_predict_i,
output bp_resolve_t resolved_branch_o, // the branch engine uses the write back from the ALU output bp_resolve_t resolved_branch_o, // the branch engine uses the write back from the ALU
output logic resolve_branch_o, // to ID signaling that we resolved the branch output logic resolve_branch_o, // to ID signaling that we resolved the branch
// CSR // CSR
input logic csr_valid_i, input logic csr_valid_i,
output logic [11:0] csr_addr_o, output logic [11:0] csr_addr_o,
input logic csr_commit_i, input logic csr_commit_i,
// MULT // MULT
input logic mult_valid_i, // Output is valid input logic mult_valid_i, // Output is valid
// LSU // LSU
output logic lsu_ready_o, // FU is ready output logic lsu_ready_o, // FU is ready
input logic lsu_valid_i, // Input is valid input logic lsu_valid_i, // Input is valid
output logic load_valid_o, output logic load_valid_o,
output riscv::xlen_t load_result_o, output riscv::xlen_t load_result_o,
output logic [TRANS_ID_BITS-1:0] load_trans_id_o, output logic [TRANS_ID_BITS-1:0] load_trans_id_o,
output exception_t load_exception_o, output exception_t load_exception_o,
output logic store_valid_o, output logic store_valid_o,
output riscv::xlen_t store_result_o, output riscv::xlen_t store_result_o,
output logic [TRANS_ID_BITS-1:0] store_trans_id_o, output logic [TRANS_ID_BITS-1:0] store_trans_id_o,
output exception_t store_exception_o, output exception_t store_exception_o,
input logic lsu_commit_i, input logic lsu_commit_i,
output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
input logic stall_st_pending_i, input logic stall_st_pending_i,
output logic no_st_pending_o, output logic no_st_pending_o,
input logic amo_valid_commit_i, input logic amo_valid_commit_i,
// FPU // FPU
output logic fpu_ready_o, // FU is ready output logic fpu_ready_o, // FU is ready
input logic fpu_valid_i, // Output is valid input logic fpu_valid_i, // Output is valid
input logic [1:0] fpu_fmt_i, // FP format input logic [1:0] fpu_fmt_i, // FP format
input logic [2:0] fpu_rm_i, // FP rm input logic [2:0] fpu_rm_i, // FP rm
input logic [2:0] fpu_frm_i, // FP frm csr input logic [2:0] fpu_frm_i, // FP frm csr
input logic [6:0] fpu_prec_i, // FP precision control input logic [6:0] fpu_prec_i, // FP precision control
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output riscv::xlen_t fpu_result_o, output riscv::xlen_t fpu_result_o,
output logic fpu_valid_o, output logic fpu_valid_o,
output exception_t fpu_exception_o, output exception_t fpu_exception_o,
// CoreV-X-Interface // CoreV-X-Interface
input logic x_valid_i, input logic x_valid_i,
output logic x_ready_o, output logic x_ready_o,
input logic [31:0] x_off_instr_i, input logic [31:0] x_off_instr_i,
output logic [TRANS_ID_BITS-1:0] x_trans_id_o, output logic [TRANS_ID_BITS-1:0] x_trans_id_o,
output exception_t x_exception_o, output exception_t x_exception_o,
output riscv::xlen_t x_result_o, output riscv::xlen_t x_result_o,
output logic x_valid_o, output logic x_valid_o,
output logic x_we_o, output logic x_we_o,
output cvxif_pkg::cvxif_req_t cvxif_req_o, output cvxif_pkg::cvxif_req_t cvxif_req_o,
input cvxif_pkg::cvxif_resp_t cvxif_resp_i, input cvxif_pkg::cvxif_resp_t cvxif_resp_i,
input logic acc_valid_i, // Output is valid input logic acc_valid_i, // Output is valid
// Memory Management // Memory Management
input logic enable_translation_i, input logic enable_translation_i,
input logic en_ld_st_translation_i, input logic en_ld_st_translation_i,
input logic flush_tlb_i, input logic flush_tlb_i,
input riscv::priv_lvl_t priv_lvl_i, input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i, input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i, input logic sum_i,
input logic mxr_i, input logic mxr_i,
input logic [riscv::PPNW-1:0] satp_ppn_i, input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i, input logic [ ASID_WIDTH-1:0] asid_i,
// icache translation requests // icache translation requests
input icache_arsp_t icache_areq_i, input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o, output icache_areq_t icache_areq_o,
// interface to dcache // interface to dcache
input dcache_req_o_t [2:0] dcache_req_ports_i, input dcache_req_o_t [2:0] dcache_req_ports_i,
output dcache_req_i_t [2:0] dcache_req_ports_o, output dcache_req_i_t [2:0] dcache_req_ports_o,
input logic dcache_wbuffer_empty_i, input logic dcache_wbuffer_empty_i,
input logic dcache_wbuffer_not_ni_i, input logic dcache_wbuffer_not_ni_i,
output amo_req_t amo_req_o, // request to cache subsytem output amo_req_t amo_req_o, // request to cache subsytem
input amo_resp_t amo_resp_i, // response from cache subsystem input amo_resp_t amo_resp_i, // response from cache subsystem
// Performance counters // Performance counters
output logic itlb_miss_o, output logic itlb_miss_o,
output logic dtlb_miss_o, output logic dtlb_miss_o,
// PMPs // PMPs
input riscv::pmpcfg_t [15:0] pmpcfg_i, input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic[15:0][riscv::PLEN-3:0] pmpaddr_i, input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
// RVFI // RVFI
output [riscv::VLEN-1:0] lsu_addr_o, output [ riscv::VLEN-1:0] lsu_addr_o,
output [riscv::PLEN-1:0] mem_paddr_o, output [ riscv::PLEN-1:0] mem_paddr_o,
output [(riscv::XLEN/8)-1:0] lsu_rmask_o, output [ (riscv::XLEN/8)-1:0] lsu_rmask_o,
output [(riscv::XLEN/8)-1:0] lsu_wmask_o, output [ (riscv::XLEN/8)-1:0] lsu_wmask_o,
output [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_o output [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_o
); );
// ------------------------- // -------------------------
// Fixed Latency Units // Fixed Latency Units
// ------------------------- // -------------------------
// all fixed latency units share a single issue port and a sing write // all fixed latency units share a single issue port and a sing write
// port into the scoreboard. At the moment those are: // port into the scoreboard. At the moment those are:
// 1. ALU - all operations are single cycle // 1. ALU - all operations are single cycle
// 2. Branch unit: operation is single cycle, the ALU is needed // 2. Branch unit: operation is single cycle, the ALU is needed
// for comparison // for comparison
// 3. CSR: This is a small buffer which saves the address of the CSR. // 3. CSR: This is a small buffer which saves the address of the CSR.
// The value is then re-fetched once the instruction retires. The buffer // The value is then re-fetched once the instruction retires. The buffer
// is only a single entry deep, hence this operation will block all // is only a single entry deep, hence this operation will block all
// other operations once this buffer is full. This should not be a major // other operations once this buffer is full. This should not be a major
// concern though as CSRs are infrequent. // concern though as CSRs are infrequent.
// 4. Multiplier/Divider: The multiplier has a fixed latency of 1 cycle. // 4. Multiplier/Divider: The multiplier has a fixed latency of 1 cycle.
// The issue logic will take care of not issuing // The issue logic will take care of not issuing
// another instruction if it will collide on the // another instruction if it will collide on the
// output port. Divisions are arbitrary in length // output port. Divisions are arbitrary in length
// they will simply block the issue of all other // they will simply block the issue of all other
// instructions. // instructions.
logic current_instruction_is_sfence_vma; logic current_instruction_is_sfence_vma;
// These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA` // These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA`
// instruction to be used for TLB flush in the next clock cycle. // instruction to be used for TLB flush in the next clock cycle.
logic [ASID_WIDTH-1:0] asid_to_be_flushed; logic [ASID_WIDTH-1:0] asid_to_be_flushed;
logic [riscv::VLEN-1:0] vaddr_to_be_flushed; logic [riscv::VLEN-1:0] vaddr_to_be_flushed;
// from ALU to branch unit // from ALU to branch unit
logic alu_branch_res; // branch comparison result logic alu_branch_res; // branch comparison result
riscv::xlen_t alu_result, csr_result, mult_result; riscv::xlen_t alu_result, csr_result, mult_result;
logic [riscv::VLEN-1:0] branch_result; logic [riscv::VLEN-1:0] branch_result;
logic csr_ready, mult_ready; logic csr_ready, mult_ready;
logic [TRANS_ID_BITS-1:0] mult_trans_id; logic [TRANS_ID_BITS-1:0] mult_trans_id;
logic mult_valid; logic mult_valid;
// 1. ALU (combinatorial) // 1. ALU (combinatorial)
// data silence operation // data silence operation
fu_data_t alu_data; fu_data_t alu_data;
assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0; assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0;
alu #( alu #(
.CVA6Cfg ( CVA6Cfg ) .CVA6Cfg(CVA6Cfg)
) alu_i ( ) alu_i (
.clk_i,
.rst_ni,
.fu_data_i (alu_data),
.result_o (alu_result),
.alu_branch_res_o(alu_branch_res)
);
// 2. Branch Unit (combinatorial)
// we don't silence the branch unit as this is already critical and we do
// not want to add another layer of logic
branch_unit #(
.CVA6Cfg(CVA6Cfg)
) branch_unit_i (
.clk_i,
.rst_ni,
.debug_mode_i,
.fu_data_i,
.pc_i,
.is_compressed_instr_i,
// any functional unit is valid, check that there is no accidental mis-predict
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) ,
.branch_valid_i,
.branch_comp_res_i(alu_branch_res),
.branch_result_o(branch_result),
.branch_predict_i,
.resolved_branch_o,
.resolve_branch_o,
.branch_exception_o(flu_exception_o)
);
// 3. CSR (sequential)
csr_buffer #(
.CVA6Cfg(CVA6Cfg)
) csr_buffer_i (
.clk_i,
.rst_ni,
.flush_i,
.fu_data_i,
.csr_valid_i,
.csr_ready_o (csr_ready),
.csr_result_o(csr_result),
.csr_commit_i,
.csr_addr_o
);
assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid;
// result MUX
always_comb begin
// Branch result as default case
flu_result_o = {{riscv::XLEN - riscv::VLEN{1'b0}}, branch_result};
flu_trans_id_o = fu_data_i.trans_id;
// ALU result
if (alu_valid_i) begin
flu_result_o = alu_result;
// CSR result
end else if (csr_valid_i) begin
flu_result_o = csr_result;
end else if (mult_valid) begin
flu_result_o = mult_result;
flu_trans_id_o = mult_trans_id;
end
end
// ready flags for FLU
always_comb begin
flu_ready_o = csr_ready & mult_ready;
end
// 4. Multiplication (Sequential)
fu_data_t mult_data;
// input silencing of multiplier
assign mult_data = mult_valid_i ? fu_data_i : '0;
mult #(
.CVA6Cfg(CVA6Cfg)
) i_mult (
.clk_i,
.rst_ni,
.flush_i,
.mult_valid_i,
.fu_data_i (mult_data),
.result_o (mult_result),
.mult_valid_o (mult_valid),
.mult_ready_o (mult_ready),
.mult_trans_id_o(mult_trans_id)
);
// ----------------
// FPU
// ----------------
generate
if (CVA6Cfg.FpPresent) begin : fpu_gen
fu_data_t fpu_data;
assign fpu_data = fpu_valid_i ? fu_data_i : '0;
fpu_wrap #(
.CVA6Cfg(CVA6Cfg)
) fpu_i (
.clk_i,
.rst_ni,
.flush_i,
.fpu_valid_i,
.fpu_ready_o,
.fu_data_i(fpu_data),
.fpu_fmt_i,
.fpu_rm_i,
.fpu_frm_i,
.fpu_prec_i,
.fpu_trans_id_o,
.result_o (fpu_result_o),
.fpu_valid_o,
.fpu_exception_o
);
end else begin : no_fpu_gen
assign fpu_ready_o = '0;
assign fpu_trans_id_o = '0;
assign fpu_result_o = '0;
assign fpu_valid_o = '0;
assign fpu_exception_o = '0;
end
endgenerate
// ----------------
// Load-Store Unit
// ----------------
fu_data_t lsu_data;
assign lsu_data = lsu_valid_i ? fu_data_i : '0;
load_store_unit #(
.CVA6Cfg (CVA6Cfg),
.ASID_WIDTH(ASID_WIDTH)
) lsu_i (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.fu_data_i (lsu_data),
.lsu_ready_o,
.lsu_valid_i,
.load_trans_id_o,
.load_result_o,
.load_valid_o,
.load_exception_o,
.store_trans_id_o,
.store_result_o,
.store_valid_o,
.store_exception_o,
.commit_i (lsu_commit_i),
.commit_ready_o (lsu_commit_ready_o),
.commit_tran_id_i,
.enable_translation_i,
.en_ld_st_translation_i,
.icache_areq_i,
.icache_areq_o,
.priv_lvl_i,
.ld_st_priv_lvl_i,
.sum_i,
.mxr_i,
.satp_ppn_i,
.asid_i,
.asid_to_be_flushed_i (asid_to_be_flushed),
.vaddr_to_be_flushed_i(vaddr_to_be_flushed),
.flush_tlb_i,
.itlb_miss_o,
.dtlb_miss_o,
.dcache_req_ports_i,
.dcache_req_ports_o,
.dcache_wbuffer_empty_i,
.dcache_wbuffer_not_ni_i,
.amo_valid_commit_i,
.amo_req_o,
.amo_resp_i,
.pmpcfg_i,
.pmpaddr_i,
.lsu_addr_o,
.mem_paddr_o,
.lsu_rmask_o,
.lsu_wmask_o,
.lsu_addr_trans_id_o
);
if (CVA6Cfg.CvxifEn) begin : gen_cvxif
fu_data_t cvxif_data;
assign cvxif_data = x_valid_i ? fu_data_i : '0;
cvxif_fu #(
.CVA6Cfg(CVA6Cfg)
) cvxif_fu_i (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.fu_data_i ( alu_data ),
.result_o ( alu_result ),
.alu_branch_res_o ( alu_branch_res )
);
// 2. Branch Unit (combinatorial)
// we don't silence the branch unit as this is already critical and we do
// not want to add another layer of logic
branch_unit #(
.CVA6Cfg ( CVA6Cfg )
) branch_unit_i (
.clk_i,
.rst_ni,
.debug_mode_i,
.fu_data_i, .fu_data_i,
.pc_i, .priv_lvl_i(ld_st_priv_lvl_i),
.is_compressed_instr_i, .x_valid_i,
// any functional unit is valid, check that there is no accidental mis-predict .x_ready_o,
.fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i || acc_valid_i ) , .x_off_instr_i,
.branch_valid_i, .x_trans_id_o,
.branch_comp_res_i ( alu_branch_res ), .x_exception_o,
.branch_result_o ( branch_result ), .x_result_o,
.branch_predict_i, .x_valid_o,
.resolved_branch_o, .x_we_o,
.resolve_branch_o, .cvxif_req_o,
.branch_exception_o ( flu_exception_o ) .cvxif_resp_i
); );
end else begin : gen_no_cvxif
assign cvxif_req_o = '0;
assign x_trans_id_o = '0;
assign x_exception_o = '0;
assign x_result_o = '0;
assign x_valid_o = '0;
end
// 3. CSR (sequential) always_ff @(posedge clk_i or negedge rst_ni) begin
csr_buffer #( if (~rst_ni) begin
.CVA6Cfg ( CVA6Cfg ) current_instruction_is_sfence_vma <= 1'b0;
) csr_buffer_i ( end else begin
.clk_i, if (flush_i) begin
.rst_ni, current_instruction_is_sfence_vma <= 1'b0;
.flush_i, end else if ((fu_data_i.operation == SFENCE_VMA) && csr_valid_i) begin
.fu_data_i, current_instruction_is_sfence_vma <= 1'b1;
.csr_valid_i,
.csr_ready_o ( csr_ready ),
.csr_result_o ( csr_result ),
.csr_commit_i,
.csr_addr_o
);
assign flu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i | mult_valid;
// result MUX
always_comb begin
// Branch result as default case
flu_result_o = {{riscv::XLEN-riscv::VLEN{1'b0}}, branch_result};
flu_trans_id_o = fu_data_i.trans_id;
// ALU result
if (alu_valid_i) begin
flu_result_o = alu_result;
// CSR result
end else if (csr_valid_i) begin
flu_result_o = csr_result;
end else if (mult_valid) begin
flu_result_o = mult_result;
flu_trans_id_o = mult_trans_id;
end
end
// ready flags for FLU
always_comb begin
flu_ready_o = csr_ready & mult_ready;
end
// 4. Multiplication (Sequential)
fu_data_t mult_data;
// input silencing of multiplier
assign mult_data = mult_valid_i ? fu_data_i : '0;
mult #(
.CVA6Cfg ( CVA6Cfg )
) i_mult (
.clk_i,
.rst_ni,
.flush_i,
.mult_valid_i,
.fu_data_i ( mult_data ),
.result_o ( mult_result ),
.mult_valid_o ( mult_valid ),
.mult_ready_o ( mult_ready ),
.mult_trans_id_o ( mult_trans_id )
);
// ----------------
// FPU
// ----------------
generate
if (CVA6Cfg.FpPresent) begin : fpu_gen
fu_data_t fpu_data;
assign fpu_data = fpu_valid_i ? fu_data_i : '0;
fpu_wrap #(
.CVA6Cfg ( CVA6Cfg )
) fpu_i (
.clk_i,
.rst_ni,
.flush_i,
.fpu_valid_i,
.fpu_ready_o,
.fu_data_i ( fpu_data ),
.fpu_fmt_i,
.fpu_rm_i,
.fpu_frm_i,
.fpu_prec_i,
.fpu_trans_id_o,
.result_o ( fpu_result_o ),
.fpu_valid_o,
.fpu_exception_o
);
end else begin : no_fpu_gen
assign fpu_ready_o = '0;
assign fpu_trans_id_o = '0;
assign fpu_result_o = '0;
assign fpu_valid_o = '0;
assign fpu_exception_o = '0;
end
endgenerate
// ----------------
// Load-Store Unit
// ----------------
fu_data_t lsu_data;
assign lsu_data = lsu_valid_i ? fu_data_i : '0;
load_store_unit #(
.CVA6Cfg ( CVA6Cfg ),
.ASID_WIDTH ( ASID_WIDTH )
) lsu_i (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.fu_data_i ( lsu_data ),
.lsu_ready_o,
.lsu_valid_i,
.load_trans_id_o,
.load_result_o,
.load_valid_o,
.load_exception_o,
.store_trans_id_o,
.store_result_o,
.store_valid_o,
.store_exception_o,
.commit_i ( lsu_commit_i ),
.commit_ready_o ( lsu_commit_ready_o ),
.commit_tran_id_i,
.enable_translation_i,
.en_ld_st_translation_i,
.icache_areq_i,
.icache_areq_o,
.priv_lvl_i,
.ld_st_priv_lvl_i,
.sum_i,
.mxr_i,
.satp_ppn_i,
.asid_i,
.asid_to_be_flushed_i (asid_to_be_flushed),
.vaddr_to_be_flushed_i (vaddr_to_be_flushed),
.flush_tlb_i,
.itlb_miss_o,
.dtlb_miss_o,
.dcache_req_ports_i,
.dcache_req_ports_o,
.dcache_wbuffer_empty_i,
.dcache_wbuffer_not_ni_i,
.amo_valid_commit_i,
.amo_req_o,
.amo_resp_i,
.pmpcfg_i,
.pmpaddr_i,
.lsu_addr_o,
.mem_paddr_o,
.lsu_rmask_o,
.lsu_wmask_o,
.lsu_addr_trans_id_o
);
if (CVA6Cfg.CvxifEn) begin : gen_cvxif
fu_data_t cvxif_data;
assign cvxif_data = x_valid_i ? fu_data_i : '0;
cvxif_fu #(
.CVA6Cfg ( CVA6Cfg )
) cvxif_fu_i (
.clk_i,
.rst_ni,
.fu_data_i,
.priv_lvl_i (ld_st_priv_lvl_i),
.x_valid_i,
.x_ready_o,
.x_off_instr_i,
.x_trans_id_o,
.x_exception_o,
.x_result_o,
.x_valid_o,
.x_we_o,
.cvxif_req_o,
.cvxif_resp_i
);
end else begin : gen_no_cvxif
assign cvxif_req_o = '0;
assign x_trans_id_o = '0;
assign x_exception_o = '0;
assign x_result_o = '0;
assign x_valid_o = '0;
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
current_instruction_is_sfence_vma <= 1'b0;
end else begin
if (flush_i) begin
current_instruction_is_sfence_vma <= 1'b0;
end else if ((fu_data_i.operation == SFENCE_VMA) && csr_valid_i) begin
current_instruction_is_sfence_vma <= 1'b1;
end
end end
end
end end
// This process stores the rs1 and rs2 parameters of a SFENCE_VMA instruction. // This process stores the rs1 and rs2 parameters of a SFENCE_VMA instruction.
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin if (~rst_ni) begin
asid_to_be_flushed <= '0; asid_to_be_flushed <= '0;
vaddr_to_be_flushed <= '0; vaddr_to_be_flushed <= '0;
// if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen // if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen
end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin
vaddr_to_be_flushed <= rs1_forwarding_i; vaddr_to_be_flushed <= rs1_forwarding_i;
asid_to_be_flushed <= rs2_forwarding_i[ASID_WIDTH-1:0]; asid_to_be_flushed <= rs2_forwarding_i[ASID_WIDTH-1:0];
end
end end
end
endmodule endmodule

View file

@ -13,29 +13,35 @@
// Description: Wrapper for the floating-point unit // Description: Wrapper for the floating-point unit
module fpu_wrap import ariane_pkg::*; #( module fpu_wrap
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input logic fpu_valid_i, input logic fpu_valid_i,
output logic fpu_ready_o, output logic fpu_ready_o,
input fu_data_t fu_data_i, input fu_data_t fu_data_i,
input logic [1:0] fpu_fmt_i, input logic [ 1:0] fpu_fmt_i,
input logic [2:0] fpu_rm_i, input logic [ 2:0] fpu_rm_i,
input logic [2:0] fpu_frm_i, input logic [ 2:0] fpu_frm_i,
input logic [6:0] fpu_prec_i, input logic [ 6:0] fpu_prec_i,
output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o,
output logic [CVA6Cfg.FLen-1:0] result_o, output logic [ CVA6Cfg.FLen-1:0] result_o,
output logic fpu_valid_o, output logic fpu_valid_o,
output exception_t fpu_exception_o output exception_t fpu_exception_o
); );
// this is a workaround // this is a workaround
// otherwise compilation might issue an error if FLEN=0 // otherwise compilation might issue an error if FLEN=0
enum logic {READY, STALL} state_q, state_d; enum logic {
READY,
STALL
}
state_q, state_d;
if (CVA6Cfg.FpPresent) begin : fpu_gen if (CVA6Cfg.FpPresent) begin : fpu_gen
logic [CVA6Cfg.FLen-1:0] operand_a_i; logic [CVA6Cfg.FLen-1:0] operand_a_i;
logic [CVA6Cfg.FLen-1:0] operand_b_i; logic [CVA6Cfg.FLen-1:0] operand_b_i;
@ -47,50 +53,60 @@ module fpu_wrap import ariane_pkg::*; #(
//----------------------------------- //-----------------------------------
// FPnew config from FPnew package // FPnew config from FPnew package
//----------------------------------- //-----------------------------------
localparam OPBITS = fpnew_pkg::OP_BITS; localparam OPBITS = fpnew_pkg::OP_BITS;
localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS); localparam FMTBITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS); localparam IFMTBITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
// Features (enabled formats, vectors etc.) // Features (enabled formats, vectors etc.)
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{ localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: unsigned'(riscv::XLEN), // parameterized using XLEN Width: unsigned'(riscv::XLEN), // parameterized using XLEN
EnableVectors: CVA6Cfg.XFVec, EnableVectors: CVA6Cfg.XFVec,
EnableNanBox: 1'b1, EnableNanBox: 1'b1,
FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT}, FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT},
IntFmtMask: {CVA6Cfg.XFVec && CVA6Cfg.XF8, CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT), 1'b1, 1'b1} IntFmtMask: {
CVA6Cfg.XFVec && CVA6Cfg.XF8,
CVA6Cfg.XFVec && (CVA6Cfg.XF16 || CVA6Cfg.XF16ALT),
1'b1,
1'b1
}
}; };
// Implementation (number of registers etc) // Implementation (number of registers etc)
localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{ localparam fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{
PipeRegs: '{// FP32, FP64, FP16, FP8, FP16alt PipeRegs: '{ // FP32, FP64, FP16, FP8, FP16alt
'{unsigned'(LAT_COMP_FP32 ), '{
unsigned'(LAT_COMP_FP64 ), unsigned'(LAT_COMP_FP32),
unsigned'(LAT_COMP_FP16 ), unsigned'(LAT_COMP_FP64),
unsigned'(LAT_COMP_FP8 ), unsigned'(LAT_COMP_FP16),
unsigned'(LAT_COMP_FP16ALT)}, // ADDMUL unsigned'(LAT_COMP_FP8),
'{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT unsigned'(LAT_COMP_FP16ALT)
'{default: unsigned'(LAT_NONCOMP)}, // NONCOMP }, // ADDMUL
'{default: unsigned'(LAT_CONV)}}, // CONV '{default: unsigned'(LAT_DIVSQRT)}, // DIVSQRT
UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, // ADDMUL '{default: unsigned'(LAT_NONCOMP)}, // NONCOMP
'{default: fpnew_pkg::MERGED}, // DIVSQRT '{default: unsigned'(LAT_CONV)}
'{default: fpnew_pkg::PARALLEL}, // NONCOMP }, // CONV
'{default: fpnew_pkg::MERGED}}, // CONV UnitTypes: '{
PipeConfig: fpnew_pkg::DISTRIBUTED '{default: fpnew_pkg::PARALLEL}, // ADDMUL
'{default: fpnew_pkg::MERGED}, // DIVSQRT
'{default: fpnew_pkg::PARALLEL}, // NONCOMP
'{default: fpnew_pkg::MERGED}
}, // CONV
PipeConfig: fpnew_pkg::DISTRIBUTED
}; };
//------------------------------------------------- //-------------------------------------------------
// Inputs to the FPU and protocol inversion buffer // Inputs to the FPU and protocol inversion buffer
//------------------------------------------------- //-------------------------------------------------
logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a; logic [CVA6Cfg.FLen-1:0] operand_a_d, operand_a_q, operand_a;
logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b; logic [CVA6Cfg.FLen-1:0] operand_b_d, operand_b_q, operand_b;
logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c; logic [CVA6Cfg.FLen-1:0] operand_c_d, operand_c_q, operand_c;
logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op; logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op;
logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod; logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod;
logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt; logic [FMTBITS-1:0] fpu_srcfmt_d, fpu_srcfmt_q, fpu_srcfmt;
logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt; logic [FMTBITS-1:0] fpu_dstfmt_d, fpu_dstfmt_q, fpu_dstfmt;
logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt; logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt;
logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm; logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm;
logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op; logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op;
logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag; logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag;
@ -109,45 +125,41 @@ module fpu_wrap import ariane_pkg::*; #(
always_comb begin : input_translation always_comb begin : input_translation
automatic logic vec_replication; // control honoring of replication flag automatic logic vec_replication; // control honoring of replication flag
automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB) automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB)
automatic logic check_ah; // Decide for AH from RM field encoding automatic logic check_ah; // Decide for AH from RM field encoding
// Default Values // Default Values
operand_a_d = operand_a_i; operand_a_d = operand_a_i;
operand_b_d = operand_b_i; // immediates come through this port unless used as operand operand_b_d = operand_b_i; // immediates come through this port unless used as operand
operand_c_d = operand_c_i; // immediates come through this port unless used as operand operand_c_d = operand_c_i; // immediates come through this port unless used as operand
fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default fpu_op_d = fpnew_pkg::SGNJ; // sign injection by default
fpu_op_mod_d = 1'b0; fpu_op_mod_d = 1'b0;
fpu_dstfmt_d = fpnew_pkg::FP32; fpu_dstfmt_d = fpnew_pkg::FP32;
fpu_ifmt_d = fpnew_pkg::INT32; fpu_ifmt_d = fpnew_pkg::INT32;
fpu_rm_d = fpu_rm_i; fpu_rm_d = fpu_rm_i;
fpu_vec_op_d = fu_data_i.fu == FPU_VEC; fpu_vec_op_d = fu_data_i.fu == FPU_VEC;
fpu_tag_d = fu_data_i.trans_id; fpu_tag_d = fu_data_i.trans_id;
vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field
replicate_c = 1'b0; replicate_c = 1'b0;
check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i
// Scalar Rounding Modes - some ops encode inside RM but use smaller range // Scalar Rounding Modes - some ops encode inside RM but use smaller range
if (!(fpu_rm_i inside {[3'b000:3'b100]})) if (!(fpu_rm_i inside {[3'b000 : 3'b100]})) fpu_rm_d = fpu_frm_i;
fpu_rm_d = fpu_frm_i;
// Vectorial ops always consult FRM // Vectorial ops always consult FRM
if (fpu_vec_op_d) if (fpu_vec_op_d) fpu_rm_d = fpu_frm_i;
fpu_rm_d = fpu_frm_i;
// Formats // Formats
unique case (fpu_fmt_i) unique case (fpu_fmt_i)
// FP32 // FP32
2'b00: fpu_dstfmt_d = fpnew_pkg::FP32; 2'b00: fpu_dstfmt_d = fpnew_pkg::FP32;
// FP64 or FP16ALT (vectorial) // FP64 or FP16ALT (vectorial)
2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64; 2'b01: fpu_dstfmt_d = fpu_vec_op_d ? fpnew_pkg::FP16ALT : fpnew_pkg::FP64;
// FP16 or FP16ALT (scalar) // FP16 or FP16ALT (scalar)
2'b10: begin 2'b10: begin
if (!fpu_vec_op_d && fpu_rm_i==3'b101) if (!fpu_vec_op_d && fpu_rm_i == 3'b101) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
fpu_dstfmt_d = fpnew_pkg::FP16ALT; else fpu_dstfmt_d = fpnew_pkg::FP16;
else
fpu_dstfmt_d = fpnew_pkg::FP16;
end end
// FP8 // FP8
default: fpu_dstfmt_d = fpnew_pkg::FP8; default: fpu_dstfmt_d = fpnew_pkg::FP8;
@ -167,29 +179,29 @@ module fpu_wrap import ariane_pkg::*; #(
FSUB: begin FSUB: begin
fpu_op_d = fpnew_pkg::ADD; fpu_op_d = fpnew_pkg::ADD;
fpu_op_mod_d = 1'b1; fpu_op_mod_d = 1'b1;
replicate_c = 1'b1; // second operand is in C replicate_c = 1'b1; // second operand is in C
end end
// Multiplication // Multiplication
FMUL: fpu_op_d = fpnew_pkg::MUL; FMUL: fpu_op_d = fpnew_pkg::MUL;
// Division // Division
FDIV: fpu_op_d = fpnew_pkg::DIV; FDIV: fpu_op_d = fpnew_pkg::DIV;
// Min/Max - OP is encoded in rm (000-001) // Min/Max - OP is encoded in rm (000-001)
FMIN_MAX: begin FMIN_MAX: begin
fpu_op_d = fpnew_pkg::MINMAX; fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding check_ah = 1'b1; // AH has RM MSB encoding
end end
// Square Root // Square Root
FSQRT: fpu_op_d = fpnew_pkg::SQRT; FSQRT: fpu_op_d = fpnew_pkg::SQRT;
// Fused Multiply Add // Fused Multiply Add
FMADD: fpu_op_d = fpnew_pkg::FMADD; FMADD: fpu_op_d = fpnew_pkg::FMADD;
// Fused Multiply Subtract is modified FMADD // Fused Multiply Subtract is modified FMADD
FMSUB: begin FMSUB: begin
fpu_op_d = fpnew_pkg::FMADD; fpu_op_d = fpnew_pkg::FMADD;
fpu_op_mod_d = 1'b1; fpu_op_mod_d = 1'b1;
end end
// Fused Negated Multiply Subtract // Fused Negated Multiply Subtract
FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB; FNMSUB: fpu_op_d = fpnew_pkg::FNMSUB;
// Fused Negated Multiply Add is modified FNMSUB // Fused Negated Multiply Add is modified FNMSUB
FNMADD: begin FNMADD: begin
fpu_op_d = fpnew_pkg::FNMSUB; fpu_op_d = fpnew_pkg::FNMSUB;
@ -197,24 +209,21 @@ module fpu_wrap import ariane_pkg::*; #(
end end
// Float to Int Cast - Op encoded in lowest two imm bits or rm // Float to Int Cast - Op encoded in lowest two imm bits or rm
FCVT_F2I: begin FCVT_F2I: begin
fpu_op_d = fpnew_pkg::F2I; fpu_op_d = fpnew_pkg::F2I;
// Vectorial Ops encoded in R bit // Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0]; fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i) unique case (fpu_fmt_i)
2'b00: fpu_ifmt_d = fpnew_pkg::INT32; 2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
2'b01, 2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b11: fpu_ifmt_d = fpnew_pkg::INT8; 2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
endcase endcase
// Scalar casts encoded in imm // Scalar casts encoded in imm
end else begin end else begin
fpu_op_mod_d = operand_c_i[0]; fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1]) if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
fpu_ifmt_d = fpnew_pkg::INT64; else fpu_ifmt_d = fpnew_pkg::INT32;
else
fpu_ifmt_d = fpnew_pkg::INT32;
end end
end end
// Int to Float Cast - Op encoded in lowest two imm bits or rm // Int to Float Cast - Op encoded in lowest two imm bits or rm
@ -222,21 +231,18 @@ module fpu_wrap import ariane_pkg::*; #(
fpu_op_d = fpnew_pkg::I2F; fpu_op_d = fpnew_pkg::I2F;
// Vectorial Ops encoded in R bit // Vectorial Ops encoded in R bit
if (fpu_vec_op_d) begin if (fpu_vec_op_d) begin
fpu_op_mod_d = fpu_rm_i[0]; fpu_op_mod_d = fpu_rm_i[0];
vec_replication = 1'b0; // no replication, R bit used for op vec_replication = 1'b0; // no replication, R bit used for op
unique case (fpu_fmt_i) unique case (fpu_fmt_i)
2'b00: fpu_ifmt_d = fpnew_pkg::INT32; 2'b00: fpu_ifmt_d = fpnew_pkg::INT32;
2'b01, 2'b01, 2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b10: fpu_ifmt_d = fpnew_pkg::INT16;
2'b11: fpu_ifmt_d = fpnew_pkg::INT8; 2'b11: fpu_ifmt_d = fpnew_pkg::INT8;
endcase endcase
// Scalar casts encoded in imm // Scalar casts encoded in imm
end else begin end else begin
fpu_op_mod_d = operand_c_i[0]; fpu_op_mod_d = operand_c_i[0];
if (operand_c_i[1]) if (operand_c_i[1]) fpu_ifmt_d = fpnew_pkg::INT64;
fpu_ifmt_d = fpnew_pkg::INT64; else fpu_ifmt_d = fpnew_pkg::INT32;
else
fpu_ifmt_d = fpnew_pkg::INT32;
end end
end end
// Float to Float Cast - Source format encoded in lowest two/three imm bits // Float to Float Cast - Source format encoded in lowest two/three imm bits
@ -244,171 +250,173 @@ module fpu_wrap import ariane_pkg::*; #(
fpu_op_d = fpnew_pkg::F2F; fpu_op_d = fpnew_pkg::F2F;
// Vectorial ops encoded in lowest two imm bits // Vectorial ops encoded in lowest two imm bits
if (fpu_vec_op_d) begin if (fpu_vec_op_d) begin
vec_replication = 1'b0; // no replication for casts (not needed) vec_replication = 1'b0; // no replication for casts (not needed)
unique case (operand_c_i[1:0]) unique case (operand_c_i[1:0])
2'b00: fpu_srcfmt_d = fpnew_pkg::FP32; 2'b00: fpu_srcfmt_d = fpnew_pkg::FP32;
2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT; 2'b01: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
2'b10: fpu_srcfmt_d = fpnew_pkg::FP16; 2'b10: fpu_srcfmt_d = fpnew_pkg::FP16;
2'b11: fpu_srcfmt_d = fpnew_pkg::FP8; 2'b11: fpu_srcfmt_d = fpnew_pkg::FP8;
endcase endcase
// Scalar ops encoded in lowest three imm bits // Scalar ops encoded in lowest three imm bits
end else begin end else begin
unique case (operand_c_i[2:0]) unique case (operand_c_i[2:0])
3'b000: fpu_srcfmt_d = fpnew_pkg::FP32; 3'b000: fpu_srcfmt_d = fpnew_pkg::FP32;
3'b001: fpu_srcfmt_d = fpnew_pkg::FP64; 3'b001: fpu_srcfmt_d = fpnew_pkg::FP64;
3'b010: fpu_srcfmt_d = fpnew_pkg::FP16; 3'b010: fpu_srcfmt_d = fpnew_pkg::FP16;
3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT; 3'b110: fpu_srcfmt_d = fpnew_pkg::FP16ALT;
3'b011: fpu_srcfmt_d = fpnew_pkg::FP8; 3'b011: fpu_srcfmt_d = fpnew_pkg::FP8;
default: ; // Do nothing default: ; // Do nothing
endcase endcase
end end
end end
// Scalar Sign Injection - op encoded in rm (000-010) // Scalar Sign Injection - op encoded in rm (000-010)
FSGNJ: begin FSGNJ: begin
fpu_op_d = fpnew_pkg::SGNJ; fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding check_ah = 1'b1; // AH has RM MSB encoding
end end
// Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding // Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding
FMV_F2X: begin FMV_F2X: begin
fpu_op_d = fpnew_pkg::SGNJ; fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box fpu_rm_d = 3'b011; // passthrough without checking nan-box
fpu_op_mod_d = 1'b1; // no NaN-Boxing fpu_op_mod_d = 1'b1; // no NaN-Boxing
check_ah = 1'b1; // AH has RM MSB encoding check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand vec_replication = 1'b0; // no replication, we set second operand
end end
// Move from GPR to FPR - mapped to NOP since no recoding // Move from GPR to FPR - mapped to NOP since no recoding
FMV_X2F: begin FMV_X2F: begin
fpu_op_d = fpnew_pkg::SGNJ; fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b011; // passthrough without checking nan-box fpu_rm_d = 3'b011; // passthrough without checking nan-box
check_ah = 1'b1; // AH has RM MSB encoding check_ah = 1'b1; // AH has RM MSB encoding
vec_replication = 1'b0; // no replication, we set second operand vec_replication = 1'b0; // no replication, we set second operand
end end
// Scalar Comparisons - op encoded in rm (000-010) // Scalar Comparisons - op encoded in rm (000-010)
FCMP: begin FCMP: begin
fpu_op_d = fpnew_pkg::CMP; fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit
check_ah = 1'b1; // AH has RM MSB encoding check_ah = 1'b1; // AH has RM MSB encoding
end end
// Classification // Classification
FCLASS: begin FCLASS: begin
fpu_op_d = fpnew_pkg::CLASSIFY; fpu_op_d = fpnew_pkg::CLASSIFY;
fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit - CLASS doesn't care anyways fpu_rm_d = {
check_ah = 1'b1; // AH has RM MSB encoding 1'b0, fpu_rm_i[1:0]
}; // mask out AH encoding bit - CLASS doesn't care anyways
check_ah = 1'b1; // AH has RM MSB encoding
end end
// Vectorial Minimum - set up scalar encoding in rm // Vectorial Minimum - set up scalar encoding in rm
VFMIN: begin VFMIN: begin
fpu_op_d = fpnew_pkg::MINMAX; fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = 3'b000; // min fpu_rm_d = 3'b000; // min
end end
// Vectorial Maximum - set up scalar encoding in rm // Vectorial Maximum - set up scalar encoding in rm
VFMAX: begin VFMAX: begin
fpu_op_d = fpnew_pkg::MINMAX; fpu_op_d = fpnew_pkg::MINMAX;
fpu_rm_d = 3'b001; // max fpu_rm_d = 3'b001; // max
end end
// Vectorial Sign Injection - set up scalar encoding in rm // Vectorial Sign Injection - set up scalar encoding in rm
VFSGNJ: begin VFSGNJ: begin
fpu_op_d = fpnew_pkg::SGNJ; fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b000; // sgnj fpu_rm_d = 3'b000; // sgnj
end end
// Vectorial Negated Sign Injection - set up scalar encoding in rm // Vectorial Negated Sign Injection - set up scalar encoding in rm
VFSGNJN: begin VFSGNJN: begin
fpu_op_d = fpnew_pkg::SGNJ; fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b001; // sgnjn fpu_rm_d = 3'b001; // sgnjn
end end
// Vectorial Xored Sign Injection - set up scalar encoding in rm // Vectorial Xored Sign Injection - set up scalar encoding in rm
VFSGNJX: begin VFSGNJX: begin
fpu_op_d = fpnew_pkg::SGNJ; fpu_op_d = fpnew_pkg::SGNJ;
fpu_rm_d = 3'b010; // sgnjx fpu_rm_d = 3'b010; // sgnjx
end end
// Vectorial Equals - set up scalar encoding in rm // Vectorial Equals - set up scalar encoding in rm
VFEQ: begin VFEQ: begin
fpu_op_d = fpnew_pkg::CMP; fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b010; // eq fpu_rm_d = 3'b010; // eq
end end
// Vectorial Not Equals - set up scalar encoding in rm // Vectorial Not Equals - set up scalar encoding in rm
VFNE: begin VFNE: begin
fpu_op_d = fpnew_pkg::CMP; fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b010; // eq fpu_rm_d = 3'b010; // eq
end end
// Vectorial Less Than - set up scalar encoding in rm // Vectorial Less Than - set up scalar encoding in rm
VFLT: begin VFLT: begin
fpu_op_d = fpnew_pkg::CMP; fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b001; // lt fpu_rm_d = 3'b001; // lt
end end
// Vectorial Greater or Equal - set up scalar encoding in rm // Vectorial Greater or Equal - set up scalar encoding in rm
VFGE: begin VFGE: begin
fpu_op_d = fpnew_pkg::CMP; fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b001; // lt fpu_rm_d = 3'b001; // lt
end end
// Vectorial Less or Equal - set up scalar encoding in rm // Vectorial Less or Equal - set up scalar encoding in rm
VFLE: begin VFLE: begin
fpu_op_d = fpnew_pkg::CMP; fpu_op_d = fpnew_pkg::CMP;
fpu_rm_d = 3'b000; // le fpu_rm_d = 3'b000; // le
end end
// Vectorial Greater Than - set up scalar encoding in rm // Vectorial Greater Than - set up scalar encoding in rm
VFGT: begin VFGT: begin
fpu_op_d = fpnew_pkg::CMP; fpu_op_d = fpnew_pkg::CMP;
fpu_op_mod_d = 1'b1; // invert output fpu_op_mod_d = 1'b1; // invert output
fpu_rm_d = 3'b000; // le fpu_rm_d = 3'b000; // le
end end
// Vectorial Convert-and-Pack from FP32, lower 4 entries // Vectorial Convert-and-Pack from FP32, lower 4 entries
VFCPKAB_S: begin VFCPKAB_S: begin
fpu_op_d = fpnew_pkg::CPKAB; fpu_op_d = fpnew_pkg::CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32 fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
end end
// Vectorial Convert-and-Pack from FP32, upper 4 entries // Vectorial Convert-and-Pack from FP32, upper 4 entries
VFCPKCD_S: begin VFCPKCD_S: begin
fpu_op_d = fpnew_pkg::CPKCD; fpu_op_d = fpnew_pkg::CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32 fpu_srcfmt_d = fpnew_pkg::FP32; // Cast from FP32
end end
// Vectorial Convert-and-Pack from FP64, lower 4 entries // Vectorial Convert-and-Pack from FP64, lower 4 entries
VFCPKAB_D: begin VFCPKAB_D: begin
fpu_op_d = fpnew_pkg::CPKAB; fpu_op_d = fpnew_pkg::CPKAB;
fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64 fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
end end
// Vectorial Convert-and-Pack from FP64, upper 4 entries // Vectorial Convert-and-Pack from FP64, upper 4 entries
VFCPKCD_D: begin VFCPKCD_D: begin
fpu_op_d = fpnew_pkg::CPKCD; fpu_op_d = fpnew_pkg::CPKCD;
fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit
vec_replication = 1'b0; // no replication, R bit used for op vec_replication = 1'b0; // no replication, R bit used for op
fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64 fpu_srcfmt_d = fpnew_pkg::FP64; // Cast from FP64
end end
// No changes per default // No changes per default
default: ; //nothing default: ; //nothing
endcase endcase
// Scalar AH encoding fixing // Scalar AH encoding fixing
if (!fpu_vec_op_d && check_ah) if (!fpu_vec_op_d && check_ah) if (fpu_rm_i[2]) fpu_dstfmt_d = fpnew_pkg::FP16ALT;
if (fpu_rm_i[2])
fpu_dstfmt_d = fpnew_pkg::FP16ALT;
// Replication // Replication
if (fpu_vec_op_d && vec_replication) begin if (fpu_vec_op_d && vec_replication) begin
if (replicate_c) begin if (replicate_c) begin
unique case (fpu_dstfmt_d) unique case (fpu_dstfmt_d)
fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i; fpnew_pkg::FP32: operand_c_d = CVA6Cfg.RVD ? {2{operand_c_i[31:0]}} : operand_c_i;
fpnew_pkg::FP16, fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
fpnew_pkg::FP16ALT: operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}}; operand_c_d = CVA6Cfg.RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}};
fpnew_pkg::FP8: operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}}; fpnew_pkg::FP8:
default: ; // Do nothing operand_c_d = CVA6Cfg.RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}};
endcase // fpu_dstfmt_d default: ; // Do nothing
endcase // fpu_dstfmt_d
end else begin end else begin
unique case (fpu_dstfmt_d) unique case (fpu_dstfmt_d)
fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i; fpnew_pkg::FP32: operand_b_d = CVA6Cfg.RVD ? {2{operand_b_i[31:0]}} : operand_b_i;
fpnew_pkg::FP16, fpnew_pkg::FP16, fpnew_pkg::FP16ALT:
fpnew_pkg::FP16ALT: operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; operand_b_d = CVA6Cfg.RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}};
fpnew_pkg::FP8: operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; fpnew_pkg::FP8:
default: ; // Do nothing operand_b_d = CVA6Cfg.RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}};
endcase // fpu_dstfmt_d default: ; // Do nothing
endcase // fpu_dstfmt_d
end end
end end
end end
@ -422,31 +430,31 @@ module fpu_wrap import ariane_pkg::*; #(
// Default Values // Default Values
fpu_ready_o = 1'b0; fpu_ready_o = 1'b0;
fpu_in_valid = 1'b0; fpu_in_valid = 1'b0;
hold_inputs = 1'b0; // hold register disabled hold_inputs = 1'b0; // hold register disabled
use_hold = 1'b0; // inputs go directly to unit use_hold = 1'b0; // inputs go directly to unit
state_d = state_q; // stay in the same state state_d = state_q; // stay in the same state
// FSM // FSM
unique case (state_q) unique case (state_q)
// Default state, ready for instructions // Default state, ready for instructions
READY: begin READY: begin
fpu_ready_o = 1'b1; // Act as if FPU ready fpu_ready_o = 1'b1; // Act as if FPU ready
fpu_in_valid = fpu_valid_i; // Forward input valid to FPU fpu_in_valid = fpu_valid_i; // Forward input valid to FPU
// There is a transaction but the FPU can't handle it // There is a transaction but the FPU can't handle it
if (fpu_valid_i & ~fpu_in_ready) begin if (fpu_valid_i & ~fpu_in_ready) begin
fpu_ready_o = 1'b0; // No token given to Issue fpu_ready_o = 1'b0; // No token given to Issue
hold_inputs = 1'b1; // save inputs to the holding register hold_inputs = 1'b1; // save inputs to the holding register
state_d = STALL; // stall future incoming requests state_d = STALL; // stall future incoming requests
end end
end end
// We're stalling the upstream (ready=0) // We're stalling the upstream (ready=0)
STALL: begin STALL: begin
fpu_in_valid = 1'b1; // we have data for the FPU fpu_in_valid = 1'b1; // we have data for the FPU
use_hold = 1'b1; // the data comes from the hold reg use_hold = 1'b1; // the data comes from the hold reg
// Wait until it's consumed // Wait until it's consumed
if (fpu_in_ready) begin if (fpu_in_ready) begin
fpu_ready_o = 1'b1; // Give a token to issue fpu_ready_o = 1'b1; // Give a token to issue
state_d = READY; // accept future requests state_d = READY; // accept future requests
end end
end end
// Default: emit default values // Default: emit default values
@ -462,50 +470,50 @@ module fpu_wrap import ariane_pkg::*; #(
// Buffer register and FSM state holding // Buffer register and FSM state holding
always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg
if(~rst_ni) begin if (~rst_ni) begin
state_q <= READY; state_q <= READY;
operand_a_q <= '0; operand_a_q <= '0;
operand_b_q <= '0; operand_b_q <= '0;
operand_c_q <= '0; operand_c_q <= '0;
fpu_op_q <= '0; fpu_op_q <= '0;
fpu_op_mod_q <= '0; fpu_op_mod_q <= '0;
fpu_srcfmt_q <= '0; fpu_srcfmt_q <= '0;
fpu_dstfmt_q <= '0; fpu_dstfmt_q <= '0;
fpu_ifmt_q <= '0; fpu_ifmt_q <= '0;
fpu_rm_q <= '0; fpu_rm_q <= '0;
fpu_vec_op_q <= '0; fpu_vec_op_q <= '0;
fpu_tag_q <= '0; fpu_tag_q <= '0;
end else begin end else begin
state_q <= state_d; state_q <= state_d;
// Hold register is [TRIGGERED] by FSM // Hold register is [TRIGGERED] by FSM
if (hold_inputs) begin if (hold_inputs) begin
operand_a_q <= operand_a_d; operand_a_q <= operand_a_d;
operand_b_q <= operand_b_d; operand_b_q <= operand_b_d;
operand_c_q <= operand_c_d; operand_c_q <= operand_c_d;
fpu_op_q <= fpu_op_d; fpu_op_q <= fpu_op_d;
fpu_op_mod_q <= fpu_op_mod_d; fpu_op_mod_q <= fpu_op_mod_d;
fpu_srcfmt_q <= fpu_srcfmt_d; fpu_srcfmt_q <= fpu_srcfmt_d;
fpu_dstfmt_q <= fpu_dstfmt_d; fpu_dstfmt_q <= fpu_dstfmt_d;
fpu_ifmt_q <= fpu_ifmt_d; fpu_ifmt_q <= fpu_ifmt_d;
fpu_rm_q <= fpu_rm_d; fpu_rm_q <= fpu_rm_d;
fpu_vec_op_q <= fpu_vec_op_d; fpu_vec_op_q <= fpu_vec_op_d;
fpu_tag_q <= fpu_tag_d; fpu_tag_q <= fpu_tag_d;
end end
end end
end end
// Select FPU input data: from register if valid data in register, else directly from input // Select FPU input data: from register if valid data in register, else directly from input
assign operand_a = use_hold ? operand_a_q : operand_a_d; assign operand_a = use_hold ? operand_a_q : operand_a_d;
assign operand_b = use_hold ? operand_b_q : operand_b_d; assign operand_b = use_hold ? operand_b_q : operand_b_d;
assign operand_c = use_hold ? operand_c_q : operand_c_d; assign operand_c = use_hold ? operand_c_q : operand_c_d;
assign fpu_op = use_hold ? fpu_op_q : fpu_op_d; assign fpu_op = use_hold ? fpu_op_q : fpu_op_d;
assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d; assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d;
assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d; assign fpu_srcfmt = use_hold ? fpu_srcfmt_q : fpu_srcfmt_d;
assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d; assign fpu_dstfmt = use_hold ? fpu_dstfmt_q : fpu_dstfmt_d;
assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d; assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d;
assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d; assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d;
assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d; assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d;
assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d; assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d;
// Consolidate operands // Consolidate operands
logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands; logic [2:0][CVA6Cfg.FLen-1:0] fpu_operands;
@ -519,31 +527,31 @@ module fpu_wrap import ariane_pkg::*; #(
//--------------- //---------------
fpnew_top #( fpnew_top #(
.Features ( FPU_FEATURES ), .Features (FPU_FEATURES),
.Implementation ( FPU_IMPLEMENTATION ), .Implementation(FPU_IMPLEMENTATION),
.TagType ( logic [TRANS_ID_BITS-1:0] ) .TagType (logic [TRANS_ID_BITS-1:0])
) i_fpnew_bulk ( ) i_fpnew_bulk (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.operands_i ( fpu_operands ), .operands_i (fpu_operands),
.rnd_mode_i ( fpnew_pkg::roundmode_e'(fpu_rm) ), .rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rm)),
.op_i ( fpnew_pkg::operation_e'(fpu_op) ), .op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i ( fpu_op_mod ), .op_mod_i (fpu_op_mod),
.src_fmt_i ( fpnew_pkg::fp_format_e'(fpu_srcfmt) ), .src_fmt_i (fpnew_pkg::fp_format_e'(fpu_srcfmt)),
.dst_fmt_i ( fpnew_pkg::fp_format_e'(fpu_dstfmt) ), .dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dstfmt)),
.int_fmt_i ( fpnew_pkg::int_format_e'(fpu_ifmt) ), .int_fmt_i (fpnew_pkg::int_format_e'(fpu_ifmt)),
.vectorial_op_i ( fpu_vec_op ), .vectorial_op_i(fpu_vec_op),
.tag_i ( fpu_tag ), .tag_i (fpu_tag),
.simd_mask_i ( 1'b1 ), .simd_mask_i (1'b1),
.in_valid_i ( fpu_in_valid ), .in_valid_i (fpu_in_valid),
.in_ready_o ( fpu_in_ready ), .in_ready_o (fpu_in_ready),
.flush_i, .flush_i,
.result_o, .result_o,
.status_o ( fpu_status ), .status_o (fpu_status),
.tag_o ( fpu_trans_id_o ), .tag_o (fpu_trans_id_o),
.out_valid_o ( fpu_out_valid ), .out_valid_o (fpu_out_valid),
.out_ready_i ( fpu_out_ready ), .out_ready_i (fpu_out_ready),
.busy_o ( /* unused */ ) .busy_o ( /* unused */)
); );
// Pack status flag into exception cause, tval ignored in wb, exception is always invalid // Pack status flag into exception cause, tval ignored in wb, exception is always invalid

View file

@ -21,195 +21,195 @@
module bht #( module bht #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NR_ENTRIES = 1024 parameter int unsigned NR_ENTRIES = 1024
)( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input logic debug_mode_i, input logic debug_mode_i,
input logic [riscv::VLEN-1:0] vpc_i, input logic [ riscv::VLEN-1:0] vpc_i,
input ariane_pkg::bht_update_t bht_update_i, input ariane_pkg::bht_update_t bht_update_i,
// we potentially need INSTR_PER_FETCH predictions/cycle // we potentially need INSTR_PER_FETCH predictions/cycle
output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o
); );
// the last bit is always zero, we don't need it for indexing // the last bit is always zero, we don't need it for indexing
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2; localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
// re-shape the branch history table // re-shape the branch history table
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH; localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
// number of bits needed to index the row // number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH); localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction // number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// we are not interested in all bits of the address // we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i)); unread i_unread (.d_i(|vpc_i));
struct packed { struct packed {
logic valid; logic valid;
logic [1:0] saturation_counter; logic [1:0] saturation_counter;
} bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0]; }
bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
logic [$clog2(NR_ROWS)-1:0] index, update_pc; logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index; logic [ROW_INDEX_BITS-1:0] update_row_index;
assign index = vpc_i[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET]; assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
assign update_pc = bht_update_i.pc[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET]; assign update_pc = bht_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
if (CVA6Cfg.RVC) begin : gen_update_row_index if (CVA6Cfg.RVC) begin : gen_update_row_index
assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS + OFFSET - 1:OFFSET]; assign update_row_index = bht_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin end else begin
assign update_row_index = '0; assign update_row_index = '0;
end
if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht // ASIC TARGET
logic [1:0] saturation_counter;
// prediction assignment
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output
assign bht_prediction_o[i].valid = bht_q[index][i].valid;
assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1;
end end
if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht // ASIC TARGET always_comb begin : update_bht
bht_d = bht_q;
saturation_counter = bht_q[update_pc][update_row_index].saturation_counter;
logic [1:0] saturation_counter; if (bht_update_i.valid && !debug_mode_i) begin
// prediction assignment bht_d[update_pc][update_row_index].valid = 1'b1;
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output
assign bht_prediction_o[i].valid = bht_q[index][i].valid;
assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1;
end
always_comb begin : update_bht if (saturation_counter == 2'b11) begin
bht_d = bht_q; // we can safely decrease it
saturation_counter = bht_q[update_pc][update_row_index].saturation_counter; if (!bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
if (bht_update_i.valid && !debug_mode_i) begin
bht_d[update_pc][update_row_index].valid = 1'b1;
if (saturation_counter == 2'b11) begin
// we can safely decrease it
if (!bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
// then check if it saturated in the negative regime e.g.: branch not taken // then check if it saturated in the negative regime e.g.: branch not taken
end else if (saturation_counter == 2'b00) begin end else if (saturation_counter == 2'b00) begin
// we can safely increase it // we can safely increase it
if (bht_update_i.taken) if (bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1; bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
end else begin // otherwise we are not in any boundaries and can decrease or increase it end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken) if (bht_update_i.taken)
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1; bht_d[update_pc][update_row_index].saturation_counter = saturation_counter + 1;
else else bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
bht_d[update_pc][update_row_index].saturation_counter = saturation_counter - 1;
end
end end
end end
end
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin if (!rst_ni) begin
for (int unsigned i = 0; i < NR_ROWS; i++) begin for (int unsigned i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
bht_q[i][j] <= '0;
end
end
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
bht_q[i][j] <= '0; bht_q[i][j].valid <= 1'b0;
bht_q[i][j].saturation_counter <= 2'b10;
end end
end end
end else begin end else begin
// evict all entries bht_q <= bht_d;
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
bht_q[i][j].valid <= 1'b0;
bht_q[i][j].saturation_counter <= 2'b10;
end
end
end else begin
bht_q <= bht_d;
end
end end
end end
end else begin : gen_fpga_bht //FPGA TARGETS
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t);
logic [ROW_INDEX_BITS-1:0] row_index;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0;
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1;
ariane_pkg::bht_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht;
ariane_pkg::bht_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated;
if (CVA6Cfg.RVC) begin : gen_row_index
assign row_index = vpc_i[ROW_ADDR_BITS + OFFSET - 1:OFFSET];
end else begin
assign row_index = '0;
end
// -------------------------
// prediction assignment & update Branch History Table
// -------------------------
always_comb begin : prediction_update_bht
bht_ram_we = '0;
bht_ram_read_address_0 = '0;
bht_ram_read_address_1 = '0;
bht_ram_write_address = '0;
bht_ram_wdata ='0;
bht_updated = '0;
bht = '0;
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (row_index == i) begin
bht_ram_read_address_0[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = index;
bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2] ;
bht_prediction_o[i].taken = bht_ram_rdata_0[i*BRAM_WORD_BITS+1] ;
end
end
if (bht_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
bht_ram_read_address_1[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = update_pc;
bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS +: 2];
if (bht[i].saturation_counter == 2'b11) begin
// we can safely decrease it
if (!bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
else
bht_updated[i].saturation_counter = 2'b11;
// then check if it saturated in the negative regime e.g.: branch not taken
end else if (bht[i].saturation_counter == 2'b00) begin
// we can safely increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else
bht_updated[i].saturation_counter = 2'b00;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else
bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
end
bht_updated[i].valid = 1'b1;
bht_ram_we[i] = 1'b1;
bht_ram_write_address[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = update_pc;
//bht_ram_wdata[(i+1)*BRAM_WORD_BITS-1] = 1'b1; //valid
bht_ram_wdata[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] = {bht_updated[i].valid , bht_updated[i].saturation_counter};
end
end
end
end
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram
AsyncThreePortRam #(
.ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH (NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS)
) i_bht_ram (
.Clk_CI ( clk_i ),
.WrEn_SI ( bht_ram_we[i] ),
.WrAddr_DI ( bht_ram_write_address[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.WrData_DI ( bht_ram_wdata[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
.RdAddr_DI_0 ( bht_ram_read_address_0[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.RdAddr_DI_1 ( bht_ram_read_address_1[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ),
.RdData_DO_0 ( bht_ram_rdata_0[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ),
.RdData_DO_1 ( bht_ram_rdata_1[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] )
);
end
end end
end else begin : gen_fpga_bht //FPGA TARGETS
// number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t);
logic [ ROW_INDEX_BITS-1:0] row_index;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1;
ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht;
ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated;
if (CVA6Cfg.RVC) begin : gen_row_index
assign row_index = vpc_i[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin
assign row_index = '0;
end
// -------------------------
// prediction assignment & update Branch History Table
// -------------------------
always_comb begin : prediction_update_bht
bht_ram_we = '0;
bht_ram_read_address_0 = '0;
bht_ram_read_address_1 = '0;
bht_ram_write_address = '0;
bht_ram_wdata = '0;
bht_updated = '0;
bht = '0;
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (row_index == i) begin
bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2];
bht_prediction_o[i].taken = bht_ram_rdata_0[i*BRAM_WORD_BITS+1];
end
end
if (bht_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS+:2];
if (bht[i].saturation_counter == 2'b11) begin
// we can safely decrease it
if (!bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
else bht_updated[i].saturation_counter = 2'b11;
// then check if it saturated in the negative regime e.g.: branch not taken
end else if (bht[i].saturation_counter == 2'b00) begin
// we can safely increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else bht_updated[i].saturation_counter = 2'b00;
end else begin // otherwise we are not in any boundaries and can decrease or increase it
if (bht_update_i.taken)
bht_updated[i].saturation_counter = bht[i].saturation_counter + 1;
else bht_updated[i].saturation_counter = bht[i].saturation_counter - 1;
end
bht_updated[i].valid = 1'b1;
bht_ram_we[i] = 1'b1;
bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
//bht_ram_wdata[(i+1)*BRAM_WORD_BITS-1] = 1'b1; //valid
bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
bht_updated[i].valid, bht_updated[i].saturation_counter
};
end
end
end
end
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram
AsyncThreePortRam #(
.ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH(NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS)
) i_bht_ram (
.Clk_CI (clk_i),
.WrEn_SI (bht_ram_we[i]),
.WrAddr_DI (bht_ram_write_address[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrData_DI (bht_ram_wdata[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdAddr_DI_0(bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.RdAddr_DI_1(bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.RdData_DO_0(bht_ram_rdata_0[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdData_DO_1(bht_ram_rdata_1[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
);
end
end
endmodule endmodule

View file

@ -28,156 +28,158 @@
module btb #( module btb #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int NR_ENTRIES = 8 parameter int NR_ENTRIES = 8
)( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush the btb input logic flush_i, // flush the btb
input logic debug_mode_i, input logic debug_mode_i,
input logic [riscv::VLEN-1:0] vpc_i, // virtual PC from IF stage input logic [riscv::VLEN-1:0] vpc_i, // virtual PC from IF stage
input ariane_pkg::btb_update_t btb_update_i, // update btb with this information input ariane_pkg::btb_update_t btb_update_i, // update btb with this information
output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb
); );
// the last bit is always zero, we don't need it for indexing // the last bit is always zero, we don't need it for indexing
localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2; localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2;
// re-shape the branch history table // re-shape the branch history table
localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH; localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH;
// number of bits needed to index the row // number of bits needed to index the row
localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH); localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH);
localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1;
// number of bits we should use for prediction // number of bits we should use for prediction
localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS;
// prevent aliasing to degrade performance // prevent aliasing to degrade performance
localparam ANTIALIAS_BITS = 8; localparam ANTIALIAS_BITS = 8;
// number of bits par word in the bram // number of bits par word in the bram
localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t); localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t);
// we are not interested in all bits of the address // we are not interested in all bits of the address
unread i_unread (.d_i(|vpc_i)); unread i_unread (.d_i(|vpc_i));
logic [$clog2(NR_ROWS)-1:0] index, update_pc; logic [$clog2(NR_ROWS)-1:0] index, update_pc;
logic [ROW_INDEX_BITS-1:0] update_row_index; logic [ROW_INDEX_BITS-1:0] update_row_index;
assign index = vpc_i[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET]; assign index = vpc_i[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
assign update_pc = btb_update_i.pc[PREDICTION_BITS - 1:ROW_ADDR_BITS + OFFSET]; assign update_pc = btb_update_i.pc[PREDICTION_BITS-1:ROW_ADDR_BITS+OFFSET];
if (CVA6Cfg.RVC) begin : gen_update_row_index if (CVA6Cfg.RVC) begin : gen_update_row_index
assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS + OFFSET - 1:OFFSET]; assign update_row_index = btb_update_i.pc[ROW_ADDR_BITS+OFFSET-1:OFFSET];
end else begin end else begin
assign update_row_index = '0; assign update_row_index = '0;
end
if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update;
logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update;
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update;
logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update;
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_ram_csel_prediction[i] = 1'b1;
assign btb_ram_we_prediction[i] = 1'b0;
assign btb_ram_wdata_prediction = '0;
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index;
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS];
end end
if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS // -------------------------
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction; // Update Branch Prediction
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction; // -------------------------
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction; // update on a mis-predict
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction; always_comb begin : update_branch_predict
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction; btb_ram_csel_update = '0;
btb_ram_we_update = '0;
btb_ram_addr_update = '0;
btb_ram_wdata_update = '0;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update; if (btb_update_i.valid && !debug_mode_i) begin
logic [ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update; for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update; if (update_row_index == i) begin
logic [ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update; btb_ram_csel_update[i] = 1'b1;
btb_ram_we_update[i] = 1'b1;
// output matching prediction btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc;
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS] = {
assign btb_ram_csel_prediction[i] = 1'b1; 1'b1, btb_update_i.target_address
assign btb_ram_we_prediction[i] = 1'b0; };
assign btb_ram_wdata_prediction = '0;
assign btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = index;
assign btb_prediction_o[i] = btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS];
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_ram_csel_update = '0;
btb_ram_we_update = '0;
btb_ram_addr_update = '0;
btb_ram_wdata_update = '0;
if (btb_update_i.valid && !debug_mode_i) begin
for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin
if (update_row_index == i) begin
btb_ram_csel_update[i] = 1'b1;
btb_ram_we_update[i] = 1'b1;
btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] = update_pc;
btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] = {1'b1 , btb_update_i.target_address};
end
end end
end end
end end
end
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram
SyncDpRam #( SyncDpRam #(
.ADDR_WIDTH($clog2(NR_ROWS)), .ADDR_WIDTH($clog2(NR_ROWS)),
.DATA_DEPTH(NR_ROWS), .DATA_DEPTH(NR_ROWS),
.DATA_WIDTH(BRAM_WORD_BITS), .DATA_WIDTH(BRAM_WORD_BITS),
.OUT_REGS (0), .OUT_REGS (0),
.SIM_INIT (1) .SIM_INIT (1)
) i_btb_ram ( ) i_btb_ram (
.Clk_CI ( clk_i ), .Clk_CI (clk_i),
.Rst_RBI ( rst_ni ), .Rst_RBI (rst_ni),
//---------------------------- //----------------------------
.CSelA_SI ( btb_ram_csel_update[i] ), .CSelA_SI (btb_ram_csel_update[i]),
.WrEnA_SI ( btb_ram_we_update[i] ), .WrEnA_SI (btb_ram_we_update[i]),
.AddrA_DI ( btb_ram_addr_update[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ), .AddrA_DI (btb_ram_addr_update[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrDataA_DI ( btb_ram_wdata_update[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ), .WrDataA_DI(btb_ram_wdata_update[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdDataA_DO ( ), .RdDataA_DO(),
//----------------------------- //-----------------------------
.CSelB_SI ( btb_ram_csel_prediction[i] ), .CSelB_SI (btb_ram_csel_prediction[i]),
.WrEnB_SI ( btb_ram_we_prediction[i] ), .WrEnB_SI (btb_ram_we_prediction[i]),
.AddrB_DI ( btb_ram_addr_prediction[i*$clog2(NR_ROWS) +: $clog2(NR_ROWS)] ), .AddrB_DI (btb_ram_addr_prediction[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)]),
.WrDataB_DI ( btb_ram_wdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ), .WrDataB_DI(btb_ram_wdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS]),
.RdDataB_DO ( btb_ram_rdata_prediction[i*BRAM_WORD_BITS +: BRAM_WORD_BITS] ) .RdDataB_DO(btb_ram_rdata_prediction[i*BRAM_WORD_BITS+:BRAM_WORD_BITS])
); );
end
end else begin : gen_asic_btb // ASIC TARGET
// typedef for all branch target entries
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects
ariane_pkg::btb_prediction_t
btb_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0],
btb_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0];
// output matching prediction
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output
assign btb_prediction_o[i] = btb_q[index][i]; // workaround
end
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_d = btb_q;
if (btb_update_i.valid && !debug_mode_i) begin
btb_d[update_pc][update_row_index].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
end end
end
end else begin : gen_asic_btb // ASIC TARGET // sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
// typedef for all branch target entries if (!rst_ni) begin
// we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects // Bias the branches to be taken upon first arrival
ariane_pkg::btb_prediction_t btb_d [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], for (int i = 0; i < NR_ROWS; i++) btb_q[i] <= '{default: 0};
btb_q [NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0]; end else begin
// evict all entries
// output matching prediction if (flush_i) begin
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output for (int i = 0; i < NR_ROWS; i++) begin
assign btb_prediction_o[i] = btb_q[index][i]; // workaround for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
end btb_q[i][j].valid <= 1'b0;
// -------------------------
// Update Branch Prediction
// -------------------------
// update on a mis-predict
always_comb begin : update_branch_predict
btb_d = btb_q;
if (btb_update_i.valid && !debug_mode_i) begin
btb_d[update_pc][update_row_index].valid = 1'b1;
// the target address is simply updated
btb_d[update_pc][update_row_index].target_address = btb_update_i.target_address;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin
// Bias the branches to be taken upon first arrival
for (int i = 0; i < NR_ROWS; i++)
btb_q[i] <= '{default: 0};
end else begin
// evict all entries
if (flush_i) begin
for (int i = 0; i < NR_ROWS; i++) begin
for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin
btb_q[i][j].valid <= 1'b0;
end
end end
end else begin
btb_q <= btb_d;
end end
end else begin
btb_q <= btb_d;
end end
end end
end end
end
endmodule endmodule

View file

@ -15,492 +15,502 @@
// This module interfaces with the instruction cache, handles control // This module interfaces with the instruction cache, handles control
// change request from the back-end and does branch prediction. // change request from the back-end and does branch prediction.
module frontend import ariane_pkg::*; #( module frontend
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush request for PCGEN input logic flush_i, // flush request for PCGEN
input logic flush_bp_i, // flush branch prediction input logic flush_bp_i, // flush branch prediction
input logic halt_i, // halt commit stage input logic halt_i, // halt commit stage
input logic debug_mode_i, input logic debug_mode_i,
// global input // global input
input logic [riscv::VLEN-1:0] boot_addr_i, input logic [riscv::VLEN-1:0] boot_addr_i,
// Set a new PC // Set a new PC
// mispredict // mispredict
input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB
// from commit, when flushing the whole pipeline // from commit, when flushing the whole pipeline
input logic set_pc_commit_i, // Take the PC from commit stage input logic set_pc_commit_i, // Take the PC from commit stage
input logic [riscv::VLEN-1:0] pc_commit_i, // PC of instruction in commit stage input logic [riscv::VLEN-1:0] pc_commit_i, // PC of instruction in commit stage
// CSR input // CSR input
input logic [riscv::VLEN-1:0] epc_i, // exception PC which we need to return to input logic [riscv::VLEN-1:0] epc_i, // exception PC which we need to return to
input logic eret_i, // return from exception input logic eret_i, // return from exception
input logic [riscv::VLEN-1:0] trap_vector_base_i, // base of trap vector input logic [riscv::VLEN-1:0] trap_vector_base_i, // base of trap vector
input logic ex_valid_i, // exception is valid - from commit input logic ex_valid_i, // exception is valid - from commit
input logic set_debug_pc_i, // jump to debug address input logic set_debug_pc_i, // jump to debug address
// Instruction Fetch // Instruction Fetch
output icache_dreq_t icache_dreq_o, output icache_dreq_t icache_dreq_o,
input icache_drsp_t icache_dreq_i, input icache_drsp_t icache_dreq_i,
// instruction output port -> to processor back-end // instruction output port -> to processor back-end
output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage output fetch_entry_t fetch_entry_o, // fetch entry containing all relevant data for the ID stage
output logic fetch_entry_valid_o, // instruction in IF is valid output logic fetch_entry_valid_o, // instruction in IF is valid
input logic fetch_entry_ready_i // ID acknowledged this instruction input logic fetch_entry_ready_i // ID acknowledged this instruction
); );
// Instruction Cache Registers, from I$ // Instruction Cache Registers, from I$
logic [FETCH_WIDTH-1:0] icache_data_q; logic [ FETCH_WIDTH-1:0] icache_data_q;
logic icache_valid_q; logic icache_valid_q;
ariane_pkg::frontend_exception_t icache_ex_valid_q; ariane_pkg::frontend_exception_t icache_ex_valid_q;
logic [riscv::VLEN-1:0] icache_vaddr_q; logic [ riscv::VLEN-1:0] icache_vaddr_q;
logic instr_queue_ready; logic instr_queue_ready;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed; logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed;
// upper-most branch-prediction from last cycle // upper-most branch-prediction from last cycle
btb_prediction_t btb_q; btb_prediction_t btb_q;
bht_prediction_t bht_q; bht_prediction_t bht_q;
// instruction fetch is ready // instruction fetch is ready
logic if_ready; logic if_ready;
logic [riscv::VLEN-1:0] npc_d, npc_q; // next PC logic [riscv::VLEN-1:0] npc_d, npc_q; // next PC
// indicates whether we come out of reset (then we need to load boot_addr_i) // indicates whether we come out of reset (then we need to load boot_addr_i)
logic npc_rst_load_q; logic npc_rst_load_q;
logic replay; logic replay;
logic [riscv::VLEN-1:0] replay_addr; logic [ riscv::VLEN-1:0] replay_addr;
// shift amount // shift amount
logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt; logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt;
// address will always be 16 bit aligned, make this explicit here // address will always be 16 bit aligned, make this explicit here
if (CVA6Cfg.RVC) begin : gen_shamt if (CVA6Cfg.RVC) begin : gen_shamt
assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1]; assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1];
end else begin end else begin
assign shamt = 1'b0; assign shamt = 1'b0;
end
// -----------------------
// Ctrl Flow Speculation
// -----------------------
// RVI ctrl flow prediction
logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm;
// RVC branching
logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm;
// re-aligned instruction and address (coming from cache - combinationally)
logic [INSTR_PER_FETCH-1:0][ 31:0] instr;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr;
logic [INSTR_PER_FETCH-1:0] instruction_valid;
// BHT, BTB and RAS prediction
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction;
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
ras_t ras_predict;
logic [ riscv::VLEN-1:0] vpc_btb;
// branch-predict update
logic is_mispredict;
logic ras_push, ras_pop;
logic [ riscv::VLEN-1:0] ras_update;
// Instruction FIFO
logic [ riscv::VLEN-1:0] predict_address;
cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf;
logic serving_unaligned;
// Re-align instructions
instr_realign #(
.CVA6Cfg(CVA6Cfg)
) i_instr_realign (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (icache_dreq_o.kill_s2),
.valid_i (icache_valid_q),
.serving_unaligned_o(serving_unaligned),
.address_i (icache_vaddr_q),
.data_i (icache_data_q),
.valid_o (instruction_valid),
.addr_o (addr),
.instr_o (instr)
);
// --------------------
// Branch Prediction
// --------------------
// select the right branch prediction result
// in case we are serving an unaligned instruction in instr[0] we need to take
// the prediction we saved from the previous fetch
if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2(
INSTR_PER_FETCH
):1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2(
INSTR_PER_FETCH
):1]];
// for all other predictions we can use the generated address to index
// into the branch prediction data structures
for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address
assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
end end
end else begin
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]];
end
;
// ----------------------- // for the return address stack it doens't matter as we have the
// Ctrl Flow Speculation // address of the call/return already
// ----------------------- logic bp_valid;
// RVI ctrl flow prediction
logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch,
rvi_jalr, rvi_jump;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm;
// RVC branching
logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return,
rvc_jalr, rvc_call;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm;
// re-aligned instruction and address (coming from cache - combinationally)
logic [INSTR_PER_FETCH-1:0][31:0] instr;
logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr;
logic [INSTR_PER_FETCH-1:0] instruction_valid;
// BHT, BTB and RAS prediction
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction;
bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted;
btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted;
ras_t ras_predict;
logic [riscv::VLEN-1:0] vpc_btb;
// branch-predict update logic [INSTR_PER_FETCH-1:0] is_branch;
logic is_mispredict; logic [INSTR_PER_FETCH-1:0] is_call;
logic ras_push, ras_pop; logic [INSTR_PER_FETCH-1:0] is_jump;
logic [riscv::VLEN-1:0] ras_update; logic [INSTR_PER_FETCH-1:0] is_return;
logic [INSTR_PER_FETCH-1:0] is_jalr;
// Instruction FIFO for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
logic [riscv::VLEN-1:0] predict_address; // branch history table -> BHT
cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type; assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]);
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf; // function calls -> RAS
logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf; assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]);
// function return -> RAS
assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]);
// unconditional jumps with known target -> immediately resolved
assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
// unconditional jumps with unknown target -> BTB
assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
end
logic serving_unaligned; // taken/not taken
// Re-align instructions always_comb begin
instr_realign #( taken_rvi_cf = '0;
.CVA6Cfg ( CVA6Cfg ) taken_rvc_cf = '0;
) i_instr_realign ( predict_address = '0;
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( icache_dreq_o.kill_s2 ),
.valid_i ( icache_valid_q ),
.serving_unaligned_o ( serving_unaligned ),
.address_i ( icache_vaddr_q ),
.data_i ( icache_data_q ),
.valid_o ( instruction_valid ),
.addr_o ( addr ),
.instr_o ( instr )
);
// -------------------- for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF;
// Branch Prediction
// --------------------
// select the right branch prediction result
// in case we are serving an unaligned instruction in instr[0] we need to take
// the prediction we saved from the previous fetch
if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2(INSTR_PER_FETCH):1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2(INSTR_PER_FETCH):1]];
// for all other predictions we can use the generated address to index ras_push = 1'b0;
// into the branch prediction data structures ras_pop = 1'b0;
for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address ras_update = '0;
assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]];
end
end else begin
assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]];
assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][1]];
end;
// for the return address stack it doens't matter as we have the // lower most prediction gets precedence
// address of the call/return already for (int i = INSTR_PER_FETCH - 1; i >= 0; i--) begin
logic bp_valid; unique case ({
is_branch[i], is_return[i], is_jump[i], is_jalr[i]
logic [INSTR_PER_FETCH-1:0] is_branch; })
logic [INSTR_PER_FETCH-1:0] is_call; 4'b0000: ; // regular instruction e.g.: no branch
logic [INSTR_PER_FETCH-1:0] is_jump; // unconditional jump to register, we need the BTB to resolve this
logic [INSTR_PER_FETCH-1:0] is_return; 4'b0001: begin
logic [INSTR_PER_FETCH-1:0] is_jalr; ras_pop = 1'b0;
ras_push = 1'b0;
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin if (btb_prediction_shifted[i].valid) begin
// branch history table -> BHT predict_address = btb_prediction_shifted[i].target_address;
assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]); cf_type[i] = ariane_pkg::JumpR;
// function calls -> RAS
assign is_call[i] = instruction_valid[i] & (rvi_call[i] | rvc_call[i]);
// function return -> RAS
assign is_return[i] = instruction_valid[i] & (rvi_return[i] | rvc_return[i]);
// unconditional jumps with known target -> immediately resolved
assign is_jump[i] = instruction_valid[i] & (rvi_jump[i] | rvc_jump[i]);
// unconditional jumps with unknown target -> BTB
assign is_jalr[i] = instruction_valid[i] & ~is_return[i] & (rvi_jalr[i] | rvc_jalr[i] | rvc_jr[i]);
end
// taken/not taken
always_comb begin
taken_rvi_cf = '0;
taken_rvc_cf = '0;
predict_address = '0;
for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF;
ras_push = 1'b0;
ras_pop = 1'b0;
ras_update = '0;
// lower most prediction gets precedence
for (int i = INSTR_PER_FETCH - 1; i >= 0 ; i--) begin
unique case ({is_branch[i], is_return[i], is_jump[i], is_jalr[i]})
4'b0000:; // regular instruction e.g.: no branch
// unconditional jump to register, we need the BTB to resolve this
4'b0001: begin
ras_pop = 1'b0;
ras_push = 1'b0;
if (btb_prediction_shifted[i].valid) begin
predict_address = btb_prediction_shifted[i].target_address;
cf_type[i] = ariane_pkg::JumpR;
end
end end
// its an unconditional jump to an immediate end
4'b0010: begin // its an unconditional jump to an immediate
ras_pop = 1'b0; 4'b0010: begin
ras_push = 1'b0; ras_pop = 1'b0;
taken_rvi_cf[i] = rvi_jump[i]; ras_push = 1'b0;
taken_rvc_cf[i] = rvc_jump[i]; taken_rvi_cf[i] = rvi_jump[i];
cf_type[i] = ariane_pkg::Jump; taken_rvc_cf[i] = rvc_jump[i];
end cf_type[i] = ariane_pkg::Jump;
// return end
4'b0100: begin // return
// make sure to only alter the RAS if we actually consumed the instruction 4'b0100: begin
ras_pop = ras_predict.valid & instr_queue_consumed[i]; // make sure to only alter the RAS if we actually consumed the instruction
ras_push = 1'b0; ras_pop = ras_predict.valid & instr_queue_consumed[i];
predict_address = ras_predict.ra; ras_push = 1'b0;
cf_type[i] = ariane_pkg::Return; predict_address = ras_predict.ra;
end cf_type[i] = ariane_pkg::Return;
// branch prediction end
4'b1000: begin // branch prediction
ras_pop = 1'b0; 4'b1000: begin
ras_push = 1'b0; ras_pop = 1'b0;
// if we have a valid dynamic prediction use it ras_push = 1'b0;
if (bht_prediction_shifted[i].valid) begin // if we have a valid dynamic prediction use it
taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken; if (bht_prediction_shifted[i].valid) begin
taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken; taken_rvi_cf[i] = rvi_branch[i] & bht_prediction_shifted[i].taken;
taken_rvc_cf[i] = rvc_branch[i] & bht_prediction_shifted[i].taken;
// otherwise default to static prediction // otherwise default to static prediction
end else begin end else begin
// set if immediate is negative - static prediction // set if immediate is negative - static prediction
taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1]; taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1];
taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1]; taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1];
end
if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin
cf_type[i] = ariane_pkg::Branch;
end
end end
default:; if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin
// default: $error("Decoded more than one control flow"); cf_type[i] = ariane_pkg::Branch;
endcase
// if this instruction, in addition, is a call, save the resulting address
// but only if we actually consumed the address
if (is_call[i]) begin
ras_push = instr_queue_consumed[i];
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
end
// calculate the jump target address
if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
end end
end
default: ;
// default: $error("Decoded more than one control flow");
endcase
// if this instruction, in addition, is a call, save the resulting address
// but only if we actually consumed the address
if (is_call[i]) begin
ras_push = instr_queue_consumed[i];
ras_update = addr[i] + (rvc_call[i] ? 2 : 4);
end
// calculate the jump target address
if (taken_rvc_cf[i] || taken_rvi_cf[i]) begin
predict_address = addr[i] + (taken_rvc_cf[i] ? rvc_imm[i] : rvi_imm[i]);
end end
end end
// or reduce struct end
always_comb begin // or reduce struct
bp_valid = 1'b0; always_comb begin
// BP cannot be valid if we have a return instruction and the RAS is not giving a valid address bp_valid = 1'b0;
// Check that we encountered a control flow and that for a return the RAS // BP cannot be valid if we have a return instruction and the RAS is not giving a valid address
// contains a valid prediction. // Check that we encountered a control flow and that for a return the RAS
for (int i = 0; i < INSTR_PER_FETCH; i++) bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid)); // contains a valid prediction.
end for (int i = 0; i < INSTR_PER_FETCH; i++)
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict; bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid));
end
assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict;
// Cache interface // Cache interface
assign icache_dreq_o.req = instr_queue_ready; assign icache_dreq_o.req = instr_queue_ready;
assign if_ready = icache_dreq_i.ready & instr_queue_ready; assign if_ready = icache_dreq_i.ready & instr_queue_ready;
// We need to flush the cache pipeline if: // We need to flush the cache pipeline if:
// 1. We mispredicted // 1. We mispredicted
// 2. Want to flush the whole processor front-end // 2. Want to flush the whole processor front-end
// 3. Need to replay an instruction because the fetch-fifo was full // 3. Need to replay an instruction because the fetch-fifo was full
assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay; assign icache_dreq_o.kill_s1 = is_mispredict | flush_i | replay;
// if we have a valid branch-prediction we need to only kill the last cache request // if we have a valid branch-prediction we need to only kill the last cache request
// also if we killed the first stage we also need to kill the second stage (inclusive flush) // also if we killed the first stage we also need to kill the second stage (inclusive flush)
assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid; assign icache_dreq_o.kill_s2 = icache_dreq_o.kill_s1 | bp_valid;
// Update Control Flow Predictions // Update Control Flow Predictions
bht_update_t bht_update; bht_update_t bht_update;
btb_update_t btb_update; btb_update_t btb_update;
// assert on branch, deassert when resolved // assert on branch, deassert when resolved
logic speculative_q,speculative_d; logic speculative_q, speculative_d;
assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i; assign speculative_d = (speculative_q && !resolved_branch_i.valid || |is_branch || |is_return || |is_jalr) && !flush_i;
assign icache_dreq_o.spec = speculative_d; assign icache_dreq_o.spec = speculative_d;
assign bht_update.valid = resolved_branch_i.valid assign bht_update.valid = resolved_branch_i.valid
& (resolved_branch_i.cf_type == ariane_pkg::Branch); & (resolved_branch_i.cf_type == ariane_pkg::Branch);
assign bht_update.pc = resolved_branch_i.pc; assign bht_update.pc = resolved_branch_i.pc;
assign bht_update.taken = resolved_branch_i.is_taken; assign bht_update.taken = resolved_branch_i.is_taken;
// only update mispredicted branches e.g. no returns from the RAS // only update mispredicted branches e.g. no returns from the RAS
assign btb_update.valid = resolved_branch_i.valid assign btb_update.valid = resolved_branch_i.valid
& resolved_branch_i.is_mispredict & resolved_branch_i.is_mispredict
& (resolved_branch_i.cf_type == ariane_pkg::JumpR); & (resolved_branch_i.cf_type == ariane_pkg::JumpR);
assign btb_update.pc = resolved_branch_i.pc; assign btb_update.pc = resolved_branch_i.pc;
assign btb_update.target_address = resolved_branch_i.target_address; assign btb_update.target_address = resolved_branch_i.target_address;
// ------------------- // -------------------
// Next PC // Next PC
// ------------------- // -------------------
// next PC (NPC) can come from (in order of precedence): // next PC (NPC) can come from (in order of precedence):
// 0. Default assignment/replay instruction // 0. Default assignment/replay instruction
// 1. Branch Predict taken // 1. Branch Predict taken
// 2. Control flow change request (misprediction) // 2. Control flow change request (misprediction)
// 3. Return from environment call // 3. Return from environment call
// 4. Exception/Interrupt // 4. Exception/Interrupt
// 5. Pipeline Flush because of CSR side effects // 5. Pipeline Flush because of CSR side effects
// Mis-predict handling is a little bit different // Mis-predict handling is a little bit different
// select PC a.k.a PC Gen // select PC a.k.a PC Gen
always_comb begin : npc_select always_comb begin : npc_select
automatic logic [riscv::VLEN-1:0] fetch_address; automatic logic [riscv::VLEN-1:0] fetch_address;
// check whether we come out of reset // check whether we come out of reset
// this is a workaround. some tools have issues // this is a workaround. some tools have issues
// having boot_addr_i in the asynchronous // having boot_addr_i in the asynchronous
// reset assignment to npc_q, even though // reset assignment to npc_q, even though
// boot_addr_i will be assigned a constant // boot_addr_i will be assigned a constant
// on the top-level. // on the top-level.
if (npc_rst_load_q) begin if (npc_rst_load_q) begin
npc_d = boot_addr_i; npc_d = boot_addr_i;
fetch_address = boot_addr_i; fetch_address = boot_addr_i;
end else begin end else begin
fetch_address = npc_q; fetch_address = npc_q;
// keep stable by default // keep stable by default
npc_d = npc_q; npc_d = npc_q;
end
// 0. Branch Prediction
if (bp_valid) begin
fetch_address = predict_address;
npc_d = predict_address;
end
// 1. Default assignment
if (if_ready) begin
npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4;
end
// 2. Replay instruction fetch
if (replay) begin
npc_d = replay_addr;
end
// 3. Control flow change request
if (is_mispredict) begin
npc_d = resolved_branch_i.target_address;
end
// 4. Return from environment call
if (eret_i) begin
npc_d = epc_i;
end
// 5. Exception/Interrupt
if (ex_valid_i) begin
npc_d = trap_vector_base_i;
end
// 6. Pipeline Flush because of CSR side effects
// On a pipeline flush start fetching from the next address
// of the instruction in the commit stage
// we either came here from a flush request of a CSR instruction or AMO,
// so as CSR or AMO instructions do not exist in a compressed form
// we can unconditionally do PC + 4 here
// or if the commit stage is halted, just take the current pc of the
// instruction in the commit stage
// TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
if (set_pc_commit_i) begin
npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN-3{1'b0}}, 3'b100});
end
// 7. Debug
// enter debug on a hard-coded base-address
if (set_debug_pc_i) npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0];
icache_dreq_o.vaddr = fetch_address;
end end
// 0. Branch Prediction
if (bp_valid) begin
fetch_address = predict_address;
npc_d = predict_address;
end
// 1. Default assignment
if (if_ready) begin
npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4;
end
// 2. Replay instruction fetch
if (replay) begin
npc_d = replay_addr;
end
// 3. Control flow change request
if (is_mispredict) begin
npc_d = resolved_branch_i.target_address;
end
// 4. Return from environment call
if (eret_i) begin
npc_d = epc_i;
end
// 5. Exception/Interrupt
if (ex_valid_i) begin
npc_d = trap_vector_base_i;
end
// 6. Pipeline Flush because of CSR side effects
// On a pipeline flush start fetching from the next address
// of the instruction in the commit stage
// we either came here from a flush request of a CSR instruction or AMO,
// so as CSR or AMO instructions do not exist in a compressed form
// we can unconditionally do PC + 4 here
// or if the commit stage is halted, just take the current pc of the
// instruction in the commit stage
// TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage
if (set_pc_commit_i) begin
npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN - 3{1'b0}}, 3'b100});
end
// 7. Debug
// enter debug on a hard-coded base-address
if (set_debug_pc_i)
npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0];
icache_dreq_o.vaddr = fetch_address;
end
logic [FETCH_WIDTH-1:0] icache_data; logic [FETCH_WIDTH-1:0] icache_data;
// re-align the cache line // re-align the cache line
assign icache_data = icache_dreq_i.data >> {shamt, 4'b0}; assign icache_data = icache_dreq_i.data >> {shamt, 4'b0};
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin if (!rst_ni) begin
npc_rst_load_q <= 1'b1; npc_rst_load_q <= 1'b1;
npc_q <= '0; npc_q <= '0;
speculative_q <= '0; speculative_q <= '0;
icache_data_q <= '0; icache_data_q <= '0;
icache_valid_q <= 1'b0; icache_valid_q <= 1'b0;
icache_vaddr_q <= 'b0; icache_vaddr_q <= 'b0;
icache_ex_valid_q <= ariane_pkg::FE_NONE; icache_ex_valid_q <= ariane_pkg::FE_NONE;
btb_q <= '0; btb_q <= '0;
bht_q <= '0; bht_q <= '0;
end else begin end else begin
npc_rst_load_q <= 1'b0; npc_rst_load_q <= 1'b0;
npc_q <= npc_d; npc_q <= npc_d;
speculative_q <= speculative_d; speculative_q <= speculative_d;
icache_valid_q <= icache_dreq_i.valid; icache_valid_q <= icache_dreq_i.valid;
if (icache_dreq_i.valid) begin if (icache_dreq_i.valid) begin
icache_data_q <= icache_data; icache_data_q <= icache_data;
icache_vaddr_q <= icache_dreq_i.vaddr; icache_vaddr_q <= icache_dreq_i.vaddr;
// Map the only three exceptions which can occur in the frontend to a two bit enum // Map the only three exceptions which can occur in the frontend to a two bit enum
if (icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin if (icache_dreq_i.ex.cause == riscv::INSTR_PAGE_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT; icache_ex_valid_q <= ariane_pkg::FE_INSTR_PAGE_FAULT;
end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin end else if (icache_dreq_i.ex.cause == riscv::INSTR_ACCESS_FAULT) begin
icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT; icache_ex_valid_q <= ariane_pkg::FE_INSTR_ACCESS_FAULT;
end else begin end else begin
icache_ex_valid_q <= ariane_pkg::FE_NONE; icache_ex_valid_q <= ariane_pkg::FE_NONE;
end
// save the uppermost prediction
btb_q <= btb_prediction[INSTR_PER_FETCH-1];
bht_q <= bht_prediction[INSTR_PER_FETCH-1];
end end
// save the uppermost prediction
btb_q <= btb_prediction[INSTR_PER_FETCH-1];
bht_q <= bht_prediction[INSTR_PER_FETCH-1];
end end
end end
end
if (CVA6Cfg.RASDepth == 0) begin if (CVA6Cfg.RASDepth == 0) begin
assign ras_predict = '0; assign ras_predict = '0;
end else begin : ras_gen end else begin : ras_gen
ras #( ras #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg(CVA6Cfg),
.DEPTH ( CVA6Cfg.RASDepth ) .DEPTH (CVA6Cfg.RASDepth)
) i_ras ( ) i_ras (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.flush_i( flush_bp_i ), .flush_i(flush_bp_i),
.push_i ( ras_push ), .push_i (ras_push),
.pop_i ( ras_pop ), .pop_i (ras_pop),
.data_i ( ras_update ), .data_i (ras_update),
.data_o ( ras_predict ) .data_o (ras_predict)
);
end
//For FPGA, BTB is implemented in read synchronous BRAM
//while for ASIC, BTB is implemented in D flip-flop
//and can be read at the same cycle.
assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
if (CVA6Cfg.BTBEntries == 0) begin
assign btb_prediction = '0;
end else begin : btb_gen
btb #(
.CVA6Cfg ( CVA6Cfg ),
.NR_ENTRIES ( CVA6Cfg.BTBEntries )
) i_btb (
.clk_i,
.rst_ni,
.flush_i ( flush_bp_i ),
.debug_mode_i,
.vpc_i ( vpc_btb ),
.btb_update_i ( btb_update ),
.btb_prediction_o ( btb_prediction )
);
end
if (CVA6Cfg.BHTEntries == 0) begin
assign bht_prediction = '0;
end else begin : bht_gen
bht #(
.CVA6Cfg ( CVA6Cfg ),
.NR_ENTRIES ( CVA6Cfg.BHTEntries )
) i_bht (
.clk_i,
.rst_ni,
.flush_i ( flush_bp_i ),
.debug_mode_i,
.vpc_i ( icache_vaddr_q ),
.bht_update_i ( bht_update ),
.bht_prediction_o ( bht_prediction )
);
end
// we need to inspect up to INSTR_PER_FETCH instructions for branches
// and jumps
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan
instr_scan #(
.CVA6Cfg ( CVA6Cfg )
) i_instr_scan (
.instr_i ( instr[i] ),
.rvi_return_o ( rvi_return[i] ),
.rvi_call_o ( rvi_call[i] ),
.rvi_branch_o ( rvi_branch[i] ),
.rvi_jalr_o ( rvi_jalr[i] ),
.rvi_jump_o ( rvi_jump[i] ),
.rvi_imm_o ( rvi_imm[i] ),
.rvc_branch_o ( rvc_branch[i] ),
.rvc_jump_o ( rvc_jump[i] ),
.rvc_jr_o ( rvc_jr[i] ),
.rvc_return_o ( rvc_return[i] ),
.rvc_jalr_o ( rvc_jalr[i] ),
.rvc_call_o ( rvc_call[i] ),
.rvc_imm_o ( rvc_imm[i] )
);
end
instr_queue #(
.CVA6Cfg ( CVA6Cfg )
) i_instr_queue (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.flush_i ( flush_i ),
.instr_i ( instr ), // from re-aligner
.addr_i ( addr ), // from re-aligner
.exception_i ( icache_ex_valid_q ), // from I$
.exception_addr_i ( icache_vaddr_q ),
.predict_address_i ( predict_address ),
.cf_type_i ( cf_type ),
.valid_i ( instruction_valid ), // from re-aligner
.consumed_o ( instr_queue_consumed ),
.ready_o ( instr_queue_ready ),
.replay_o ( replay ),
.replay_addr_o ( replay_addr ),
.fetch_entry_o ( fetch_entry_o ), // to back-end
.fetch_entry_valid_o ( fetch_entry_valid_o ), // to back-end
.fetch_entry_ready_i ( fetch_entry_ready_i ) // to back-end
); );
end
// pragma translate_off //For FPGA, BTB is implemented in read synchronous BRAM
`ifndef VERILATOR //while for ASIC, BTB is implemented in D flip-flop
initial begin //and can be read at the same cycle.
assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) else $fatal(1, "[frontend] fetch width != not supported"); assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q;
end
`endif if (CVA6Cfg.BTBEntries == 0) begin
// pragma translate_on assign btb_prediction = '0;
end else begin : btb_gen
btb #(
.CVA6Cfg (CVA6Cfg),
.NR_ENTRIES(CVA6Cfg.BTBEntries)
) i_btb (
.clk_i,
.rst_ni,
.flush_i (flush_bp_i),
.debug_mode_i,
.vpc_i (vpc_btb),
.btb_update_i (btb_update),
.btb_prediction_o(btb_prediction)
);
end
if (CVA6Cfg.BHTEntries == 0) begin
assign bht_prediction = '0;
end else begin : bht_gen
bht #(
.CVA6Cfg (CVA6Cfg),
.NR_ENTRIES(CVA6Cfg.BHTEntries)
) i_bht (
.clk_i,
.rst_ni,
.flush_i (flush_bp_i),
.debug_mode_i,
.vpc_i (icache_vaddr_q),
.bht_update_i (bht_update),
.bht_prediction_o(bht_prediction)
);
end
// we need to inspect up to INSTR_PER_FETCH instructions for branches
// and jumps
for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan
instr_scan #(
.CVA6Cfg(CVA6Cfg)
) i_instr_scan (
.instr_i (instr[i]),
.rvi_return_o(rvi_return[i]),
.rvi_call_o (rvi_call[i]),
.rvi_branch_o(rvi_branch[i]),
.rvi_jalr_o (rvi_jalr[i]),
.rvi_jump_o (rvi_jump[i]),
.rvi_imm_o (rvi_imm[i]),
.rvc_branch_o(rvc_branch[i]),
.rvc_jump_o (rvc_jump[i]),
.rvc_jr_o (rvc_jr[i]),
.rvc_return_o(rvc_return[i]),
.rvc_jalr_o (rvc_jalr[i]),
.rvc_call_o (rvc_call[i]),
.rvc_imm_o (rvc_imm[i])
);
end
instr_queue #(
.CVA6Cfg(CVA6Cfg)
) i_instr_queue (
.clk_i (clk_i),
.rst_ni (rst_ni),
.flush_i (flush_i),
.instr_i (instr), // from re-aligner
.addr_i (addr), // from re-aligner
.exception_i (icache_ex_valid_q), // from I$
.exception_addr_i (icache_vaddr_q),
.predict_address_i (predict_address),
.cf_type_i (cf_type),
.valid_i (instruction_valid), // from re-aligner
.consumed_o (instr_queue_consumed),
.ready_o (instr_queue_ready),
.replay_o (replay),
.replay_addr_o (replay_addr),
.fetch_entry_o (fetch_entry_o), // to back-end
.fetch_entry_valid_o(fetch_entry_valid_o), // to back-end
.fetch_entry_ready_i(fetch_entry_ready_i) // to back-end
);
// pragma translate_off
`ifndef VERILATOR
initial begin
assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64)
else $fatal(1, "[frontend] fetch width != not supported");
end
`endif
// pragma translate_on
endmodule endmodule

View file

@ -43,64 +43,67 @@
// the replay mechanism gets more complicated as it can be that a 32 bit instruction // the replay mechanism gets more complicated as it can be that a 32 bit instruction
// can not be pushed at once. // can not be pushed at once.
module instr_queue import ariane_pkg::*; #( module instr_queue
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i, input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i, input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i,
input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i, input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i,
output logic ready_o, output logic ready_o,
output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o, output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o,
// we've encountered an exception, at this point the only possible exceptions are page-table faults // we've encountered an exception, at this point the only possible exceptions are page-table faults
input ariane_pkg::frontend_exception_t exception_i, input ariane_pkg::frontend_exception_t exception_i,
input logic [riscv::VLEN-1:0] exception_addr_i, input logic [riscv::VLEN-1:0] exception_addr_i,
// branch predict // branch predict
input logic [riscv::VLEN-1:0] predict_address_i, input logic [riscv::VLEN-1:0] predict_address_i,
input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i, input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i,
// replay instruction because one of the FIFO was already full // replay instruction because one of the FIFO was already full
output logic replay_o, output logic replay_o,
output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction
// to processor backend // to processor backend
output ariane_pkg::fetch_entry_t fetch_entry_o, output ariane_pkg::fetch_entry_t fetch_entry_o,
output logic fetch_entry_valid_o, output logic fetch_entry_valid_o,
input logic fetch_entry_ready_i input logic fetch_entry_ready_i
); );
typedef struct packed { typedef struct packed {
logic [31:0] instr; // instruction word logic [31:0] instr; // instruction word
ariane_pkg::cf_t cf; // branch was taken ariane_pkg::cf_t cf; // branch was taken
ariane_pkg::frontend_exception_t ex; // exception happened ariane_pkg::frontend_exception_t ex; // exception happened
logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception
} instr_data_t; } instr_data_t;
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] branch_index; logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] branch_index;
// instruction queues // instruction queues
logic [ariane_pkg::INSTR_PER_FETCH-1:0] logic [ariane_pkg::INSTR_PER_FETCH-1:0][$clog2(
[$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] instr_queue_usage; ariane_pkg::FETCH_FIFO_DEPTH
instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out; )-1:0] instr_queue_usage;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo; instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr; logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full; logic [ ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty; logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full;
logic instr_overflow; logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty;
logic instr_overflow;
// address queue // address queue
logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage; logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage;
logic [riscv::VLEN-1:0] address_out; logic [ riscv::VLEN-1:0] address_out;
logic pop_address; logic pop_address;
logic push_address; logic push_address;
logic full_address; logic full_address;
logic empty_address; logic empty_address;
logic address_overflow; logic address_overflow;
// input stream counter // input stream counter
logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q; logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q;
// Registers // Registers
// output FIFO select, one-hot // output FIFO select, one-hot
logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q; logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q;
logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC
logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush
logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended; logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended;
logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask; logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask;
@ -129,12 +132,12 @@ module instr_queue import ariane_pkg::*; #(
// calculate a branch mask, e.g.: get the first taken branch // calculate a branch mask, e.g.: get the first taken branch
lzc #( lzc #(
.WIDTH ( ariane_pkg::INSTR_PER_FETCH ), .WIDTH(ariane_pkg::INSTR_PER_FETCH),
.MODE ( 0 ) // count trailing zeros .MODE (0) // count trailing zeros
) i_lzc_branch_index ( ) i_lzc_branch_index (
.in_i ( taken ), // we want to count trailing zeros .in_i (taken), // we want to count trailing zeros
.cnt_o ( branch_index ), // first branch on branch_index .cnt_o (branch_index), // first branch on branch_index
.empty_o ( branch_empty ) .empty_o(branch_empty)
); );
@ -154,10 +157,10 @@ module instr_queue import ariane_pkg::*; #(
assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0]; assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0];
// count the numbers of valid instructions we've pushed from this package // count the numbers of valid instructions we've pushed from this package
popcount #( popcount #(
.INPUT_WIDTH ( ariane_pkg::INSTR_PER_FETCH ) .INPUT_WIDTH(ariane_pkg::INSTR_PER_FETCH)
) i_popcount ( ) i_popcount (
.data_i ( push_instr_fifo ), .data_i (push_instr_fifo),
.popcount_o ( popcount ) .popcount_o(popcount)
); );
assign shamt = popcount[$bits(shamt)-1:0]; assign shamt = popcount[$bits(shamt)-1:0];
@ -168,7 +171,7 @@ module instr_queue import ariane_pkg::*; #(
// Input interface // Input interface
// ---------------------- // ----------------------
// rotate left by the current position // rotate left by the current position
assign fifo_pos_extended = { valid, valid } << idx_is_q; assign fifo_pos_extended = {valid, valid} << idx_is_q;
// we just care about the upper bits // we just care about the upper bits
assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH]; assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH];
// the fifo_position signal can directly be used to guide the push signal of each FIFO // the fifo_position signal can directly be used to guide the push signal of each FIFO
@ -178,17 +181,17 @@ module instr_queue import ariane_pkg::*; #(
// duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0 // duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input
assign instr[i] = instr_i[i]; assign instr[i] = instr_i[i];
assign instr[i + ariane_pkg::INSTR_PER_FETCH] = instr_i[i]; assign instr[i+ariane_pkg::INSTR_PER_FETCH] = instr_i[i];
assign cf[i] = cf_type_i[i]; assign cf[i] = cf_type_i[i];
assign cf[i + ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i]; assign cf[i+ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i];
end end
// shift the inputs // shift the inputs
for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
assign instr_data_in[i].instr = instr[i + idx_is_q]; assign instr_data_in[i].instr = instr[i+idx_is_q];
assign instr_data_in[i].cf = cf[i + idx_is_q]; assign instr_data_in[i].cf = cf[i+idx_is_q];
assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet assign instr_data_in[i].ex = exception_i; // exceptions hold for the whole fetch packet
assign instr_data_in[i].ex_vaddr = exception_addr_i; assign instr_data_in[i].ex_vaddr = exception_addr_i;
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */
end end
@ -217,7 +220,7 @@ module instr_queue import ariane_pkg::*; #(
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
assign instr_data_in[0].instr = instr_i[0]; assign instr_data_in[0].instr = instr_i[0];
assign instr_data_in[0].cf = cf_type_i[0]; assign instr_data_in[0].cf = cf_type_i[0];
assign instr_data_in[0].ex = exception_i; // exceptions hold for the whole fetch packet assign instr_data_in[0].ex = exception_i; // exceptions hold for the whole fetch packet
assign instr_data_in[0].ex_vaddr = exception_addr_i; assign instr_data_in[0].ex_vaddr = exception_addr_i;
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */
end end
@ -235,7 +238,7 @@ module instr_queue import ariane_pkg::*; #(
end else begin : gen_instr_overflow_fifo_without_C end else begin : gen_instr_overflow_fifo_without_C
assign instr_overflow_fifo = instr_queue_full & valid_i; assign instr_overflow_fifo = instr_queue_full & valid_i;
end end
assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed assign instr_overflow = |instr_overflow_fifo; // at least one instruction overflowed
assign address_overflow = full_address & push_address; assign address_overflow = full_address & push_address;
assign replay_o = instr_overflow | address_overflow; assign replay_o = instr_overflow | address_overflow;
@ -279,14 +282,18 @@ module instr_queue import ariane_pkg::*; #(
end end
fetch_entry_o.instruction = instr_data_out[i].instr; fetch_entry_o.instruction = instr_data_out[i].instr;
fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE; fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE;
fetch_entry_o.ex.tval = {{(riscv::XLEN-riscv::VLEN){1'b0}}, instr_data_out[i].ex_vaddr}; fetch_entry_o.ex.tval = {
{(riscv::XLEN - riscv::VLEN) {1'b0}}, instr_data_out[i].ex_vaddr
};
fetch_entry_o.branch_predict.cf = instr_data_out[i].cf; fetch_entry_o.branch_predict.cf = instr_data_out[i].cf;
pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i; pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i;
end end
end end
// rotate the pointer left // rotate the pointer left
if (fetch_entry_ready_i) begin if (fetch_entry_ready_i) begin
idx_ds_d = {idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1]}; idx_ds_d = {
idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1]
};
end end
end end
end else begin : gen_downstream_itf_without_c end else begin : gen_downstream_itf_without_c
@ -302,7 +309,7 @@ module instr_queue import ariane_pkg::*; #(
end else begin end else begin
fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT; fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT;
end end
fetch_entry_o.ex.tval = {{64-riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr}; fetch_entry_o.ex.tval = {{64 - riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr};
fetch_entry_o.branch_predict.predict_address = address_out; fetch_entry_o.branch_predict.predict_address = address_out;
fetch_entry_o.branch_predict.cf = instr_data_out[0].cf; fetch_entry_o.branch_predict.cf = instr_data_out[0].cf;
@ -326,15 +333,15 @@ module instr_queue import ariane_pkg::*; #(
// TODO(zarubaf): This needs to change for a dual issue implementation // TODO(zarubaf): This needs to change for a dual issue implementation
// advance the PC // advance the PC
if (ariane_pkg::RVC == 1'b1) begin : gen_pc_with_c_extension if (ariane_pkg::RVC == 1'b1) begin : gen_pc_with_c_extension
pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4); pc_d = pc_q + ((fetch_entry_o.instruction[1:0] != 2'b11) ? 'd2 : 'd4);
end else begin : gen_pc_without_c_extension end else begin : gen_pc_without_c_extension
pc_d = pc_q + 'd4; pc_d = pc_q + 'd4;
end end
end end
if (pop_address) pc_d = address_out; if (pop_address) pc_d = address_out;
// we previously flushed so we need to reset the address // we previously flushed so we need to reset the address
if (valid_i[0] && reset_address_q) begin if (valid_i[0] && reset_address_q) begin
// this is the base of the first instruction // this is the base of the first instruction
pc_d = addr_i[0]; pc_d = addr_i[0];
@ -347,20 +354,20 @@ module instr_queue import ariane_pkg::*; #(
// Make sure we don't save any instructions if we couldn't save the address // Make sure we don't save any instructions if we couldn't save the address
assign push_instr_fifo[i] = push_instr[i] & ~address_overflow; assign push_instr_fifo[i] = push_instr[i] & ~address_overflow;
fifo_v3 #( fifo_v3 #(
.DEPTH ( ariane_pkg::FETCH_FIFO_DEPTH ), .DEPTH(ariane_pkg::FETCH_FIFO_DEPTH),
.dtype ( instr_data_t ) .dtype(instr_data_t)
) i_fifo_instr_data ( ) i_fifo_instr_data (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_i ), .flush_i (flush_i),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( instr_queue_full[i] ), .full_o (instr_queue_full[i]),
.empty_o ( instr_queue_empty[i] ), .empty_o (instr_queue_empty[i]),
.usage_o ( instr_queue_usage[i] ), .usage_o (instr_queue_usage[i]),
.data_i ( instr_data_in[i] ), .data_i (instr_data_in[i]),
.push_i ( push_instr_fifo[i] ), .push_i (push_instr_fifo[i]),
.data_o ( instr_data_out[i] ), .data_o (instr_data_out[i]),
.pop_i ( pop_instr[i] ) .pop_i (pop_instr[i])
); );
end end
// or reduce and check whether we are retiring a taken branch (might be that the corresponding) // or reduce and check whether we are retiring a taken branch (might be that the corresponding)
@ -374,26 +381,26 @@ module instr_queue import ariane_pkg::*; #(
end end
fifo_v3 #( fifo_v3 #(
.DEPTH ( ariane_pkg::FETCH_FIFO_DEPTH ), // TODO(zarubaf): Fork out to separate param .DEPTH (ariane_pkg::FETCH_FIFO_DEPTH), // TODO(zarubaf): Fork out to separate param
.DATA_WIDTH ( riscv::VLEN ) .DATA_WIDTH(riscv::VLEN)
) i_fifo_address ( ) i_fifo_address (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_i ), .flush_i (flush_i),
.testmode_i ( 1'b0 ), .testmode_i(1'b0),
.full_o ( full_address ), .full_o (full_address),
.empty_o ( empty_address ), .empty_o (empty_address),
.usage_o ( address_queue_usage ), .usage_o (address_queue_usage),
.data_i ( predict_address_i ), .data_i (predict_address_i),
.push_i ( push_address & ~full_address ), .push_i (push_address & ~full_address),
.data_o ( address_out ), .data_o (address_out),
.pop_i ( pop_address ) .pop_i (pop_address)
); );
unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage})); unread i_unread_address_fifo (.d_i(|{empty_address, address_queue_usage}));
unread i_unread_branch_mask (.d_i(|branch_mask_extended)); unread i_unread_branch_mask (.d_i(|branch_mask_extended));
unread i_unread_lzc (.d_i(|{branch_empty})); unread i_unread_lzc (.d_i(|{branch_empty}));
unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals unread i_unread_fifo_pos (.d_i(|fifo_pos_extended)); // we don't care about the lower signals
unread i_unread_instr_fifo (.d_i(|instr_queue_usage)); unread i_unread_instr_fifo (.d_i(|instr_queue_usage));
if (ariane_pkg::RVC) begin : gen_pc_q_with_c if (ariane_pkg::RVC) begin : gen_pc_q_with_c
@ -413,8 +420,8 @@ module instr_queue import ariane_pkg::*; #(
idx_is_q <= '0; idx_is_q <= '0;
reset_address_q <= 1'b1; reset_address_q <= 1'b1;
end else begin end else begin
idx_ds_q <= idx_ds_d; idx_ds_q <= idx_ds_d;
idx_is_q <= idx_is_d; idx_is_q <= idx_is_d;
end end
end end
end end
@ -436,14 +443,17 @@ module instr_queue import ariane_pkg::*; #(
end end
// pragma translate_off // pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
replay_address_fifo: assert property ( replay_address_fifo :
@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i assert property (@(posedge clk_i) disable iff (!rst_ni) replay_o |-> !i_fifo_address.push_i)
) else $fatal(1,"[instr_queue] Pushing address although replay asserted"); else $fatal(1, "[instr_queue] Pushing address although replay asserted");
output_select_onehot: assert property ( output_select_onehot :
@(posedge clk_i) $onehot0(idx_ds_q) assert property (@(posedge clk_i) $onehot0(idx_ds_q))
) else begin $error("Output select should be one-hot encoded"); $stop(); end else begin
`endif $error("Output select should be one-hot encoded");
$stop();
end
`endif
// pragma translate_on // pragma translate_on
endmodule endmodule

View file

@ -18,62 +18,66 @@
module instr_scan #( module instr_scan #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic [31:0] instr_i, // expect aligned instruction, compressed or not input logic [ 31:0] instr_i, // expect aligned instruction, compressed or not
output logic rvi_return_o, output logic rvi_return_o,
output logic rvi_call_o, output logic rvi_call_o,
output logic rvi_branch_o, output logic rvi_branch_o,
output logic rvi_jalr_o, output logic rvi_jalr_o,
output logic rvi_jump_o, output logic rvi_jump_o,
output logic [riscv::VLEN-1:0] rvi_imm_o, output logic [riscv::VLEN-1:0] rvi_imm_o,
output logic rvc_branch_o, output logic rvc_branch_o,
output logic rvc_jump_o, output logic rvc_jump_o,
output logic rvc_jr_o, output logic rvc_jr_o,
output logic rvc_return_o, output logic rvc_return_o,
output logic rvc_jalr_o, output logic rvc_jalr_o,
output logic rvc_call_o, output logic rvc_call_o,
output logic [riscv::VLEN-1:0] rvc_imm_o output logic [riscv::VLEN-1:0] rvc_imm_o
); );
logic is_rvc; logic is_rvc;
assign is_rvc = (instr_i[1:0] != 2'b11); assign is_rvc = (instr_i[1:0] != 2'b11);
logic rv32_rvc_jal; logic rv32_rvc_jal;
assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)); assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1));
logic is_xret; logic is_xret;
assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011); assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011);
// check that rs1 is either x1 or x5 and that rd is not rs1 // check that rs1 is either x1 or x5 and that rd is not rs1
assign rvi_return_o = rvi_jalr_o & ((instr_i[19:15] == 5'd1) | instr_i[19:15] == 5'd5) assign rvi_return_o = rvi_jalr_o & ((instr_i[19:15] == 5'd1) | instr_i[19:15] == 5'd5)
& (instr_i[19:15] != instr_i[11:7]); & (instr_i[19:15] != instr_i[11:7]);
// Opocde is JAL[R] and destination register is either x1 or x5 // Opocde is JAL[R] and destination register is either x1 or x5
assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5); assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5);
// differentiates between JAL and BRANCH opcode, JALR comes from BHT // differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm(instr_i) : ariane_pkg::sb_imm(instr_i); assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm(
assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch); instr_i
assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr); ) : ariane_pkg::sb_imm(
assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret; instr_i
);
assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch);
assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr);
assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret;
// opcode JAL // opcode JAL
assign rvc_jump_o = ((instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)) | rv32_rvc_jal; assign rvc_jump_o = ((instr_i[15:13] == riscv::OpcodeC1J) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)) | rv32_rvc_jal;
// always links to register 0 // always links to register 0
logic is_jal_r; logic is_jal_r;
assign is_jal_r = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd) assign is_jal_r = (instr_i[15:13] == riscv::OpcodeC2JalrMvAdd)
& (instr_i[6:2] == 5'b00000) & (instr_i[6:2] == 5'b00000)
& (instr_i[1:0] == riscv::OpcodeC2) & (instr_i[1:0] == riscv::OpcodeC2)
& is_rvc; & is_rvc;
assign rvc_jr_o = is_jal_r & ~instr_i[12]; assign rvc_jr_o = is_jal_r & ~instr_i[12];
// always links to register 1 e.g.: it is a jump // always links to register 1 e.g.: it is a jump
assign rvc_jalr_o = is_jal_r & instr_i[12]; assign rvc_jalr_o = is_jal_r & instr_i[12];
assign rvc_call_o = rvc_jalr_o | rv32_rvc_jal; assign rvc_call_o = rvc_jalr_o | rv32_rvc_jal;
assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez)) assign rvc_branch_o = ((instr_i[15:13] == riscv::OpcodeC1Beqz) | (instr_i[15:13] == riscv::OpcodeC1Bnez))
& (instr_i[1:0] == riscv::OpcodeC1) & (instr_i[1:0] == riscv::OpcodeC1)
& is_rvc; & is_rvc;
// check that rs1 is x1 or x5 // check that rs1 is x1 or x5
assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o ; assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o;
// differentiates between JAL and BRANCH opcode, JALR comes from BHT // differentiates between JAL and BRANCH opcode, JALR comes from BHT
assign rvc_imm_o = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0} assign rvc_imm_o = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0}
: {{53+riscv::VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0}; : {{53+riscv::VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0};
endmodule endmodule

View file

@ -17,55 +17,55 @@
module ras #( module ras #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned DEPTH = 2 parameter int unsigned DEPTH = 2
)( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input logic push_i, input logic push_i,
input logic pop_i, input logic pop_i,
input logic [riscv::VLEN-1:0] data_i, input logic [riscv::VLEN-1:0] data_i,
output ariane_pkg::ras_t data_o output ariane_pkg::ras_t data_o
); );
ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q; ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q;
assign data_o = stack_q[0]; assign data_o = stack_q[0];
always_comb begin always_comb begin
stack_d = stack_q; stack_d = stack_q;
// push on the stack // push on the stack
if (push_i) begin if (push_i) begin
stack_d[0].ra = data_i; stack_d[0].ra = data_i;
// mark the new return address as valid // mark the new return address as valid
stack_d[0].valid = 1'b1; stack_d[0].valid = 1'b1;
stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0]; stack_d[DEPTH-1:1] = stack_q[DEPTH-2:0];
end
if (pop_i) begin
stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1];
// we popped the value so invalidate the end of the stack
stack_d[DEPTH-1].valid = 1'b0;
stack_d[DEPTH-1].ra = 'b0;
end
// leave everything untouched and just push the latest value to the
// top of the stack
if (pop_i && push_i) begin
stack_d = stack_q;
stack_d[0].ra = data_i;
stack_d[0].valid = 1'b1;
end
if (flush_i) begin
stack_d = '0;
end
end end
always_ff @(posedge clk_i or negedge rst_ni) begin if (pop_i) begin
if (~rst_ni) begin stack_d[DEPTH-2:0] = stack_q[DEPTH-1:1];
stack_q <= '0; // we popped the value so invalidate the end of the stack
end else begin stack_d[DEPTH-1].valid = 1'b0;
stack_q <= stack_d; stack_d[DEPTH-1].ra = 'b0;
end
end end
// leave everything untouched and just push the latest value to the
// top of the stack
if (pop_i && push_i) begin
stack_d = stack_q;
stack_d[0].ra = data_i;
stack_d[0].valid = 1'b1;
end
if (flush_i) begin
stack_d = '0;
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
stack_q <= '0;
end else begin
stack_q <= stack_d;
end
end
endmodule endmodule

View file

@ -16,127 +16,125 @@
module id_stage #( module id_stage #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input logic debug_req_i, input logic debug_req_i,
// from IF // from IF
input ariane_pkg::fetch_entry_t fetch_entry_i, input ariane_pkg::fetch_entry_t fetch_entry_i,
input logic fetch_entry_valid_i, input logic fetch_entry_valid_i,
output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry) output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry)
// to ID // to ID
output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction
output logic issue_entry_valid_o, // issue entry is valid output logic issue_entry_valid_o, // issue entry is valid
output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions
input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions
// from CSR file // from CSR file
input riscv::priv_lvl_t priv_lvl_i, // current privilege level input riscv::priv_lvl_t priv_lvl_i, // current privilege level
input riscv::xs_t fs_i, // floating point extension status input riscv::xs_t fs_i, // floating point extension status
input logic [2:0] frm_i, // floating-point dynamic rounding mode input logic [2:0] frm_i, // floating-point dynamic rounding mode
input riscv::xs_t vs_i, // vector extension status input riscv::xs_t vs_i, // vector extension status
input logic [1:0] irq_i, input logic [1:0] irq_i,
input ariane_pkg::irq_ctrl_t irq_ctrl_i, input ariane_pkg::irq_ctrl_t irq_ctrl_i,
input logic debug_mode_i, // we are in debug mode input logic debug_mode_i, // we are in debug mode
input logic tvm_i, input logic tvm_i,
input logic tw_i, input logic tw_i,
input logic tsr_i input logic tsr_i
); );
// ID/ISSUE register stage // ID/ISSUE register stage
typedef struct packed { typedef struct packed {
logic valid; logic valid;
ariane_pkg::scoreboard_entry_t sbe; ariane_pkg::scoreboard_entry_t sbe;
logic is_ctrl_flow; logic is_ctrl_flow;
} issue_struct_t; } issue_struct_t;
issue_struct_t issue_n, issue_q; issue_struct_t issue_n, issue_q;
logic is_control_flow_instr; logic is_control_flow_instr;
ariane_pkg::scoreboard_entry_t decoded_instruction; ariane_pkg::scoreboard_entry_t decoded_instruction;
logic is_illegal; logic is_illegal;
logic [31:0] instruction; logic [31:0] instruction;
logic is_compressed; logic is_compressed;
if (CVA6Cfg.RVC) begin if (CVA6Cfg.RVC) begin
// ---------------------------------------------------------
// 1. Check if they are compressed and expand in case they are
// ---------------------------------------------------------
compressed_decoder #(
.CVA6Cfg ( CVA6Cfg )
) compressed_decoder_i (
.instr_i ( fetch_entry_i.instruction ),
.instr_o ( instruction ),
.illegal_instr_o ( is_illegal ),
.is_compressed_o ( is_compressed )
);
end else begin
assign instruction = fetch_entry_i.instruction;
assign is_illegal = '0;
assign is_compressed = '0;
end
// --------------------------------------------------------- // ---------------------------------------------------------
// 2. Decode and emit instruction to issue stage // 1. Check if they are compressed and expand in case they are
// --------------------------------------------------------- // ---------------------------------------------------------
decoder #( compressed_decoder #(
.CVA6Cfg ( CVA6Cfg ) .CVA6Cfg(CVA6Cfg)
) decoder_i ( ) compressed_decoder_i (
.debug_req_i, .instr_i (fetch_entry_i.instruction),
.irq_ctrl_i, .instr_o (instruction),
.irq_i, .illegal_instr_o(is_illegal),
.pc_i ( fetch_entry_i.address ), .is_compressed_o(is_compressed)
.is_compressed_i ( is_compressed ),
.is_illegal_i ( is_illegal ),
.instruction_i ( instruction ),
.compressed_instr_i ( fetch_entry_i.instruction[15:0] ),
.branch_predict_i ( fetch_entry_i.branch_predict ),
.ex_i ( fetch_entry_i.ex ),
.priv_lvl_i ( priv_lvl_i ),
.debug_mode_i ( debug_mode_i ),
.fs_i,
.frm_i,
.vs_i,
.tvm_i,
.tw_i,
.tsr_i,
.instruction_o ( decoded_instruction ),
.is_control_flow_instr_o ( is_control_flow_instr )
); );
end else begin
assign instruction = fetch_entry_i.instruction;
assign is_illegal = '0;
assign is_compressed = '0;
end
// ---------------------------------------------------------
// 2. Decode and emit instruction to issue stage
// ---------------------------------------------------------
decoder #(
.CVA6Cfg(CVA6Cfg)
) decoder_i (
.debug_req_i,
.irq_ctrl_i,
.irq_i,
.pc_i (fetch_entry_i.address),
.is_compressed_i (is_compressed),
.is_illegal_i (is_illegal),
.instruction_i (instruction),
.compressed_instr_i (fetch_entry_i.instruction[15:0]),
.branch_predict_i (fetch_entry_i.branch_predict),
.ex_i (fetch_entry_i.ex),
.priv_lvl_i (priv_lvl_i),
.debug_mode_i (debug_mode_i),
.fs_i,
.frm_i,
.vs_i,
.tvm_i,
.tw_i,
.tsr_i,
.instruction_o (decoded_instruction),
.is_control_flow_instr_o(is_control_flow_instr)
);
// ------------------ // ------------------
// Pipeline Register // Pipeline Register
// ------------------ // ------------------
assign issue_entry_o = issue_q.sbe; assign issue_entry_o = issue_q.sbe;
assign issue_entry_valid_o = issue_q.valid; assign issue_entry_valid_o = issue_q.valid;
assign is_ctrl_flow_o = issue_q.is_ctrl_flow; assign is_ctrl_flow_o = issue_q.is_ctrl_flow;
always_comb begin always_comb begin
issue_n = issue_q; issue_n = issue_q;
fetch_entry_ready_o = 1'b0; fetch_entry_ready_o = 1'b0;
// Clear the valid flag if issue has acknowledged the instruction // Clear the valid flag if issue has acknowledged the instruction
if (issue_instr_ack_i) if (issue_instr_ack_i) issue_n.valid = 1'b0;
issue_n.valid = 1'b0;
// if we have a space in the register and the fetch is valid, go get it // if we have a space in the register and the fetch is valid, go get it
// or the issue stage is currently acknowledging an instruction, which means that we will have space // or the issue stage is currently acknowledging an instruction, which means that we will have space
// for a new instruction // for a new instruction
if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin
fetch_entry_ready_o = 1'b1; fetch_entry_ready_o = 1'b1;
issue_n = '{1'b1, decoded_instruction, is_control_flow_instr}; issue_n = '{1'b1, decoded_instruction, is_control_flow_instr};
end
// invalidate the pipeline register on a flush
if (flush_i)
issue_n.valid = 1'b0;
end end
// -------------------------
// Registers (ID <-> Issue) // invalidate the pipeline register on a flush
// ------------------------- if (flush_i) issue_n.valid = 1'b0;
always_ff @(posedge clk_i or negedge rst_ni) begin end
if(~rst_ni) begin // -------------------------
issue_q <= '0; // Registers (ID <-> Issue)
end else begin // -------------------------
issue_q <= issue_n; always_ff @(posedge clk_i or negedge rst_ni) begin
end if (~rst_ni) begin
issue_q <= '0;
end else begin
issue_q <= issue_n;
end end
end
endmodule endmodule

File diff suppressed because it is too large Load diff

View file

@ -9,161 +9,161 @@
package config_pkg; package config_pkg;
// --------------- // ---------------
// Global Config // Global Config
// --------------- // ---------------
localparam int unsigned ILEN = 32; localparam int unsigned ILEN = 32;
localparam int unsigned NRET = 1; localparam int unsigned NRET = 1;
/// The NoC type is a top-level parameter, hence we need a bit more /// The NoC type is a top-level parameter, hence we need a bit more
/// information on what protocol those type parameters are supporting. /// information on what protocol those type parameters are supporting.
/// Currently two values are supported" /// Currently two values are supported"
typedef enum { typedef enum {
/// The "classic" AXI4 protocol. /// The "classic" AXI4 protocol.
NOC_TYPE_AXI4_ATOP, NOC_TYPE_AXI4_ATOP,
/// In the OpenPiton setting the WT cache is connected to the L15. /// In the OpenPiton setting the WT cache is connected to the L15.
NOC_TYPE_L15_BIG_ENDIAN, NOC_TYPE_L15_BIG_ENDIAN,
NOC_TYPE_L15_LITTLE_ENDIAN NOC_TYPE_L15_LITTLE_ENDIAN
} noc_type_e; } noc_type_e;
/// Cache type parameter /// Cache type parameter
typedef enum logic [1:0] { typedef enum logic [1:0] {
WB = 0, WB = 0,
WT = 1, WT = 1,
HPDCACHE = 2 HPDCACHE = 2
} cache_type_t ; } cache_type_t;
localparam NrMaxRules = 16; localparam NrMaxRules = 16;
typedef struct packed { typedef struct packed {
/// Number of commit ports, i.e., maximum number of instructions that the /// Number of commit ports, i.e., maximum number of instructions that the
/// core can retire per cycle. It can be beneficial to have more commit /// core can retire per cycle. It can be beneficial to have more commit
/// ports than issue ports, for the scoreboard to empty out in case one /// ports than issue ports, for the scoreboard to empty out in case one
/// instruction stalls a little longer. /// instruction stalls a little longer.
int unsigned NrCommitPorts; int unsigned NrCommitPorts;
/// AXI parameters. /// AXI parameters.
int unsigned AxiAddrWidth; int unsigned AxiAddrWidth;
int unsigned AxiDataWidth; int unsigned AxiDataWidth;
int unsigned AxiIdWidth; int unsigned AxiIdWidth;
int unsigned AxiUserWidth; int unsigned AxiUserWidth;
int unsigned NrLoadBufEntries; int unsigned NrLoadBufEntries;
bit FpuEn; bit FpuEn;
bit XF16; bit XF16;
bit XF16ALT; bit XF16ALT;
bit XF8; bit XF8;
bit RVA; bit RVA;
bit RVV; bit RVV;
bit RVC; bit RVC;
bit RVZCB; bit RVZCB;
bit XFVec; bit XFVec;
bit CvxifEn; bit CvxifEn;
bit ZiCondExtEn; bit ZiCondExtEn;
// Calculated // Calculated
bit RVF; bit RVF;
bit RVD; bit RVD;
bit FpPresent; bit FpPresent;
bit NSX; bit NSX;
int unsigned FLen; int unsigned FLen;
bit RVFVec; bit RVFVec;
bit XF16Vec; bit XF16Vec;
bit XF16ALTVec; bit XF16ALTVec;
bit XF8Vec; bit XF8Vec;
int unsigned NrRgprPorts; int unsigned NrRgprPorts;
int unsigned NrWbPorts; int unsigned NrWbPorts;
bit EnableAccelerator; bit EnableAccelerator;
// Debug Module // Debug Module
// address to which a hart should jump when it was requested to halt // address to which a hart should jump when it was requested to halt
logic [63:0] HaltAddress; logic [63:0] HaltAddress;
logic [63:0] ExceptionAddress; logic [63:0] ExceptionAddress;
/// Return address stack depth, good values are around 2 to 4. /// Return address stack depth, good values are around 2 to 4.
int unsigned RASDepth; int unsigned RASDepth;
/// Branch target buffer entries. /// Branch target buffer entries.
int unsigned BTBEntries; int unsigned BTBEntries;
/// Branch history (2-bit saturation counter) size, to keep track of /// Branch history (2-bit saturation counter) size, to keep track of
/// branch otucomes. /// branch otucomes.
int unsigned BHTEntries; int unsigned BHTEntries;
/// Offset of the debug module. /// Offset of the debug module.
logic [63:0] DmBaseAddress; logic [63:0] DmBaseAddress;
/// Number of PMP entries. /// Number of PMP entries.
int unsigned NrPMPEntries; int unsigned NrPMPEntries;
/// Set to the bus type in use. /// Set to the bus type in use.
noc_type_e NOCType; noc_type_e NOCType;
/// Physical Memory Attributes (PMAs) /// Physical Memory Attributes (PMAs)
/// Number of non idempotent rules. /// Number of non idempotent rules.
int unsigned NrNonIdempotentRules; int unsigned NrNonIdempotentRules;
/// Base which needs to match. /// Base which needs to match.
logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase; logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase;
/// Bit mask which bits to consider when matching the rule. /// Bit mask which bits to consider when matching the rule.
logic [NrMaxRules-1:0][63:0] NonIdempotentLength; logic [NrMaxRules-1:0][63:0] NonIdempotentLength;
/// Number of regions which have execute property. /// Number of regions which have execute property.
int unsigned NrExecuteRegionRules; int unsigned NrExecuteRegionRules;
/// Base which needs to match. /// Base which needs to match.
logic [NrMaxRules-1:0][63:0] ExecuteRegionAddrBase; logic [NrMaxRules-1:0][63:0] ExecuteRegionAddrBase;
/// Bit mask which bits to consider when matching the rule. /// Bit mask which bits to consider when matching the rule.
logic [NrMaxRules-1:0][63:0] ExecuteRegionLength; logic [NrMaxRules-1:0][63:0] ExecuteRegionLength;
/// Number of regions which have cached property. /// Number of regions which have cached property.
int unsigned NrCachedRegionRules; int unsigned NrCachedRegionRules;
/// Base which needs to match. /// Base which needs to match.
logic [NrMaxRules-1:0][63:0] CachedRegionAddrBase; logic [NrMaxRules-1:0][63:0] CachedRegionAddrBase;
/// Bit mask which bits to consider when matching the rule. /// Bit mask which bits to consider when matching the rule.
logic [NrMaxRules-1:0][63:0] CachedRegionLength; logic [NrMaxRules-1:0][63:0] CachedRegionLength;
} cva6_cfg_t; } cva6_cfg_t;
/// Empty configuration to sanity check proper parameter passing. Whenever
/// you develop a module that resides within the core, assign this constant.
localparam cva6_cfg_t cva6_cfg_empty = '0;
/// Utility function being called to check parameters. Not all values make /// Empty configuration to sanity check proper parameter passing. Whenever
/// sense for all parameters, here is the place to sanity check them. /// you develop a module that resides within the core, assign this constant.
function automatic void check_cfg (cva6_cfg_t Cfg); localparam cva6_cfg_t cva6_cfg_empty = '0;
// pragma translate_off
`ifndef VERILATOR
assert(Cfg.RASDepth > 0);
assert(2**$clog2(Cfg.BTBEntries) == Cfg.BTBEntries);
assert(2**$clog2(Cfg.BHTEntries) == Cfg.BHTEntries);
assert(Cfg.NrNonIdempotentRules <= NrMaxRules);
assert(Cfg.NrExecuteRegionRules <= NrMaxRules);
assert(Cfg.NrCachedRegionRules <= NrMaxRules);
assert(Cfg.NrPMPEntries <= 16);
`endif
// pragma translate_on
endfunction
function automatic logic range_check(logic[63:0] base, logic[63:0] len, logic[63:0] address);
// if len is a power of two, and base is properly aligned, this check could be simplified
// Extend base by one bit to prevent an overflow.
return (address >= base) && (({1'b0, address}) < (65'(base)+len));
endfunction : range_check
function automatic logic is_inside_nonidempotent_regions (cva6_cfg_t Cfg, logic[63:0] address); /// Utility function being called to check parameters. Not all values make
logic[NrMaxRules-1:0] pass; /// sense for all parameters, here is the place to sanity check them.
pass = '0; function automatic void check_cfg(cva6_cfg_t Cfg);
for (int unsigned k = 0; k < Cfg.NrNonIdempotentRules; k++) begin // pragma translate_off
pass[k] = range_check(Cfg.NonIdempotentAddrBase[k], Cfg.NonIdempotentLength[k], address); `ifndef VERILATOR
end assert (Cfg.RASDepth > 0);
return |pass; assert (2 ** $clog2(Cfg.BTBEntries) == Cfg.BTBEntries);
endfunction : is_inside_nonidempotent_regions assert (2 ** $clog2(Cfg.BHTEntries) == Cfg.BHTEntries);
assert (Cfg.NrNonIdempotentRules <= NrMaxRules);
assert (Cfg.NrExecuteRegionRules <= NrMaxRules);
assert (Cfg.NrCachedRegionRules <= NrMaxRules);
assert (Cfg.NrPMPEntries <= 16);
`endif
// pragma translate_on
endfunction
function automatic logic is_inside_execute_regions (cva6_cfg_t Cfg, logic[63:0] address); function automatic logic range_check(logic [63:0] base, logic [63:0] len, logic [63:0] address);
// if we don't specify any region we assume everything is accessible // if len is a power of two, and base is properly aligned, this check could be simplified
logic[NrMaxRules-1:0] pass; // Extend base by one bit to prevent an overflow.
pass = '0; return (address >= base) && (({1'b0, address}) < (65'(base) + len));
for (int unsigned k = 0; k < Cfg.NrExecuteRegionRules; k++) begin endfunction : range_check
pass[k] = range_check(Cfg.ExecuteRegionAddrBase[k], Cfg.ExecuteRegionLength[k], address);
end
return |pass;
endfunction : is_inside_execute_regions
function automatic logic is_inside_cacheable_regions (cva6_cfg_t Cfg, logic[63:0] address);
automatic logic[NrMaxRules-1:0] pass; function automatic logic is_inside_nonidempotent_regions(cva6_cfg_t Cfg, logic [63:0] address);
pass = '0; logic [NrMaxRules-1:0] pass;
for (int unsigned k = 0; k < Cfg.NrCachedRegionRules; k++) begin pass = '0;
pass[k] = range_check(Cfg.CachedRegionAddrBase[k], Cfg.CachedRegionLength[k], address); for (int unsigned k = 0; k < Cfg.NrNonIdempotentRules; k++) begin
end pass[k] = range_check(Cfg.NonIdempotentAddrBase[k], Cfg.NonIdempotentLength[k], address);
return |pass; end
endfunction : is_inside_cacheable_regions return |pass;
endfunction : is_inside_nonidempotent_regions
function automatic logic is_inside_execute_regions(cva6_cfg_t Cfg, logic [63:0] address);
// if we don't specify any region we assume everything is accessible
logic [NrMaxRules-1:0] pass;
pass = '0;
for (int unsigned k = 0; k < Cfg.NrExecuteRegionRules; k++) begin
pass[k] = range_check(Cfg.ExecuteRegionAddrBase[k], Cfg.ExecuteRegionLength[k], address);
end
return |pass;
endfunction : is_inside_execute_regions
function automatic logic is_inside_cacheable_regions(cva6_cfg_t Cfg, logic [63:0] address);
automatic logic [NrMaxRules-1:0] pass;
pass = '0;
for (int unsigned k = 0; k < Cfg.NrCachedRegionRules; k++) begin
pass[k] = range_check(Cfg.CachedRegionAddrBase[k], Cfg.CachedRegionLength[k], address);
end
return |pass;
endfunction : is_inside_cacheable_regions
endpackage endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 32; localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0; localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1; localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1; localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1; localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1; localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 1; localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4; localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2; localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2; localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 0; localparam CVA6ConfigRASDepth = 0;
localparam CVA6ConfigBTBEntries = 0; localparam CVA6ConfigBTBEntries = 0;
localparam CVA6ConfigBHTEntries = 0; localparam CVA6ConfigBHTEntries = 0;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 0; localparam CVA6ConfigPerfCounterEn = 0;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -9,69 +9,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 32; localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0; localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1; localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1; localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 0; localparam CVA6ConfigAExtEn = 0;
localparam CVA6ConfigBExtEn = 1; localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0; localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 2; localparam CVA6ConfigWtDcacheWbufDepth = 2;
localparam CVA6ConfigNrCommitPorts = 1; localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4; localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2; localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2; localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 0; localparam CVA6ConfigRASDepth = 0;
localparam CVA6ConfigBTBEntries = 0; localparam CVA6ConfigBTBEntries = 0;
localparam CVA6ConfigBHTEntries = 0; localparam CVA6ConfigBHTEntries = 0;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 0; localparam CVA6ConfigPerfCounterEn = 0;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 0; localparam CVA6ConfigMmuPresent = 0;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -90,7 +90,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -104,24 +107,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 32; localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0; localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0; localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 0; localparam CVA6ConfigCExtEn = 0;
localparam CVA6ConfigZcbExtEn = 0; localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0; localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0; localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 8192; localparam CVA6ConfigIcacheByteSize = 8192;
localparam CVA6ConfigIcacheSetAssoc = 2; localparam CVA6ConfigIcacheSetAssoc = 2;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 8192; localparam CVA6ConfigDcacheByteSize = 8192;
localparam CVA6ConfigDcacheSetAssoc = 2; localparam CVA6ConfigDcacheSetAssoc = 2;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 2; localparam CVA6ConfigWtDcacheWbufDepth = 2;
localparam CVA6ConfigNrCommitPorts = 1; localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 4; localparam CVA6ConfigNrScoreboardEntries = 4;
localparam CVA6ConfigFPGAEn = 1; localparam CVA6ConfigFPGAEn = 1;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2; localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2; localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 0; localparam CVA6ConfigNrPMPEntries = 0;
localparam CVA6ConfigPerfCounterEn = 0; localparam CVA6ConfigPerfCounterEn = 0;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 32; localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0; localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0; localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0; localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0; localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0; localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2; localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8; localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigInstrTlbEntries = 2; localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2; localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1; localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -105,23 +108,32 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 32; localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 0; localparam CVA6ConfigFpuEn = 0;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0; localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0; localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0; localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0; localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2; localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8; localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2; localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2; localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1; localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 32; localparam CVA6ConfigXlen = 32;
localparam CVA6ConfigFpuEn = 1; localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0; localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0; localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0; localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0; localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2; localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8; localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 2; localparam CVA6ConfigInstrTlbEntries = 2;
localparam CVA6ConfigDataTlbEntries = 2; localparam CVA6ConfigDataTlbEntries = 2;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1; localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WB; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WB;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -10,68 +10,68 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 64; localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1; localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 1; localparam CVA6ConfigF16En = 1;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0; localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0; localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0; localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 1; localparam CVA6ConfigVExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2; localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8; localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16; localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16; localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1; localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -90,7 +90,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -104,24 +107,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_L15_BIG_ENDIAN, NOCType: config_pkg::NOC_TYPE_L15_BIG_ENDIAN,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 64; localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1; localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1; localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1; localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1; localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1; localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2; localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8; localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16; localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16; localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1; localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -17,69 +17,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 64; localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1; localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1; localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1; localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1; localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1; localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 3; localparam CVA6ConfigDcacheIdWidth = 3;
localparam CVA6ConfigMemTidWidth = CVA6ConfigAxiIdWidth; localparam CVA6ConfigMemTidWidth = CVA6ConfigAxiIdWidth;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2; localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8; localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 8; localparam CVA6ConfigNrLoadBufEntries = 8;
localparam CVA6ConfigInstrTlbEntries = 16; localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16; localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1; localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::HPDCACHE; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::HPDCACHE;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -98,7 +98,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -119,17 +122,26 @@ package cva6_config_pkg;
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 64; localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1; localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0; localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0; localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0; localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 0; localparam CVA6ConfigZiCondExtEn = 0;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2; localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8; localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16; localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16; localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1; localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_L15_BIG_ENDIAN, NOCType: config_pkg::NOC_TYPE_L15_BIG_ENDIAN,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -10,69 +10,69 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 64; localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1; localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 1; localparam CVA6ConfigCvxifEn = 1;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 1; localparam CVA6ConfigZcbExtEn = 1;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 1; localparam CVA6ConfigBExtEn = 1;
localparam CVA6ConfigVExtEn = 0; localparam CVA6ConfigVExtEn = 0;
localparam CVA6ConfigZiCondExtEn = 1; localparam CVA6ConfigZiCondExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 32768; localparam CVA6ConfigDcacheByteSize = 32768;
localparam CVA6ConfigDcacheSetAssoc = 8; localparam CVA6ConfigDcacheSetAssoc = 8;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 2; localparam CVA6ConfigNrCommitPorts = 2;
localparam CVA6ConfigNrScoreboardEntries = 8; localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16; localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16; localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1; localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WB; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WB;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -91,7 +91,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -105,24 +108,33 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -10,68 +10,68 @@
package cva6_config_pkg; package cva6_config_pkg;
localparam CVA6ConfigXlen = 64; localparam CVA6ConfigXlen = 64;
localparam CVA6ConfigFpuEn = 1; localparam CVA6ConfigFpuEn = 1;
localparam CVA6ConfigF16En = 0; localparam CVA6ConfigF16En = 0;
localparam CVA6ConfigF16AltEn = 0; localparam CVA6ConfigF16AltEn = 0;
localparam CVA6ConfigF8En = 0; localparam CVA6ConfigF8En = 0;
localparam CVA6ConfigFVecEn = 0; localparam CVA6ConfigFVecEn = 0;
localparam CVA6ConfigCvxifEn = 0; localparam CVA6ConfigCvxifEn = 0;
localparam CVA6ConfigCExtEn = 1; localparam CVA6ConfigCExtEn = 1;
localparam CVA6ConfigZcbExtEn = 0; localparam CVA6ConfigZcbExtEn = 0;
localparam CVA6ConfigAExtEn = 1; localparam CVA6ConfigAExtEn = 1;
localparam CVA6ConfigBExtEn = 0; localparam CVA6ConfigBExtEn = 0;
localparam CVA6ConfigVExtEn = 1; localparam CVA6ConfigVExtEn = 1;
localparam CVA6ConfigAxiIdWidth = 4; localparam CVA6ConfigAxiIdWidth = 4;
localparam CVA6ConfigAxiAddrWidth = 64; localparam CVA6ConfigAxiAddrWidth = 64;
localparam CVA6ConfigAxiDataWidth = 64; localparam CVA6ConfigAxiDataWidth = 64;
localparam CVA6ConfigFetchUserEn = 0; localparam CVA6ConfigFetchUserEn = 0;
localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen; localparam CVA6ConfigFetchUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigDataUserEn = 0; localparam CVA6ConfigDataUserEn = 0;
localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen; localparam CVA6ConfigDataUserWidth = CVA6ConfigXlen;
localparam CVA6ConfigIcacheByteSize = 16384; localparam CVA6ConfigIcacheByteSize = 16384;
localparam CVA6ConfigIcacheSetAssoc = 4; localparam CVA6ConfigIcacheSetAssoc = 4;
localparam CVA6ConfigIcacheLineWidth = 128; localparam CVA6ConfigIcacheLineWidth = 128;
localparam CVA6ConfigDcacheByteSize = 16384; localparam CVA6ConfigDcacheByteSize = 16384;
localparam CVA6ConfigDcacheSetAssoc = 4; localparam CVA6ConfigDcacheSetAssoc = 4;
localparam CVA6ConfigDcacheLineWidth = 128; localparam CVA6ConfigDcacheLineWidth = 128;
localparam CVA6ConfigDcacheIdWidth = 1; localparam CVA6ConfigDcacheIdWidth = 1;
localparam CVA6ConfigMemTidWidth = 2; localparam CVA6ConfigMemTidWidth = 2;
localparam CVA6ConfigWtDcacheWbufDepth = 8; localparam CVA6ConfigWtDcacheWbufDepth = 8;
localparam CVA6ConfigNrCommitPorts = 1; localparam CVA6ConfigNrCommitPorts = 1;
localparam CVA6ConfigNrScoreboardEntries = 8; localparam CVA6ConfigNrScoreboardEntries = 8;
localparam CVA6ConfigFPGAEn = 0; localparam CVA6ConfigFPGAEn = 0;
localparam CVA6ConfigNrLoadPipeRegs = 1; localparam CVA6ConfigNrLoadPipeRegs = 1;
localparam CVA6ConfigNrStorePipeRegs = 0; localparam CVA6ConfigNrStorePipeRegs = 0;
localparam CVA6ConfigNrLoadBufEntries = 2; localparam CVA6ConfigNrLoadBufEntries = 2;
localparam CVA6ConfigInstrTlbEntries = 16; localparam CVA6ConfigInstrTlbEntries = 16;
localparam CVA6ConfigDataTlbEntries = 16; localparam CVA6ConfigDataTlbEntries = 16;
localparam CVA6ConfigRASDepth = 2; localparam CVA6ConfigRASDepth = 2;
localparam CVA6ConfigBTBEntries = 32; localparam CVA6ConfigBTBEntries = 32;
localparam CVA6ConfigBHTEntries = 128; localparam CVA6ConfigBHTEntries = 128;
localparam CVA6ConfigNrPMPEntries = 8; localparam CVA6ConfigNrPMPEntries = 8;
localparam CVA6ConfigPerfCounterEn = 1; localparam CVA6ConfigPerfCounterEn = 1;
localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT; localparam config_pkg::cache_type_t CVA6ConfigDcacheType = config_pkg::WT;
localparam CVA6ConfigMmuPresent = 1; localparam CVA6ConfigMmuPresent = 1;
localparam CVA6ConfigRvfiTrace = 1; localparam CVA6ConfigRvfiTrace = 1;
localparam config_pkg::cva6_cfg_t cva6_cfg = '{ localparam config_pkg::cva6_cfg_t cva6_cfg = '{
NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts),
AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth),
AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth),
@ -90,7 +90,10 @@ package cva6_config_pkg;
CvxifEn: bit'(CVA6ConfigCvxifEn), CvxifEn: bit'(CVA6ConfigCvxifEn),
ZiCondExtEn: bit'(0), ZiCondExtEn: bit'(0),
// Extended // Extended
RVF: bit'(0), RVF:
bit'(
0
),
RVD: bit'(0), RVD: bit'(0),
FpPresent: bit'(0), FpPresent: bit'(0),
NSX: bit'(0), NSX: bit'(0),
@ -104,23 +107,32 @@ package cva6_config_pkg;
EnableAccelerator: bit'(0), EnableAccelerator: bit'(0),
HaltAddress: 64'h800, HaltAddress: 64'h800,
ExceptionAddress: 64'h808, ExceptionAddress: 64'h808,
RASDepth: unsigned'(CVA6ConfigRASDepth), RASDepth: unsigned'(CVA6ConfigRASDepth),
BTBEntries: unsigned'(CVA6ConfigBTBEntries), BTBEntries: unsigned'(CVA6ConfigBTBEntries),
BHTEntries: unsigned'(CVA6ConfigBHTEntries), BHTEntries: unsigned'(CVA6ConfigBHTEntries),
DmBaseAddress: 64'h0, DmBaseAddress: 64'h0,
NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries),
NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, NOCType: config_pkg::NOC_TYPE_AXI4_ATOP,
// idempotent region // idempotent region
NrNonIdempotentRules: unsigned'(2), NrNonIdempotentRules:
unsigned'(
2
),
NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), NonIdempotentAddrBase: 1024'({64'b0, 64'b0}),
NonIdempotentLength: 1024'({64'b0, 64'b0}), NonIdempotentLength: 1024'({64'b0, 64'b0}),
NrExecuteRegionRules: unsigned'(3), NrExecuteRegionRules: unsigned'(3),
// DRAM, Boot ROM, Debug Module // DRAM, Boot ROM, Debug Module
ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), ExecuteRegionAddrBase:
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), 1024'(
{64'h8000_0000, 64'h1_0000, 64'h0}
),
ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}),
// cached region // cached region
NrCachedRegionRules: unsigned'(1), NrCachedRegionRules:
CachedRegionAddrBase: 1024'({64'h8000_0000}), unsigned'(
CachedRegionLength: 1024'({64'h40000000}) 1
}; ),
CachedRegionAddrBase: 1024'({64'h8000_0000}),
CachedRegionLength: 1024'({64'h40000000})
};
endpackage endpackage

View file

@ -14,110 +14,108 @@
// needed. // needed.
package hpdcache_params_pkg; package hpdcache_params_pkg;
// Imports from the CVA6 configuration package // Imports from the CVA6 configuration package
// {{{ // {{{
import cva6_config_pkg::CVA6ConfigXlen; import cva6_config_pkg::CVA6ConfigXlen;
import cva6_config_pkg::CVA6ConfigDcacheByteSize; import cva6_config_pkg::CVA6ConfigDcacheByteSize;
import cva6_config_pkg::CVA6ConfigDcacheSetAssoc; import cva6_config_pkg::CVA6ConfigDcacheSetAssoc;
import cva6_config_pkg::CVA6ConfigDcacheLineWidth; import cva6_config_pkg::CVA6ConfigDcacheLineWidth;
import cva6_config_pkg::CVA6ConfigDcacheIdWidth; import cva6_config_pkg::CVA6ConfigDcacheIdWidth;
import cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth; import cva6_config_pkg::CVA6ConfigWtDcacheWbufDepth;
import cva6_config_pkg::CVA6ConfigNrLoadBufEntries; import cva6_config_pkg::CVA6ConfigNrLoadBufEntries;
// }}} // }}}
// Definition of constants used only in this file // Definition of constants used only in this file
// {{{ // {{{
localparam int unsigned __BYTES_PER_WAY = localparam int unsigned __BYTES_PER_WAY = CVA6ConfigDcacheByteSize / CVA6ConfigDcacheSetAssoc;
CVA6ConfigDcacheByteSize/CVA6ConfigDcacheSetAssoc;
localparam int unsigned __BYTES_PER_CACHELINE = localparam int unsigned __BYTES_PER_CACHELINE = CVA6ConfigDcacheLineWidth / 8;
CVA6ConfigDcacheLineWidth/8; // }}}
// }}}
// Definition of global constants for the HPDcache data and directory // Definition of global constants for the HPDcache data and directory
// {{{ // {{{
// HPDcache physical address width (in bits) // HPDcache physical address width (in bits)
localparam int unsigned PARAM_PA_WIDTH = riscv::PLEN; localparam int unsigned PARAM_PA_WIDTH = riscv::PLEN;
// HPDcache number of sets // HPDcache number of sets
localparam int unsigned PARAM_SETS = __BYTES_PER_WAY/__BYTES_PER_CACHELINE; localparam int unsigned PARAM_SETS = __BYTES_PER_WAY / __BYTES_PER_CACHELINE;
// HPDcache number of ways // HPDcache number of ways
localparam int unsigned PARAM_WAYS = CVA6ConfigDcacheSetAssoc; localparam int unsigned PARAM_WAYS = CVA6ConfigDcacheSetAssoc;
// HPDcache word width (bits) // HPDcache word width (bits)
localparam int unsigned PARAM_WORD_WIDTH = CVA6ConfigXlen; localparam int unsigned PARAM_WORD_WIDTH = CVA6ConfigXlen;
// HPDcache cache-line width (bits) // HPDcache cache-line width (bits)
localparam int unsigned PARAM_CL_WORDS = CVA6ConfigDcacheLineWidth/PARAM_WORD_WIDTH; localparam int unsigned PARAM_CL_WORDS = CVA6ConfigDcacheLineWidth / PARAM_WORD_WIDTH;
// HPDcache number of words in the request data channels (request and response) // HPDcache number of words in the request data channels (request and response)
localparam int unsigned PARAM_REQ_WORDS = 1; localparam int unsigned PARAM_REQ_WORDS = 1;
// HPDcache request transaction ID width (bits) // HPDcache request transaction ID width (bits)
localparam int unsigned PARAM_REQ_TRANS_ID_WIDTH = CVA6ConfigDcacheIdWidth; localparam int unsigned PARAM_REQ_TRANS_ID_WIDTH = CVA6ConfigDcacheIdWidth;
// HPDcache request source ID width (bits) // HPDcache request source ID width (bits)
localparam int unsigned PARAM_REQ_SRC_ID_WIDTH = 3; localparam int unsigned PARAM_REQ_SRC_ID_WIDTH = 3;
// }}} // }}}
// Definition of constants and types for HPDcache data memory // Definition of constants and types for HPDcache data memory
// {{{ // {{{
localparam int unsigned PARAM_DATA_WAYS_PER_RAM_WORD = 128/PARAM_WORD_WIDTH; localparam int unsigned PARAM_DATA_WAYS_PER_RAM_WORD = 128 / PARAM_WORD_WIDTH;
localparam int unsigned PARAM_DATA_SETS_PER_RAM = PARAM_SETS; localparam int unsigned PARAM_DATA_SETS_PER_RAM = PARAM_SETS;
// HPDcache DATA RAM macros whether implements: // HPDcache DATA RAM macros whether implements:
// - Write byte enable (1'b1) // - Write byte enable (1'b1)
// - Write bit mask (1'b0) // - Write bit mask (1'b0)
localparam bit PARAM_DATA_RAM_WBYTEENABLE = 1'b1; localparam bit PARAM_DATA_RAM_WBYTEENABLE = 1'b1;
// Define the number of memory contiguous words that can be accessed // Define the number of memory contiguous words that can be accessed
// simultaneously from the cache. // simultaneously from the cache.
// - This limits the maximum width for the data channel from requesters // - This limits the maximum width for the data channel from requesters
// - This impacts the refill latency (more ACCESS_WORDS -> less REFILL LATENCY) // - This impacts the refill latency (more ACCESS_WORDS -> less REFILL LATENCY)
localparam int unsigned PARAM_ACCESS_WORDS = PARAM_CL_WORDS/2; localparam int unsigned PARAM_ACCESS_WORDS = PARAM_CL_WORDS / 2;
// }}} // }}}
// Definition of constants and types for the Miss Status Holding Register (MSHR) // Definition of constants and types for the Miss Status Holding Register (MSHR)
// {{{ // {{{
// HPDcache MSHR number of sets // HPDcache MSHR number of sets
localparam int unsigned PARAM_MSHR_SETS = 2; localparam int unsigned PARAM_MSHR_SETS = 2;
// HPDcache MSHR number of ways // HPDcache MSHR number of ways
localparam int unsigned PARAM_MSHR_WAYS = (CVA6ConfigNrLoadBufEntries > 4) ? 4 : 2; localparam int unsigned PARAM_MSHR_WAYS = (CVA6ConfigNrLoadBufEntries > 4) ? 4 : 2;
// HPDcache MSHR number of ways in the same SRAM word // HPDcache MSHR number of ways in the same SRAM word
localparam int unsigned PARAM_MSHR_WAYS_PER_RAM_WORD = PARAM_MSHR_WAYS > 1 ? 2 : 1; localparam int unsigned PARAM_MSHR_WAYS_PER_RAM_WORD = PARAM_MSHR_WAYS > 1 ? 2 : 1;
// HPDcache MSHR number of sets in the same SRAM // HPDcache MSHR number of sets in the same SRAM
localparam int unsigned PARAM_MSHR_SETS_PER_RAM = PARAM_MSHR_SETS; localparam int unsigned PARAM_MSHR_SETS_PER_RAM = PARAM_MSHR_SETS;
// HPDcache MSHR RAM whether implements: // HPDcache MSHR RAM whether implements:
// - Write byte enable (1'b1) // - Write byte enable (1'b1)
// - Write bit mask (1'b0) // - Write bit mask (1'b0)
localparam bit PARAM_MSHR_RAM_WBYTEENABLE = 1'b1; localparam bit PARAM_MSHR_RAM_WBYTEENABLE = 1'b1;
// HPDcache MSHR whether uses FFs or SRAM // HPDcache MSHR whether uses FFs or SRAM
localparam bit PARAM_MSHR_USE_REGBANK = (PARAM_MSHR_SETS*PARAM_MSHR_WAYS) <= 16; localparam bit PARAM_MSHR_USE_REGBANK = (PARAM_MSHR_SETS * PARAM_MSHR_WAYS) <= 16;
// }}} // }}}
// Definition of constants and types for the Write Buffer (WBUF) // Definition of constants and types for the Write Buffer (WBUF)
// {{{ // {{{
// HPDcache Write-Buffer number of entries in the directory // HPDcache Write-Buffer number of entries in the directory
localparam int unsigned PARAM_WBUF_DIR_ENTRIES = CVA6ConfigWtDcacheWbufDepth; localparam int unsigned PARAM_WBUF_DIR_ENTRIES = CVA6ConfigWtDcacheWbufDepth;
// HPDcache Write-Buffer number of entries in the data buffer // HPDcache Write-Buffer number of entries in the data buffer
localparam int unsigned PARAM_WBUF_DATA_ENTRIES = CVA6ConfigWtDcacheWbufDepth; localparam int unsigned PARAM_WBUF_DATA_ENTRIES = CVA6ConfigWtDcacheWbufDepth;
// HPDcache Write-Buffer number of words per entry // HPDcache Write-Buffer number of words per entry
localparam int unsigned PARAM_WBUF_WORDS = PARAM_REQ_WORDS; localparam int unsigned PARAM_WBUF_WORDS = PARAM_REQ_WORDS;
// HPDcache Write-Buffer threshold counter width (in bits) // HPDcache Write-Buffer threshold counter width (in bits)
localparam int unsigned PARAM_WBUF_TIMECNT_WIDTH = 3; localparam int unsigned PARAM_WBUF_TIMECNT_WIDTH = 3;
// }}} // }}}
// Definition of constants and types for the Replay Table (RTAB) // Definition of constants and types for the Replay Table (RTAB)
// {{{ // {{{
localparam int PARAM_RTAB_ENTRIES = 4; localparam int PARAM_RTAB_ENTRIES = 4;
// }}} // }}}
endpackage endpackage

View file

@ -11,100 +11,100 @@
package cvxif_pkg; package cvxif_pkg;
localparam X_DATAWIDTH = riscv::XLEN; localparam X_DATAWIDTH = riscv::XLEN;
localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; //2 or 3 localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; //2 or 3
localparam X_ID_WIDTH = ariane_pkg::TRANS_ID_BITS; localparam X_ID_WIDTH = ariane_pkg::TRANS_ID_BITS;
localparam X_MEM_WIDTH = 64; localparam X_MEM_WIDTH = 64;
localparam X_RFR_WIDTH = riscv::XLEN; localparam X_RFR_WIDTH = riscv::XLEN;
localparam X_RFW_WIDTH = riscv::XLEN; localparam X_RFW_WIDTH = riscv::XLEN;
typedef struct packed { typedef struct packed {
logic [15:0] instr; logic [15:0] instr;
logic [1:0] mode; logic [1:0] mode;
logic [X_ID_WIDTH-1:0] id; logic [X_ID_WIDTH-1:0] id;
} x_compressed_req_t; } x_compressed_req_t;
typedef struct packed { typedef struct packed {
logic [31:0] instr; logic [31:0] instr;
logic accept; logic accept;
} x_compressed_resp_t; } x_compressed_resp_t;
typedef struct packed { typedef struct packed {
logic [31:0] instr; logic [31:0] instr;
logic [1:0] mode; logic [1:0] mode;
logic [X_ID_WIDTH-1:0] id; logic [X_ID_WIDTH-1:0] id;
logic [X_NUM_RS-1:0][X_RFR_WIDTH-1:0] rs; logic [X_NUM_RS-1:0][X_RFR_WIDTH-1:0] rs;
logic [X_NUM_RS-1:0] rs_valid; logic [X_NUM_RS-1:0] rs_valid;
} x_issue_req_t; } x_issue_req_t;
typedef struct packed { typedef struct packed {
logic accept; logic accept;
logic writeback; logic writeback;
logic dualwrite; logic dualwrite;
logic dualread; logic dualread;
logic loadstore; logic loadstore;
logic exc; logic exc;
} x_issue_resp_t; } x_issue_resp_t;
typedef struct packed { typedef struct packed {
logic [X_ID_WIDTH-1:0] id; logic [X_ID_WIDTH-1:0] id;
logic x_commit_kill; logic x_commit_kill;
} x_commit_t; } x_commit_t;
typedef struct packed { typedef struct packed {
logic [X_ID_WIDTH-1:0] id; logic [X_ID_WIDTH-1:0] id;
logic [31:0] addr; logic [31:0] addr;
logic [1:0] mode; logic [1:0] mode;
logic we; logic we;
logic [1:0] size; logic [1:0] size;
logic [X_MEM_WIDTH-1:0] wdata; logic [X_MEM_WIDTH-1:0] wdata;
logic last; logic last;
logic spec; logic spec;
} x_mem_req_t; } x_mem_req_t;
typedef struct packed { typedef struct packed {
logic exc; logic exc;
logic [5:0] exccode; logic [5:0] exccode;
} x_mem_resp_t; } x_mem_resp_t;
typedef struct packed { typedef struct packed {
logic [X_ID_WIDTH-1:0] id; logic [X_ID_WIDTH-1:0] id;
logic [X_MEM_WIDTH-1:0] rdata; logic [X_MEM_WIDTH-1:0] rdata;
logic err; logic err;
} x_mem_result_t ; } x_mem_result_t;
typedef struct packed { typedef struct packed {
logic [X_ID_WIDTH-1:0] id; logic [X_ID_WIDTH-1:0] id;
logic [X_RFW_WIDTH-1:0] data; logic [X_RFW_WIDTH-1:0] data;
logic [4:0] rd; logic [4:0] rd;
logic we; logic we;
logic exc; logic exc;
logic [5:0] exccode; logic [5:0] exccode;
} x_result_t ; } x_result_t;
typedef struct packed { typedef struct packed {
logic x_compressed_valid; logic x_compressed_valid;
x_compressed_req_t x_compressed_req; x_compressed_req_t x_compressed_req;
logic x_issue_valid; logic x_issue_valid;
x_issue_req_t x_issue_req; x_issue_req_t x_issue_req;
logic x_commit_valid; logic x_commit_valid;
x_commit_t x_commit; x_commit_t x_commit;
logic x_mem_ready; logic x_mem_ready;
x_mem_resp_t x_mem_resp; x_mem_resp_t x_mem_resp;
logic x_mem_result_valid; logic x_mem_result_valid;
x_mem_result_t x_mem_result; x_mem_result_t x_mem_result;
logic x_result_ready; logic x_result_ready;
} cvxif_req_t; } cvxif_req_t;
typedef struct packed { typedef struct packed {
logic x_compressed_ready; logic x_compressed_ready;
x_compressed_resp_t x_compressed_resp; x_compressed_resp_t x_compressed_resp;
logic x_issue_ready; logic x_issue_ready;
x_issue_resp_t x_issue_resp; x_issue_resp_t x_issue_resp;
logic x_mem_valid; logic x_mem_valid;
x_mem_req_t x_mem_req; x_mem_req_t x_mem_req;
logic x_result_valid; logic x_result_valid;
x_result_t x_result; x_result_t x_result;
} cvxif_resp_t; } cvxif_resp_t;
endpackage endpackage

View file

@ -15,188 +15,188 @@
`ifndef VERILATOR `ifndef VERILATOR
package instr_tracer_pkg; package instr_tracer_pkg;
parameter INSTR_NOP = 32'h00_00_00_13; parameter INSTR_NOP = 32'h00_00_00_13;
parameter INSTR_LUI = { 25'b?, riscv::OpcodeLui }; parameter INSTR_LUI = {25'b?, riscv::OpcodeLui};
parameter INSTR_AUIPC = { 25'b?, riscv::OpcodeAuipc }; parameter INSTR_AUIPC = {25'b?, riscv::OpcodeAuipc};
parameter INSTR_JAL = { 25'b?, riscv::OpcodeJal }; parameter INSTR_JAL = {25'b?, riscv::OpcodeJal};
parameter INSTR_JALR = { 17'b?, 3'b000, 5'b?, riscv::OpcodeJalr }; parameter INSTR_JALR = {17'b?, 3'b000, 5'b?, riscv::OpcodeJalr};
// BRANCH // BRANCH
parameter INSTR_BEQZ = { 7'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BEQZ = {7'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BEQ = { 7'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BEQ = {7'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BNEZ = { 7'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BNEZ = {7'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BNE = { 7'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BNE = {7'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BLTZ = { 7'b?, 5'b0, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BLTZ = {7'b?, 5'b0, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BLT = { 7'b?, 5'b?, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BLT = {7'b?, 5'b?, 5'b?, 3'b100, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BGEZ = { 7'b?, 5'b0, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BGEZ = {7'b?, 5'b0, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BGE = { 7'b?, 5'b?, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BGE = {7'b?, 5'b?, 5'b?, 3'b101, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BLTU = { 7'b?, 5'b?, 5'b?, 3'b110, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BLTU = {7'b?, 5'b?, 5'b?, 3'b110, 5'b?, riscv::OpcodeBranch};
parameter INSTR_BGEU = { 7'b?, 5'b?, 5'b?, 3'b111, 5'b?, riscv::OpcodeBranch }; parameter INSTR_BGEU = {7'b?, 5'b?, 5'b?, 3'b111, 5'b?, riscv::OpcodeBranch};
// OP-IMM // OP-IMM
parameter INSTR_LI = { 12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_LI = {12'b?, 5'b0, 3'b000, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_ADDI = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_ADDI = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SLTI = { 17'b?, 3'b010, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_SLTI = {17'b?, 3'b010, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SLTIU = { 17'b?, 3'b011, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_SLTIU = {17'b?, 3'b011, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_XORI = { 17'b?, 3'b100, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_XORI = {17'b?, 3'b100, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_ORI = { 17'b?, 3'b110, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_ORI = {17'b?, 3'b110, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_ANDI = { 17'b?, 3'b111, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_ANDI = {17'b?, 3'b111, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SLLI = { 6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_SLLI = {6'b000000, 11'b?, 3'b001, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SRLI = { 6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_SRLI = {6'b000000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm};
parameter INSTR_SRAI = { 6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm }; parameter INSTR_SRAI = {6'b010000, 11'b?, 3'b101, 5'b?, riscv::OpcodeOpImm};
// OP-IMM-32 // OP-IMM-32
parameter INSTR_ADDIW = { 17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm32 }; parameter INSTR_ADDIW = {17'b?, 3'b000, 5'b?, riscv::OpcodeOpImm32};
parameter INSTR_SLLIW = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpImm32 }; parameter INSTR_SLLIW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOpImm32};
parameter INSTR_SRLIW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32 }; parameter INSTR_SRLIW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32};
parameter INSTR_SRAIW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32 }; parameter INSTR_SRAIW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOpImm32};
// OP // OP
parameter INSTR_ADD = { 7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp }; parameter INSTR_ADD = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp};
parameter INSTR_SUB = { 7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp }; parameter INSTR_SUB = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp};
parameter INSTR_SLL = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp }; parameter INSTR_SLL = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp};
parameter INSTR_SLT = { 7'b0000000, 10'b?, 3'b010, 5'b?, riscv::OpcodeOp }; parameter INSTR_SLT = {7'b0000000, 10'b?, 3'b010, 5'b?, riscv::OpcodeOp};
parameter INSTR_SLTU = { 7'b0000000, 10'b?, 3'b011, 5'b?, riscv::OpcodeOp }; parameter INSTR_SLTU = {7'b0000000, 10'b?, 3'b011, 5'b?, riscv::OpcodeOp};
parameter INSTR_XOR = { 7'b0000000, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp }; parameter INSTR_XOR = {7'b0000000, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp};
parameter INSTR_SRL = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp }; parameter INSTR_SRL = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp};
parameter INSTR_SRA = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp }; parameter INSTR_SRA = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp};
parameter INSTR_OR = { 7'b0000000, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp }; parameter INSTR_OR = {7'b0000000, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp};
parameter INSTR_AND = { 7'b0000000, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp }; parameter INSTR_AND = {7'b0000000, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp};
parameter INSTR_MUL = { 7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp }; parameter INSTR_MUL = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp};
// OP32 // OP32
parameter INSTR_ADDW = { 7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32 }; parameter INSTR_ADDW = {7'b0000000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32};
parameter INSTR_SUBW = { 7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32 }; parameter INSTR_SUBW = {7'b0100000, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp32};
parameter INSTR_SLLW = { 7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp32 }; parameter INSTR_SLLW = {7'b0000000, 10'b?, 3'b001, 5'b?, riscv::OpcodeOp32};
parameter INSTR_SRLW = { 7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32 }; parameter INSTR_SRLW = {7'b0000000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32};
parameter INSTR_SRAW = { 7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32 }; parameter INSTR_SRAW = {7'b0100000, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp32};
parameter INSTR_MULW = { 7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp32 }; parameter INSTR_MULW = {7'b0000001, 10'b?, 3'b???, 5'b?, riscv::OpcodeOp32};
// MISC-MEM // MISC-MEM
parameter INSTR_FENCE = { 4'b0, 8'b?, 13'b0, riscv::OpcodeMiscMem }; parameter INSTR_FENCE = {4'b0, 8'b?, 13'b0, riscv::OpcodeMiscMem};
parameter INSTR_FENCEI = { 17'b0, 3'b001, 5'b0, riscv::OpcodeMiscMem }; parameter INSTR_FENCEI = {17'b0, 3'b001, 5'b0, riscv::OpcodeMiscMem};
// SYSTEM // SYSTEM
parameter INSTR_CSRW = { 12'b?, 5'b?, 3'b001, 5'b0, riscv::OpcodeSystem }; parameter INSTR_CSRW = {12'b?, 5'b?, 3'b001, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRW = { 12'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeSystem }; parameter INSTR_CSRRW = {12'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRR = { 12'b?, 5'b0, 3'b010, 5'b?, riscv::OpcodeSystem }; parameter INSTR_CSRR = {12'b?, 5'b0, 3'b010, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRRS = { 12'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeSystem }; parameter INSTR_CSRRS = {12'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRS = { 12'b?, 5'b?, 3'b010, 5'b0, riscv::OpcodeSystem }; parameter INSTR_CSRS = {12'b?, 5'b?, 3'b010, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRC = { 12'b?, 5'b?, 3'b011, 5'b?, riscv::OpcodeSystem }; parameter INSTR_CSRRC = {12'b?, 5'b?, 3'b011, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRC = { 12'b?, 5'b?, 3'b011, 5'b0, riscv::OpcodeSystem }; parameter INSTR_CSRC = {12'b?, 5'b?, 3'b011, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRWI = { 17'b?, 3'b101, 5'b0, riscv::OpcodeSystem }; parameter INSTR_CSRWI = {17'b?, 3'b101, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRWI = { 17'b?, 3'b101, 5'b?, riscv::OpcodeSystem }; parameter INSTR_CSRRWI = {17'b?, 3'b101, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRSI = { 17'b?, 3'b110, 5'b0, riscv::OpcodeSystem }; parameter INSTR_CSRSI = {17'b?, 3'b110, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRSI = { 17'b?, 3'b110, 5'b?, riscv::OpcodeSystem }; parameter INSTR_CSRRSI = {17'b?, 3'b110, 5'b?, riscv::OpcodeSystem};
parameter INSTR_CSRCI = { 17'b?, 3'b111, 5'b0, riscv::OpcodeSystem }; parameter INSTR_CSRCI = {17'b?, 3'b111, 5'b0, riscv::OpcodeSystem};
parameter INSTR_CSRRCI = { 17'b?, 3'b111, 5'b?, riscv::OpcodeSystem }; parameter INSTR_CSRRCI = {17'b?, 3'b111, 5'b?, riscv::OpcodeSystem};
parameter INSTR_ECALL = { 12'b000000000000, 13'b0, riscv::OpcodeSystem }; parameter INSTR_ECALL = {12'b000000000000, 13'b0, riscv::OpcodeSystem};
parameter INSTR_EBREAK = { 12'b000000000001, 13'b0, riscv::OpcodeSystem }; parameter INSTR_EBREAK = {12'b000000000001, 13'b0, riscv::OpcodeSystem};
parameter INSTR_MRET = { 12'b001100000010, 13'b0, riscv::OpcodeSystem }; parameter INSTR_MRET = {12'b001100000010, 13'b0, riscv::OpcodeSystem};
parameter INSTR_SRET = { 12'b000100000010, 13'b0, riscv::OpcodeSystem }; parameter INSTR_SRET = {12'b000100000010, 13'b0, riscv::OpcodeSystem};
parameter INSTR_DRET = { 12'b011110110010, 13'b0, riscv::OpcodeSystem }; parameter INSTR_DRET = {12'b011110110010, 13'b0, riscv::OpcodeSystem};
parameter INSTR_WFI = { 12'b000100000101, 13'b0, riscv::OpcodeSystem }; parameter INSTR_WFI = {12'b000100000101, 13'b0, riscv::OpcodeSystem};
parameter INSTR_SFENCE = { 12'b0001001?????, 13'b?, riscv::OpcodeSystem }; parameter INSTR_SFENCE = {12'b0001001?????, 13'b?, riscv::OpcodeSystem};
// RV32M // RV32M
parameter INSTR_PMUL = { 7'b0000001, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp }; parameter INSTR_PMUL = {7'b0000001, 10'b?, 3'b000, 5'b?, riscv::OpcodeOp};
parameter INSTR_DIV = { 7'b0000001, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp }; parameter INSTR_DIV = {7'b0000001, 10'b?, 3'b100, 5'b?, riscv::OpcodeOp};
parameter INSTR_DIVU = { 7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp }; parameter INSTR_DIVU = {7'b0000001, 10'b?, 3'b101, 5'b?, riscv::OpcodeOp};
parameter INSTR_REM = { 7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp }; parameter INSTR_REM = {7'b0000001, 10'b?, 3'b110, 5'b?, riscv::OpcodeOp};
parameter INSTR_REMU = { 7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp }; parameter INSTR_REMU = {7'b0000001, 10'b?, 3'b111, 5'b?, riscv::OpcodeOp};
// RVFD // RVFD
parameter INSTR_FMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMadd}; parameter INSTR_FMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMadd};
parameter INSTR_FMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMsub}; parameter INSTR_FMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeMsub};
parameter INSTR_FNSMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmsub}; parameter INSTR_FNSMSUB = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmsub};
parameter INSTR_FNMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmadd}; parameter INSTR_FNMADD = {5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeNmadd};
parameter INSTR_FADD = { 5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FADD = {5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSUB = { 5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FSUB = {5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMUL = { 5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FMUL = {5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FDIV = { 5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FDIV = {5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSQRT = { 5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FSQRT = {5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJ = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FSGNJ = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJN = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FSGNJN = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FSGNJX = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FSGNJX = {5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMIN = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FMIN = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMAX = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FMAX = {5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FLE = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FLE = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FLT = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FLT = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FEQ = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FEQ = {5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_F2F = { 5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FCVT_F2F = {5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMV_F2X = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FMV_F2X = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCLASS = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FCLASS = {5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FMV_X2F = { 5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FMV_X2F = {5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_F2I = { 5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FCVT_F2I = {5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
parameter INSTR_FCVT_I2F = { 5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp}; parameter INSTR_FCVT_I2F = {5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, riscv::OpcodeOpFp};
// A // A
parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo }; parameter INSTR_AMO = {25'b?, riscv::OpcodeAmo};
// Load/Stores // Load/Stores
parameter [31:0] LB = 32'b?????????????????000?????0000011; parameter [31:0] LB = 32'b?????????????????000?????0000011;
parameter [31:0] LH = 32'b?????????????????001?????0000011; parameter [31:0] LH = 32'b?????????????????001?????0000011;
parameter [31:0] LW = 32'b?????????????????010?????0000011; parameter [31:0] LW = 32'b?????????????????010?????0000011;
parameter [31:0] LD = 32'b?????????????????011?????0000011; parameter [31:0] LD = 32'b?????????????????011?????0000011;
parameter [31:0] LBU = 32'b?????????????????100?????0000011; parameter [31:0] LBU = 32'b?????????????????100?????0000011;
parameter [31:0] LHU = 32'b?????????????????101?????0000011; parameter [31:0] LHU = 32'b?????????????????101?????0000011;
parameter [31:0] LWU = 32'b?????????????????110?????0000011; parameter [31:0] LWU = 32'b?????????????????110?????0000011;
parameter [31:0] FLW = 32'b?????????????????010?????0000111; parameter [31:0] FLW = 32'b?????????????????010?????0000111;
parameter [31:0] FLD = 32'b?????????????????011?????0000111; parameter [31:0] FLD = 32'b?????????????????011?????0000111;
parameter [31:0] FLQ = 32'b?????????????????100?????0000111; parameter [31:0] FLQ = 32'b?????????????????100?????0000111;
parameter [31:0] SB = 32'b?????????????????000?????0100011; parameter [31:0] SB = 32'b?????????????????000?????0100011;
parameter [31:0] SH = 32'b?????????????????001?????0100011; parameter [31:0] SH = 32'b?????????????????001?????0100011;
parameter [31:0] SW = 32'b?????????????????010?????0100011; parameter [31:0] SW = 32'b?????????????????010?????0100011;
parameter [31:0] SD = 32'b?????????????????011?????0100011; parameter [31:0] SD = 32'b?????????????????011?????0100011;
parameter [31:0] FSW = 32'b?????????????????010?????0100111; parameter [31:0] FSW = 32'b?????????????????010?????0100111;
parameter [31:0] FSD = 32'b?????????????????011?????0100111; parameter [31:0] FSD = 32'b?????????????????011?????0100111;
parameter [31:0] FSQ = 32'b?????????????????100?????0100111; parameter [31:0] FSQ = 32'b?????????????????100?????0100111;
parameter [31:0] C_ADDI4SPN = 32'b????????????????000???????????00; parameter [31:0] C_ADDI4SPN = 32'b????????????????000???????????00;
parameter [31:0] C_FLD = 32'b????????????????001???????????00; parameter [31:0] C_FLD = 32'b????????????????001???????????00;
parameter [31:0] C_LW = 32'b????????????????010???????????00; parameter [31:0] C_LW = 32'b????????????????010???????????00;
parameter [31:0] C_FLW = 32'b????????????????011???????????00; parameter [31:0] C_FLW = 32'b????????????????011???????????00;
parameter [31:0] C_FSD = 32'b????????????????101???????????00; parameter [31:0] C_FSD = 32'b????????????????101???????????00;
parameter [31:0] C_SW = 32'b????????????????110???????????00; parameter [31:0] C_SW = 32'b????????????????110???????????00;
parameter [31:0] C_FSW = 32'b????????????????111???????????00; parameter [31:0] C_FSW = 32'b????????????????111???????????00;
parameter [31:0] C_ADDI = 32'b????????????????000???????????01; parameter [31:0] C_ADDI = 32'b????????????????000???????????01;
parameter [31:0] C_JAL = 32'b????????????????001???????????01; parameter [31:0] C_JAL = 32'b????????????????001???????????01;
parameter [31:0] C_LI = 32'b????????????????010???????????01; parameter [31:0] C_LI = 32'b????????????????010???????????01;
parameter [31:0] C_LUI = 32'b????????????????011???????????01; parameter [31:0] C_LUI = 32'b????????????????011???????????01;
parameter [31:0] C_SRLI = 32'b????????????????100?00????????01; parameter [31:0] C_SRLI = 32'b????????????????100?00????????01;
parameter [31:0] C_SRAI = 32'b????????????????100?01????????01; parameter [31:0] C_SRAI = 32'b????????????????100?01????????01;
parameter [31:0] C_ANDI = 32'b????????????????100?10????????01; parameter [31:0] C_ANDI = 32'b????????????????100?10????????01;
parameter [31:0] C_SUB = 32'b????????????????100011???00???01; parameter [31:0] C_SUB = 32'b????????????????100011???00???01;
parameter [31:0] C_XOR = 32'b????????????????100011???01???01; parameter [31:0] C_XOR = 32'b????????????????100011???01???01;
parameter [31:0] C_OR = 32'b????????????????100011???10???01; parameter [31:0] C_OR = 32'b????????????????100011???10???01;
parameter [31:0] C_AND = 32'b????????????????100011???11???01; parameter [31:0] C_AND = 32'b????????????????100011???11???01;
parameter [31:0] C_SUBW = 32'b????????????????100111???00???01; parameter [31:0] C_SUBW = 32'b????????????????100111???00???01;
parameter [31:0] C_ADDW = 32'b????????????????100111???01???01; parameter [31:0] C_ADDW = 32'b????????????????100111???01???01;
parameter [31:0] C_J = 32'b????????????????101???????????01; parameter [31:0] C_J = 32'b????????????????101???????????01;
parameter [31:0] C_BEQZ = 32'b????????????????110???????????01; parameter [31:0] C_BEQZ = 32'b????????????????110???????????01;
parameter [31:0] C_BNEZ = 32'b????????????????111???????????01; parameter [31:0] C_BNEZ = 32'b????????????????111???????????01;
parameter [31:0] C_SLLI = 32'b????????????????000???????????10; parameter [31:0] C_SLLI = 32'b????????????????000???????????10;
parameter [31:0] C_FLDSP = 32'b????????????????001???????????10; parameter [31:0] C_FLDSP = 32'b????????????????001???????????10;
parameter [31:0] C_LWSP = 32'b????????????????010???????????10; parameter [31:0] C_LWSP = 32'b????????????????010???????????10;
parameter [31:0] C_FLWSP = 32'b????????????????011???????????10; parameter [31:0] C_FLWSP = 32'b????????????????011???????????10;
parameter [31:0] C_MV = 32'b????????????????1000??????????10; parameter [31:0] C_MV = 32'b????????????????1000??????????10;
parameter [31:0] C_ADD = 32'b????????????????1001??????????10; parameter [31:0] C_ADD = 32'b????????????????1001??????????10;
parameter [31:0] C_FSDSP = 32'b????????????????101???????????10; parameter [31:0] C_FSDSP = 32'b????????????????101???????????10;
parameter [31:0] C_SWSP = 32'b????????????????110???????????10; parameter [31:0] C_SWSP = 32'b????????????????110???????????10;
parameter [31:0] C_FSWSP = 32'b????????????????111???????????10; parameter [31:0] C_FSWSP = 32'b????????????????111???????????10;
parameter [31:0] C_NOP = 32'b????????????????0000000000000001; parameter [31:0] C_NOP = 32'b????????????????0000000000000001;
parameter [31:0] C_ADDI16SP = 32'b????????????????011?00010?????01; parameter [31:0] C_ADDI16SP = 32'b????????????????011?00010?????01;
parameter [31:0] C_JR = 32'b????????????????1000?????0000010; parameter [31:0] C_JR = 32'b????????????????1000?????0000010;
parameter [31:0] C_JALR = 32'b????????????????1001?????0000010; parameter [31:0] C_JALR = 32'b????????????????1001?????0000010;
parameter [31:0] C_EBREAK = 32'b????????????????1001000000000010; parameter [31:0] C_EBREAK = 32'b????????????????1001000000000010;
parameter [31:0] C_LD = 32'b????????????????011???????????00; parameter [31:0] C_LD = 32'b????????????????011???????????00;
parameter [31:0] C_SD = 32'b????????????????111???????????00; parameter [31:0] C_SD = 32'b????????????????111???????????00;
parameter [31:0] C_ADDIW = 32'b????????????????001???????????01; parameter [31:0] C_ADDIW = 32'b????????????????001???????????01;
parameter [31:0] C_LDSP = 32'b????????????????011???????????10; parameter [31:0] C_LDSP = 32'b????????????????011???????????10;
parameter [31:0] C_SDSP = 32'b????????????????111???????????10; parameter [31:0] C_SDSP = 32'b????????????????111???????????10;
endpackage endpackage
`endif `endif

File diff suppressed because it is too large Load diff

View file

@ -17,84 +17,81 @@
package std_cache_pkg; package std_cache_pkg;
// Calculated parameter // Calculated parameter
localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8); localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8);
localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_BYTE_OFFSET); localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_BYTE_OFFSET);
localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC*2; localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC * 2;
// localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not // localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not
typedef struct packed { typedef struct packed {
logic [1:0] id; // id for which we handle the miss logic [1:0] id; // id for which we handle the miss
logic valid; logic valid;
logic we; logic we;
logic [55:0] addr; logic [55:0] addr;
logic [7:0][7:0] wdata; logic [7:0][7:0] wdata;
logic [7:0] be; logic [7:0] be;
} mshr_t; } mshr_t;
typedef struct packed { typedef struct packed {
logic valid; logic valid;
logic [63:0] addr; logic [63:0] addr;
logic [7:0] be; logic [7:0] be;
logic [1:0] size; logic [1:0] size;
logic we; logic we;
logic [63:0] wdata; logic [63:0] wdata;
logic bypass; logic bypass;
} miss_req_t; } miss_req_t;
typedef struct packed { typedef struct packed {
logic req; logic req;
ariane_pkg::ad_req_t reqtype; ariane_pkg::ad_req_t reqtype;
ariane_pkg::amo_t amo; ariane_pkg::amo_t amo;
logic [3:0] id; logic [3:0] id;
logic [63:0] addr; logic [63:0] addr;
logic [63:0] wdata; logic [63:0] wdata;
logic we; logic we;
logic [7:0] be; logic [7:0] be;
logic [1:0] size; logic [1:0] size;
} bypass_req_t; } bypass_req_t;
typedef struct packed { typedef struct packed {
logic gnt; logic gnt;
logic valid; logic valid;
logic [63:0] rdata; logic [63:0] rdata;
} bypass_rsp_t; } bypass_rsp_t;
typedef struct packed { typedef struct packed {
logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array
logic valid; // state array logic valid; // state array
logic dirty; // state array logic dirty; // state array
} cache_line_t; } cache_line_t;
// cache line byte enable // cache line byte enable
typedef struct packed { typedef struct packed {
logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array
logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits) logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits)
} cl_be_t; } cl_be_t;
// convert one hot to bin for -> needed for cache replacement // convert one hot to bin for -> needed for cache replacement
function automatic logic [$clog2(ariane_pkg::DCACHE_SET_ASSOC)-1:0] one_hot_to_bin ( function automatic logic [$clog2(ariane_pkg::DCACHE_SET_ASSOC)-1:0] one_hot_to_bin(
input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] in input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] in);
); for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin if (in[i]) return i;
if (in[i]) end
return i; endfunction
end // get the first bit set, returns one hot value
endfunction function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] get_victim_cl(
// get the first bit set, returns one hot value input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid_dirty);
function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] get_victim_cl ( // one-hot return vector
input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid_dirty logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] oh = '0;
); for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin
// one-hot return vector if (valid_dirty[i]) begin
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] oh = '0; oh[i] = 1'b1;
for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin return oh;
if (valid_dirty[i]) begin end
oh[i] = 1'b1; end
return oh; endfunction
end
end
endfunction
endpackage : std_cache_pkg endpackage : std_cache_pkg

View file

@ -16,8 +16,8 @@
// configuration in case Ariane is // configuration in case Ariane is
// instantiated in OpenPiton // instantiated in OpenPiton
`ifdef PITON_ARIANE `ifdef PITON_ARIANE
`include "l15.tmp.h" `include "l15.tmp.h"
`include "define.tmp.h" `include "define.tmp.h"
`endif `endif
package wt_cache_pkg; package wt_cache_pkg;
@ -27,56 +27,56 @@ package wt_cache_pkg;
`ifdef PITON_ARIANE `ifdef PITON_ARIANE
`ifndef CONFIG_L15_ASSOCIATIVITY `ifndef CONFIG_L15_ASSOCIATIVITY
`define CONFIG_L15_ASSOCIATIVITY 4 `define CONFIG_L15_ASSOCIATIVITY 4
`endif `endif
`ifndef TLB_CSM_WIDTH `ifndef TLB_CSM_WIDTH
`define TLB_CSM_WIDTH 33 `define TLB_CSM_WIDTH 33
`endif `endif
localparam L15_SET_ASSOC = `CONFIG_L15_ASSOCIATIVITY; localparam L15_SET_ASSOC = `CONFIG_L15_ASSOCIATIVITY;
localparam L15_TLB_CSM_WIDTH = `TLB_CSM_WIDTH; localparam L15_TLB_CSM_WIDTH = `TLB_CSM_WIDTH;
`else `else
localparam L15_SET_ASSOC = ariane_pkg::DCACHE_SET_ASSOC;// align with dcache for compatibility with the standard Ariane setup localparam L15_SET_ASSOC = ariane_pkg::DCACHE_SET_ASSOC;// align with dcache for compatibility with the standard Ariane setup
localparam L15_TLB_CSM_WIDTH = 33; localparam L15_TLB_CSM_WIDTH = 33;
`endif `endif
localparam L15_TID_WIDTH = ariane_pkg::MEM_TID_WIDTH; localparam L15_TID_WIDTH = ariane_pkg::MEM_TID_WIDTH;
localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC); localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC);
localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC); localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC);
localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC); localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC);
// FIFO depths of L15 adapter // FIFO depths of L15 adapter
localparam ADAPTER_REQ_FIFO_DEPTH = 2; localparam ADAPTER_REQ_FIFO_DEPTH = 2;
localparam ADAPTER_RTRN_FIFO_DEPTH = 2; localparam ADAPTER_RTRN_FIFO_DEPTH = 2;
// Calculated parameter // Calculated parameter
localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH/8); localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8);
localparam ICACHE_NUM_WORDS = 2**(ariane_pkg::ICACHE_INDEX_WIDTH-ICACHE_OFFSET_WIDTH); localparam ICACHE_NUM_WORDS = 2 ** (ariane_pkg::ICACHE_INDEX_WIDTH - ICACHE_OFFSET_WIDTH);
localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS);// excluding byte offset localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS); // excluding byte offset
localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH/8); localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8);
localparam DCACHE_NUM_WORDS = 2**(ariane_pkg::DCACHE_INDEX_WIDTH-DCACHE_OFFSET_WIDTH); localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_OFFSET_WIDTH);
localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS);// excluding byte offset localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS); // excluding byte offset
localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH/riscv::XLEN; localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH / riscv::XLEN;
localparam DCACHE_NUM_BANKS_WIDTH = $clog2(DCACHE_NUM_BANKS); localparam DCACHE_NUM_BANKS_WIDTH = $clog2(DCACHE_NUM_BANKS);
// write buffer parameterization // write buffer parameterization
localparam DCACHE_WBUF_DEPTH = ariane_pkg::WT_DCACHE_WBUF_DEPTH; localparam DCACHE_WBUF_DEPTH = ariane_pkg::WT_DCACHE_WBUF_DEPTH;
localparam DCACHE_MAX_TX = 2**L15_TID_WIDTH; localparam DCACHE_MAX_TX = 2 ** L15_TID_WIDTH;
localparam CACHE_ID_WIDTH = L15_TID_WIDTH; localparam CACHE_ID_WIDTH = L15_TID_WIDTH;
typedef struct packed { typedef struct packed {
logic [ariane_pkg::DCACHE_TAG_WIDTH+(ariane_pkg::DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES)-1:0] wtag; logic [ariane_pkg::DCACHE_TAG_WIDTH+(ariane_pkg::DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES)-1:0] wtag;
riscv::xlen_t data; riscv::xlen_t data;
logic [ariane_pkg::DCACHE_USER_WIDTH-1:0] user; logic [ariane_pkg::DCACHE_USER_WIDTH-1:0] user;
logic [(riscv::XLEN/8)-1:0] dirty; // byte is dirty logic [(riscv::XLEN/8)-1:0] dirty; // byte is dirty
logic [(riscv::XLEN/8)-1:0] valid; // byte is valid logic [(riscv::XLEN/8)-1:0] valid; // byte is valid
logic [(riscv::XLEN/8)-1:0] txblock; // byte is part of transaction in-flight logic [(riscv::XLEN/8)-1:0] txblock; // byte is part of transaction in-flight
logic checked; // if cache state of this word has been checked logic checked; // if cache state of this word has been checked
logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache
} wbuffer_t; } wbuffer_t;
// TX status registers are indexed with the transaction ID // TX status registers are indexed with the transaction ID
@ -95,263 +95,245 @@ package wt_cache_pkg;
DCACHE_LOAD_REQ, DCACHE_LOAD_REQ,
DCACHE_ATOMIC_REQ, DCACHE_ATOMIC_REQ,
DCACHE_INT_REQ DCACHE_INT_REQ
} dcache_out_t; } dcache_out_t;
typedef enum logic [2:0] { typedef enum logic [2:0] {
DCACHE_INV_REQ, // no ack from the core required DCACHE_INV_REQ, // no ack from the core required
DCACHE_STORE_ACK,// note: this may contain an invalidation vector, too DCACHE_STORE_ACK, // note: this may contain an invalidation vector, too
DCACHE_LOAD_ACK, DCACHE_LOAD_ACK,
DCACHE_ATOMIC_ACK, DCACHE_ATOMIC_ACK,
DCACHE_INT_ACK DCACHE_INT_ACK
} dcache_in_t; } dcache_in_t;
typedef enum logic [0:0] { typedef enum logic [0:0] {
ICACHE_INV_REQ, // no ack from the core required ICACHE_INV_REQ, // no ack from the core required
ICACHE_IFILL_ACK ICACHE_IFILL_ACK
} icache_in_t; } icache_in_t;
// icache interface // icache interface
typedef struct packed { typedef struct packed {
logic vld; // invalidate only affected way logic vld; // invalidate only affected way
logic all; // invalidate all ways logic all; // invalidate all ways
logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate
logic [L15_WAY_WIDTH-1:0] way; // way to invalidate logic [L15_WAY_WIDTH-1:0] way; // way to invalidate
} icache_inval_t; } icache_inval_t;
typedef struct packed { typedef struct packed {
logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace
logic [riscv::PLEN-1:0] paddr; // physical address logic [riscv::PLEN-1:0] paddr; // physical address
logic nc; // noncacheable logic nc; // noncacheable
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} icache_req_t; } icache_req_t;
typedef struct packed { typedef struct packed {
icache_in_t rtype; // see definitions above icache_in_t rtype; // see definitions above
logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width
logic [ariane_pkg::ICACHE_USER_LINE_WIDTH-1:0] user; // user bits logic [ariane_pkg::ICACHE_USER_LINE_WIDTH-1:0] user; // user bits
icache_inval_t inv; // invalidation vector icache_inval_t inv; // invalidation vector
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} icache_rtrn_t; } icache_rtrn_t;
// dcache interface // dcache interface
typedef struct packed { typedef struct packed {
logic vld; // invalidate only affected way logic vld; // invalidate only affected way
logic all; // invalidate all ways logic all; // invalidate all ways
logic [ariane_pkg::DCACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate logic [ariane_pkg::DCACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate
logic [L15_WAY_WIDTH-1:0] way; // way to invalidate logic [L15_WAY_WIDTH-1:0] way; // way to invalidate
} dcache_inval_t; } dcache_inval_t;
typedef struct packed { typedef struct packed {
dcache_out_t rtype; // see definitions above dcache_out_t rtype; // see definitions above
logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
logic [L1D_WAY_WIDTH-1:0] way; // way to replace logic [L1D_WAY_WIDTH-1:0] way; // way to replace
logic [riscv::PLEN-1:0] paddr; // physical address logic [riscv::PLEN-1:0] paddr; // physical address
riscv::xlen_t data; // word width of processor (no block stores at the moment) riscv::xlen_t data; // word width of processor (no block stores at the moment)
logic [ariane_pkg::DATA_USER_WIDTH-1:0] user; // user width of processor (no block stores at the moment) logic [ariane_pkg::DATA_USER_WIDTH-1:0] user; // user width of processor (no block stores at the moment)
logic nc; // noncacheable logic nc; // noncacheable
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
ariane_pkg::amo_t amo_op; // amo opcode ariane_pkg::amo_t amo_op; // amo opcode
} dcache_req_t; } dcache_req_t;
typedef struct packed { typedef struct packed {
dcache_in_t rtype; // see definitions above dcache_in_t rtype; // see definitions above
logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width
logic [ariane_pkg::DCACHE_USER_LINE_WIDTH-1:0] user; // user bits logic [ariane_pkg::DCACHE_USER_LINE_WIDTH-1:0] user; // user bits
dcache_inval_t inv; // invalidation vector dcache_inval_t inv; // invalidation vector
logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane)
} dcache_rtrn_t; } dcache_rtrn_t;
// taken from iop.h in openpiton // taken from iop.h in openpiton
// to l1.5 (only marked subset is used) // to l1.5 (only marked subset is used)
typedef enum logic [4:0] { typedef enum logic [4:0] {
L15_LOAD_RQ = 5'b00000, // load request L15_LOAD_RQ = 5'b00000, // load request
L15_IMISS_RQ = 5'b10000, // instruction fill request L15_IMISS_RQ = 5'b10000, // instruction fill request
L15_STORE_RQ = 5'b00001, // store request L15_STORE_RQ = 5'b00001, // store request
L15_ATOMIC_RQ = 5'b00110, // atomic op L15_ATOMIC_RQ = 5'b00110, // atomic op
//L15_CAS1_RQ = 5'b00010, // compare and swap1 packet (OpenSparc atomics) //L15_CAS1_RQ = 5'b00010, // compare and swap1 packet (OpenSparc atomics)
//L15_CAS2_RQ = 5'b00011, // compare and swap2 packet (OpenSparc atomics) //L15_CAS2_RQ = 5'b00011, // compare and swap2 packet (OpenSparc atomics)
//L15_SWAP_RQ = 5'b00110, // swap packet (OpenSparc atomics) //L15_SWAP_RQ = 5'b00110, // swap packet (OpenSparc atomics)
L15_STRLOAD_RQ = 5'b00100, // unused L15_STRLOAD_RQ = 5'b00100, // unused
L15_STRST_RQ = 5'b00101, // unused L15_STRST_RQ = 5'b00101, // unused
L15_STQ_RQ = 5'b00111, // unused L15_STQ_RQ = 5'b00111, // unused
L15_INT_RQ = 5'b01001, // interrupt request L15_INT_RQ = 5'b01001, // interrupt request
L15_FWD_RQ = 5'b01101, // unused L15_FWD_RQ = 5'b01101, // unused
L15_FWD_RPY = 5'b01110, // unused L15_FWD_RPY = 5'b01110, // unused
L15_RSVD_RQ = 5'b11111 // unused L15_RSVD_RQ = 5'b11111 // unused
} l15_reqtypes_t; } l15_reqtypes_t;
// from l1.5 (only marked subset is used) // from l1.5 (only marked subset is used)
typedef enum logic [3:0] { typedef enum logic [3:0] {
L15_LOAD_RET = 4'b0000, // load packet L15_LOAD_RET = 4'b0000, // load packet
// L15_INV_RET = 4'b0011, // invalidate packet, not unique... // L15_INV_RET = 4'b0011, // invalidate packet, not unique...
L15_ST_ACK = 4'b0100, // store ack packet L15_ST_ACK = 4'b0100, // store ack packet
//L15_AT_ACK = 4'b0011, // unused, not unique... //L15_AT_ACK = 4'b0011, // unused, not unique...
L15_INT_RET = 4'b0111, // interrupt packet L15_INT_RET = 4'b0111, // interrupt packet
L15_TEST_RET = 4'b0101, // unused L15_TEST_RET = 4'b0101, // unused
L15_FP_RET = 4'b1000, // unused L15_FP_RET = 4'b1000, // unused
L15_IFILL_RET = 4'b0001, // instruction fill packet L15_IFILL_RET = 4'b0001, // instruction fill packet
L15_EVICT_REQ = 4'b0011, // eviction request L15_EVICT_REQ = 4'b0011, // eviction request
L15_ERR_RET = 4'b1100, // unused L15_ERR_RET = 4'b1100, // unused
L15_STRLOAD_RET = 4'b0010, // unused L15_STRLOAD_RET = 4'b0010, // unused
L15_STRST_ACK = 4'b0110, // unused L15_STRST_ACK = 4'b0110, // unused
L15_FWD_RQ_RET = 4'b1010, // unused L15_FWD_RQ_RET = 4'b1010, // unused
L15_FWD_RPY_RET = 4'b1011, // unused L15_FWD_RPY_RET = 4'b1011, // unused
L15_RSVD_RET = 4'b1111, // unused L15_RSVD_RET = 4'b1111, // unused
L15_CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses L15_CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses
} l15_rtrntypes_t; } l15_rtrntypes_t;
typedef struct packed { typedef struct packed {
logic l15_val; // valid signal, asserted with request logic l15_val; // valid signal, asserted with request
logic l15_req_ack; // ack for response logic l15_req_ack; // ack for response
l15_reqtypes_t l15_rqtype; // see below for encoding l15_reqtypes_t l15_rqtype; // see below for encoding
logic l15_nc; // non-cacheable bit logic l15_nc; // non-cacheable bit
logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte)
logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1 logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1
logic l15_prefetch; // unused in openpiton logic l15_prefetch; // unused in openpiton
logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment
logic l15_blockstore; // unused in openpiton logic l15_blockstore; // unused in openpiton
logic l15_blockinitstore; // unused in openpiton logic l15_blockinitstore; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace
logic [39:0] l15_address; // physical address logic [39:0] l15_address; // physical address
logic [63:0] l15_data; // word to write logic [63:0] l15_data; // word to write
logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests) logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests)
logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane
logic [3:0] l15_amo_op; // atomic operation type logic [3:0] l15_amo_op; // atomic operation type
} l15_req_t; } l15_req_t;
typedef struct packed { typedef struct packed {
logic l15_ack; // ack for request struct logic l15_ack; // ack for request struct
logic l15_header_ack; // ack for request struct logic l15_header_ack; // ack for request struct
logic l15_val; // valid signal for return struct logic l15_val; // valid signal for return struct
l15_rtrntypes_t l15_returntype; // see below for encoding l15_rtrntypes_t l15_returntype; // see below for encoding
logic l15_l2miss; // unused in Ariane logic l15_l2miss; // unused in Ariane
logic [1:0] l15_error; // unused in openpiton logic [1:0] l15_error; // unused in openpiton
logic l15_noncacheable; // non-cacheable bit logic l15_noncacheable; // non-cacheable bit
logic l15_atomic; // asserted in load return and store ack packets of atomic tx logic l15_atomic; // asserted in load return and store ack packets of atomic tx
logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID
logic l15_prefetch; // unused in openpiton logic l15_prefetch; // unused in openpiton
logic l15_f4b; // 4byte instruction fill from I/O space (nc). logic l15_f4b; // 4byte instruction fill from I/O space (nc).
logic [63:0] l15_data_0; // used for both caches logic [63:0] l15_data_0; // used for both caches
logic [63:0] l15_data_1; // used for both caches logic [63:0] l15_data_1; // used for both caches
logic [63:0] l15_data_2; // currently only used for I$ logic [63:0] l15_data_2; // currently only used for I$
logic [63:0] l15_data_3; // currently only used for I$ logic [63:0] l15_data_3; // currently only used for I$
logic l15_inval_icache_all_way; // invalidate all ways logic l15_inval_icache_all_way; // invalidate all ways
logic l15_inval_dcache_all_way; // unused in openpiton logic l15_inval_dcache_all_way; // unused in openpiton
logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline
logic l15_cross_invalidate; // unused in openpiton logic l15_cross_invalidate; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton
logic l15_inval_dcache_inval; // invalidate selected cacheline and way logic l15_inval_dcache_inval; // invalidate selected cacheline and way
logic l15_inval_icache_inval; // unused in openpiton logic l15_inval_icache_inval; // unused in openpiton
logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate
logic l15_blockinitstore; // unused in openpiton logic l15_blockinitstore; // unused in openpiton
} l15_rtrn_t; } l15_rtrn_t;
// swap endianess in a 64bit word // swap endianess in a 64bit word
function automatic logic[63:0] swendian64(input logic[63:0] in); function automatic logic [63:0] swendian64(input logic [63:0] in);
automatic logic[63:0] out; automatic logic [63:0] out;
for(int k=0; k<64;k+=8)begin for (int k = 0; k < 64; k += 8) begin
out[k +: 8] = in[63-k -: 8]; out[k+:8] = in[63-k-:8];
end end
return out; return out;
endfunction endfunction
function automatic logic [5:0] popcnt64 ( function automatic logic [5:0] popcnt64(input logic [63:0] in);
input logic [63:0] in logic [5:0] cnt = 0;
);
logic [5:0] cnt= 0;
foreach (in[k]) begin foreach (in[k]) begin
cnt += 6'(in[k]); cnt += 6'(in[k]);
end end
return cnt; return cnt;
endfunction : popcnt64 endfunction : popcnt64
function automatic logic [7:0] to_byte_enable8( function automatic logic [7:0] to_byte_enable8(input logic [2:0] offset, input logic [1:0] size);
input logic [2:0] offset,
input logic [1:0] size
);
logic [7:0] be; logic [7:0] be;
be = '0; be = '0;
unique case(size) unique case (size)
2'b00: be[offset] = '1; // byte 2'b00: be[offset] = '1; // byte
2'b01: be[offset +:2 ] = '1; // hword 2'b01: be[offset+:2] = '1; // hword
2'b10: be[offset +:4 ] = '1; // word 2'b10: be[offset+:4] = '1; // word
default: be = '1; // dword default: be = '1; // dword
endcase // size endcase // size
return be; return be;
endfunction : to_byte_enable8 endfunction : to_byte_enable8
function automatic logic [3:0] to_byte_enable4( function automatic logic [3:0] to_byte_enable4(input logic [1:0] offset, input logic [1:0] size);
input logic [1:0] offset,
input logic [1:0] size
);
logic [3:0] be; logic [3:0] be;
be = '0; be = '0;
unique case(size) unique case (size)
2'b00: be[offset] = '1; // byte 2'b00: be[offset] = '1; // byte
2'b01: be[offset +:2 ] = '1; // hword 2'b01: be[offset+:2] = '1; // hword
default: be = '1; // word default: be = '1; // word
endcase // size endcase // size
return be; return be;
endfunction : to_byte_enable4 endfunction : to_byte_enable4
// openpiton requires the data to be replicated in case of smaller sizes than dwords // openpiton requires the data to be replicated in case of smaller sizes than dwords
function automatic logic [63:0] repData64( function automatic logic [63:0] repData64(input logic [63:0] data, input logic [2:0] offset,
input logic [63:0] data, input logic [1:0] size);
input logic [2:0] offset,
input logic [1:0] size
);
logic [63:0] out; logic [63:0] out;
unique case(size) unique case (size)
2'b00: for(int k=0; k<8; k++) out[k*8 +: 8] = data[offset*8 +: 8]; // byte 2'b00: for (int k = 0; k < 8; k++) out[k*8+:8] = data[offset*8+:8]; // byte
2'b01: for(int k=0; k<4; k++) out[k*16 +: 16] = data[offset*8 +: 16]; // hword 2'b01: for (int k = 0; k < 4; k++) out[k*16+:16] = data[offset*8+:16]; // hword
2'b10: for(int k=0; k<2; k++) out[k*32 +: 32] = data[offset*8 +: 32]; // word 2'b10: for (int k = 0; k < 2; k++) out[k*32+:32] = data[offset*8+:32]; // word
default: out = data; // dword default: out = data; // dword
endcase // size endcase // size
return out; return out;
endfunction : repData64 endfunction : repData64
function automatic logic [31:0] repData32( function automatic logic [31:0] repData32(input logic [31:0] data, input logic [1:0] offset,
input logic [31:0] data, input logic [1:0] size);
input logic [1:0] offset,
input logic [1:0] size
);
logic [31:0] out; logic [31:0] out;
unique case(size) unique case (size)
2'b00: for(int k=0; k<4; k++) out[k*8 +: 8] = data[offset*8 +: 8]; // byte 2'b00: for (int k = 0; k < 4; k++) out[k*8+:8] = data[offset*8+:8]; // byte
2'b01: for(int k=0; k<2; k++) out[k*16 +: 16] = data[offset*8 +: 16]; // hword 2'b01: for (int k = 0; k < 2; k++) out[k*16+:16] = data[offset*8+:16]; // hword
default: out = data; // word default: out = data; // word
endcase // size endcase // size
return out; return out;
endfunction : repData32 endfunction : repData32
// note: this is openpiton specific. cannot transmit unaligned words. // note: this is openpiton specific. cannot transmit unaligned words.
// hence we default to individual bytes in that case, and they have to be transmitted // hence we default to individual bytes in that case, and they have to be transmitted
// one after the other // one after the other
function automatic logic [1:0] toSize64( function automatic logic [1:0] toSize64(input logic [7:0] be);
input logic [7:0] be
);
logic [1:0] size; logic [1:0] size;
unique case(be) unique case (be)
8'b1111_1111: size = 2'b11; // dword 8'b1111_1111: size = 2'b11; // dword
8'b0000_1111, 8'b1111_0000: size = 2'b10; // word 8'b0000_1111, 8'b1111_0000: size = 2'b10; // word
8'b1100_0000, 8'b0011_0000, 8'b0000_1100, 8'b0000_0011: size = 2'b01; // hword 8'b1100_0000, 8'b0011_0000, 8'b0000_1100, 8'b0000_0011: size = 2'b01; // hword
default: size = 2'b00; // individual bytes default: size = 2'b00; // individual bytes
endcase // be endcase // be
return size; return size;
endfunction : toSize64 endfunction : toSize64
function automatic logic [1:0] toSize32( function automatic logic [1:0] toSize32(input logic [3:0] be);
input logic [3:0] be
);
logic [1:0] size; logic [1:0] size;
unique case(be) unique case (be)
4'b1111: size = 2'b10; // word 4'b1111: size = 2'b10; // word
4'b1100, 4'b0011: size = 2'b01; // hword 4'b1100, 4'b0011: size = 2'b01; // hword
default: size = 2'b00; // individual bytes default: size = 2'b00; // individual bytes
endcase // be endcase // be
return size; return size;
endfunction : toSize32 endfunction : toSize32

View file

@ -20,340 +20,342 @@
// instruction e.g. a branch. // instruction e.g. a branch.
module instr_realign import ariane_pkg::*; #( module instr_realign
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input logic valid_i, input logic valid_i,
output logic serving_unaligned_o, // we have an unaligned instruction in [0] output logic serving_unaligned_o, // we have an unaligned instruction in [0]
input logic [riscv::VLEN-1:0] address_i, input logic [riscv::VLEN-1:0] address_i,
input logic [FETCH_WIDTH-1:0] data_i, input logic [FETCH_WIDTH-1:0] data_i,
output logic [INSTR_PER_FETCH-1:0] valid_o, output logic [INSTR_PER_FETCH-1:0] valid_o,
output logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_o, output logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_o,
output logic [INSTR_PER_FETCH-1:0][31:0] instr_o output logic [INSTR_PER_FETCH-1:0][31:0] instr_o
); );
// as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions // as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions
logic [3:0] instr_is_compressed; logic [3:0] instr_is_compressed;
for (genvar i = 0; i < INSTR_PER_FETCH; i ++) begin for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin
// LSB != 2'b11 // LSB != 2'b11
assign instr_is_compressed[i] = ~&data_i[i * 16 +: 2]; assign instr_is_compressed[i] = ~&data_i[i*16+:2];
end end
// save the unaligned part of the instruction to this ff // save the unaligned part of the instruction to this ff
logic [15:0] unaligned_instr_d, unaligned_instr_q; logic [15:0] unaligned_instr_d, unaligned_instr_q;
// the last instruction was unaligned // the last instruction was unaligned
logic unaligned_d, unaligned_q; logic unaligned_d, unaligned_q;
// register to save the unaligned address // register to save the unaligned address
logic [riscv::VLEN-1:0] unaligned_address_d, unaligned_address_q; logic [riscv::VLEN-1:0] unaligned_address_d, unaligned_address_q;
// we have an unaligned instruction // we have an unaligned instruction
assign serving_unaligned_o = unaligned_q; assign serving_unaligned_o = unaligned_q;
// Instruction re-alignment // Instruction re-alignment
if (FETCH_WIDTH == 32) begin : realign_bp_32 if (FETCH_WIDTH == 32) begin : realign_bp_32
always_comb begin : re_align always_comb begin : re_align
unaligned_d = unaligned_q; unaligned_d = unaligned_q;
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
unaligned_instr_d = data_i[31:16]; unaligned_instr_d = data_i[31:16];
valid_o[0] = valid_i; valid_o[0] = valid_i;
instr_o[0] = (unaligned_q) ? {data_i[15:0], unaligned_instr_q} : data_i[31:0]; instr_o[0] = (unaligned_q) ? {data_i[15:0], unaligned_instr_q} : data_i[31:0];
addr_o[0] = (unaligned_q) ? unaligned_address_q : address_i; addr_o[0] = (unaligned_q) ? unaligned_address_q : address_i;
valid_o[1] = 1'b0; valid_o[1] = 1'b0;
instr_o[1] = '0; instr_o[1] = '0;
addr_o[1] = {address_i[riscv::VLEN-1:2], 2'b10}; addr_o[1] = {address_i[riscv::VLEN-1:2], 2'b10};
// this instruction is compressed or the last instruction was unaligned // this instruction is compressed or the last instruction was unaligned
if (instr_is_compressed[0] || unaligned_q) begin if (instr_is_compressed[0] || unaligned_q) begin
// check if this is instruction is still unaligned e.g.: it is not compressed // check if this is instruction is still unaligned e.g.: it is not compressed
// if its compressed re-set unaligned flag // if its compressed re-set unaligned flag
// for 32 bit we can simply check the next instruction and whether it is compressed or not // for 32 bit we can simply check the next instruction and whether it is compressed or not
// if it is compressed the next fetch will contain an aligned instruction // if it is compressed the next fetch will contain an aligned instruction
// is instruction 1 also compressed // is instruction 1 also compressed
// yes? -> no problem, no -> we've got an unaligned instruction // yes? -> no problem, no -> we've got an unaligned instruction
if (instr_is_compressed[1]) begin if (instr_is_compressed[1]) begin
unaligned_d = 1'b0; unaligned_d = 1'b0;
valid_o[1] = valid_i; valid_o[1] = valid_i;
instr_o[1] = {16'b0, data_i[31:16]}; instr_o[1] = {16'b0, data_i[31:16]};
end else begin
// save the upper bits for next cycle
unaligned_d = 1'b1;
unaligned_instr_d = data_i[31:16];
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
end
end // else -> normal fetch
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
if (valid_i && address_i[1]) begin
// the instruction is not compressed so we can't do anything in this cycle
if (!instr_is_compressed[0]) begin
valid_o = '0;
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
unaligned_instr_d = data_i[15:0];
// the instruction isn't compressed but only the lower is ready
end else begin
valid_o = 1'b1;
end
end
end
// TODO(zarubaf): Fix 64 bit FETCH_WIDTH, maybe generalize to arbitrary fetch width
end else if (FETCH_WIDTH == 64) begin : realign_bp_64
initial begin
$error("Not propperly implemented");
end
always_comb begin : re_align
unaligned_d = unaligned_q;
unaligned_address_d = unaligned_address_q;
unaligned_instr_d = unaligned_instr_q;
valid_o = '0;
valid_o[0] = valid_i;
instr_o[0] = data_i[31:0];
addr_o[0] = address_i;
instr_o[1] = '0;
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b010};
instr_o[2] = {16'b0, data_i[47:32]};
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b100};
instr_o[3] = {16'b0, data_i[63:48]};
addr_o[3] = {address_i[riscv::VLEN-1:3], 3'b110};
// last instruction was unaligned
if (unaligned_q) begin
instr_o[0] = {data_i[15:0], unaligned_instr_q};
addr_o[0] = unaligned_address_q;
// for 64 bit there exist the following options:
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | U | -> again unaligned
// | * | C | I | U | -> aligned
// | * | I | C | U | -> aligned
// | I | C | C | U | -> again unaligned
// | * | C | C | C | U | -> aligned
// Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half
// * = don't care
if (instr_is_compressed[1]) begin
instr_o[1] = {16'b0, data_i[31:16]};
valid_o[1] = valid_i;
if (instr_is_compressed[2]) begin
if (instr_is_compressed[3]) begin
unaligned_d = 1'b0;
valid_o[3] = valid_i;
end else begin
// continues to be unaligned
end
end else begin
unaligned_d = 1'b0;
instr_o[2] = data_i[63:32];
valid_o[2] = valid_i;
end
// instruction 1 is not compressed
end else begin
instr_o[1] = data_i[47:16];
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[2]) begin
unaligned_d = 1'b0;
instr_o[2] = {16'b0, data_i[63:48]};
valid_o[2] = valid_i;
end else begin
// continues to be unaligned
end
end
end else if (instr_is_compressed[0]) begin // instruction zero is RVC
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | C | -> again unaligned
// | * | C | I | C | -> aligned
// | * | I | C | C | -> aligned
// | I | C | C | C | -> again unaligned
// | * | C | C | C | C | -> aligned
if (instr_is_compressed[1]) begin
instr_o[1] = {16'b0, data_i[31:16]};
valid_o[1] = valid_i;
if (instr_is_compressed[2]) begin
valid_o[2] = valid_i;
if (instr_is_compressed[3]) begin
valid_o[3] = valid_i;
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[3];
end
end else begin
instr_o[2] = data_i[63:32];
valid_o[2] = valid_i;
end
// instruction 1 is not compressed -> check slot 3
end else begin
instr_o[1] = data_i[47:16];
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
instr_o[2] = data_i[63:48];
valid_o[2] = valid_i;
end else begin
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[2];
end
end
// Full instruction in slot zero
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | C | I |
// | * | C | C | I |
// | * | I | I |
end else begin
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
if (instr_is_compressed[2]) begin
instr_o[1] = {16'b0, data_i[47:32]};
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
// | * | C | C | I |
valid_o[2] = valid_i;
addr_o[2] = {16'b0, data_i[63:48]};
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[2];
end
end else begin
// two regular instructions back-to-back
instr_o[1] = data_i[63:32];
valid_o[1] = valid_i;
end
end
// --------------------------
// Unaligned fetch
// --------------------------
// Address was not 64 bit aligned
case (address_i[2:1])
// this means the previouse instruction was either compressed or unaligned
// in any case we don't ccare
2'b01: begin
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | x -> again unaligned
// | * | C | I | x -> aligned
// | * | I | C | x -> aligned
// | I | C | C | x -> again unaligned
// | * | C | C | C | x -> aligned
addr_o[0] = {address_i[riscv::VLEN-1:3], 3'b010};
if (instr_is_compressed[1]) begin
instr_o[0] = {16'b0, data_i[31:16]};
valid_o[0] = valid_i;
if (instr_is_compressed[2]) begin
valid_o[1] = valid_i;
instr_o[1] = {16'b0, data_i[47:32]};
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
if (instr_is_compressed[3]) begin
instr_o[2] = {16'b0, data_i[63:48]};
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
valid_o[2] = valid_i;
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[3];
end
end else begin
instr_o[1] = data_i[63:32];
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
valid_o[1] = valid_i;
end
// instruction 1 is not compressed -> check slot 3
end else begin
instr_o[0] = data_i[47:16];
valid_o[0] = valid_i;
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
instr_o[1] = data_i[63:48];
valid_o[1] = valid_i;
end else begin
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[1];
end
end
end
2'b10: begin
valid_o = '0;
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | C | * | <- unaligned
// | C | C | * | <- aligned
// | I | * | <- aligned
if (instr_is_compressed[2]) begin
valid_o[0] = valid_i;
instr_o[0] = data_i[47:32];
// second instruction is also compressed
if (instr_is_compressed[3]) begin
valid_o[1] = valid_i;
instr_o[1] = data_i[63:48];
// regular instruction -> unaligned
end else begin
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
unaligned_instr_d = data_i[63:48];
end
// instruction is a regular instruction
end else begin
valid_o[0] = valid_i;
instr_o[0] = data_i[63:32];
addr_o[0] = address_i;
end
end
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
2'b11: begin
valid_o = '0;
if (!instr_is_compressed[3]) begin
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
unaligned_instr_d = data_i[63:48];
end else begin
valid_o[3] = valid_i;
end
end
endcase
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
unaligned_q <= 1'b0;
unaligned_address_q <= '0;
unaligned_instr_q <= '0;
end else begin end else begin
if (valid_i) begin // save the upper bits for next cycle
unaligned_address_q <= unaligned_address_d; unaligned_d = 1'b1;
unaligned_instr_q <= unaligned_instr_d; unaligned_instr_d = data_i[31:16];
end unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
if (flush_i) begin
unaligned_q <= 1'b0;
end else if (valid_i) begin
unaligned_q <= unaligned_d;
end
end end
end // else -> normal fetch
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
if (valid_i && address_i[1]) begin
// the instruction is not compressed so we can't do anything in this cycle
if (!instr_is_compressed[0]) begin
valid_o = '0;
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10};
unaligned_instr_d = data_i[15:0];
// the instruction isn't compressed but only the lower is ready
end else begin
valid_o = 1'b1;
end
end
end end
// TODO(zarubaf): Fix 64 bit FETCH_WIDTH, maybe generalize to arbitrary fetch width
end else if (FETCH_WIDTH == 64) begin : realign_bp_64
initial begin
$error("Not propperly implemented");
end
always_comb begin : re_align
unaligned_d = unaligned_q;
unaligned_address_d = unaligned_address_q;
unaligned_instr_d = unaligned_instr_q;
valid_o = '0;
valid_o[0] = valid_i;
instr_o[0] = data_i[31:0];
addr_o[0] = address_i;
instr_o[1] = '0;
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b010};
instr_o[2] = {16'b0, data_i[47:32]};
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b100};
instr_o[3] = {16'b0, data_i[63:48]};
addr_o[3] = {address_i[riscv::VLEN-1:3], 3'b110};
// last instruction was unaligned
if (unaligned_q) begin
instr_o[0] = {data_i[15:0], unaligned_instr_q};
addr_o[0] = unaligned_address_q;
// for 64 bit there exist the following options:
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | U | -> again unaligned
// | * | C | I | U | -> aligned
// | * | I | C | U | -> aligned
// | I | C | C | U | -> again unaligned
// | * | C | C | C | U | -> aligned
// Legend: C = compressed, I = 32 bit instruction, U = unaligned upper half
// * = don't care
if (instr_is_compressed[1]) begin
instr_o[1] = {16'b0, data_i[31:16]};
valid_o[1] = valid_i;
if (instr_is_compressed[2]) begin
if (instr_is_compressed[3]) begin
unaligned_d = 1'b0;
valid_o[3] = valid_i;
end else begin
// continues to be unaligned
end
end else begin
unaligned_d = 1'b0;
instr_o[2] = data_i[63:32];
valid_o[2] = valid_i;
end
// instruction 1 is not compressed
end else begin
instr_o[1] = data_i[47:16];
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[2]) begin
unaligned_d = 1'b0;
instr_o[2] = {16'b0, data_i[63:48]};
valid_o[2] = valid_i;
end else begin
// continues to be unaligned
end
end
end else if (instr_is_compressed[0]) begin // instruction zero is RVC
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | C | -> again unaligned
// | * | C | I | C | -> aligned
// | * | I | C | C | -> aligned
// | I | C | C | C | -> again unaligned
// | * | C | C | C | C | -> aligned
if (instr_is_compressed[1]) begin
instr_o[1] = {16'b0, data_i[31:16]};
valid_o[1] = valid_i;
if (instr_is_compressed[2]) begin
valid_o[2] = valid_i;
if (instr_is_compressed[3]) begin
valid_o[3] = valid_i;
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[3];
end
end else begin
instr_o[2] = data_i[63:32];
valid_o[2] = valid_i;
end
// instruction 1 is not compressed -> check slot 3
end else begin
instr_o[1] = data_i[47:16];
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
instr_o[2] = data_i[63:48];
valid_o[2] = valid_i;
end else begin
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[2];
end
end
// Full instruction in slot zero
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | C | I |
// | * | C | C | I |
// | * | I | I |
end else begin
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
if (instr_is_compressed[2]) begin
instr_o[1] = {16'b0, data_i[47:32]};
valid_o[1] = valid_i;
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
// | * | C | C | I |
valid_o[2] = valid_i;
addr_o[2] = {16'b0, data_i[63:48]};
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[2];
end
end else begin
// two regular instructions back-to-back
instr_o[1] = data_i[63:32];
valid_o[1] = valid_i;
end
end
// --------------------------
// Unaligned fetch
// --------------------------
// Address was not 64 bit aligned
case (address_i[2:1])
// this means the previouse instruction was either compressed or unaligned
// in any case we don't ccare
2'b01: begin
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | I | x -> again unaligned
// | * | C | I | x -> aligned
// | * | I | C | x -> aligned
// | I | C | C | x -> again unaligned
// | * | C | C | C | x -> aligned
addr_o[0] = {address_i[riscv::VLEN-1:3], 3'b010};
if (instr_is_compressed[1]) begin
instr_o[0] = {16'b0, data_i[31:16]};
valid_o[0] = valid_i;
if (instr_is_compressed[2]) begin
valid_o[1] = valid_i;
instr_o[1] = {16'b0, data_i[47:32]};
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
if (instr_is_compressed[3]) begin
instr_o[2] = {16'b0, data_i[63:48]};
addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110};
valid_o[2] = valid_i;
end else begin
// this instruction is unaligned
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[3];
end
end else begin
instr_o[1] = data_i[63:32];
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100};
valid_o[1] = valid_i;
end
// instruction 1 is not compressed -> check slot 3
end else begin
instr_o[0] = data_i[47:16];
valid_o[0] = valid_i;
addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b110};
if (instr_is_compressed[3]) begin
instr_o[1] = data_i[63:48];
valid_o[1] = valid_i;
end else begin
unaligned_d = 1'b1;
unaligned_instr_d = data_i[63:48];
unaligned_address_d = addr_o[1];
end
end
end
2'b10: begin
valid_o = '0;
// 64 32 0
// | 3 | 2 | 1 | 0 | <- instruction slot
// | I | C | * | <- unaligned
// | C | C | * | <- aligned
// | I | * | <- aligned
if (instr_is_compressed[2]) begin
valid_o[0] = valid_i;
instr_o[0] = data_i[47:32];
// second instruction is also compressed
if (instr_is_compressed[3]) begin
valid_o[1] = valid_i;
instr_o[1] = data_i[63:48];
// regular instruction -> unaligned
end else begin
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
unaligned_instr_d = data_i[63:48];
end
// instruction is a regular instruction
end else begin
valid_o[0] = valid_i;
instr_o[0] = data_i[63:32];
addr_o[0] = address_i;
end
end
// we started to fetch on a unaligned boundary with a whole instruction -> wait until we've
// received the next instruction
2'b11: begin
valid_o = '0;
if (!instr_is_compressed[3]) begin
unaligned_d = 1'b1;
unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110};
unaligned_instr_d = data_i[63:48];
end else begin
valid_o[3] = valid_i;
end
end
endcase
end
end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
unaligned_q <= 1'b0;
unaligned_address_q <= '0;
unaligned_instr_q <= '0;
end else begin
if (valid_i) begin
unaligned_address_q <= unaligned_address_d;
unaligned_instr_q <= unaligned_instr_d;
end
if (flush_i) begin
unaligned_q <= 1'b0;
end else if (valid_i) begin
unaligned_q <= unaligned_d;
end
end
end
endmodule endmodule

File diff suppressed because it is too large Load diff

View file

@ -14,196 +14,198 @@
// in a scoreboard like data-structure. // in a scoreboard like data-structure.
module issue_stage import ariane_pkg::*; #( module issue_stage
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter bit IsRVFI = bit'(0), parameter bit IsRVFI = bit'(0),
parameter int unsigned NR_ENTRIES = 8 parameter int unsigned NR_ENTRIES = 8
)( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
output logic sb_full_o, output logic sb_full_o,
input logic flush_unissued_instr_i, input logic flush_unissued_instr_i,
input logic flush_i, input logic flush_i,
input logic stall_i, // Stall issue stage input logic stall_i, // Stall issue stage
// from ISSUE // from ISSUE
input scoreboard_entry_t decoded_instr_i, input scoreboard_entry_t decoded_instr_i,
input logic decoded_instr_valid_i, input logic decoded_instr_valid_i,
input logic is_ctrl_flow_i, input logic is_ctrl_flow_i,
output logic decoded_instr_ack_o, output logic decoded_instr_ack_o,
// to EX // to EX
output [riscv::VLEN-1:0] rs1_forwarding_o, // unregistered version of fu_data_o.operanda output [riscv::VLEN-1:0] rs1_forwarding_o, // unregistered version of fu_data_o.operanda
output [riscv::VLEN-1:0] rs2_forwarding_o, // unregistered version of fu_data_o.operandb output [riscv::VLEN-1:0] rs2_forwarding_o, // unregistered version of fu_data_o.operandb
output fu_data_t fu_data_o, output fu_data_t fu_data_o,
output logic [riscv::VLEN-1:0] pc_o, output logic [riscv::VLEN-1:0] pc_o,
output logic is_compressed_instr_o, output logic is_compressed_instr_o,
input logic flu_ready_i, input logic flu_ready_i,
output logic alu_valid_o, output logic alu_valid_o,
// ex just resolved our predicted branch, we are ready to accept new requests // ex just resolved our predicted branch, we are ready to accept new requests
input logic resolve_branch_i, input logic resolve_branch_i,
input logic lsu_ready_i, input logic lsu_ready_i,
output logic lsu_valid_o, output logic lsu_valid_o,
// branch prediction // branch prediction
output logic branch_valid_o, // use branch prediction unit output logic branch_valid_o, // use branch prediction unit
output branchpredict_sbe_t branch_predict_o, // Branch predict Out output branchpredict_sbe_t branch_predict_o, // Branch predict Out
output logic mult_valid_o, output logic mult_valid_o,
input logic fpu_ready_i, input logic fpu_ready_i,
output logic fpu_valid_o, output logic fpu_valid_o,
output logic [1:0] fpu_fmt_o, // FP fmt field from instr. output logic [1:0] fpu_fmt_o, // FP fmt field from instr.
output logic [2:0] fpu_rm_o, // FP rm field from instr. output logic [2:0] fpu_rm_o, // FP rm field from instr.
output logic csr_valid_o, output logic csr_valid_o,
// CVXIF // CVXIF
//Issue interface //Issue interface
output logic x_issue_valid_o, output logic x_issue_valid_o,
input logic x_issue_ready_i, input logic x_issue_ready_i,
output logic [31:0] x_off_instr_o, output logic [31:0] x_off_instr_o,
// to accelerator dispatcher // to accelerator dispatcher
output scoreboard_entry_t issue_instr_o, output scoreboard_entry_t issue_instr_o,
output logic issue_instr_hs_o, output logic issue_instr_hs_o,
// write back port // write back port
input logic [CVA6Cfg.NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_i, input logic [CVA6Cfg.NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_i,
input bp_resolve_t resolved_branch_i, input bp_resolve_t resolved_branch_i,
input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i,
input exception_t [CVA6Cfg.NrWbPorts-1:0] ex_ex_i, // exception from execute stage or CVXIF offloaded instruction input exception_t [CVA6Cfg.NrWbPorts-1:0] ex_ex_i, // exception from execute stage or CVXIF offloaded instruction
input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i,
input logic x_we_i, input logic x_we_i,
// commit port // commit port
input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i, input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i, input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i, input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i, input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i,
output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o, output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o,
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
output logic stall_issue_o, // Used in Performance Counters output logic stall_issue_o, // Used in Performance Counters
//RVFI //RVFI
input [riscv::VLEN-1:0] lsu_addr_i, input [ riscv::VLEN-1:0] lsu_addr_i,
input [(riscv::XLEN/8)-1:0] lsu_rmask_i, input [ (riscv::XLEN/8)-1:0] lsu_rmask_i,
input [(riscv::XLEN/8)-1:0] lsu_wmask_i, input [ (riscv::XLEN/8)-1:0] lsu_wmask_i,
input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i
); );
// --------------------------------------------------- // ---------------------------------------------------
// Scoreboard (SB) <-> Issue and Read Operands (IRO) // Scoreboard (SB) <-> Issue and Read Operands (IRO)
// --------------------------------------------------- // ---------------------------------------------------
typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? riscv::XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t; typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? riscv::XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t;
fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro; fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro;
fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro; fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro;
logic [REG_ADDR_SIZE-1:0] rs1_iro_sb; logic [ REG_ADDR_SIZE-1:0] rs1_iro_sb;
riscv::xlen_t rs1_sb_iro; riscv::xlen_t rs1_sb_iro;
logic rs1_valid_sb_iro; logic rs1_valid_sb_iro;
logic [REG_ADDR_SIZE-1:0] rs2_iro_sb; logic [ REG_ADDR_SIZE-1:0] rs2_iro_sb;
riscv::xlen_t rs2_sb_iro; riscv::xlen_t rs2_sb_iro;
logic rs2_valid_iro_sb; logic rs2_valid_iro_sb;
logic [REG_ADDR_SIZE-1:0] rs3_iro_sb; logic [ REG_ADDR_SIZE-1:0] rs3_iro_sb;
rs3_len_t rs3_sb_iro; rs3_len_t rs3_sb_iro;
logic rs3_valid_iro_sb; logic rs3_valid_iro_sb;
scoreboard_entry_t issue_instr_sb_iro; scoreboard_entry_t issue_instr_sb_iro;
logic issue_instr_valid_sb_iro; logic issue_instr_valid_sb_iro;
logic issue_ack_iro_sb; logic issue_ack_iro_sb;
riscv::xlen_t rs1_forwarding_xlen; riscv::xlen_t rs1_forwarding_xlen;
riscv::xlen_t rs2_forwarding_xlen; riscv::xlen_t rs2_forwarding_xlen;
assign rs1_forwarding_o = rs1_forwarding_xlen[riscv::VLEN-1:0]; assign rs1_forwarding_o = rs1_forwarding_xlen[riscv::VLEN-1:0];
assign rs2_forwarding_o = rs2_forwarding_xlen[riscv::VLEN-1:0]; assign rs2_forwarding_o = rs2_forwarding_xlen[riscv::VLEN-1:0];
assign issue_instr_o = issue_instr_sb_iro; assign issue_instr_o = issue_instr_sb_iro;
assign issue_instr_hs_o = issue_instr_valid_sb_iro & issue_ack_iro_sb; assign issue_instr_hs_o = issue_instr_valid_sb_iro & issue_ack_iro_sb;
// --------------------------------------------------------- // ---------------------------------------------------------
// 2. Manage instructions in a scoreboard // 2. Manage instructions in a scoreboard
// --------------------------------------------------------- // ---------------------------------------------------------
scoreboard #( scoreboard #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.IsRVFI ( IsRVFI ), .IsRVFI (IsRVFI),
.rs3_len_t ( rs3_len_t ), .rs3_len_t (rs3_len_t),
.NR_ENTRIES (NR_ENTRIES ) .NR_ENTRIES(NR_ENTRIES)
) i_scoreboard ( ) i_scoreboard (
.sb_full_o ( sb_full_o ), .sb_full_o (sb_full_o),
.unresolved_branch_i ( 1'b0 ), .unresolved_branch_i(1'b0),
.rd_clobber_gpr_o ( rd_clobber_gpr_sb_iro ), .rd_clobber_gpr_o (rd_clobber_gpr_sb_iro),
.rd_clobber_fpr_o ( rd_clobber_fpr_sb_iro ), .rd_clobber_fpr_o (rd_clobber_fpr_sb_iro),
.rs1_i ( rs1_iro_sb ), .rs1_i (rs1_iro_sb),
.rs1_o ( rs1_sb_iro ), .rs1_o (rs1_sb_iro),
.rs1_valid_o ( rs1_valid_sb_iro ), .rs1_valid_o (rs1_valid_sb_iro),
.rs2_i ( rs2_iro_sb ), .rs2_i (rs2_iro_sb),
.rs2_o ( rs2_sb_iro ), .rs2_o (rs2_sb_iro),
.rs2_valid_o ( rs2_valid_iro_sb ), .rs2_valid_o (rs2_valid_iro_sb),
.rs3_i ( rs3_iro_sb ), .rs3_i (rs3_iro_sb),
.rs3_o ( rs3_sb_iro ), .rs3_o (rs3_sb_iro),
.rs3_valid_o ( rs3_valid_iro_sb ), .rs3_valid_o (rs3_valid_iro_sb),
.decoded_instr_i ( decoded_instr_i ), .decoded_instr_i (decoded_instr_i),
.decoded_instr_valid_i ( decoded_instr_valid_i ), .decoded_instr_valid_i(decoded_instr_valid_i),
.decoded_instr_ack_o ( decoded_instr_ack_o ), .decoded_instr_ack_o (decoded_instr_ack_o),
.issue_instr_o ( issue_instr_sb_iro ), .issue_instr_o (issue_instr_sb_iro),
.issue_instr_valid_o ( issue_instr_valid_sb_iro ), .issue_instr_valid_o (issue_instr_valid_sb_iro),
.issue_ack_i ( issue_ack_iro_sb ), .issue_ack_i (issue_ack_iro_sb),
.resolved_branch_i ( resolved_branch_i ), .resolved_branch_i (resolved_branch_i),
.trans_id_i ( trans_id_i ), .trans_id_i (trans_id_i),
.wbdata_i ( wbdata_i ), .wbdata_i (wbdata_i),
.ex_i ( ex_ex_i ), .ex_i (ex_ex_i),
.lsu_addr_i ( lsu_addr_i ), .lsu_addr_i (lsu_addr_i),
.lsu_rmask_i ( lsu_rmask_i ), .lsu_rmask_i (lsu_rmask_i),
.lsu_wmask_i ( lsu_wmask_i ), .lsu_wmask_i (lsu_wmask_i),
.lsu_addr_trans_id_i ( lsu_addr_trans_id_i ), .lsu_addr_trans_id_i(lsu_addr_trans_id_i),
.rs1_forwarding_i ( rs1_forwarding_xlen ), .rs1_forwarding_i (rs1_forwarding_xlen),
.rs2_forwarding_i ( rs2_forwarding_xlen ), .rs2_forwarding_i (rs2_forwarding_xlen),
.* .*
); );
// --------------------------------------------------------- // ---------------------------------------------------------
// 3. Issue instruction and read operand, also commit // 3. Issue instruction and read operand, also commit
// --------------------------------------------------------- // ---------------------------------------------------------
issue_read_operands #( issue_read_operands #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.rs3_len_t ( rs3_len_t ) .rs3_len_t(rs3_len_t)
)i_issue_read_operands ( ) i_issue_read_operands (
.flush_i ( flush_unissued_instr_i ), .flush_i (flush_unissued_instr_i),
.issue_instr_i ( issue_instr_sb_iro ), .issue_instr_i (issue_instr_sb_iro),
.issue_instr_valid_i ( issue_instr_valid_sb_iro ), .issue_instr_valid_i(issue_instr_valid_sb_iro),
.issue_ack_o ( issue_ack_iro_sb ), .issue_ack_o (issue_ack_iro_sb),
.fu_data_o ( fu_data_o ), .fu_data_o (fu_data_o),
.flu_ready_i ( flu_ready_i ), .flu_ready_i (flu_ready_i),
.rs1_o ( rs1_iro_sb ), .rs1_o (rs1_iro_sb),
.rs1_i ( rs1_sb_iro ), .rs1_i (rs1_sb_iro),
.rs1_valid_i ( rs1_valid_sb_iro ), .rs1_valid_i (rs1_valid_sb_iro),
.rs2_o ( rs2_iro_sb ), .rs2_o (rs2_iro_sb),
.rs2_i ( rs2_sb_iro ), .rs2_i (rs2_sb_iro),
.rs2_valid_i ( rs2_valid_iro_sb ), .rs2_valid_i (rs2_valid_iro_sb),
.rs3_o ( rs3_iro_sb ), .rs3_o (rs3_iro_sb),
.rs3_i ( rs3_sb_iro ), .rs3_i (rs3_sb_iro),
.rs3_valid_i ( rs3_valid_iro_sb ), .rs3_valid_i (rs3_valid_iro_sb),
.rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ), .rd_clobber_gpr_i (rd_clobber_gpr_sb_iro),
.rd_clobber_fpr_i ( rd_clobber_fpr_sb_iro ), .rd_clobber_fpr_i (rd_clobber_fpr_sb_iro),
.alu_valid_o ( alu_valid_o ), .alu_valid_o (alu_valid_o),
.branch_valid_o ( branch_valid_o ), .branch_valid_o (branch_valid_o),
.csr_valid_o ( csr_valid_o ), .csr_valid_o (csr_valid_o),
.cvxif_valid_o ( x_issue_valid_o ), .cvxif_valid_o (x_issue_valid_o),
.cvxif_ready_i ( x_issue_ready_i ), .cvxif_ready_i (x_issue_ready_i),
.cvxif_off_instr_o ( x_off_instr_o ), .cvxif_off_instr_o (x_off_instr_o),
.mult_valid_o ( mult_valid_o ), .mult_valid_o (mult_valid_o),
.rs1_forwarding_o ( rs1_forwarding_xlen ), .rs1_forwarding_o (rs1_forwarding_xlen),
.rs2_forwarding_o ( rs2_forwarding_xlen ), .rs2_forwarding_o (rs2_forwarding_xlen),
.stall_issue_o ( stall_issue_o ), .stall_issue_o (stall_issue_o),
.* .*
); );
endmodule endmodule

View file

@ -13,484 +13,486 @@
// Description: Load Store Unit, handles address calculation and memory interface signals // Description: Load Store Unit, handles address calculation and memory interface signals
module load_store_unit import ariane_pkg::*; #( module load_store_unit
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ASID_WIDTH = 1 parameter int unsigned ASID_WIDTH = 1
)( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input logic stall_st_pending_i, input logic stall_st_pending_i,
output logic no_st_pending_o, output logic no_st_pending_o,
input logic amo_valid_commit_i, input logic amo_valid_commit_i,
input fu_data_t fu_data_i, input fu_data_t fu_data_i,
output logic lsu_ready_o, // FU is ready e.g. not busy output logic lsu_ready_o, // FU is ready e.g. not busy
input logic lsu_valid_i, // Input is valid input logic lsu_valid_i, // Input is valid
output logic [TRANS_ID_BITS-1:0] load_trans_id_o, // ID of scoreboard entry at which to write back output logic [TRANS_ID_BITS-1:0] load_trans_id_o, // ID of scoreboard entry at which to write back
output riscv::xlen_t load_result_o, output riscv::xlen_t load_result_o,
output logic load_valid_o, output logic load_valid_o,
output exception_t load_exception_o, // to WB, signal exception status LD exception output exception_t load_exception_o, // to WB, signal exception status LD exception
output logic [TRANS_ID_BITS-1:0] store_trans_id_o, // ID of scoreboard entry at which to write back output logic [TRANS_ID_BITS-1:0] store_trans_id_o, // ID of scoreboard entry at which to write back
output riscv::xlen_t store_result_o, output riscv::xlen_t store_result_o,
output logic store_valid_o, output logic store_valid_o,
output exception_t store_exception_o, // to WB, signal exception status ST exception output exception_t store_exception_o, // to WB, signal exception status ST exception
input logic commit_i, // commit the pending store input logic commit_i, // commit the pending store
output logic commit_ready_o, // commit queue is ready to accept another commit request output logic commit_ready_o, // commit queue is ready to accept another commit request
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
input logic enable_translation_i, // enable virtual memory translation input logic enable_translation_i, // enable virtual memory translation
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
// icache translation requests // icache translation requests
input icache_arsp_t icache_areq_i, input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o, output icache_areq_t icache_areq_o,
input riscv::priv_lvl_t priv_lvl_i, // From CSR register file input riscv::priv_lvl_t priv_lvl_i, // From CSR register file
input riscv::priv_lvl_t ld_st_priv_lvl_i, // From CSR register file input riscv::priv_lvl_t ld_st_priv_lvl_i, // From CSR register file
input logic sum_i, // From CSR register file input logic sum_i, // From CSR register file
input logic mxr_i, // From CSR register file input logic mxr_i, // From CSR register file
input logic [riscv::PPNW-1:0] satp_ppn_i, // From CSR register file input logic [riscv::PPNW-1:0] satp_ppn_i, // From CSR register file
input logic [ASID_WIDTH-1:0] asid_i, // From CSR register file input logic [ ASID_WIDTH-1:0] asid_i, // From CSR register file
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
input logic flush_tlb_i, input logic flush_tlb_i,
// Performance counters // Performance counters
output logic itlb_miss_o, output logic itlb_miss_o,
output logic dtlb_miss_o, output logic dtlb_miss_o,
// interface to dcache // interface to dcache
input dcache_req_o_t [2:0] dcache_req_ports_i, input dcache_req_o_t [ 2:0] dcache_req_ports_i,
output dcache_req_i_t [2:0] dcache_req_ports_o, output dcache_req_i_t [ 2:0] dcache_req_ports_o,
input logic dcache_wbuffer_empty_i, input logic dcache_wbuffer_empty_i,
input logic dcache_wbuffer_not_ni_i, input logic dcache_wbuffer_not_ni_i,
// AMO interface // AMO interface
output amo_req_t amo_req_o, output amo_req_t amo_req_o,
input amo_resp_t amo_resp_i, input amo_resp_t amo_resp_i,
// PMP // PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i, input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
//RVFI //RVFI
output [riscv::VLEN-1:0] lsu_addr_o, output [ riscv::VLEN-1:0] lsu_addr_o,
output [riscv::PLEN-1:0] mem_paddr_o, output [ riscv::PLEN-1:0] mem_paddr_o,
output [(riscv::XLEN/8)-1:0] lsu_rmask_o, output [ (riscv::XLEN/8)-1:0] lsu_rmask_o,
output [(riscv::XLEN/8)-1:0] lsu_wmask_o, output [ (riscv::XLEN/8)-1:0] lsu_wmask_o,
output [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_o output [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_o
); );
// data is misaligned // data is misaligned
logic data_misaligned; logic data_misaligned;
// -------------------------------------- // --------------------------------------
// 1st register stage - (stall registers) // 1st register stage - (stall registers)
// -------------------------------------- // --------------------------------------
// those are the signals which are always correct // those are the signals which are always correct
// e.g.: they keep the value in the stall case // e.g.: they keep the value in the stall case
lsu_ctrl_t lsu_ctrl; lsu_ctrl_t lsu_ctrl;
logic pop_st; logic pop_st;
logic pop_ld; logic pop_ld;
// ------------------------------ // ------------------------------
// Address Generation Unit (AGU) // Address Generation Unit (AGU)
// ------------------------------ // ------------------------------
// virtual address as calculated by the AGU in the first cycle // virtual address as calculated by the AGU in the first cycle
logic [riscv::VLEN-1:0] vaddr_i; logic [ riscv::VLEN-1:0] vaddr_i;
riscv::xlen_t vaddr_xlen; riscv::xlen_t vaddr_xlen;
logic overflow; logic overflow;
logic [(riscv::XLEN/8)-1:0] be_i; logic [(riscv::XLEN/8)-1:0] be_i;
assign vaddr_xlen = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a)); assign vaddr_xlen = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a));
assign vaddr_i = vaddr_xlen[riscv::VLEN-1:0]; assign vaddr_i = vaddr_xlen[riscv::VLEN-1:0];
// we work with SV39 or SV32, so if VM is enabled, check that all bits [XLEN-1:38] or [XLEN-1:31] are equal // we work with SV39 or SV32, so if VM is enabled, check that all bits [XLEN-1:38] or [XLEN-1:31] are equal
assign overflow = !((&vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b1 || (|vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b0); assign overflow = !((&vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b1 || (|vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b0);
logic st_valid_i; logic st_valid_i;
logic ld_valid_i; logic ld_valid_i;
logic ld_translation_req; logic ld_translation_req;
logic st_translation_req; logic st_translation_req;
logic [riscv::VLEN-1:0] ld_vaddr; logic [riscv::VLEN-1:0] ld_vaddr;
logic [riscv::VLEN-1:0] st_vaddr; logic [riscv::VLEN-1:0] st_vaddr;
logic translation_req; logic translation_req;
logic translation_valid; logic translation_valid;
logic [riscv::VLEN-1:0] mmu_vaddr; logic [riscv::VLEN-1:0] mmu_vaddr;
logic [riscv::PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen; logic [riscv::PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen;
exception_t mmu_exception; exception_t mmu_exception;
logic dtlb_hit; logic dtlb_hit;
logic [riscv::PPNW-1:0] dtlb_ppn; logic [ riscv::PPNW-1:0] dtlb_ppn;
logic ld_valid; logic ld_valid;
logic [TRANS_ID_BITS-1:0] ld_trans_id; logic [TRANS_ID_BITS-1:0] ld_trans_id;
riscv::xlen_t ld_result; riscv::xlen_t ld_result;
logic st_valid; logic st_valid;
logic [TRANS_ID_BITS-1:0] st_trans_id; logic [TRANS_ID_BITS-1:0] st_trans_id;
riscv::xlen_t st_result; riscv::xlen_t st_result;
logic [11:0] page_offset; logic [ 11:0] page_offset;
logic page_offset_matches; logic page_offset_matches;
exception_t misaligned_exception; exception_t misaligned_exception;
exception_t ld_ex; exception_t ld_ex;
exception_t st_ex; exception_t st_ex;
// ------------------- // -------------------
// MMU e.g.: TLBs/PTW // MMU e.g.: TLBs/PTW
// ------------------- // -------------------
if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39 if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39
mmu #( mmu #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.INSTR_TLB_ENTRIES ( ariane_pkg::INSTR_TLB_ENTRIES ), .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES),
.DATA_TLB_ENTRIES ( ariane_pkg::DATA_TLB_ENTRIES ), .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES),
.ASID_WIDTH ( ASID_WIDTH ) .ASID_WIDTH (ASID_WIDTH)
) i_cva6_mmu ( ) i_cva6_mmu (
// misaligned bypass // misaligned bypass
.misaligned_ex_i ( misaligned_exception ), .misaligned_ex_i(misaligned_exception),
.lsu_is_store_i ( st_translation_req ), .lsu_is_store_i (st_translation_req),
.lsu_req_i ( translation_req ), .lsu_req_i (translation_req),
.lsu_vaddr_i ( mmu_vaddr ), .lsu_vaddr_i (mmu_vaddr),
.lsu_valid_o ( translation_valid ), .lsu_valid_o (translation_valid),
.lsu_paddr_o ( mmu_paddr ), .lsu_paddr_o (mmu_paddr),
.lsu_exception_o ( mmu_exception ), .lsu_exception_o(mmu_exception),
.lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request .lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request
.lsu_dtlb_ppn_o ( dtlb_ppn ), // send in the same cycle as the request .lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request
// connecting PTW to D$ IF // connecting PTW to D$ IF
.req_port_i ( dcache_req_ports_i [0] ), .req_port_i (dcache_req_ports_i[0]),
.req_port_o ( dcache_req_ports_o [0] ), .req_port_o (dcache_req_ports_o[0]),
// icache address translation requests // icache address translation requests
.icache_areq_i ( icache_areq_i ), .icache_areq_i (icache_areq_i),
.asid_to_be_flushed_i, .asid_to_be_flushed_i,
.vaddr_to_be_flushed_i, .vaddr_to_be_flushed_i,
.icache_areq_o ( icache_areq_o ), .icache_areq_o (icache_areq_o),
.pmpcfg_i, .pmpcfg_i,
.pmpaddr_i, .pmpaddr_i,
.*
);
end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32
cva6_mmu_sv32 #(
.CVA6Cfg ( CVA6Cfg ),
.INSTR_TLB_ENTRIES ( ariane_pkg::INSTR_TLB_ENTRIES ),
.DATA_TLB_ENTRIES ( ariane_pkg::DATA_TLB_ENTRIES ),
.ASID_WIDTH ( ASID_WIDTH )
) i_cva6_mmu (
// misaligned bypass
.misaligned_ex_i ( misaligned_exception ),
.lsu_is_store_i ( st_translation_req ),
.lsu_req_i ( translation_req ),
.lsu_vaddr_i ( mmu_vaddr ),
.lsu_valid_o ( translation_valid ),
.lsu_paddr_o ( mmu_paddr ),
.lsu_exception_o ( mmu_exception ),
.lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request
.lsu_dtlb_ppn_o ( dtlb_ppn ), // send in the same cycle as the request
// connecting PTW to D$ IF
.req_port_i ( dcache_req_ports_i [0] ),
.req_port_o ( dcache_req_ports_o [0] ),
// icache address translation requests
.icache_areq_i ( icache_areq_i ),
.asid_to_be_flushed_i,
.vaddr_to_be_flushed_i,
.icache_areq_o ( icache_areq_o ),
.pmpcfg_i,
.pmpaddr_i,
.*
);
end else begin : gen_no_mmu
if (riscv::VLEN > riscv::PLEN) begin
assign mmu_vaddr_plen = mmu_vaddr[riscv::PLEN-1:0];
assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0];
end else begin
assign mmu_vaddr_plen = {{{riscv::PLEN-riscv::VLEN}{1'b0}}, mmu_vaddr};
assign fetch_vaddr_plen = {{{riscv::PLEN-riscv::VLEN}{1'b0}}, icache_areq_i.fetch_vaddr};
end
assign icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
assign icache_areq_o.fetch_paddr = fetch_vaddr_plen;
assign icache_areq_o.fetch_exception = '0;
assign dcache_req_ports_o[0].address_index = '0;
assign dcache_req_ports_o[0].address_tag = '0;
assign dcache_req_ports_o[0].data_wdata = '0;
assign dcache_req_ports_o[0].data_req = 1'b0;
assign dcache_req_ports_o[0].data_be = '1;
assign dcache_req_ports_o[0].data_size = 2'b11;
assign dcache_req_ports_o[0].data_we = 1'b0;
assign dcache_req_ports_o[0].kill_req = '0;
assign dcache_req_ports_o[0].tag_valid = 1'b0;
assign itlb_miss_o = 1'b0;
assign dtlb_miss_o = 1'b0;
assign dtlb_ppn = mmu_vaddr_plen[riscv::PLEN-1:12];
assign dtlb_hit = 1'b1;
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mmu_paddr <= '0;
translation_valid <= '0;
mmu_exception <= '0;
end else begin
mmu_paddr <= mmu_vaddr_plen;
translation_valid <= translation_req;
mmu_exception <= misaligned_exception;
end
end
end
logic store_buffer_empty;
// ------------------
// Store Unit
// ------------------
store_unit #(
.CVA6Cfg ( CVA6Cfg )
) i_store_unit (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.store_buffer_empty_o ( store_buffer_empty ),
.valid_i ( st_valid_i ),
.lsu_ctrl_i ( lsu_ctrl ),
.pop_st_o ( pop_st ),
.commit_i,
.commit_ready_o,
.amo_valid_commit_i,
.valid_o ( st_valid ),
.trans_id_o ( st_trans_id ),
.result_o ( st_result ),
.ex_o ( st_ex ),
// MMU port
.translation_req_o ( st_translation_req ),
.vaddr_o ( st_vaddr ),
.mem_paddr_o ( mem_paddr_o ),
.paddr_i ( mmu_paddr ),
.ex_i ( mmu_exception ),
.dtlb_hit_i ( dtlb_hit ),
// Load Unit
.page_offset_i ( page_offset ),
.page_offset_matches_o ( page_offset_matches ),
// AMOs
.amo_req_o,
.amo_resp_i,
// to memory arbiter
.req_port_i ( dcache_req_ports_i [2] ),
.req_port_o ( dcache_req_ports_o [2] )
);
// ------------------
// Load Unit
// ------------------
load_unit #(
.CVA6Cfg ( CVA6Cfg )
) i_load_unit (
.valid_i ( ld_valid_i ),
.lsu_ctrl_i ( lsu_ctrl ),
.pop_ld_o ( pop_ld ),
.valid_o ( ld_valid ),
.trans_id_o ( ld_trans_id ),
.result_o ( ld_result ),
.ex_o ( ld_ex ),
// MMU port
.translation_req_o ( ld_translation_req ),
.vaddr_o ( ld_vaddr ),
.paddr_i ( mmu_paddr ),
.ex_i ( mmu_exception ),
.dtlb_hit_i ( dtlb_hit ),
.dtlb_ppn_i ( dtlb_ppn ),
// to store unit
.page_offset_o ( page_offset ),
.page_offset_matches_i ( page_offset_matches ),
.store_buffer_empty_i ( store_buffer_empty ),
// to memory arbiter
.req_port_i ( dcache_req_ports_i [1] ),
.req_port_o ( dcache_req_ports_o [1] ),
.dcache_wbuffer_not_ni_i,
.commit_tran_id_i,
.* .*
); );
end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32
// ---------------------------- cva6_mmu_sv32 #(
// Output Pipeline Register .CVA6Cfg (CVA6Cfg),
// ---------------------------- .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES),
.DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES),
// amount of pipeline registers inserted for load/store return path .ASID_WIDTH (ASID_WIDTH)
// can be tuned to trade-off IPC vs. cycle time ) i_cva6_mmu (
// misaligned bypass
shift_reg #( .misaligned_ex_i(misaligned_exception),
.dtype ( logic[$bits(ld_valid) + $bits(ld_trans_id) + $bits(ld_result) + $bits(ld_ex) - 1: 0]), .lsu_is_store_i (st_translation_req),
.Depth ( cva6_config_pkg::CVA6ConfigNrLoadPipeRegs ) .lsu_req_i (translation_req),
) i_pipe_reg_load ( .lsu_vaddr_i (mmu_vaddr),
.clk_i, .lsu_valid_o (translation_valid),
.rst_ni, .lsu_paddr_o (mmu_paddr),
.d_i ( {ld_valid, ld_trans_id, ld_result, ld_ex} ), .lsu_exception_o(mmu_exception),
.d_o ( {load_valid_o, load_trans_id_o, load_result_o, load_exception_o} ) .lsu_dtlb_hit_o (dtlb_hit), // send in the same cycle as the request
.lsu_dtlb_ppn_o (dtlb_ppn), // send in the same cycle as the request
// connecting PTW to D$ IF
.req_port_i (dcache_req_ports_i[0]),
.req_port_o (dcache_req_ports_o[0]),
// icache address translation requests
.icache_areq_i (icache_areq_i),
.asid_to_be_flushed_i,
.vaddr_to_be_flushed_i,
.icache_areq_o (icache_areq_o),
.pmpcfg_i,
.pmpaddr_i,
.*
); );
end else begin : gen_no_mmu
shift_reg #( if (riscv::VLEN > riscv::PLEN) begin
.dtype ( logic[$bits(st_valid) + $bits(st_trans_id) + $bits(st_result) + $bits(st_ex) - 1: 0]), assign mmu_vaddr_plen = mmu_vaddr[riscv::PLEN-1:0];
.Depth ( cva6_config_pkg::CVA6ConfigNrStorePipeRegs ) assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0];
) i_pipe_reg_store ( end else begin
.clk_i, assign mmu_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, mmu_vaddr};
.rst_ni, assign fetch_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, icache_areq_i.fetch_vaddr};
.d_i ( {st_valid, st_trans_id, st_result, st_ex} ),
.d_o ( {store_valid_o, store_trans_id_o, store_result_o, store_exception_o} )
);
// determine whether this is a load or store
always_comb begin : which_op
ld_valid_i = 1'b0;
st_valid_i = 1'b0;
translation_req = 1'b0;
mmu_vaddr = {riscv::VLEN{1'b0}};
// check the operation to activate the right functional unit accordingly
unique case (lsu_ctrl.fu)
// all loads go here
LOAD: begin
ld_valid_i = lsu_ctrl.valid;
translation_req = ld_translation_req;
mmu_vaddr = ld_vaddr;
end
// all stores go here
STORE: begin
st_valid_i = lsu_ctrl.valid;
translation_req = st_translation_req;
mmu_vaddr = st_vaddr;
end
// not relevant for the LSU
default: ;
endcase
end end
assign icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
assign icache_areq_o.fetch_paddr = fetch_vaddr_plen;
assign icache_areq_o.fetch_exception = '0;
// --------------- assign dcache_req_ports_o[0].address_index = '0;
// Byte Enable assign dcache_req_ports_o[0].address_tag = '0;
// --------------- assign dcache_req_ports_o[0].data_wdata = '0;
// we can generate the byte enable from the virtual address since the last assign dcache_req_ports_o[0].data_req = 1'b0;
// 12 bit are the same anyway assign dcache_req_ports_o[0].data_be = '1;
// and we can always generate the byte enable from the address at hand assign dcache_req_ports_o[0].data_size = 2'b11;
assign be_i = riscv::IS_XLEN64 ? be_gen(vaddr_i[2:0], extract_transfer_size(fu_data_i.operation)): assign dcache_req_ports_o[0].data_we = 1'b0;
be_gen_32(vaddr_i[1:0], extract_transfer_size(fu_data_i.operation)); assign dcache_req_ports_o[0].kill_req = '0;
assign dcache_req_ports_o[0].tag_valid = 1'b0;
// ------------------------ assign itlb_miss_o = 1'b0;
// Misaligned Exception assign dtlb_miss_o = 1'b0;
// ------------------------ assign dtlb_ppn = mmu_vaddr_plen[riscv::PLEN-1:12];
// we can detect a misaligned exception immediately assign dtlb_hit = 1'b1;
// the misaligned exception is passed to the functional unit via the MMU, which in case
// can augment the exception if other memory related exceptions like a page fault or access errors
always_comb begin : data_misaligned_detection
misaligned_exception = { always_ff @(posedge clk_i or negedge rst_ni) begin
{riscv::XLEN{1'b0}}, if (~rst_ni) begin
{riscv::XLEN{1'b0}}, mmu_paddr <= '0;
1'b0 translation_valid <= '0;
}; mmu_exception <= '0;
end else begin
mmu_paddr <= mmu_vaddr_plen;
translation_valid <= translation_req;
mmu_exception <= misaligned_exception;
end
end
end
data_misaligned = 1'b0;
if (lsu_ctrl.valid) begin logic store_buffer_empty;
case (lsu_ctrl.operation) // ------------------
// double word // Store Unit
LD, SD, FLD, FSD, // ------------------
store_unit #(
.CVA6Cfg(CVA6Cfg)
) i_store_unit (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.store_buffer_empty_o(store_buffer_empty),
.valid_i (st_valid_i),
.lsu_ctrl_i(lsu_ctrl),
.pop_st_o (pop_st),
.commit_i,
.commit_ready_o,
.amo_valid_commit_i,
.valid_o (st_valid),
.trans_id_o (st_trans_id),
.result_o (st_result),
.ex_o (st_ex),
// MMU port
.translation_req_o (st_translation_req),
.vaddr_o (st_vaddr),
.mem_paddr_o (mem_paddr_o),
.paddr_i (mmu_paddr),
.ex_i (mmu_exception),
.dtlb_hit_i (dtlb_hit),
// Load Unit
.page_offset_i (page_offset),
.page_offset_matches_o(page_offset_matches),
// AMOs
.amo_req_o,
.amo_resp_i,
// to memory arbiter
.req_port_i (dcache_req_ports_i[2]),
.req_port_o (dcache_req_ports_o[2])
);
// ------------------
// Load Unit
// ------------------
load_unit #(
.CVA6Cfg(CVA6Cfg)
) i_load_unit (
.valid_i (ld_valid_i),
.lsu_ctrl_i(lsu_ctrl),
.pop_ld_o (pop_ld),
.valid_o (ld_valid),
.trans_id_o (ld_trans_id),
.result_o (ld_result),
.ex_o (ld_ex),
// MMU port
.translation_req_o (ld_translation_req),
.vaddr_o (ld_vaddr),
.paddr_i (mmu_paddr),
.ex_i (mmu_exception),
.dtlb_hit_i (dtlb_hit),
.dtlb_ppn_i (dtlb_ppn),
// to store unit
.page_offset_o (page_offset),
.page_offset_matches_i(page_offset_matches),
.store_buffer_empty_i (store_buffer_empty),
// to memory arbiter
.req_port_i (dcache_req_ports_i[1]),
.req_port_o (dcache_req_ports_o[1]),
.dcache_wbuffer_not_ni_i,
.commit_tran_id_i,
.*
);
// ----------------------------
// Output Pipeline Register
// ----------------------------
// amount of pipeline registers inserted for load/store return path
// can be tuned to trade-off IPC vs. cycle time
shift_reg #(
.dtype(logic [$bits(ld_valid) + $bits(ld_trans_id) + $bits(ld_result) + $bits(ld_ex) - 1:0]),
.Depth(cva6_config_pkg::CVA6ConfigNrLoadPipeRegs)
) i_pipe_reg_load (
.clk_i,
.rst_ni,
.d_i({ld_valid, ld_trans_id, ld_result, ld_ex}),
.d_o({load_valid_o, load_trans_id_o, load_result_o, load_exception_o})
);
shift_reg #(
.dtype(logic [$bits(st_valid) + $bits(st_trans_id) + $bits(st_result) + $bits(st_ex) - 1:0]),
.Depth(cva6_config_pkg::CVA6ConfigNrStorePipeRegs)
) i_pipe_reg_store (
.clk_i,
.rst_ni,
.d_i({st_valid, st_trans_id, st_result, st_ex}),
.d_o({store_valid_o, store_trans_id_o, store_result_o, store_exception_o})
);
// determine whether this is a load or store
always_comb begin : which_op
ld_valid_i = 1'b0;
st_valid_i = 1'b0;
translation_req = 1'b0;
mmu_vaddr = {riscv::VLEN{1'b0}};
// check the operation to activate the right functional unit accordingly
unique case (lsu_ctrl.fu)
// all loads go here
LOAD: begin
ld_valid_i = lsu_ctrl.valid;
translation_req = ld_translation_req;
mmu_vaddr = ld_vaddr;
end
// all stores go here
STORE: begin
st_valid_i = lsu_ctrl.valid;
translation_req = st_translation_req;
mmu_vaddr = st_vaddr;
end
// not relevant for the LSU
default: ;
endcase
end
// ---------------
// Byte Enable
// ---------------
// we can generate the byte enable from the virtual address since the last
// 12 bit are the same anyway
// and we can always generate the byte enable from the address at hand
assign be_i = riscv::IS_XLEN64 ? be_gen(
vaddr_i[2:0], extract_transfer_size(fu_data_i.operation)
) : be_gen_32(
vaddr_i[1:0], extract_transfer_size(fu_data_i.operation)
);
// ------------------------
// Misaligned Exception
// ------------------------
// we can detect a misaligned exception immediately
// the misaligned exception is passed to the functional unit via the MMU, which in case
// can augment the exception if other memory related exceptions like a page fault or access errors
always_comb begin : data_misaligned_detection
misaligned_exception = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0};
data_misaligned = 1'b0;
if (lsu_ctrl.valid) begin
case (lsu_ctrl.operation)
// double word
LD, SD, FLD, FSD,
AMO_LRD, AMO_SCD, AMO_LRD, AMO_SCD,
AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD, AMO_SWAPD, AMO_ADDD, AMO_ANDD, AMO_ORD,
AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND, AMO_XORD, AMO_MAXD, AMO_MAXDU, AMO_MIND,
AMO_MINDU: begin AMO_MINDU: begin
if (lsu_ctrl.vaddr[2:0] != 3'b000) begin if (lsu_ctrl.vaddr[2:0] != 3'b000) begin
data_misaligned = 1'b1; data_misaligned = 1'b1;
end end
end end
// word // word
LW, LWU, SW, FLW, FSW, LW, LWU, SW, FLW, FSW,
AMO_LRW, AMO_SCW, AMO_LRW, AMO_SCW,
AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW, AMO_SWAPW, AMO_ADDW, AMO_ANDW, AMO_ORW,
AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW, AMO_XORW, AMO_MAXW, AMO_MAXWU, AMO_MINW,
AMO_MINWU: begin AMO_MINWU: begin
if (lsu_ctrl.vaddr[1:0] != 2'b00) begin if (lsu_ctrl.vaddr[1:0] != 2'b00) begin
data_misaligned = 1'b1; data_misaligned = 1'b1;
end end
end
// half word
LH, LHU, SH, FLH, FSH: begin
if (lsu_ctrl.vaddr[0] != 1'b0) begin
data_misaligned = 1'b1;
end
end
// byte -> is always aligned
default:;
endcase
end end
// half word
if (data_misaligned) begin LH, LHU, SH, FLH, FSH: begin
if (lsu_ctrl.vaddr[0] != 1'b0) begin
if (lsu_ctrl.fu == LOAD) begin data_misaligned = 1'b1;
misaligned_exception = { end
riscv::LD_ADDR_MISALIGNED,
{{riscv::XLEN-riscv::VLEN{1'b0}},lsu_ctrl.vaddr},
1'b1
};
end else if (lsu_ctrl.fu == STORE) begin
misaligned_exception = {
riscv::ST_ADDR_MISALIGNED,
{{riscv::XLEN-riscv::VLEN{1'b0}},lsu_ctrl.vaddr},
1'b1
};
end
end
if (ariane_pkg::MMU_PRESENT && en_ld_st_translation_i && lsu_ctrl.overflow) begin
if (lsu_ctrl.fu == LOAD) begin
misaligned_exception = {
riscv::LD_ACCESS_FAULT,
{{riscv::XLEN-riscv::VLEN{1'b0}},lsu_ctrl.vaddr},
1'b1
};
end else if (lsu_ctrl.fu == STORE) begin
misaligned_exception = {
riscv::ST_ACCESS_FAULT,
{{riscv::XLEN-riscv::VLEN{1'b0}},lsu_ctrl.vaddr},
1'b1
};
end
end end
// byte -> is always aligned
default: ;
endcase
end end
// ------------------ if (data_misaligned) begin
// LSU Control
// ------------------
// new data arrives here
lsu_ctrl_t lsu_req_i;
assign lsu_req_i = {lsu_valid_i, vaddr_i, overflow, fu_data_i.operand_b, be_i, fu_data_i.fu, fu_data_i.operation, fu_data_i.trans_id}; if (lsu_ctrl.fu == LOAD) begin
misaligned_exception = {
riscv::LD_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
};
lsu_bypass #( end else if (lsu_ctrl.fu == STORE) begin
.CVA6Cfg ( CVA6Cfg ) misaligned_exception = {
) lsu_bypass_i ( riscv::ST_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
.lsu_req_i ( lsu_req_i ), };
.lsu_req_valid_i ( lsu_valid_i ), end
.pop_ld_i ( pop_ld ), end
.pop_st_i ( pop_st ),
.lsu_ctrl_o ( lsu_ctrl ), if (ariane_pkg::MMU_PRESENT && en_ld_st_translation_i && lsu_ctrl.overflow) begin
.ready_o ( lsu_ready_o ),
.*
);
assign lsu_addr_o = lsu_ctrl.vaddr; if (lsu_ctrl.fu == LOAD) begin
assign lsu_rmask_o = lsu_ctrl.fu == LOAD ? lsu_ctrl.be : '0; misaligned_exception = {
assign lsu_wmask_o = lsu_ctrl.fu == STORE ? lsu_ctrl.be : '0; riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
assign lsu_addr_trans_id_o = lsu_ctrl.trans_id; };
end else if (lsu_ctrl.fu == STORE) begin
misaligned_exception = {
riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1
};
end
end
end
// ------------------
// LSU Control
// ------------------
// new data arrives here
lsu_ctrl_t lsu_req_i;
assign lsu_req_i = {
lsu_valid_i,
vaddr_i,
overflow,
fu_data_i.operand_b,
be_i,
fu_data_i.fu,
fu_data_i.operation,
fu_data_i.trans_id
};
lsu_bypass #(
.CVA6Cfg(CVA6Cfg)
) lsu_bypass_i (
.lsu_req_i (lsu_req_i),
.lsu_req_valid_i(lsu_valid_i),
.pop_ld_i (pop_ld),
.pop_st_i (pop_st),
.lsu_ctrl_o(lsu_ctrl),
.ready_o (lsu_ready_o),
.*
);
assign lsu_addr_o = lsu_ctrl.vaddr;
assign lsu_rmask_o = lsu_ctrl.fu == LOAD ? lsu_ctrl.be : '0;
assign lsu_wmask_o = lsu_ctrl.fu == STORE ? lsu_ctrl.be : '0;
assign lsu_addr_trans_id_o = lsu_ctrl.trans_id;
endmodule endmodule

View file

@ -18,411 +18,420 @@
// Modification: add support for multiple outstanding load operations // Modification: add support for multiple outstanding load operations
// to the data cache // to the data cache
module load_unit import ariane_pkg::*; #( module load_unit
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, input logic flush_i,
// load unit input port // load unit input port
input logic valid_i, input logic valid_i,
input lsu_ctrl_t lsu_ctrl_i, input lsu_ctrl_t lsu_ctrl_i,
output logic pop_ld_o, output logic pop_ld_o,
// load unit output port // load unit output port
output logic valid_o, output logic valid_o,
output logic [TRANS_ID_BITS-1:0] trans_id_o, output logic [TRANS_ID_BITS-1:0] trans_id_o,
output riscv::xlen_t result_o, output riscv::xlen_t result_o,
output exception_t ex_o, output exception_t ex_o,
// MMU -> Address Translation // MMU -> Address Translation
output logic translation_req_o, // request address translation output logic translation_req_o, // request address translation
output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out
input logic [riscv::PLEN-1:0] paddr_i, // physical address in input logic [riscv::PLEN-1:0] paddr_i, // physical address in
input exception_t ex_i, // exception which may has happened earlier. for example: mis-aligned exception input exception_t ex_i, // exception which may has happened earlier. for example: mis-aligned exception
input logic dtlb_hit_i, // hit on the dtlb, send in the same cycle as the request input logic dtlb_hit_i, // hit on the dtlb, send in the same cycle as the request
input logic [riscv::PPNW-1:0] dtlb_ppn_i, // ppn on the dtlb, send in the same cycle as the request input logic [riscv::PPNW-1:0] dtlb_ppn_i, // ppn on the dtlb, send in the same cycle as the request
// address checker // address checker
output logic [11:0] page_offset_o, output logic [11:0] page_offset_o,
input logic page_offset_matches_i, input logic page_offset_matches_i,
input logic store_buffer_empty_i, // the entire store-buffer is empty input logic store_buffer_empty_i, // the entire store-buffer is empty
input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, input logic [TRANS_ID_BITS-1:0] commit_tran_id_i,
// D$ interface // D$ interface
input dcache_req_o_t req_port_i, input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o, output dcache_req_i_t req_port_o,
input logic dcache_wbuffer_not_ni_i input logic dcache_wbuffer_not_ni_i
); );
enum logic [3:0] { IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, enum logic [3:0] {
ABORT_TRANSACTION, ABORT_TRANSACTION_NI, WAIT_TRANSLATION, WAIT_FLUSH, IDLE,
WAIT_WB_EMPTY WAIT_GNT,
} state_d, state_q; SEND_TAG,
WAIT_PAGE_OFFSET,
ABORT_TRANSACTION,
ABORT_TRANSACTION_NI,
WAIT_TRANSLATION,
WAIT_FLUSH,
WAIT_WB_EMPTY
}
state_d, state_q;
// in order to decouple the response interface from the request interface, // in order to decouple the response interface from the request interface,
// we need a a buffer which can hold all inflight memory load requests // we need a a buffer which can hold all inflight memory load requests
typedef struct packed { typedef struct packed {
logic [TRANS_ID_BITS-1:0] trans_id; // scoreboard identifier logic [TRANS_ID_BITS-1:0] trans_id; // scoreboard identifier
logic [riscv::XLEN_ALIGN_BYTES-1:0] address_offset; // least significant bits of the address logic [riscv::XLEN_ALIGN_BYTES-1:0] address_offset; // least significant bits of the address
fu_op operation; // type of load fu_op operation; // type of load
} ldbuf_t; } ldbuf_t;
// to support a throughput of one load per cycle, if the number of entries // to support a throughput of one load per cycle, if the number of entries
// of the load buffer is 1, implement a fall-through mode. This however // of the load buffer is 1, implement a fall-through mode. This however
// adds a combinational path between the request and response interfaces // adds a combinational path between the request and response interfaces
// towards the cache. // towards the cache.
localparam logic LDBUF_FALLTHROUGH = (CVA6Cfg.NrLoadBufEntries == 1); localparam logic LDBUF_FALLTHROUGH = (CVA6Cfg.NrLoadBufEntries == 1);
localparam int unsigned REQ_ID_BITS = CVA6Cfg.NrLoadBufEntries > 1 ? localparam int unsigned REQ_ID_BITS = CVA6Cfg.NrLoadBufEntries > 1 ? $clog2(
$clog2(CVA6Cfg.NrLoadBufEntries) : 1; CVA6Cfg.NrLoadBufEntries
) : 1;
typedef logic [REQ_ID_BITS-1:0] ldbuf_id_t; typedef logic [REQ_ID_BITS-1:0] ldbuf_id_t;
logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_valid_q, ldbuf_valid_d; logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_valid_q, ldbuf_valid_d;
logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_flushed_q, ldbuf_flushed_d; logic [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_flushed_q, ldbuf_flushed_d;
ldbuf_t [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_q; ldbuf_t [CVA6Cfg.NrLoadBufEntries-1:0] ldbuf_q;
logic ldbuf_empty, ldbuf_full; logic ldbuf_empty, ldbuf_full;
ldbuf_id_t ldbuf_free_index; ldbuf_id_t ldbuf_free_index;
logic ldbuf_w; logic ldbuf_w;
ldbuf_t ldbuf_wdata; ldbuf_t ldbuf_wdata;
ldbuf_id_t ldbuf_windex; ldbuf_id_t ldbuf_windex;
logic ldbuf_r; logic ldbuf_r;
ldbuf_t ldbuf_rdata; ldbuf_t ldbuf_rdata;
ldbuf_id_t ldbuf_rindex; ldbuf_id_t ldbuf_rindex;
ldbuf_id_t ldbuf_last_id_q; ldbuf_id_t ldbuf_last_id_q;
assign ldbuf_full = &ldbuf_valid_q; assign ldbuf_full = &ldbuf_valid_q;
// //
// buffer of outstanding loads // buffer of outstanding loads
// write in the first available slot // write in the first available slot
generate generate
if (CVA6Cfg.NrLoadBufEntries > 1) begin : ldbuf_free_index_multi_gen if (CVA6Cfg.NrLoadBufEntries > 1) begin : ldbuf_free_index_multi_gen
lzc #( lzc #(
.WIDTH (CVA6Cfg.NrLoadBufEntries), .WIDTH(CVA6Cfg.NrLoadBufEntries),
.MODE (1'b0) // Count leading zeros .MODE (1'b0) // Count leading zeros
) lzc_windex_i ( ) lzc_windex_i (
.in_i (~ldbuf_valid_q), .in_i (~ldbuf_valid_q),
.cnt_o (ldbuf_free_index), .cnt_o (ldbuf_free_index),
.empty_o (ldbuf_empty) .empty_o(ldbuf_empty)
); );
end else begin : ldbuf_free_index_single_gen end else begin : ldbuf_free_index_single_gen
assign ldbuf_free_index = 1'b0; assign ldbuf_free_index = 1'b0;
end
endgenerate
assign ldbuf_windex = (LDBUF_FALLTHROUGH && ldbuf_r) ? ldbuf_rindex : ldbuf_free_index;
always_comb
begin : ldbuf_comb
ldbuf_flushed_d = ldbuf_flushed_q;
ldbuf_valid_d = ldbuf_valid_q;
// In case of flush, raise the flushed flag in all slots.
if (flush_i) begin
ldbuf_flushed_d = '1;
end
// Free read entry (in the case of fall-through mode, free the entry
// only if there is no pending load)
if (ldbuf_r && (!LDBUF_FALLTHROUGH || !ldbuf_w)) begin
ldbuf_valid_d[ldbuf_rindex] = 1'b0;
end
// Track a new outstanding operation in the load buffer
if (ldbuf_w) begin
ldbuf_flushed_d[ldbuf_windex] = 1'b0;
ldbuf_valid_d[ldbuf_windex] = 1'b1;
end
end end
endgenerate
always_ff @(posedge clk_i or negedge rst_ni) assign ldbuf_windex = (LDBUF_FALLTHROUGH && ldbuf_r) ? ldbuf_rindex : ldbuf_free_index;
begin : ldbuf_ff
if (!rst_ni) begin always_comb begin : ldbuf_comb
ldbuf_flushed_q <= '0; ldbuf_flushed_d = ldbuf_flushed_q;
ldbuf_valid_q <= '0; ldbuf_valid_d = ldbuf_valid_q;
ldbuf_last_id_q <= '0;
ldbuf_q <= '0; // In case of flush, raise the flushed flag in all slots.
end else begin if (flush_i) begin
ldbuf_flushed_q <= ldbuf_flushed_d; ldbuf_flushed_d = '1;
ldbuf_valid_q <= ldbuf_valid_d;
if (ldbuf_w) begin
ldbuf_last_id_q <= ldbuf_windex;
ldbuf_q[ldbuf_windex] <= ldbuf_wdata;
end
end
end end
// Free read entry (in the case of fall-through mode, free the entry
// only if there is no pending load)
if (ldbuf_r && (!LDBUF_FALLTHROUGH || !ldbuf_w)) begin
ldbuf_valid_d[ldbuf_rindex] = 1'b0;
end
// Track a new outstanding operation in the load buffer
if (ldbuf_w) begin
ldbuf_flushed_d[ldbuf_windex] = 1'b0;
ldbuf_valid_d[ldbuf_windex] = 1'b1;
end
end
// page offset is defined as the lower 12 bits, feed through for address checker always_ff @(posedge clk_i or negedge rst_ni) begin : ldbuf_ff
assign page_offset_o = lsu_ctrl_i.vaddr[11:0]; if (!rst_ni) begin
// feed-through the virtual address for VA translation ldbuf_flushed_q <= '0;
assign vaddr_o = lsu_ctrl_i.vaddr; ldbuf_valid_q <= '0;
// this is a read-only interface so set the write enable to 0 ldbuf_last_id_q <= '0;
assign req_port_o.data_we = 1'b0; ldbuf_q <= '0;
assign req_port_o.data_wdata = '0; end else begin
// compose the load buffer write data, control is handled in the FSM ldbuf_flushed_q <= ldbuf_flushed_d;
assign ldbuf_wdata = {lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[riscv::XLEN_ALIGN_BYTES-1:0], lsu_ctrl_i.operation}; ldbuf_valid_q <= ldbuf_valid_d;
// output address if (ldbuf_w) begin
// we can now output the lower 12 bit as the index to the cache ldbuf_last_id_q <= ldbuf_windex;
assign req_port_o.address_index = lsu_ctrl_i.vaddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; ldbuf_q[ldbuf_windex] <= ldbuf_wdata;
// translation from last cycle, again: control is handled in the FSM end
assign req_port_o.address_tag = paddr_i[ariane_pkg::DCACHE_TAG_WIDTH + end
end
// page offset is defined as the lower 12 bits, feed through for address checker
assign page_offset_o = lsu_ctrl_i.vaddr[11:0];
// feed-through the virtual address for VA translation
assign vaddr_o = lsu_ctrl_i.vaddr;
// this is a read-only interface so set the write enable to 0
assign req_port_o.data_we = 1'b0;
assign req_port_o.data_wdata = '0;
// compose the load buffer write data, control is handled in the FSM
assign ldbuf_wdata = {
lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[riscv::XLEN_ALIGN_BYTES-1:0], lsu_ctrl_i.operation
};
// output address
// we can now output the lower 12 bit as the index to the cache
assign req_port_o.address_index = lsu_ctrl_i.vaddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
// translation from last cycle, again: control is handled in the FSM
assign req_port_o.address_tag = paddr_i[ariane_pkg::DCACHE_TAG_WIDTH +
ariane_pkg::DCACHE_INDEX_WIDTH-1 : ariane_pkg::DCACHE_INDEX_WIDTH-1 :
ariane_pkg::DCACHE_INDEX_WIDTH]; ariane_pkg::DCACHE_INDEX_WIDTH];
// request id = index of the load buffer's entry // request id = index of the load buffer's entry
assign req_port_o.data_id = ldbuf_windex; assign req_port_o.data_id = ldbuf_windex;
// directly forward exception fields (valid bit is set below) // directly forward exception fields (valid bit is set below)
assign ex_o.cause = ex_i.cause; assign ex_o.cause = ex_i.cause;
assign ex_o.tval = ex_i.tval; assign ex_o.tval = ex_i.tval;
// Check that NI operations follow the necessary conditions // Check that NI operations follow the necessary conditions
logic paddr_ni; logic paddr_ni;
logic not_commit_time; logic not_commit_time;
logic inflight_stores; logic inflight_stores;
logic stall_ni; logic stall_ni;
assign paddr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {dtlb_ppn_i,12'd0}); assign paddr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {dtlb_ppn_i, 12'd0});
assign not_commit_time = commit_tran_id_i != lsu_ctrl_i.trans_id; assign not_commit_time = commit_tran_id_i != lsu_ctrl_i.trans_id;
assign inflight_stores = (!dcache_wbuffer_not_ni_i || !store_buffer_empty_i); assign inflight_stores = (!dcache_wbuffer_not_ni_i || !store_buffer_empty_i);
assign stall_ni = (inflight_stores || not_commit_time) && paddr_ni; assign stall_ni = (inflight_stores || not_commit_time) && paddr_ni;
// --------------- // ---------------
// Load Control // Load Control
// --------------- // ---------------
always_comb begin : load_control always_comb begin : load_control
automatic logic accept_req; automatic logic accept_req;
// default assignments // default assignments
state_d = state_q; state_d = state_q;
translation_req_o = 1'b0; translation_req_o = 1'b0;
req_port_o.data_req = 1'b0; req_port_o.data_req = 1'b0;
// tag control // tag control
req_port_o.kill_req = 1'b0; req_port_o.kill_req = 1'b0;
req_port_o.tag_valid = 1'b0; req_port_o.tag_valid = 1'b0;
req_port_o.data_be = lsu_ctrl_i.be; req_port_o.data_be = lsu_ctrl_i.be;
req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operation); req_port_o.data_size = extract_transfer_size(lsu_ctrl_i.operation);
pop_ld_o = 1'b0; pop_ld_o = 1'b0;
// In IDLE and SEND_TAG states, this unit can accept a new load request // In IDLE and SEND_TAG states, this unit can accept a new load request
// when the load buffer is not full or if there is a response and the // when the load buffer is not full or if there is a response and the
// load buffer is in fall-through mode // load buffer is in fall-through mode
accept_req = (valid_i && (!ldbuf_full || (LDBUF_FALLTHROUGH && ldbuf_r))); accept_req = (valid_i && (!ldbuf_full || (LDBUF_FALLTHROUGH && ldbuf_r)));
case (state_q) case (state_q)
IDLE: begin IDLE: begin
if (accept_req) begin if (accept_req) begin
// start the translation process even though we do not know if the addresses match // start the translation process even though we do not know if the addresses match
// this should ease timing // this should ease timing
translation_req_o = 1'b1; translation_req_o = 1'b1;
// check if the page offset matches with a store, if it does then stall and wait // check if the page offset matches with a store, if it does then stall and wait
if (!page_offset_matches_i) begin if (!page_offset_matches_i) begin
// make a load request to memory // make a load request to memory
req_port_o.data_req = 1'b1; req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag // we got no data grant so wait for the grant before sending the tag
if (!req_port_i.data_gnt) begin if (!req_port_i.data_gnt) begin
state_d = WAIT_GNT; state_d = WAIT_GNT;
end else begin end else begin
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION; state_d = ABORT_TRANSACTION;
end else begin end else begin
if (!stall_ni) begin if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle // we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG; state_d = SEND_TAG;
pop_ld_o = 1'b1; pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty. // translation valid but this is to NC and the WB is not yet empty.
end else begin end else begin
state_d = ABORT_TRANSACTION_NI; state_d = ABORT_TRANSACTION_NI;
end
end
end
end else begin
// wait for the store buffer to train and the page offset to not match anymore
state_d = WAIT_PAGE_OFFSET;
end
end end
end
end end
end else begin
// wait here for the page offset to not match anymore // wait for the store buffer to train and the page offset to not match anymore
WAIT_PAGE_OFFSET: begin state_d = WAIT_PAGE_OFFSET;
// we make a new request as soon as the page offset does not match anymore end
if (!page_offset_matches_i) begin
state_d = WAIT_GNT;
end
end
// abort the previous request - free the D$ arbiter
// we are here because of a TLB miss, we need to abort the current request and give way for the
// PTW walker to satisfy the TLB miss
ABORT_TRANSACTION, ABORT_TRANSACTION_NI: begin
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// either re-do the request or wait until the WB is empty (depending on where we came from).
state_d = (state_q == ABORT_TRANSACTION_NI) ? WAIT_WB_EMPTY : WAIT_TRANSLATION;
end
// Wait until the write-back buffer is empty in the data cache.
WAIT_WB_EMPTY: begin
// the write buffer is empty, so lets go and re-do the translation.
if (dcache_wbuffer_not_ni_i) state_d = WAIT_TRANSLATION;
end
WAIT_TRANSLATION: begin
translation_req_o = 1'b1;
// we've got a hit and we can continue with the request process
if (dtlb_hit_i)
state_d = WAIT_GNT;
// we got an exception
if (ex_i.valid) begin
// the next state will be the idle state
state_d = IDLE;
// pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction
pop_ld_o = ~req_port_i.data_rvalid;
end
end
WAIT_GNT: begin
// keep the translation request up
translation_req_o = 1'b1;
// keep the request up
req_port_o.data_req = 1'b1;
// we finally got a data grant
if (req_port_i.data_gnt) begin
// so we send the tag in the next cycle
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
// otherwise we keep waiting on our grant
end
// we know for sure that the tag we want to send is valid
SEND_TAG: begin
req_port_o.tag_valid = 1'b1;
state_d = IDLE;
if (accept_req) begin
// start the translation process even though we do not know if the addresses match
// this should ease timing
translation_req_o = 1'b1;
// check if the page offset matches with a store, if it does stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!req_port_i.data_gnt) begin
state_d = WAIT_GNT;
end else begin
// we got a grant so we can send the tag in the next cycle
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
end else begin
// wait for the store buffer to train and the page offset to not match anymore
state_d = WAIT_PAGE_OFFSET;
end
end
// ----------
// Exception
// ----------
// if we got an exception we need to kill the request immediately
if (ex_i.valid) begin
req_port_o.kill_req = 1'b1;
end
end
WAIT_FLUSH: begin
// the D$ arbiter will take care of presenting this to the memory only in case we
// have an outstanding request
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// we've killed the current request so we can go back to idle
state_d = IDLE;
end
default: state_d = IDLE;
endcase
// if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage
if (flush_i) begin
state_d = WAIT_FLUSH;
end end
end
// wait here for the page offset to not match anymore
WAIT_PAGE_OFFSET: begin
// we make a new request as soon as the page offset does not match anymore
if (!page_offset_matches_i) begin
state_d = WAIT_GNT;
end
end
// abort the previous request - free the D$ arbiter
// we are here because of a TLB miss, we need to abort the current request and give way for the
// PTW walker to satisfy the TLB miss
ABORT_TRANSACTION, ABORT_TRANSACTION_NI: begin
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// either re-do the request or wait until the WB is empty (depending on where we came from).
state_d = (state_q == ABORT_TRANSACTION_NI) ? WAIT_WB_EMPTY : WAIT_TRANSLATION;
end
// Wait until the write-back buffer is empty in the data cache.
WAIT_WB_EMPTY: begin
// the write buffer is empty, so lets go and re-do the translation.
if (dcache_wbuffer_not_ni_i) state_d = WAIT_TRANSLATION;
end
WAIT_TRANSLATION: begin
translation_req_o = 1'b1;
// we've got a hit and we can continue with the request process
if (dtlb_hit_i) state_d = WAIT_GNT;
// we got an exception
if (ex_i.valid) begin
// the next state will be the idle state
state_d = IDLE;
// pop load - but only if we are not getting an rvalid in here - otherwise we will over-write an incoming transaction
pop_ld_o = ~req_port_i.data_rvalid;
end
end
WAIT_GNT: begin
// keep the translation request up
translation_req_o = 1'b1;
// keep the request up
req_port_o.data_req = 1'b1;
// we finally got a data grant
if (req_port_i.data_gnt) begin
// so we send the tag in the next cycle
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
// otherwise we keep waiting on our grant
end
// we know for sure that the tag we want to send is valid
SEND_TAG: begin
req_port_o.tag_valid = 1'b1;
state_d = IDLE;
if (accept_req) begin
// start the translation process even though we do not know if the addresses match
// this should ease timing
translation_req_o = 1'b1;
// check if the page offset matches with a store, if it does stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
req_port_o.data_req = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!req_port_i.data_gnt) begin
state_d = WAIT_GNT;
end else begin
// we got a grant so we can send the tag in the next cycle
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = ABORT_TRANSACTION;
end else begin
if (!stall_ni) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
state_d = SEND_TAG;
pop_ld_o = 1'b1;
// translation valid but this is to NC and the WB is not yet empty.
end else begin
state_d = ABORT_TRANSACTION_NI;
end
end
end
end else begin
// wait for the store buffer to train and the page offset to not match anymore
state_d = WAIT_PAGE_OFFSET;
end
end
// ----------
// Exception
// ----------
// if we got an exception we need to kill the request immediately
if (ex_i.valid) begin
req_port_o.kill_req = 1'b1;
end
end
WAIT_FLUSH: begin
// the D$ arbiter will take care of presenting this to the memory only in case we
// have an outstanding request
req_port_o.kill_req = 1'b1;
req_port_o.tag_valid = 1'b1;
// we've killed the current request so we can go back to idle
state_d = IDLE;
end
default: state_d = IDLE;
endcase
// if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage
if (flush_i) begin
state_d = WAIT_FLUSH;
end
end
// track the load data for later usage
assign ldbuf_w = req_port_o.data_req & req_port_i.data_gnt;
// ---------------
// Retire Load
// ---------------
assign ldbuf_rindex = (CVA6Cfg.NrLoadBufEntries > 1) ? ldbuf_id_t'(req_port_i.data_rid) : 1'b0,
ldbuf_rdata = ldbuf_q[ldbuf_rindex];
// decoupled rvalid process
always_comb begin : rvalid_output
// read the pending load buffer
ldbuf_r = req_port_i.data_rvalid;
trans_id_o = ldbuf_q[ldbuf_rindex].trans_id;
valid_o = 1'b0;
ex_o.valid = 1'b0;
// we got an rvalid and it's corresponding request was not flushed
if (req_port_i.data_rvalid && !ldbuf_flushed_q[ldbuf_rindex]) begin
// if the response corresponds to the last request, check that we are not killing it
if ((ldbuf_last_id_q != ldbuf_rindex) || !req_port_o.kill_req) valid_o = 1'b1;
// the output is also valid if we got an exception. An exception arrives one cycle after
// dtlb_hit_i is asserted, i.e. when we are in SEND_TAG. Otherwise, the exception
// corresponds to the next request that is already being translated (see below).
if (ex_i.valid && (state_q == SEND_TAG)) begin
valid_o = 1'b1;
ex_o.valid = 1'b1;
end
end end
// track the load data for later usage // an exception occurred during translation
assign ldbuf_w = req_port_o.data_req & req_port_i.data_gnt; // exceptions can retire out-of-order -> but we need to give priority to non-excepting load and stores
// so we simply check if we got an rvalid if so we prioritize it by not retiring the exception - we simply go for another
// --------------- // round in the load FSM
// Retire Load if ((state_q == WAIT_TRANSLATION) && !req_port_i.data_rvalid && ex_i.valid && valid_i) begin
// --------------- trans_id_o = lsu_ctrl_i.trans_id;
assign ldbuf_rindex = (CVA6Cfg.NrLoadBufEntries > 1) ? ldbuf_id_t'(req_port_i.data_rid) : 1'b0, valid_o = 1'b1;
ldbuf_rdata = ldbuf_q[ldbuf_rindex]; ex_o.valid = 1'b1;
// decoupled rvalid process
always_comb begin : rvalid_output
// read the pending load buffer
ldbuf_r = req_port_i.data_rvalid;
trans_id_o = ldbuf_q[ldbuf_rindex].trans_id;
valid_o = 1'b0;
ex_o.valid = 1'b0;
// we got an rvalid and it's corresponding request was not flushed
if (req_port_i.data_rvalid && !ldbuf_flushed_q[ldbuf_rindex]) begin
// if the response corresponds to the last request, check that we are not killing it
if((ldbuf_last_id_q != ldbuf_rindex) || !req_port_o.kill_req)
valid_o = 1'b1;
// the output is also valid if we got an exception. An exception arrives one cycle after
// dtlb_hit_i is asserted, i.e. when we are in SEND_TAG. Otherwise, the exception
// corresponds to the next request that is already being translated (see below).
if (ex_i.valid && (state_q == SEND_TAG)) begin
valid_o = 1'b1;
ex_o.valid = 1'b1;
end
end
// an exception occurred during translation
// exceptions can retire out-of-order -> but we need to give priority to non-excepting load and stores
// so we simply check if we got an rvalid if so we prioritize it by not retiring the exception - we simply go for another
// round in the load FSM
if ((state_q == WAIT_TRANSLATION) && !req_port_i.data_rvalid && ex_i.valid && valid_i) begin
trans_id_o = lsu_ctrl_i.trans_id;
valid_o = 1'b1;
ex_o.valid = 1'b1;
end
end end
end
// latch physical address for the tag cycle (one cycle after applying the index) // latch physical address for the tag cycle (one cycle after applying the index)
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin if (~rst_ni) begin
state_q <= IDLE; state_q <= IDLE;
end else begin end else begin
state_q <= state_d; state_q <= state_d;
end
end end
end
// --------------- // ---------------
// Sign Extend // Sign Extend
// --------------- // ---------------
riscv::xlen_t shifted_data; riscv::xlen_t shifted_data;
// realign as needed // realign as needed
assign shifted_data = req_port_i.data_rdata >> {ldbuf_rdata.address_offset, 3'b000}; assign shifted_data = req_port_i.data_rdata >> {ldbuf_rdata.address_offset, 3'b000};
/* // result mux (leaner code, but more logic stages. /* // result mux (leaner code, but more logic stages.
// can be used instead of the code below (in between //result mux fast) if timing is not so critical) // can be used instead of the code below (in between //result mux fast) if timing is not so critical)
always_comb begin always_comb begin
unique case (ldbuf_rdata.operation) unique case (ldbuf_rdata.operation)
@ -436,74 +445,81 @@ module load_unit import ariane_pkg::*; #(
endcase endcase
end */ end */
// result mux fast // result mux fast
logic [(riscv::XLEN/8)-1:0] rdata_sign_bits; logic [ (riscv::XLEN/8)-1:0] rdata_sign_bits;
logic [riscv::XLEN_ALIGN_BYTES-1:0] rdata_offset; logic [riscv::XLEN_ALIGN_BYTES-1:0] rdata_offset;
logic rdata_sign_bit, rdata_is_signed, rdata_is_fp_signed; logic rdata_sign_bit, rdata_is_signed, rdata_is_fp_signed;
// prepare these signals for faster selection in the next cycle // prepare these signals for faster selection in the next cycle
assign rdata_is_signed = ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::LH, ariane_pkg::LB}; assign rdata_is_signed = ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::LH, ariane_pkg::LB};
assign rdata_is_fp_signed = ldbuf_rdata.operation inside {ariane_pkg::FLW, ariane_pkg::FLH, ariane_pkg::FLB}; assign rdata_is_fp_signed = ldbuf_rdata.operation inside {ariane_pkg::FLW, ariane_pkg::FLH, ariane_pkg::FLB};
assign rdata_offset = ((ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::FLW}) & riscv::IS_XLEN64) ? ldbuf_rdata.address_offset + 3 : assign rdata_offset = ((ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::FLW}) & riscv::IS_XLEN64) ? ldbuf_rdata.address_offset + 3 :
( ldbuf_rdata.operation inside {ariane_pkg::LH, ariane_pkg::FLH}) ? ldbuf_rdata.address_offset + 1 : ( ldbuf_rdata.operation inside {ariane_pkg::LH, ariane_pkg::FLH}) ? ldbuf_rdata.address_offset + 1 :
ldbuf_rdata.address_offset; ldbuf_rdata.address_offset;
for (genvar i = 0; i < (riscv::XLEN/8); i++) begin : gen_sign_bits for (genvar i = 0; i < (riscv::XLEN / 8); i++) begin : gen_sign_bits
assign rdata_sign_bits[i] = req_port_i.data_rdata[(i+1)*8-1]; assign rdata_sign_bits[i] = req_port_i.data_rdata[(i+1)*8-1];
end end
// select correct sign bit in parallel to result shifter above // select correct sign bit in parallel to result shifter above
// pull to 0 if unsigned // pull to 0 if unsigned
assign rdata_sign_bit = rdata_is_signed & rdata_sign_bits[rdata_offset] | rdata_is_fp_signed; assign rdata_sign_bit = rdata_is_signed & rdata_sign_bits[rdata_offset] | rdata_is_fp_signed;
// result mux // result mux
always_comb begin always_comb begin
unique case (ldbuf_rdata.operation) unique case (ldbuf_rdata.operation)
ariane_pkg::LW, ariane_pkg::LWU: result_o = {{riscv::XLEN-32{rdata_sign_bit}}, shifted_data[31:0]}; ariane_pkg::LW, ariane_pkg::LWU:
ariane_pkg::LH, ariane_pkg::LHU: result_o = {{riscv::XLEN-32+16{rdata_sign_bit}}, shifted_data[15:0]}; result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]};
ariane_pkg::LB, ariane_pkg::LBU: result_o = {{riscv::XLEN-32+24{rdata_sign_bit}}, shifted_data[7:0]}; ariane_pkg::LH, ariane_pkg::LHU:
ariane_pkg::FLW: begin result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]};
if(CVA6Cfg.FpPresent) begin ariane_pkg::LB, ariane_pkg::LBU:
result_o = {{riscv::XLEN-32{rdata_sign_bit}}, shifted_data[31:0]}; result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]};
end ariane_pkg::FLW: begin
end if (CVA6Cfg.FpPresent) begin
ariane_pkg::FLH: begin result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]};
if(CVA6Cfg.FpPresent) begin end
result_o = {{riscv::XLEN-32+16{rdata_sign_bit}}, shifted_data[15:0]}; end
end ariane_pkg::FLH: begin
end if (CVA6Cfg.FpPresent) begin
ariane_pkg::FLB: begin result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]};
if(CVA6Cfg.FpPresent) begin end
result_o = {{riscv::XLEN-32+24{rdata_sign_bit}}, shifted_data[7:0]}; end
end ariane_pkg::FLB: begin
end if (CVA6Cfg.FpPresent) begin
result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]};
end
end
default: result_o = shifted_data[riscv::XLEN-1:0]; default: result_o = shifted_data[riscv::XLEN-1:0];
endcase endcase
end end
// end result mux fast // end result mux fast
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// assertions // assertions
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
//pragma translate_off //pragma translate_off
`ifndef VERILATOR `ifndef VERILATOR
initial assert (ariane_pkg::DCACHE_TID_WIDTH >= REQ_ID_BITS) else initial
$fatal(1, "CVA6ConfigDcacheIdWidth parameter is not wide enough to encode pending loads"); assert (ariane_pkg::DCACHE_TID_WIDTH >= REQ_ID_BITS)
// check invalid offsets, but only issue a warning as these conditions actually trigger a load address misaligned exception else $fatal(1, "CVA6ConfigDcacheIdWidth parameter is not wide enough to encode pending loads");
addr_offset0: assert property (@(posedge clk_i) disable iff (~rst_ni) // check invalid offsets, but only issue a warning as these conditions actually trigger a load address misaligned exception
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LW, ariane_pkg::LWU}) |-> ldbuf_wdata.address_offset < 5) else addr_offset0 :
$fatal(1, "invalid address offset used with {LW, LWU}"); assert property (@(posedge clk_i) disable iff (~rst_ni)
addr_offset1: assert property (@(posedge clk_i) disable iff (~rst_ni) ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LW, ariane_pkg::LWU}) |-> ldbuf_wdata.address_offset < 5)
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LH, ariane_pkg::LHU}) |-> ldbuf_wdata.address_offset < 7) else else $fatal(1, "invalid address offset used with {LW, LWU}");
$fatal(1, "invalid address offset used with {LH, LHU}"); addr_offset1 :
addr_offset2: assert property (@(posedge clk_i) disable iff (~rst_ni) assert property (@(posedge clk_i) disable iff (~rst_ni)
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LB, ariane_pkg::LBU}) |-> ldbuf_wdata.address_offset < 8) else ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LH, ariane_pkg::LHU}) |-> ldbuf_wdata.address_offset < 7)
$fatal(1, "invalid address offset used with {LB, LBU}"); else $fatal(1, "invalid address offset used with {LH, LHU}");
addr_offset2 :
assert property (@(posedge clk_i) disable iff (~rst_ni)
ldbuf_w |-> (ldbuf_wdata.operation inside {ariane_pkg::LB, ariane_pkg::LBU}) |-> ldbuf_wdata.address_offset < 8)
else $fatal(1, "invalid address offset used with {LB, LBU}");
`endif `endif
//pragma translate_on //pragma translate_on
endmodule endmodule

View file

@ -23,99 +23,100 @@
// the LSU control should sample it and store it for later application to the units. It does so, by storing it in a // the LSU control should sample it and store it for later application to the units. It does so, by storing it in a
// two element FIFO. This is necessary as we only know very late in the cycle whether the load/store will succeed (address check, // two element FIFO. This is necessary as we only know very late in the cycle whether the load/store will succeed (address check,
// TLB hit mainly). So we better unconditionally allow another request to arrive and store this request in case we need to. // TLB hit mainly). So we better unconditionally allow another request to arrive and store this request in case we need to.
module lsu_bypass import ariane_pkg::*; #( module lsu_bypass
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input lsu_ctrl_t lsu_req_i, input lsu_ctrl_t lsu_req_i,
input logic lsu_req_valid_i, input logic lsu_req_valid_i,
input logic pop_ld_i, input logic pop_ld_i,
input logic pop_st_i, input logic pop_st_i,
output lsu_ctrl_t lsu_ctrl_o, output lsu_ctrl_t lsu_ctrl_o,
output logic ready_o output logic ready_o
); );
lsu_ctrl_t [1:0] mem_n, mem_q; lsu_ctrl_t [1:0] mem_n, mem_q;
logic read_pointer_n, read_pointer_q; logic read_pointer_n, read_pointer_q;
logic write_pointer_n, write_pointer_q; logic write_pointer_n, write_pointer_q;
logic [1:0] status_cnt_n, status_cnt_q; logic [1:0] status_cnt_n, status_cnt_q;
logic empty; logic empty;
assign empty = (status_cnt_q == 0); assign empty = (status_cnt_q == 0);
assign ready_o = empty; assign ready_o = empty;
always_comb begin always_comb begin
automatic logic [1:0] status_cnt; automatic logic [1:0] status_cnt;
automatic logic write_pointer; automatic logic write_pointer;
automatic logic read_pointer; automatic logic read_pointer;
status_cnt = status_cnt_q; status_cnt = status_cnt_q;
write_pointer = write_pointer_q; write_pointer = write_pointer_q;
read_pointer = read_pointer_q; read_pointer = read_pointer_q;
mem_n = mem_q; mem_n = mem_q;
// we've got a valid LSU request // we've got a valid LSU request
if (lsu_req_valid_i) begin if (lsu_req_valid_i) begin
mem_n[write_pointer_q] = lsu_req_i; mem_n[write_pointer_q] = lsu_req_i;
write_pointer++; write_pointer++;
status_cnt++; status_cnt++;
end
if (pop_ld_i) begin
// invalidate the result
mem_n[read_pointer_q].valid = 1'b0;
read_pointer++;
status_cnt--;
end
if (pop_st_i) begin
// invalidate the result
mem_n[read_pointer_q].valid = 1'b0;
read_pointer++;
status_cnt--;
end
if (pop_st_i && pop_ld_i)
mem_n = '0;
if (flush_i) begin
status_cnt = '0;
write_pointer = '0;
read_pointer = '0;
mem_n = '0;
end
// default assignments
read_pointer_n = read_pointer;
write_pointer_n = write_pointer;
status_cnt_n = status_cnt;
end end
// output assignment if (pop_ld_i) begin
always_comb begin : output_assignments // invalidate the result
if (empty) begin mem_n[read_pointer_q].valid = 1'b0;
lsu_ctrl_o = lsu_req_i; read_pointer++;
end else begin status_cnt--;
lsu_ctrl_o = mem_q[read_pointer_q];
end
end end
// registers if (pop_st_i) begin
always_ff @(posedge clk_i or negedge rst_ni) begin // invalidate the result
if (~rst_ni) begin mem_n[read_pointer_q].valid = 1'b0;
mem_q <= '0; read_pointer++;
status_cnt_q <= '0; status_cnt--;
write_pointer_q <= '0;
read_pointer_q <= '0;
end else begin
mem_q <= mem_n;
status_cnt_q <= status_cnt_n;
write_pointer_q <= write_pointer_n;
read_pointer_q <= read_pointer_n;
end
end end
if (pop_st_i && pop_ld_i) mem_n = '0;
if (flush_i) begin
status_cnt = '0;
write_pointer = '0;
read_pointer = '0;
mem_n = '0;
end
// default assignments
read_pointer_n = read_pointer;
write_pointer_n = write_pointer;
status_cnt_n = status_cnt;
end
// output assignment
always_comb begin : output_assignments
if (empty) begin
lsu_ctrl_o = lsu_req_i;
end else begin
lsu_ctrl_o = mem_q[read_pointer_q];
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mem_q <= '0;
status_cnt_q <= '0;
write_pointer_q <= '0;
read_pointer_q <= '0;
end else begin
mem_q <= mem_n;
status_cnt_q <= status_cnt_n;
write_pointer_q <= write_pointer_n;
read_pointer_q <= read_pointer_n;
end
end
endmodule endmodule

View file

@ -26,495 +26,538 @@
// 2020-02-17 0.1 S.Jacq MMU Sv32 for CV32A6 // 2020-02-17 0.1 S.Jacq MMU Sv32 for CV32A6
// =========================================================================== // // =========================================================================== //
module cva6_mmu_sv32 import ariane_pkg::*; #( module cva6_mmu_sv32
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int unsigned INSTR_TLB_ENTRIES = 2, #(
parameter int unsigned DATA_TLB_ENTRIES = 2, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ASID_WIDTH = 1 parameter int unsigned INSTR_TLB_ENTRIES = 2,
parameter int unsigned DATA_TLB_ENTRIES = 2,
parameter int unsigned ASID_WIDTH = 1
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input logic enable_translation_i, input logic enable_translation_i,
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
// IF interface // IF interface
input icache_arsp_t icache_areq_i, input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o, output icache_areq_t icache_areq_o,
// LSU interface // LSU interface
// this is a more minimalistic interface because the actual addressing logic is handled // this is a more minimalistic interface because the actual addressing logic is handled
// in the LSU as we distinguish load and stores, what we do here is simple address translation // in the LSU as we distinguish load and stores, what we do here is simple address translation
input exception_t misaligned_ex_i, input exception_t misaligned_ex_i,
input logic lsu_req_i, // request address translation input logic lsu_req_i, // request address translation
input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in
input logic lsu_is_store_i, // the translation is requested by a store input logic lsu_is_store_i, // the translation is requested by a store
// if we need to walk the page table we can't grant in the same cycle // if we need to walk the page table we can't grant in the same cycle
// Cycle 0 // Cycle 0
output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB
output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit)
// Cycle 1 // Cycle 1
output logic lsu_valid_o, // translation is valid output logic lsu_valid_o, // translation is valid
output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address
output exception_t lsu_exception_o, // address translation threw an exception output exception_t lsu_exception_o, // address translation threw an exception
// General control signals // General control signals
input riscv::priv_lvl_t priv_lvl_i, input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i, input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i, input logic sum_i,
input logic mxr_i, input logic mxr_i,
// input logic flag_mprv_i, // input logic flag_mprv_i,
input logic [riscv::PPNW-1:0] satp_ppn_i, input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i, input logic [ASID_WIDTH-1:0] asid_i,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
input logic flush_tlb_i, input logic flush_tlb_i,
// Performance counters // Performance counters
output logic itlb_miss_o, output logic itlb_miss_o,
output logic dtlb_miss_o, output logic dtlb_miss_o,
// PTW memory interface // PTW memory interface
input dcache_req_o_t req_port_i, input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o, output dcache_req_i_t req_port_o,
// PMP // PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i, input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i input logic [15:0][riscv::PLEN-3:0] pmpaddr_i
); );
logic iaccess_err; // insufficient privilege to access this instruction page logic iaccess_err; // insufficient privilege to access this instruction page
logic daccess_err; // insufficient privilege to access this data page logic daccess_err; // insufficient privilege to access this data page
logic ptw_active; // PTW is currently walking a page table logic ptw_active; // PTW is currently walking a page table
logic walking_instr; // PTW is walking because of an ITLB miss logic walking_instr; // PTW is walking because of an ITLB miss
logic ptw_error; // PTW threw an exception logic ptw_error; // PTW threw an exception
logic ptw_access_exception; // PTW threw an access exception (PMPs) logic ptw_access_exception; // PTW threw an access exception (PMPs)
logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr
logic [riscv::VLEN-1:0] update_vaddr; logic [riscv::VLEN-1:0] update_vaddr;
tlb_update_sv32_t update_itlb, update_dtlb, update_shared_tlb; tlb_update_sv32_t update_itlb, update_dtlb, update_shared_tlb;
logic itlb_lu_access; logic itlb_lu_access;
riscv::pte_sv32_t itlb_content; riscv::pte_sv32_t itlb_content;
logic itlb_is_4M; logic itlb_is_4M;
logic itlb_lu_hit; logic itlb_lu_hit;
logic dtlb_lu_access; logic dtlb_lu_access;
riscv::pte_sv32_t dtlb_content; riscv::pte_sv32_t dtlb_content;
logic dtlb_is_4M; logic dtlb_is_4M;
logic dtlb_lu_hit; logic dtlb_lu_hit;
logic shared_tlb_access; logic shared_tlb_access;
logic [riscv::VLEN-1:0] shared_tlb_vaddr; logic [riscv::VLEN-1:0] shared_tlb_vaddr;
logic shared_tlb_hit; logic shared_tlb_hit;
logic itlb_req; logic itlb_req;
// Assignments // Assignments
assign itlb_lu_access = icache_areq_i.fetch_req; assign itlb_lu_access = icache_areq_i.fetch_req;
assign dtlb_lu_access = lsu_req_i; assign dtlb_lu_access = lsu_req_i;
cva6_tlb_sv32 #( cva6_tlb_sv32 #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.TLB_ENTRIES ( INSTR_TLB_ENTRIES ), .TLB_ENTRIES(INSTR_TLB_ENTRIES),
.ASID_WIDTH ( ASID_WIDTH ) .ASID_WIDTH (ASID_WIDTH)
) i_itlb ( ) i_itlb (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_tlb_i ), .flush_i(flush_tlb_i),
.update_i ( update_itlb ), .update_i(update_itlb),
.lu_access_i ( itlb_lu_access ), .lu_access_i (itlb_lu_access),
.lu_asid_i ( asid_i ), .lu_asid_i (asid_i),
.asid_to_be_flushed_i ( asid_to_be_flushed_i ), .asid_to_be_flushed_i (asid_to_be_flushed_i),
.vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ), .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
.lu_vaddr_i ( icache_areq_i.fetch_vaddr ), .lu_vaddr_i (icache_areq_i.fetch_vaddr),
.lu_content_o ( itlb_content ), .lu_content_o (itlb_content),
.lu_is_4M_o ( itlb_is_4M ), .lu_is_4M_o(itlb_is_4M),
.lu_hit_o ( itlb_lu_hit ) .lu_hit_o (itlb_lu_hit)
); );
cva6_tlb_sv32 #( cva6_tlb_sv32 #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.TLB_ENTRIES ( DATA_TLB_ENTRIES ), .TLB_ENTRIES(DATA_TLB_ENTRIES),
.ASID_WIDTH ( ASID_WIDTH ) .ASID_WIDTH (ASID_WIDTH)
) i_dtlb ( ) i_dtlb (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_tlb_i ), .flush_i(flush_tlb_i),
.update_i ( update_dtlb ), .update_i(update_dtlb),
.lu_access_i ( dtlb_lu_access ), .lu_access_i (dtlb_lu_access),
.lu_asid_i ( asid_i ), .lu_asid_i (asid_i),
.asid_to_be_flushed_i ( asid_to_be_flushed_i ), .asid_to_be_flushed_i (asid_to_be_flushed_i),
.vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ), .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
.lu_vaddr_i ( lsu_vaddr_i ), .lu_vaddr_i (lsu_vaddr_i),
.lu_content_o ( dtlb_content ), .lu_content_o (dtlb_content),
.lu_is_4M_o ( dtlb_is_4M ), .lu_is_4M_o(dtlb_is_4M),
.lu_hit_o ( dtlb_lu_hit ) .lu_hit_o (dtlb_lu_hit)
); );
cva6_shared_tlb_sv32 #( cva6_shared_tlb_sv32 #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.SHARED_TLB_DEPTH ( 64 ), .SHARED_TLB_DEPTH(64),
.SHARED_TLB_WAYS ( 2 ), .SHARED_TLB_WAYS (2),
.ASID_WIDTH ( ASID_WIDTH ) .ASID_WIDTH (ASID_WIDTH)
) i_shared_tlb ( ) i_shared_tlb (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_tlb_i ), .flush_i(flush_tlb_i),
.enable_translation_i ( enable_translation_i ), .enable_translation_i (enable_translation_i),
.en_ld_st_translation_i ( en_ld_st_translation_i), .en_ld_st_translation_i(en_ld_st_translation_i),
.asid_i (asid_i ), .asid_i (asid_i),
// from TLBs // from TLBs
// did we miss? // did we miss?
.itlb_access_i ( itlb_lu_access ), .itlb_access_i(itlb_lu_access),
.itlb_hit_i ( itlb_lu_hit ), .itlb_hit_i (itlb_lu_hit),
.itlb_vaddr_i ( icache_areq_i.fetch_vaddr ), .itlb_vaddr_i (icache_areq_i.fetch_vaddr),
.dtlb_access_i ( dtlb_lu_access ), .dtlb_access_i(dtlb_lu_access),
.dtlb_hit_i ( dtlb_lu_hit ), .dtlb_hit_i (dtlb_lu_hit),
.dtlb_vaddr_i ( lsu_vaddr_i ), .dtlb_vaddr_i (lsu_vaddr_i),
// to TLBs, update logic // to TLBs, update logic
.itlb_update_o ( update_itlb ), .itlb_update_o(update_itlb),
.dtlb_update_o ( update_dtlb ), .dtlb_update_o(update_dtlb),
// Performance counters // Performance counters
.itlb_miss_o (itlb_miss_o ), .itlb_miss_o(itlb_miss_o),
.dtlb_miss_o (dtlb_miss_o ), .dtlb_miss_o(dtlb_miss_o),
.shared_tlb_access_o ( shared_tlb_access ), .shared_tlb_access_o(shared_tlb_access),
.shared_tlb_hit_o ( shared_tlb_hit ), .shared_tlb_hit_o (shared_tlb_hit),
.shared_tlb_vaddr_o ( shared_tlb_vaddr ), .shared_tlb_vaddr_o (shared_tlb_vaddr),
.itlb_req_o ( itlb_req ), .itlb_req_o (itlb_req),
// to update shared tlb // to update shared tlb
.shared_tlb_update_i (update_shared_tlb ) .shared_tlb_update_i(update_shared_tlb)
); );
cva6_ptw_sv32 #( cva6_ptw_sv32 #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.ASID_WIDTH ( ASID_WIDTH ) .ASID_WIDTH(ASID_WIDTH)
) i_ptw ( ) i_ptw (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_i ), .flush_i(flush_i),
.ptw_active_o ( ptw_active ), .ptw_active_o (ptw_active),
.walking_instr_o ( walking_instr ), .walking_instr_o (walking_instr),
.ptw_error_o ( ptw_error ), .ptw_error_o (ptw_error),
.ptw_access_exception_o ( ptw_access_exception ), .ptw_access_exception_o(ptw_access_exception),
.lsu_is_store_i ( lsu_is_store_i ), .lsu_is_store_i(lsu_is_store_i),
// PTW memory interface // PTW memory interface
.req_port_i ( req_port_i ), .req_port_i (req_port_i),
.req_port_o ( req_port_o ), .req_port_o (req_port_o),
// to Shared TLB, update logic // to Shared TLB, update logic
.shared_tlb_update_o ( update_shared_tlb ), .shared_tlb_update_o(update_shared_tlb),
.update_vaddr_o ( update_vaddr ), .update_vaddr_o(update_vaddr),
.asid_i ( asid_i ), .asid_i(asid_i),
// from shared TLB // from shared TLB
// did we miss? // did we miss?
.shared_tlb_access_i ( shared_tlb_access ), .shared_tlb_access_i(shared_tlb_access),
.shared_tlb_hit_i ( shared_tlb_hit ), .shared_tlb_hit_i (shared_tlb_hit),
.shared_tlb_vaddr_i ( shared_tlb_vaddr ), .shared_tlb_vaddr_i (shared_tlb_vaddr),
.itlb_req_i ( itlb_req ), .itlb_req_i(itlb_req),
// from CSR file // from CSR file
.satp_ppn_i ( satp_ppn_i ), // ppn from satp .satp_ppn_i(satp_ppn_i), // ppn from satp
.mxr_i ( mxr_i ), .mxr_i (mxr_i),
// Performance counters // Performance counters
.shared_tlb_miss_o ( ), //open for now .shared_tlb_miss_o(), //open for now
// PMP // PMP
.pmpcfg_i ( pmpcfg_i ), .pmpcfg_i (pmpcfg_i),
.pmpaddr_i ( pmpaddr_i ), .pmpaddr_i (pmpaddr_i),
.bad_paddr_o ( ptw_bad_paddr ) .bad_paddr_o(ptw_bad_paddr)
); );
// ila_1 i_ila_1 ( // ila_1 i_ila_1 (
// .clk(clk_i), // input wire clk // .clk(clk_i), // input wire clk
// .probe0({req_port_o.address_tag, req_port_o.address_index}), // .probe0({req_port_o.address_tag, req_port_o.address_index}),
// .probe1(req_port_o.data_req), // input wire [63:0] probe1 // .probe1(req_port_o.data_req), // input wire [63:0] probe1
// .probe2(req_port_i.data_gnt), // input wire [0:0] probe2 // .probe2(req_port_i.data_gnt), // input wire [0:0] probe2
// .probe3(req_port_i.data_rdata), // input wire [0:0] probe3 // .probe3(req_port_i.data_rdata), // input wire [0:0] probe3
// .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4 // .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4
// .probe5(ptw_error), // input wire [1:0] probe5 // .probe5(ptw_error), // input wire [1:0] probe5
// .probe6(update_vaddr), // input wire [0:0] probe6 // .probe6(update_vaddr), // input wire [0:0] probe6
// .probe7(update_itlb.valid), // input wire [0:0] probe7 // .probe7(update_itlb.valid), // input wire [0:0] probe7
// .probe8(update_dtlb.valid), // input wire [0:0] probe8 // .probe8(update_dtlb.valid), // input wire [0:0] probe8
// .probe9(dtlb_lu_access), // input wire [0:0] probe9 // .probe9(dtlb_lu_access), // input wire [0:0] probe9
// .probe10(lsu_vaddr_i), // input wire [0:0] probe10 // .probe10(lsu_vaddr_i), // input wire [0:0] probe10
// .probe11(dtlb_lu_hit), // input wire [0:0] probe11 // .probe11(dtlb_lu_hit), // input wire [0:0] probe11
// .probe12(itlb_lu_access), // input wire [0:0] probe12 // .probe12(itlb_lu_access), // input wire [0:0] probe12
// .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13 // .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13
// .probe14(itlb_lu_hit) // input wire [0:0] probe13 // .probe14(itlb_lu_hit) // input wire [0:0] probe13
// ); // );
//----------------------- //-----------------------
// Instruction Interface // Instruction Interface
//----------------------- //-----------------------
logic match_any_execute_region; logic match_any_execute_region;
logic pmp_instr_allow; logic pmp_instr_allow;
// The instruction interface is a simple request response interface // The instruction interface is a simple request response interface
always_comb begin : instr_interface always_comb begin : instr_interface
// MMU disabled: just pass through // MMU disabled: just pass through
icache_areq_o.fetch_valid = icache_areq_i.fetch_req; icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
if (riscv::PLEN > riscv::VLEN) if (riscv::PLEN > riscv::VLEN)
icache_areq_o.fetch_paddr = {{riscv::PLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr};// play through in case we disabled address translation icache_areq_o.fetch_paddr = {
else {riscv::PLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr
icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0];// play through in case we disabled address translation }; // play through in case we disabled address translation
// two potential exception sources: else
// 1. HPTW threw an exception -> signal with a page fault exception icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0];// play through in case we disabled address translation
// 2. We got an access error because of insufficient permissions -> throw an access exception // two potential exception sources:
icache_areq_o.fetch_exception = '0; // 1. HPTW threw an exception -> signal with a page fault exception
// Check whether we are allowed to access this memory region from a fetch perspective // 2. We got an access error because of insufficient permissions -> throw an access exception
iaccess_err = icache_areq_i.fetch_req && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u) icache_areq_o.fetch_exception = '0;
// Check whether we are allowed to access this memory region from a fetch perspective
iaccess_err = icache_areq_i.fetch_req && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u)
|| ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u)); || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u));
// MMU enabled: address from TLB, request delayed until hit. Error when TLB // MMU enabled: address from TLB, request delayed until hit. Error when TLB
// hit and no access right or TLB hit and translated address not valid (e.g. // hit and no access right or TLB hit and translated address not valid (e.g.
// AXI decode error), or when PTW performs walk due to ITLB miss and raises // AXI decode error), or when PTW performs walk due to ITLB miss and raises
// an error. // an error.
if (enable_translation_i) begin if (enable_translation_i) begin
// we work with SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal // we work with SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal
if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1}; icache_areq_o.fetch_exception = {
end riscv::INSTR_ACCESS_FAULT,
{{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
1'b1
};
end
icache_areq_o.fetch_valid = 1'b0; icache_areq_o.fetch_valid = 1'b0;
// 4K page // 4K page
icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]}; icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]};
// Mega page // Mega page
if (itlb_is_4M) begin if (itlb_is_4M) begin
icache_areq_o.fetch_paddr[21:12] = icache_areq_i.fetch_vaddr[21:12]; icache_areq_o.fetch_paddr[21:12] = icache_areq_i.fetch_vaddr[21:12];
end end
// --------- // ---------
// ITLB Hit // ITLB Hit
// -------- // --------
// if we hit the ITLB output the request signal immediately // if we hit the ITLB output the request signal immediately
if (itlb_lu_hit) begin if (itlb_lu_hit) begin
icache_areq_o.fetch_valid = icache_areq_i.fetch_req; icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
// we got an access error // we got an access error
if (iaccess_err) begin if (iaccess_err) begin
// throw a page fault // throw a page fault
icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1};//to check on wave --> not connected icache_areq_o.fetch_exception = {
end else if (!pmp_instr_allow) begin riscv::INSTR_PAGE_FAULT,
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, icache_areq_i.fetch_vaddr, 1'b1};//to check on wave --> not connected {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
end 1'b1
end else }; //to check on wave --> not connected
// --------- end else if (!pmp_instr_allow) begin
// ITLB Miss icache_areq_o.fetch_exception = {
// --------- riscv::INSTR_ACCESS_FAULT, icache_areq_i.fetch_vaddr, 1'b1
// watch out for exceptions happening during walking the page table }; //to check on wave --> not connected
if (ptw_active && walking_instr) begin
icache_areq_o.fetch_valid = ptw_error | ptw_access_exception;
if (ptw_error) icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, update_vaddr}, 1'b1};//to check on wave
// TODO(moschn,zarubaf): What should the value of tval be in this case?
else icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1};//to check on wave --> not connected
end
end
// if it didn't match any execute region throw an `Instruction Access Fault`
// or: if we are not translating, check PMPs immediately on the paddr
if (!match_any_execute_region || (!enable_translation_i && !pmp_instr_allow)) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[riscv::PLEN-1:2], 1'b1};//to check on wave --> not connected
end end
end else
// ---------
// ITLB Miss
// ---------
// watch out for exceptions happening during walking the page table
if (ptw_active && walking_instr) begin
icache_areq_o.fetch_valid = ptw_error | ptw_access_exception;
if (ptw_error)
icache_areq_o.fetch_exception = {
riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1
}; //to check on wave
// TODO(moschn,zarubaf): What should the value of tval be in this case?
else
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1
}; //to check on wave --> not connected
end
end end
// if it didn't match any execute region throw an `Instruction Access Fault`
// or: if we are not translating, check PMPs immediately on the paddr
if (!match_any_execute_region || (!enable_translation_i && !pmp_instr_allow)) begin
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[riscv::PLEN-1:2], 1'b1
}; //to check on wave --> not connected
end
end
// check for execute flag on memory // check for execute flag on memory
assign match_any_execute_region = config_pkg::is_inside_execute_regions(CVA6Cfg, {{64-riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}); assign match_any_execute_region = config_pkg::is_inside_execute_regions(
CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}
);
// Instruction fetch // Instruction fetch
pmp #( pmp #(
.PLEN ( riscv::PLEN ), .PLEN (riscv::PLEN),
.PMP_LEN ( riscv::PLEN - 2 ), .PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries ) .NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_if ( ) i_pmp_if (
.addr_i ( icache_areq_o.fetch_paddr ), .addr_i (icache_areq_o.fetch_paddr),
.priv_lvl_i, .priv_lvl_i,
// we will always execute on the instruction fetch port // we will always execute on the instruction fetch port
.access_type_i ( riscv::ACCESS_EXEC ), .access_type_i(riscv::ACCESS_EXEC),
// Configuration // Configuration
.conf_addr_i ( pmpaddr_i ), .conf_addr_i (pmpaddr_i),
.conf_i ( pmpcfg_i ), .conf_i (pmpcfg_i),
.allow_o ( pmp_instr_allow ) .allow_o (pmp_instr_allow)
); );
//----------------------- //-----------------------
// Data Interface // Data Interface
//----------------------- //-----------------------
logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q;
riscv::pte_sv32_t dtlb_pte_n, dtlb_pte_q; riscv::pte_sv32_t dtlb_pte_n, dtlb_pte_q;
exception_t misaligned_ex_n, misaligned_ex_q; exception_t misaligned_ex_n, misaligned_ex_q;
logic lsu_req_n, lsu_req_q; logic lsu_req_n, lsu_req_q;
logic lsu_is_store_n, lsu_is_store_q; logic lsu_is_store_n, lsu_is_store_q;
logic dtlb_hit_n, dtlb_hit_q; logic dtlb_hit_n, dtlb_hit_q;
logic dtlb_is_4M_n, dtlb_is_4M_q; logic dtlb_is_4M_n, dtlb_is_4M_q;
// check if we need to do translation or if we are always ready (e.g.: we are not translating anything) // check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1; assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
// Wires to PMP checks // Wires to PMP checks
riscv::pmp_access_t pmp_access_type; riscv::pmp_access_t pmp_access_type;
logic pmp_data_allow; logic pmp_data_allow;
localparam PPNWMin = (riscv::PPNW-1 > 29) ? 29 : riscv::PPNW-1; localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1;
// The data interface is simpler and only consists of a request/response interface // The data interface is simpler and only consists of a request/response interface
always_comb begin : data_interface always_comb begin : data_interface
// save request and DTLB response // save request and DTLB response
lsu_vaddr_n = lsu_vaddr_i; lsu_vaddr_n = lsu_vaddr_i;
lsu_req_n = lsu_req_i; lsu_req_n = lsu_req_i;
misaligned_ex_n = misaligned_ex_i; misaligned_ex_n = misaligned_ex_i;
dtlb_pte_n = dtlb_content; dtlb_pte_n = dtlb_content;
dtlb_hit_n = dtlb_lu_hit; dtlb_hit_n = dtlb_lu_hit;
lsu_is_store_n = lsu_is_store_i; lsu_is_store_n = lsu_is_store_i;
dtlb_is_4M_n = dtlb_is_4M; dtlb_is_4M_n = dtlb_is_4M;
if (riscv::PLEN > riscv::VLEN) begin if (riscv::PLEN > riscv::VLEN) begin
lsu_paddr_o = {{riscv::PLEN-riscv::VLEN{1'b0}}, lsu_vaddr_q}; lsu_paddr_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_q};
lsu_dtlb_ppn_o = {{riscv::PLEN-riscv::VLEN{1'b0}},lsu_vaddr_n[riscv::VLEN-1:12]}; lsu_dtlb_ppn_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n[riscv::VLEN-1:12]};
end else begin
lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0];
lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PPNW-1:0];
end
lsu_valid_o = lsu_req_q;
lsu_exception_o = misaligned_ex_q;
pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
// mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions
misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i;
// Check if the User flag is set, then we may only access it in supervisor mode
// if SUM is enabled
daccess_err = (ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
(ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u); // this is not a user page but we are in user mode and trying to access it
// translation is enabled and no misaligned exception occurred
if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
lsu_valid_o = 1'b0;
// 4K page
lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
lsu_dtlb_ppn_o = dtlb_content.ppn;
// Mega page
if (dtlb_is_4M_q) begin
lsu_paddr_o[21:12] = lsu_vaddr_q[21:12];
lsu_dtlb_ppn_o[21:12] = lsu_vaddr_n[21:12];
end
// ---------
// DTLB Hit
// --------
if (dtlb_hit_q && lsu_req_q) begin
lsu_valid_o = 1'b1;
// exception priority:
// PAGE_FAULTS have higher priority than ACCESS_FAULTS
// virtual memory based exceptions are PAGE_FAULTS
// physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
// this is a store
if (lsu_is_store_q) begin
// check if the page is write-able and we are not violating privileges
// also check if the dirty flag is set
if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
lsu_exception_o = {
riscv::STORE_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
}; //to check on wave
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {
riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1
}; //only 32 bits on 34b of lsu_paddr_o are returned.
end
// this is a load
end else begin end else begin
lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0]; // check for sufficient access privileges - throw a page fault if necessary
lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PPNW-1:0]; if (daccess_err) begin
lsu_exception_o = {
riscv::LOAD_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {
riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1
}; //only 32 bits on 34b of lsu_paddr_o are returned.
end
end end
lsu_valid_o = lsu_req_q; end else
lsu_exception_o = misaligned_ex_q;
pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
// mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions // ---------
misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i; // DTLB Miss
// ---------
// Check if the User flag is set, then we may only access it in supervisor mode // watch out for exceptions
// if SUM is enabled if (ptw_active && !walking_instr) begin
daccess_err = (ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode // page table walker threw an exception
(ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u); // this is not a user page but we are in user mode and trying to access it if (ptw_error) begin
// translation is enabled and no misaligned exception occurred // an error makes the translation valid
if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin lsu_valid_o = 1'b1;
lsu_valid_o = 1'b0; // the page table walker can only throw page faults
// 4K page if (lsu_is_store_q) begin
lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]}; lsu_exception_o = {
lsu_dtlb_ppn_o = dtlb_content.ppn; riscv::STORE_PAGE_FAULT,
// Mega page {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
if (dtlb_is_4M_q) begin 1'b1
lsu_paddr_o[21:12] = lsu_vaddr_q[21:12]; };
lsu_dtlb_ppn_o[21:12] = lsu_vaddr_n[21:12]; end else begin
end lsu_exception_o = {
// --------- riscv::LOAD_PAGE_FAULT,
// DTLB Hit {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
// -------- 1'b1
if (dtlb_hit_q && lsu_req_q) begin };
lsu_valid_o = 1'b1; end
// exception priority:
// PAGE_FAULTS have higher priority than ACCESS_FAULTS
// virtual memory based exceptions are PAGE_FAULTS
// physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
// this is a store
if (lsu_is_store_q) begin
// check if the page is write-able and we are not violating privileges
// also check if the dirty flag is set
if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1}; //to check on wave
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; //only 32 bits on 34b of lsu_paddr_o are returned.
end
// this is a load
end else begin
// check for sufficient access privileges - throw a page fault if necessary
if (daccess_err) begin
lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; //only 32 bits on 34b of lsu_paddr_o are returned.
end
end
end else
// ---------
// DTLB Miss
// ---------
// watch out for exceptions
if (ptw_active && !walking_instr) begin
// page table walker threw an exception
if (ptw_error) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1};
end else begin
lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1};
end
end
if (ptw_access_exception) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1};
end
end
end end
// If translation is not enabled, check the paddr immediately against PMPs
else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin if (ptw_access_exception) begin
if (lsu_is_store_q) begin // an error makes the translation valid
lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; lsu_valid_o = 1'b1;
end else begin // the page table walker can only throw page faults
lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1};
end
end end
end
end // If translation is not enabled, check the paddr immediately against PMPs
else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1};
end else begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1};
end
end end
end
// Load/store PMP check // Load/store PMP check
pmp #( pmp #(
.PLEN ( riscv::PLEN ), .PLEN (riscv::PLEN),
.PMP_LEN ( riscv::PLEN - 2 ), .PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries ) .NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_data ( ) i_pmp_data (
.addr_i ( lsu_paddr_o ), .addr_i (lsu_paddr_o),
.priv_lvl_i ( ld_st_priv_lvl_i ), .priv_lvl_i (ld_st_priv_lvl_i),
.access_type_i ( pmp_access_type ), .access_type_i(pmp_access_type),
// Configuration // Configuration
.conf_addr_i ( pmpaddr_i ), .conf_addr_i (pmpaddr_i),
.conf_i ( pmpcfg_i ), .conf_i (pmpcfg_i),
.allow_o ( pmp_data_allow ) .allow_o (pmp_data_allow)
); );
// ---------- // ----------
// Registers // Registers
// ---------- // ----------
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin if (~rst_ni) begin
lsu_vaddr_q <= '0; lsu_vaddr_q <= '0;
lsu_req_q <= '0; lsu_req_q <= '0;
misaligned_ex_q <= '0; misaligned_ex_q <= '0;
dtlb_pte_q <= '0; dtlb_pte_q <= '0;
dtlb_hit_q <= '0; dtlb_hit_q <= '0;
lsu_is_store_q <= '0; lsu_is_store_q <= '0;
dtlb_is_4M_q <= '0; dtlb_is_4M_q <= '0;
end else begin end else begin
lsu_vaddr_q <= lsu_vaddr_n; lsu_vaddr_q <= lsu_vaddr_n;
lsu_req_q <= lsu_req_n; lsu_req_q <= lsu_req_n;
misaligned_ex_q <= misaligned_ex_n; misaligned_ex_q <= misaligned_ex_n;
dtlb_pte_q <= dtlb_pte_n; dtlb_pte_q <= dtlb_pte_n;
dtlb_hit_q <= dtlb_hit_n; dtlb_hit_q <= dtlb_hit_n;
lsu_is_store_q <= lsu_is_store_n; lsu_is_store_q <= lsu_is_store_n;
dtlb_is_4M_q <= dtlb_is_4M_n; dtlb_is_4M_q <= dtlb_is_4M_n;
end
end end
end
endmodule endmodule

View file

@ -26,374 +26,375 @@
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
module cva6_ptw_sv32 import ariane_pkg::*; #( module cva6_ptw_sv32
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int ASID_WIDTH = 1 #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int ASID_WIDTH = 1
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush everything, we need to do this because input logic flush_i, // flush everything, we need to do this because
// actually everything we do is speculative at this stage // actually everything we do is speculative at this stage
// e.g.: there could be a CSR instruction that changes everything // e.g.: there could be a CSR instruction that changes everything
output logic ptw_active_o, output logic ptw_active_o,
output logic walking_instr_o, // set when walking for TLB output logic walking_instr_o, // set when walking for TLB
output logic ptw_error_o, // set when an error occurred output logic ptw_error_o, // set when an error occurred
output logic ptw_access_exception_o, // set when an PMP access exception occured output logic ptw_access_exception_o, // set when an PMP access exception occured
input logic lsu_is_store_i, // this translation was triggered by a store input logic lsu_is_store_i, // this translation was triggered by a store
// PTW memory interface // PTW memory interface
input dcache_req_o_t req_port_i, input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o, output dcache_req_i_t req_port_o,
// to Shared TLB, update logic // to Shared TLB, update logic
output tlb_update_sv32_t shared_tlb_update_o, output tlb_update_sv32_t shared_tlb_update_o,
output logic [riscv::VLEN-1:0] update_vaddr_o, output logic [riscv::VLEN-1:0] update_vaddr_o,
input logic [ASID_WIDTH-1:0] asid_i, input logic [ASID_WIDTH-1:0] asid_i,
// from shared TLB // from shared TLB
input logic shared_tlb_access_i, input logic shared_tlb_access_i,
input logic shared_tlb_hit_i, input logic shared_tlb_hit_i,
input logic [riscv::VLEN-1:0] shared_tlb_vaddr_i, input logic [riscv::VLEN-1:0] shared_tlb_vaddr_i,
input logic itlb_req_i, input logic itlb_req_i,
// from CSR file // from CSR file
input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp
input logic mxr_i, input logic mxr_i,
// Performance counters // Performance counters
output logic shared_tlb_miss_o, output logic shared_tlb_miss_o,
// PMP // PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i, input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
output logic [riscv::PLEN-1:0] bad_paddr_o output logic [riscv::PLEN-1:0] bad_paddr_o
); );
// input registers
logic data_rvalid_q;
riscv::xlen_t data_rdata_q;
riscv::pte_sv32_t pte;
assign pte = riscv::pte_sv32_t'(data_rdata_q);
enum logic [2:0] {
IDLE,
WAIT_GRANT,
PTE_LOOKUP,
WAIT_RVALID,
PROPAGATE_ERROR,
PROPAGATE_ACCESS_ERROR,
LATENCY
}
state_q, state_d;
// SV32 defines two levels of page tables
enum logic {
LVL1,
LVL2
}
ptw_lvl_q, ptw_lvl_n;
// is this an instruction page table walk?
logic is_instr_ptw_q, is_instr_ptw_n;
logic global_mapping_q, global_mapping_n;
// latched tag signal
logic tag_valid_n, tag_valid_q;
// register the ASID
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n;
// register the VPN we need to walk, SV32 defines a 32 bit virtual address
logic [riscv::VLEN-1:0] vaddr_q, vaddr_n;
// 4 byte aligned physical pointer
logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
// Assignments
assign update_vaddr_o = vaddr_q;
assign ptw_active_o = (state_q != IDLE);
//assign walking_instr_o = is_instr_ptw_q;
assign walking_instr_o = is_instr_ptw_q;
// directly output the correct physical address
assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
// we are never going to kill this request
assign req_port_o.kill_req = '0;
// we are never going to write with the HPTW
assign req_port_o.data_wdata = '0;
// we only issue one single request at a time
assign req_port_o.data_id = '0;
// -----------
// Shared TLB Update
// -----------
assign shared_tlb_update_o.vpn = vaddr_q[riscv::SV-1:12];
// update the correct page table level
assign shared_tlb_update_o.is_4M = (ptw_lvl_q == LVL1);
// output the correct ASID
assign shared_tlb_update_o.asid = tlb_update_asid_q;
// set the global mapping bit
assign shared_tlb_update_o.content = pte | (global_mapping_q << 5);
assign req_port_o.tag_valid = tag_valid_q;
logic allow_access;
assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0;
pmp #(
.CVA6Cfg (CVA6Cfg),
.PLEN (riscv::PLEN),
.PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_ptw (
.addr_i (ptw_pptr_q),
// PTW access are always checked as if in S-Mode...
.priv_lvl_i (riscv::PRIV_LVL_S),
// ...and they are always loads
.access_type_i(riscv::ACCESS_READ),
// Configuration
.conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (allow_access)
);
assign req_port_o.data_be = be_gen_32(req_port_o.address_index[1:0], req_port_o.data_size);
//-------------------
// Page table walker
//-------------------
// A virtual address va is translated into a physical address pa as follows:
// 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
// PAGESIZE=2^12 and LEVELS=3.)
// 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
// Sv32, PTESIZE=4.)
// 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
// exception.
// 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
// Otherwise, this PTE is a pointer to the next level of the page table.
// Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
// a = pte.ppn × PAGESIZE and go to step 2.
// 5. A leaf PTE has been found. Determine if the requested memory access
// is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
// raise an access exception. Otherwise, the translation is successful.
// Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
// The translated physical address is given as follows:
// - pa.pgoff = va.pgoff.
// - If i > 0, then this is a superpage translation and
// pa.ppn[i-1:0] = va.vpn[i-1:0].
// - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
req_port_o.data_req = 1'b0;
req_port_o.data_size = 2'b10;
req_port_o.data_we = 1'b0;
ptw_error_o = 1'b0;
ptw_access_exception_o = 1'b0;
shared_tlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
state_d = state_q;
global_mapping_n = global_mapping_q;
// input registers // input registers
logic data_rvalid_q; tlb_update_asid_n = tlb_update_asid_q;
riscv::xlen_t data_rdata_q; vaddr_n = vaddr_q;
riscv::pte_sv32_t pte; shared_tlb_miss_o = 1'b0;
assign pte = riscv::pte_sv32_t'(data_rdata_q);
case (state_q)
enum logic[2:0] { IDLE: begin
IDLE, // by default we start with the top-most page table
WAIT_GRANT, ptw_lvl_n = LVL1;
PTE_LOOKUP, global_mapping_n = 1'b0;
WAIT_RVALID, is_instr_ptw_n = 1'b0;
PROPAGATE_ERROR, // if we got a Shared TLB miss
PROPAGATE_ACCESS_ERROR, if (shared_tlb_access_i & ~shared_tlb_hit_i) begin
LATENCY ptw_pptr_n = {
} state_q, state_d; satp_ppn_i, shared_tlb_vaddr_i[riscv::SV-1:22], 2'b0
}; // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4
is_instr_ptw_n = itlb_req_i;
tlb_update_asid_n = asid_i;
vaddr_n = shared_tlb_vaddr_i;
state_d = WAIT_GRANT;
shared_tlb_miss_o = 1'b1;
end
end
// SV32 defines two levels of page tables WAIT_GRANT: begin
enum logic { // send a request out
LVL1, LVL2 req_port_o.data_req = 1'b1;
} ptw_lvl_q, ptw_lvl_n; // wait for the WAIT_GRANT
if (req_port_i.data_gnt) begin
// send the tag valid signal one cycle later
tag_valid_n = 1'b1;
state_d = PTE_LOOKUP;
end
end
// is this an instruction page table walk? PTE_LOOKUP: begin
logic is_instr_ptw_q, is_instr_ptw_n; // we wait for the valid signal
logic global_mapping_q, global_mapping_n; if (data_rvalid_q) begin
// latched tag signal
logic tag_valid_n, tag_valid_q;
// register the ASID
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n;
// register the VPN we need to walk, SV32 defines a 32 bit virtual address
logic [riscv::VLEN-1:0] vaddr_q, vaddr_n;
// 4 byte aligned physical pointer
logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
// Assignments // check if the global mapping bit is set
assign update_vaddr_o = vaddr_q; if (pte.g) global_mapping_n = 1'b1;
assign ptw_active_o = (state_q != IDLE); // -------------
//assign walking_instr_o = is_instr_ptw_q; // Invalid PTE
assign walking_instr_o = is_instr_ptw_q; // -------------
// directly output the correct physical address // If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception.
assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR;
assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; // -----------
// we are never going to kill this request // Valid PTE
assign req_port_o.kill_req = '0; // -----------
// we are never going to write with the HPTW else begin
assign req_port_o.data_wdata = '0; //state_d = IDLE;
// we only issue one single request at a time state_d = LATENCY;
assign req_port_o.data_id = '0; // it is a valid PTE
// if pte.r = 1 or pte.x = 1 it is a valid PTE
if (pte.r || pte.x) begin
// Valid translation found (either 4M or 4K entry)
if (is_instr_ptw_q) begin
// ------------
// Update ITLB
// ------------
// If page is not executable, we can directly raise an error. This
// doesn't put a useless entry into the TLB. The same idea applies
// to the access flag since we let the access flag be managed by SW.
if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR;
else shared_tlb_update_o.valid = 1'b1;
// ----------- end else begin
// Shared TLB Update // ------------
// ----------- // Update DTLB
assign shared_tlb_update_o.vpn = vaddr_q[riscv::SV-1:12]; // ------------
// update the correct page table level // Check if the access flag has been set, otherwise throw a page-fault
assign shared_tlb_update_o.is_4M = (ptw_lvl_q == LVL1); // and let the software handle those bits.
// output the correct ASID // If page is not readable (there are no write-only pages)
assign shared_tlb_update_o.asid = tlb_update_asid_q; // we can directly raise an error. This doesn't put a useless
// set the global mapping bit // entry into the TLB.
assign shared_tlb_update_o.content = pte | (global_mapping_q << 5); if (pte.a && (pte.r || (pte.x && mxr_i))) begin
shared_tlb_update_o.valid = 1'b1;
end else begin
assign req_port_o.tag_valid = tag_valid_q; state_d = PROPAGATE_ERROR;
logic allow_access;
assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0;
pmp #(
.CVA6Cfg ( CVA6Cfg ),
.PLEN ( riscv::PLEN ),
.PMP_LEN ( riscv::PLEN - 2 ),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries )
) i_pmp_ptw (
.addr_i ( ptw_pptr_q ),
// PTW access are always checked as if in S-Mode...
.priv_lvl_i ( riscv::PRIV_LVL_S ),
// ...and they are always loads
.access_type_i ( riscv::ACCESS_READ ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( allow_access )
);
assign req_port_o.data_be = be_gen_32(req_port_o.address_index[1:0],req_port_o.data_size );
//-------------------
// Page table walker
//-------------------
// A virtual address va is translated into a physical address pa as follows:
// 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
// PAGESIZE=2^12 and LEVELS=3.)
// 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
// Sv32, PTESIZE=4.)
// 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
// exception.
// 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
// Otherwise, this PTE is a pointer to the next level of the page table.
// Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
// a = pte.ppn × PAGESIZE and go to step 2.
// 5. A leaf PTE has been found. Determine if the requested memory access
// is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
// raise an access exception. Otherwise, the translation is successful.
// Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
// The translated physical address is given as follows:
// - pa.pgoff = va.pgoff.
// - If i > 0, then this is a superpage translation and
// pa.ppn[i-1:0] = va.vpn[i-1:0].
// - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
req_port_o.data_req = 1'b0;
req_port_o.data_size = 2'b10;
req_port_o.data_we = 1'b0;
ptw_error_o = 1'b0;
ptw_access_exception_o = 1'b0;
shared_tlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
state_d = state_q;
global_mapping_n = global_mapping_q;
// input registers
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
shared_tlb_miss_o = 1'b0;
case (state_q)
IDLE: begin
// by default we start with the top-most page table
ptw_lvl_n = LVL1;
global_mapping_n = 1'b0;
is_instr_ptw_n = 1'b0;
// if we got a Shared TLB miss
if (shared_tlb_access_i & ~shared_tlb_hit_i) begin
ptw_pptr_n = {satp_ppn_i, shared_tlb_vaddr_i[riscv::SV-1:22], 2'b0}; // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4
is_instr_ptw_n = itlb_req_i;
tlb_update_asid_n = asid_i;
vaddr_n = shared_tlb_vaddr_i;
state_d = WAIT_GRANT;
shared_tlb_miss_o = 1'b1;
end end
end // Request is a store: perform some additional checks
// If the request was a store and the page is not write-able, raise an error
WAIT_GRANT: begin // the same applies if the dirty flag is not set
// send a request out if (lsu_is_store_i && (!pte.w || !pte.d)) begin
req_port_o.data_req = 1'b1; shared_tlb_update_o.valid = 1'b0;
// wait for the WAIT_GRANT state_d = PROPAGATE_ERROR;
if (req_port_i.data_gnt) begin
// send the tag valid signal one cycle later
tag_valid_n = 1'b1;
state_d = PTE_LOOKUP;
end end
end
// check if the ppn is correctly aligned:
// 6. If i > 0 and pa.ppn[i 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[9:0] != '0) begin
state_d = PROPAGATE_ERROR;
shared_tlb_update_o.valid = 1'b0;
end
// this is a pointer to the next TLB level
end else begin
// pointer to next level of page table
if (ptw_lvl_q == LVL1) begin
// we are in the second level now
ptw_lvl_n = LVL2;
ptw_pptr_n = {pte.ppn, vaddr_q[21:12], 2'b0};
end
state_d = WAIT_GRANT;
if (ptw_lvl_q == LVL2) begin
// Should already be the last level page table => Error
ptw_lvl_n = LVL2;
state_d = PROPAGATE_ERROR;
end
end end
end
PTE_LOOKUP: begin // Check if this access was actually allowed from a PMP perspective
// we wait for the valid signal if (!allow_access) begin
if (data_rvalid_q) begin shared_tlb_update_o.valid = 1'b0;
// we have to return the failed address in bad_addr
ptw_pptr_n = ptw_pptr_q;
state_d = PROPAGATE_ACCESS_ERROR;
end
end
// we've got a data WAIT_GRANT so tell the cache that the tag is valid
end
// Propagate error to MMU/LSU
PROPAGATE_ERROR: begin
state_d = LATENCY;
ptw_error_o = 1'b1;
end
PROPAGATE_ACCESS_ERROR: begin
state_d = LATENCY;
ptw_access_exception_o = 1'b1;
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_q) state_d = IDLE;
end
LATENCY: begin
state_d = IDLE;
end
default: begin
state_d = IDLE;
end
endcase
// check if the global mapping bit is set // -------
if (pte.g) // Flush
global_mapping_n = 1'b1; // -------
// should we have flushed before we got an rvalid, wait for it until going back to IDLE
// ------------- if (flush_i) begin
// Invalid PTE // on a flush check whether we are
// ------------- // 1. in the PTE Lookup check whether we still need to wait for an rvalid
// If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception. // 2. waiting for a grant, if so: wait for it
if (!pte.v || (!pte.r && pte.w)) // if not, go back to idle
state_d = PROPAGATE_ERROR; if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
// -----------
// Valid PTE
// -----------
else begin
//state_d = IDLE;
state_d = LATENCY;
// it is a valid PTE
// if pte.r = 1 or pte.x = 1 it is a valid PTE
if (pte.r || pte.x) begin
// Valid translation found (either 4M or 4K entry)
if (is_instr_ptw_q) begin
// ------------
// Update ITLB
// ------------
// If page is not executable, we can directly raise an error. This
// doesn't put a useless entry into the TLB. The same idea applies
// to the access flag since we let the access flag be managed by SW.
if (!pte.x || !pte.a)
state_d = PROPAGATE_ERROR;
else
shared_tlb_update_o.valid = 1'b1;
end else begin
// ------------
// Update DTLB
// ------------
// Check if the access flag has been set, otherwise throw a page-fault
// and let the software handle those bits.
// If page is not readable (there are no write-only pages)
// we can directly raise an error. This doesn't put a useless
// entry into the TLB.
if (pte.a && (pte.r || (pte.x && mxr_i))) begin
shared_tlb_update_o.valid = 1'b1;
end else begin
state_d = PROPAGATE_ERROR;
end
// Request is a store: perform some additional checks
// If the request was a store and the page is not write-able, raise an error
// the same applies if the dirty flag is not set
if (lsu_is_store_i && (!pte.w || !pte.d)) begin
shared_tlb_update_o.valid = 1'b0;
state_d = PROPAGATE_ERROR;
end
end
// check if the ppn is correctly aligned:
// 6. If i > 0 and pa.ppn[i 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[9:0] != '0) begin
state_d = PROPAGATE_ERROR;
shared_tlb_update_o.valid = 1'b0;
end
// this is a pointer to the next TLB level
end else begin
// pointer to next level of page table
if (ptw_lvl_q == LVL1) begin
// we are in the second level now
ptw_lvl_n = LVL2;
ptw_pptr_n = {pte.ppn, vaddr_q[21:12], 2'b0};
end
state_d = WAIT_GRANT;
if (ptw_lvl_q == LVL2) begin
// Should already be the last level page table => Error
ptw_lvl_n = LVL2;
state_d = PROPAGATE_ERROR;
end
end
end
// Check if this access was actually allowed from a PMP perspective
if (!allow_access) begin
shared_tlb_update_o.valid = 1'b0;
// we have to return the failed address in bad_addr
ptw_pptr_n = ptw_pptr_q;
state_d = PROPAGATE_ACCESS_ERROR;
end
end
// we've got a data WAIT_GRANT so tell the cache that the tag is valid
end
// Propagate error to MMU/LSU
PROPAGATE_ERROR: begin
state_d = LATENCY;
ptw_error_o = 1'b1;
end
PROPAGATE_ACCESS_ERROR: begin
state_d = LATENCY;
ptw_access_exception_o = 1'b1;
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_q)
state_d = IDLE;
end
LATENCY: begin
state_d = IDLE;
end
default: begin
state_d = IDLE;
end
endcase
// -------
// Flush
// -------
// should we have flushed before we got an rvalid, wait for it until going back to IDLE
if (flush_i) begin
// on a flush check whether we are
// 1. in the PTE Lookup check whether we still need to wait for an rvalid
// 2. waiting for a grant, if so: wait for it
// if not, go back to idle
if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
((state_q == WAIT_GRANT) && req_port_i.data_gnt)) ((state_q == WAIT_GRANT) && req_port_i.data_gnt))
state_d = WAIT_RVALID; state_d = WAIT_RVALID;
else else state_d = LATENCY;
state_d = LATENCY;
end
end end
end
// sequential process // sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin if (~rst_ni) begin
state_q <= IDLE; state_q <= IDLE;
is_instr_ptw_q <= 1'b0; is_instr_ptw_q <= 1'b0;
ptw_lvl_q <= LVL1; ptw_lvl_q <= LVL1;
tag_valid_q <= 1'b0; tag_valid_q <= 1'b0;
tlb_update_asid_q <= '0; tlb_update_asid_q <= '0;
vaddr_q <= '0; vaddr_q <= '0;
ptw_pptr_q <= '0; ptw_pptr_q <= '0;
global_mapping_q <= 1'b0; global_mapping_q <= 1'b0;
data_rdata_q <= '0; data_rdata_q <= '0;
data_rvalid_q <= 1'b0; data_rvalid_q <= 1'b0;
end else begin end else begin
state_q <= state_d; state_q <= state_d;
ptw_pptr_q <= ptw_pptr_n; ptw_pptr_q <= ptw_pptr_n;
is_instr_ptw_q <= is_instr_ptw_n; is_instr_ptw_q <= is_instr_ptw_n;
ptw_lvl_q <= ptw_lvl_n; ptw_lvl_q <= ptw_lvl_n;
tag_valid_q <= tag_valid_n; tag_valid_q <= tag_valid_n;
tlb_update_asid_q <= tlb_update_asid_n; tlb_update_asid_q <= tlb_update_asid_n;
vaddr_q <= vaddr_n; vaddr_q <= vaddr_n;
global_mapping_q <= global_mapping_n; global_mapping_q <= global_mapping_n;
data_rdata_q <= req_port_i.data_rdata; data_rdata_q <= req_port_i.data_rdata;
data_rvalid_q <= req_port_i.data_rvalid; data_rvalid_q <= req_port_i.data_rvalid;
end
end end
end
endmodule endmodule
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */

View file

@ -17,348 +17,351 @@
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
module cva6_shared_tlb_sv32 import ariane_pkg::*; #( module cva6_shared_tlb_sv32
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int SHARED_TLB_DEPTH = 64, #(
parameter int SHARED_TLB_WAYS = 2, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int ASID_WIDTH = 1 parameter int SHARED_TLB_DEPTH = 64,
parameter int SHARED_TLB_WAYS = 2,
parameter int ASID_WIDTH = 1
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, input logic flush_i,
input logic enable_translation_i, // CSRs indicate to enable SV32 input logic enable_translation_i, // CSRs indicate to enable SV32
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic [ASID_WIDTH-1:0] asid_i, input logic [ASID_WIDTH-1:0] asid_i,
// from TLBs // from TLBs
// did we miss? // did we miss?
input logic itlb_access_i, input logic itlb_access_i,
input logic itlb_hit_i, input logic itlb_hit_i,
input logic [riscv::VLEN-1:0] itlb_vaddr_i, input logic [riscv::VLEN-1:0] itlb_vaddr_i,
input logic dtlb_access_i, input logic dtlb_access_i,
input logic dtlb_hit_i, input logic dtlb_hit_i,
input logic [riscv::VLEN-1:0] dtlb_vaddr_i, input logic [riscv::VLEN-1:0] dtlb_vaddr_i,
// to TLBs, update logic // to TLBs, update logic
output tlb_update_sv32_t itlb_update_o, output tlb_update_sv32_t itlb_update_o,
output tlb_update_sv32_t dtlb_update_o, output tlb_update_sv32_t dtlb_update_o,
// Performance counters // Performance counters
output logic itlb_miss_o, output logic itlb_miss_o,
output logic dtlb_miss_o, output logic dtlb_miss_o,
output logic shared_tlb_access_o, output logic shared_tlb_access_o,
output logic shared_tlb_hit_o, output logic shared_tlb_hit_o,
output logic [riscv::VLEN-1:0] shared_tlb_vaddr_o, output logic [riscv::VLEN-1:0] shared_tlb_vaddr_o,
output logic itlb_req_o, output logic itlb_req_o,
// Update shared TLB in case of miss // Update shared TLB in case of miss
input tlb_update_sv32_t shared_tlb_update_i input tlb_update_sv32_t shared_tlb_update_i
); );
function logic [SHARED_TLB_WAYS-1:0] shared_tlb_way_bin2oh ( input logic [$clog2(SHARED_TLB_WAYS)-1:0] in); function logic [SHARED_TLB_WAYS-1:0] shared_tlb_way_bin2oh(input logic [$clog2(SHARED_TLB_WAYS
logic [SHARED_TLB_WAYS-1:0] out; )-1:0] in);
out = '0; logic [SHARED_TLB_WAYS-1:0] out;
out[in] = 1'b1; out = '0;
return out; out[in] = 1'b1;
endfunction return out;
endfunction
typedef struct packed { typedef struct packed {
logic [8:0] asid; //9 bits wide logic [8:0] asid; //9 bits wide
logic [9:0] vpn1; //10 bits wide logic [9:0] vpn1; //10 bits wide
logic [9:0] vpn0; //10 bits wide logic [9:0] vpn0; //10 bits wide
logic is_4M; logic is_4M;
} shared_tag_t; } shared_tag_t;
shared_tag_t shared_tag_wr; shared_tag_t shared_tag_wr;
shared_tag_t [SHARED_TLB_WAYS-1:0] shared_tag_rd; shared_tag_t [SHARED_TLB_WAYS-1:0] shared_tag_rd;
logic [SHARED_TLB_DEPTH-1:0][SHARED_TLB_WAYS-1:0] shared_tag_valid_q, shared_tag_valid_d ; logic [SHARED_TLB_DEPTH-1:0][SHARED_TLB_WAYS-1:0] shared_tag_valid_q, shared_tag_valid_d;
logic [SHARED_TLB_WAYS-1:0] shared_tag_valid; logic [ SHARED_TLB_WAYS-1:0] shared_tag_valid;
logic [SHARED_TLB_WAYS-1:0] tag_wr_en; logic [ SHARED_TLB_WAYS-1:0] tag_wr_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_wr_addr; logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_wr_addr;
logic [$bits(shared_tag_t)-1:0] tag_wr_data; logic [ $bits(shared_tag_t)-1:0] tag_wr_data;
logic [SHARED_TLB_WAYS-1:0] tag_rd_en; logic [ SHARED_TLB_WAYS-1:0] tag_rd_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_rd_addr; logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_rd_addr;
logic [$bits(shared_tag_t)-1:0] tag_rd_data [SHARED_TLB_WAYS-1:0]; logic [ $bits(shared_tag_t)-1:0] tag_rd_data [SHARED_TLB_WAYS-1:0];
logic [SHARED_TLB_WAYS-1:0] tag_req; logic [ SHARED_TLB_WAYS-1:0] tag_req;
logic [SHARED_TLB_WAYS-1:0] tag_we; logic [ SHARED_TLB_WAYS-1:0] tag_we;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_addr; logic [$clog2(SHARED_TLB_DEPTH)-1:0] tag_addr;
logic [SHARED_TLB_WAYS-1:0] pte_wr_en; logic [ SHARED_TLB_WAYS-1:0] pte_wr_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_wr_addr; logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_wr_addr;
logic [$bits(riscv::pte_sv32_t)-1:0] pte_wr_data; logic [$bits(riscv::pte_sv32_t)-1:0] pte_wr_data;
logic [SHARED_TLB_WAYS-1:0] pte_rd_en; logic [ SHARED_TLB_WAYS-1:0] pte_rd_en;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_rd_addr; logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_rd_addr;
logic [$bits(riscv::pte_sv32_t)-1:0] pte_rd_data [SHARED_TLB_WAYS-1:0]; logic [$bits(riscv::pte_sv32_t)-1:0] pte_rd_data [SHARED_TLB_WAYS-1:0];
logic [SHARED_TLB_WAYS-1:0] pte_req; logic [ SHARED_TLB_WAYS-1:0] pte_req;
logic [SHARED_TLB_WAYS-1:0] pte_we; logic [ SHARED_TLB_WAYS-1:0] pte_we;
logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_addr; logic [$clog2(SHARED_TLB_DEPTH)-1:0] pte_addr;
logic [9:0] vpn0_d, vpn1_d, vpn0_q, vpn1_q; logic [9:0] vpn0_d, vpn1_d, vpn0_q, vpn1_q;
riscv::pte_sv32_t [SHARED_TLB_WAYS-1:0] pte; riscv::pte_sv32_t [SHARED_TLB_WAYS-1:0] pte;
logic [riscv::VLEN-1-12:0] itlb_vpn_q; logic [riscv::VLEN-1-12:0] itlb_vpn_q;
logic [riscv::VLEN-1-12:0] dtlb_vpn_q; logic [riscv::VLEN-1-12:0] dtlb_vpn_q;
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_d; logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_d;
logic shared_tlb_access_q, shared_tlb_access_d; logic shared_tlb_access_q, shared_tlb_access_d;
logic shared_tlb_hit_d; logic shared_tlb_hit_d;
logic [riscv::VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d; logic [riscv::VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d;
logic itlb_req_d, itlb_req_q; logic itlb_req_d, itlb_req_q;
logic dtlb_req_d, dtlb_req_q; logic dtlb_req_d, dtlb_req_q;
// replacement strategy // replacement strategy
logic [SHARED_TLB_WAYS-1:0] way_valid; logic [SHARED_TLB_WAYS-1:0] way_valid;
logic update_lfsr; // shift the LFSR logic update_lfsr; // shift the LFSR
logic [$clog2(SHARED_TLB_WAYS)-1:0] inv_way; // first non-valid encountered logic [$clog2(SHARED_TLB_WAYS)-1:0] inv_way; // first non-valid encountered
logic [$clog2(SHARED_TLB_WAYS)-1:0] rnd_way; // random index for replacement logic [$clog2(SHARED_TLB_WAYS)-1:0] rnd_way; // random index for replacement
logic [$clog2(SHARED_TLB_WAYS)-1:0] repl_way; // way to replace logic [$clog2(SHARED_TLB_WAYS)-1:0] repl_way; // way to replace
logic [SHARED_TLB_WAYS-1:0] repl_way_oh_d; // way to replace (onehot) logic [SHARED_TLB_WAYS-1:0] repl_way_oh_d; // way to replace (onehot)
logic all_ways_valid; // we need to switch repl strategy since all are valid logic all_ways_valid; // we need to switch repl strategy since all are valid
assign shared_tlb_access_o = shared_tlb_access_q; assign shared_tlb_access_o = shared_tlb_access_q;
assign shared_tlb_hit_o = shared_tlb_hit_d; assign shared_tlb_hit_o = shared_tlb_hit_d;
assign shared_tlb_vaddr_o = shared_tlb_vaddr_q; assign shared_tlb_vaddr_o = shared_tlb_vaddr_q;
assign itlb_req_o = itlb_req_q; assign itlb_req_o = itlb_req_q;
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
// tag comparison, hit generation // tag comparison, hit generation
/////////////////////////////////////////////////////// ///////////////////////////////////////////////////////
always_comb begin : itlb_dtlb_miss always_comb begin : itlb_dtlb_miss
itlb_miss_o = 1'b0; itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0; dtlb_miss_o = 1'b0;
vpn0_d = vpn0_q; vpn0_d = vpn0_q;
vpn1_d = vpn1_q; vpn1_d = vpn1_q;
tag_rd_en = '0; tag_rd_en = '0;
pte_rd_en = '0; pte_rd_en = '0;
itlb_req_d = 1'b0; itlb_req_d = 1'b0;
dtlb_req_d = 1'b0; dtlb_req_d = 1'b0;
tlb_update_asid_d = tlb_update_asid_q; tlb_update_asid_d = tlb_update_asid_q;
shared_tlb_access_d = '0; shared_tlb_access_d = '0;
shared_tlb_vaddr_d = shared_tlb_vaddr_q; shared_tlb_vaddr_d = shared_tlb_vaddr_q;
tag_rd_addr = '0; tag_rd_addr = '0;
pte_rd_addr = '0; pte_rd_addr = '0;
// if we got an ITLB miss // if we got an ITLB miss
if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
tag_rd_en = '1; tag_rd_en = '1;
tag_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; tag_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
pte_rd_en = '1; pte_rd_en = '1;
pte_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; pte_rd_addr = itlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
vpn0_d = itlb_vaddr_i[21:12]; vpn0_d = itlb_vaddr_i[21:12];
vpn1_d = itlb_vaddr_i[31:22]; vpn1_d = itlb_vaddr_i[31:22];
itlb_miss_o = 1'b1; itlb_miss_o = 1'b1;
itlb_req_d = 1'b1; itlb_req_d = 1'b1;
tlb_update_asid_d = asid_i; tlb_update_asid_d = asid_i;
shared_tlb_access_d = 1'b1; shared_tlb_access_d = 1'b1;
shared_tlb_vaddr_d = itlb_vaddr_i; shared_tlb_vaddr_d = itlb_vaddr_i;
// we got an DTLB miss // we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
tag_rd_en = '1; tag_rd_en = '1;
tag_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; tag_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
pte_rd_en = '1; pte_rd_en = '1;
pte_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)]; pte_rd_addr = dtlb_vaddr_i[12+:$clog2(SHARED_TLB_DEPTH)];
vpn0_d = dtlb_vaddr_i[21:12]; vpn0_d = dtlb_vaddr_i[21:12];
vpn1_d = dtlb_vaddr_i[31:22]; vpn1_d = dtlb_vaddr_i[31:22];
dtlb_miss_o = 1'b1; dtlb_miss_o = 1'b1;
dtlb_req_d = 1'b1; dtlb_req_d = 1'b1;
tlb_update_asid_d = asid_i; tlb_update_asid_d = asid_i;
shared_tlb_access_d = 1'b1; shared_tlb_access_d = 1'b1;
shared_tlb_vaddr_d = dtlb_vaddr_i; shared_tlb_vaddr_d = dtlb_vaddr_i;
end
end //itlb_dtlb_miss
always_comb begin : tag_comparison
shared_tlb_hit_d = 1'b0;
dtlb_update_o = '0;
itlb_update_o = '0;
//number of ways
for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin
if (shared_tag_valid[i] && ((tlb_update_asid_q == shared_tag_rd[i].asid) || pte[i].g) && vpn1_q == shared_tag_rd[i].vpn1) begin
if (shared_tag_rd[i].is_4M || vpn0_q == shared_tag_rd[i].vpn0) begin
shared_tlb_hit_d = 1'b1;
if (itlb_req_q) begin
itlb_update_o.valid = 1'b1;
itlb_update_o.vpn = itlb_vpn_q;
itlb_update_o.is_4M = shared_tag_rd[i].is_4M;
itlb_update_o.asid = tlb_update_asid_q;
itlb_update_o.content = pte[i];
end else if (dtlb_req_q) begin
dtlb_update_o.valid = 1'b1;
dtlb_update_o.vpn = dtlb_vpn_q;
dtlb_update_o.is_4M = shared_tag_rd[i].is_4M;
dtlb_update_o.asid = tlb_update_asid_q;
dtlb_update_o.content = pte[i];
end
end end
end //itlb_dtlb_miss
always_comb begin : tag_comparison
shared_tlb_hit_d = 1'b0;
dtlb_update_o = '0;
itlb_update_o = '0;
//number of ways
for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin
if (shared_tag_valid[i] && ((tlb_update_asid_q == shared_tag_rd[i].asid) || pte[i].g) && vpn1_q == shared_tag_rd[i].vpn1) begin
if (shared_tag_rd[i].is_4M || vpn0_q == shared_tag_rd[i].vpn0) begin
shared_tlb_hit_d = 1'b1;
if (itlb_req_q) begin
itlb_update_o.valid = 1'b1;
itlb_update_o.vpn = itlb_vpn_q;
itlb_update_o.is_4M = shared_tag_rd[i].is_4M;
itlb_update_o.asid = tlb_update_asid_q;
itlb_update_o.content = pte[i];
end else if (dtlb_req_q) begin
dtlb_update_o.valid = 1'b1;
dtlb_update_o.vpn = dtlb_vpn_q;
dtlb_update_o.is_4M = shared_tag_rd[i].is_4M;
dtlb_update_o.asid = tlb_update_asid_q;
dtlb_update_o.content = pte[i];
end
end
end
end
end //tag_comparison
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
itlb_vpn_q <= '0;
dtlb_vpn_q <= '0;
tlb_update_asid_q <= '0;
shared_tlb_access_q <= '0;
shared_tlb_vaddr_q <= '0;
shared_tag_valid_q <= '0;
vpn0_q <= '0;
vpn1_q <= '0;
itlb_req_q <= '0;
dtlb_req_q <= '0;
shared_tag_valid <= '0;
end else begin
itlb_vpn_q <= itlb_vaddr_i[riscv::SV-1:12];
dtlb_vpn_q <= dtlb_vaddr_i[riscv::SV-1:12];
tlb_update_asid_q <= tlb_update_asid_d;
shared_tlb_access_q <= shared_tlb_access_d;
shared_tlb_vaddr_q <= shared_tlb_vaddr_d;
shared_tag_valid_q <= shared_tag_valid_d;
vpn0_q <= vpn0_d;
vpn1_q <= vpn1_d;
itlb_req_q <= itlb_req_d;
dtlb_req_q <= dtlb_req_d;
shared_tag_valid <= shared_tag_valid_q[tag_rd_addr];
end end
end end
end //tag_comparison
// ------------------ // sequential process
// Update and Flush always_ff @(posedge clk_i or negedge rst_ni) begin
// ------------------ if (~rst_ni) begin
always_comb begin : update_flush itlb_vpn_q <= '0;
shared_tag_valid_d = shared_tag_valid_q; dtlb_vpn_q <= '0;
tag_wr_en = '0; tlb_update_asid_q <= '0;
pte_wr_en = '0; shared_tlb_access_q <= '0;
shared_tlb_vaddr_q <= '0;
if (flush_i) begin shared_tag_valid_q <= '0;
shared_tag_valid_d = '0; vpn0_q <= '0;
end else if (shared_tlb_update_i.valid) begin vpn1_q <= '0;
for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin itlb_req_q <= '0;
if (repl_way_oh_d[i]) begin dtlb_req_q <= '0;
shared_tag_valid_d[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]][i] = 1'b1; shared_tag_valid <= '0;
tag_wr_en[i] = 1'b1; end else begin
pte_wr_en[i] = 1'b1; itlb_vpn_q <= itlb_vaddr_i[riscv::SV-1:12];
end dtlb_vpn_q <= dtlb_vaddr_i[riscv::SV-1:12];
end tlb_update_asid_q <= tlb_update_asid_d;
end shared_tlb_access_q <= shared_tlb_access_d;
end //update_flush shared_tlb_vaddr_q <= shared_tlb_vaddr_d;
shared_tag_valid_q <= shared_tag_valid_d;
assign shared_tag_wr.asid = shared_tlb_update_i.asid; vpn0_q <= vpn0_d;
assign shared_tag_wr.vpn1 = shared_tlb_update_i.vpn[19:10]; vpn1_q <= vpn1_d;
assign shared_tag_wr.vpn0 = shared_tlb_update_i.vpn[9:0]; itlb_req_q <= itlb_req_d;
assign shared_tag_wr.is_4M = shared_tlb_update_i.is_4M; dtlb_req_q <= dtlb_req_d;
shared_tag_valid <= shared_tag_valid_q[tag_rd_addr];
assign tag_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
assign tag_wr_data = shared_tag_wr;
assign pte_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
assign pte_wr_data = shared_tlb_update_i.content;
assign way_valid = shared_tag_valid_q[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]];
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign update_lfsr = shared_tlb_update_i.valid & all_ways_valid;
assign repl_way_oh_d = (shared_tlb_update_i.valid) ? shared_tlb_way_bin2oh(repl_way) : '0;
lzc #(
.WIDTH ( SHARED_TLB_WAYS )
) i_lzc (
.in_i ( ~way_valid ),
.cnt_o ( inv_way ),
.empty_o ( all_ways_valid )
);
lfsr #(
.LfsrWidth ( 8 ),
.OutWidth ( $clog2(SHARED_TLB_WAYS))
) i_lfsr (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.en_i ( update_lfsr ),
.out_o ( rnd_way )
);
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
assign tag_req = tag_wr_en | tag_rd_en;
assign tag_we = tag_wr_en;
assign tag_addr = tag_wr_en ? tag_wr_addr : tag_rd_addr;
assign pte_req = pte_wr_en | pte_rd_en;
assign pte_we = pte_wr_en;
assign pte_addr = pte_wr_en ? pte_wr_addr : pte_rd_addr;
for (genvar i = 0; i < SHARED_TLB_WAYS; i++) begin : gen_sram
// Tag RAM
sram #(
.DATA_WIDTH ( $bits(shared_tag_t) ),
.NUM_WORDS ( SHARED_TLB_DEPTH )
) tag_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( tag_req[i] ),
.we_i ( tag_we[i] ),
.addr_i ( tag_addr ),
.wuser_i ( '0 ),
.wdata_i ( tag_wr_data ),
.be_i ( '1 ),
.ruser_o ( ),
.rdata_o ( tag_rd_data[i] )
);
assign shared_tag_rd[i] = shared_tag_t'(tag_rd_data[i]);
// PTE RAM
sram #(
.DATA_WIDTH ( $bits(riscv::pte_sv32_t) ),
.NUM_WORDS ( SHARED_TLB_DEPTH )
) pte_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( pte_req[i] ),
.we_i ( pte_we[i] ),
.addr_i ( pte_addr ),
.wuser_i ( '0 ),
.wdata_i ( pte_wr_data ),
.be_i ( '1 ),
.ruser_o ( ),
.rdata_o ( pte_rd_data[i] )
);
assign pte[i] = riscv::pte_sv32_t'(pte_rd_data[i]);
end end
end
// ------------------
// Update and Flush
// ------------------
always_comb begin : update_flush
shared_tag_valid_d = shared_tag_valid_q;
tag_wr_en = '0;
pte_wr_en = '0;
if (flush_i) begin
shared_tag_valid_d = '0;
end else if (shared_tlb_update_i.valid) begin
for (int unsigned i = 0; i < SHARED_TLB_WAYS; i++) begin
if (repl_way_oh_d[i]) begin
shared_tag_valid_d[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]][i] = 1'b1;
tag_wr_en[i] = 1'b1;
pte_wr_en[i] = 1'b1;
end
end
end
end //update_flush
assign shared_tag_wr.asid = shared_tlb_update_i.asid;
assign shared_tag_wr.vpn1 = shared_tlb_update_i.vpn[19:10];
assign shared_tag_wr.vpn0 = shared_tlb_update_i.vpn[9:0];
assign shared_tag_wr.is_4M = shared_tlb_update_i.is_4M;
assign tag_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
assign tag_wr_data = shared_tag_wr;
assign pte_wr_addr = shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0];
assign pte_wr_data = shared_tlb_update_i.content;
assign way_valid = shared_tag_valid_q[shared_tlb_update_i.vpn[$clog2(SHARED_TLB_DEPTH)-1:0]];
assign repl_way = (all_ways_valid) ? rnd_way : inv_way;
assign update_lfsr = shared_tlb_update_i.valid & all_ways_valid;
assign repl_way_oh_d = (shared_tlb_update_i.valid) ? shared_tlb_way_bin2oh(repl_way) : '0;
lzc #(
.WIDTH(SHARED_TLB_WAYS)
) i_lzc (
.in_i (~way_valid),
.cnt_o (inv_way),
.empty_o(all_ways_valid)
);
lfsr #(
.LfsrWidth(8),
.OutWidth ($clog2(SHARED_TLB_WAYS))
) i_lfsr (
.clk_i (clk_i),
.rst_ni(rst_ni),
.en_i (update_lfsr),
.out_o (rnd_way)
);
///////////////////////////////////////////////////////
// memory arrays and regs
///////////////////////////////////////////////////////
assign tag_req = tag_wr_en | tag_rd_en;
assign tag_we = tag_wr_en;
assign tag_addr = tag_wr_en ? tag_wr_addr : tag_rd_addr;
assign pte_req = pte_wr_en | pte_rd_en;
assign pte_we = pte_wr_en;
assign pte_addr = pte_wr_en ? pte_wr_addr : pte_rd_addr;
for (genvar i = 0; i < SHARED_TLB_WAYS; i++) begin : gen_sram
// Tag RAM
sram #(
.DATA_WIDTH($bits(shared_tag_t)),
.NUM_WORDS (SHARED_TLB_DEPTH)
) tag_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (tag_req[i]),
.we_i (tag_we[i]),
.addr_i (tag_addr),
.wuser_i('0),
.wdata_i(tag_wr_data),
.be_i ('1),
.ruser_o(),
.rdata_o(tag_rd_data[i])
);
assign shared_tag_rd[i] = shared_tag_t'(tag_rd_data[i]);
// PTE RAM
sram #(
.DATA_WIDTH($bits(riscv::pte_sv32_t)),
.NUM_WORDS (SHARED_TLB_DEPTH)
) pte_sram (
.clk_i (clk_i),
.rst_ni (rst_ni),
.req_i (pte_req[i]),
.we_i (pte_we[i]),
.addr_i (pte_addr),
.wuser_i('0),
.wdata_i(pte_wr_data),
.be_i ('1),
.ruser_o(),
.rdata_o(pte_rd_data[i])
);
assign pte[i] = riscv::pte_sv32_t'(pte_rd_data[i]);
end
endmodule endmodule
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */

View file

@ -24,134 +24,136 @@
// 2020-02-17 0.1 S.Jacq TLB Sv32 for CV32A6 // 2020-02-17 0.1 S.Jacq TLB Sv32 for CV32A6
// =========================================================================== // // =========================================================================== //
module cva6_tlb_sv32 import ariane_pkg::*; #( module cva6_tlb_sv32
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int unsigned TLB_ENTRIES = 4, #(
parameter int unsigned ASID_WIDTH = 1 parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
)( parameter int unsigned TLB_ENTRIES = 4,
input logic clk_i, // Clock parameter int unsigned ASID_WIDTH = 1
input logic rst_ni, // Asynchronous reset active low ) (
input logic flush_i, // Flush signal input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // Flush signal
// Update TLB // Update TLB
input tlb_update_sv32_t update_i, input tlb_update_sv32_t update_i,
// Lookup signals // Lookup signals
input logic lu_access_i, input logic lu_access_i,
input logic [ASID_WIDTH-1:0] lu_asid_i, input logic [ASID_WIDTH-1:0] lu_asid_i,
input logic [riscv::VLEN-1:0] lu_vaddr_i, input logic [riscv::VLEN-1:0] lu_vaddr_i,
output riscv::pte_sv32_t lu_content_o, output riscv::pte_sv32_t lu_content_o,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
output logic lu_is_4M_o, output logic lu_is_4M_o,
output logic lu_hit_o output logic lu_hit_o
); );
// Sv32 defines two levels of page tables // Sv32 defines two levels of page tables
struct packed { struct packed {
logic [8:0] asid; //9 bits wide logic [8:0] asid; //9 bits wide
logic [9:0] vpn1; //10 bits wide logic [9:0] vpn1; //10 bits wide
logic [9:0] vpn0; //10 bits wide logic [9:0] vpn0; //10 bits wide
logic is_4M; logic is_4M;
logic valid; logic valid;
} [TLB_ENTRIES-1:0] tags_q, tags_n; } [TLB_ENTRIES-1:0]
tags_q, tags_n;
riscv::pte_sv32_t [TLB_ENTRIES-1:0] content_q, content_n; riscv::pte_sv32_t [TLB_ENTRIES-1:0] content_q, content_n;
logic [9:0] vpn0, vpn1; logic [9:0] vpn0, vpn1;
logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic
logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy
//------------- //-------------
// Translation // Translation
//------------- //-------------
always_comb begin : translation always_comb begin : translation
vpn0 = lu_vaddr_i[21:12]; vpn0 = lu_vaddr_i[21:12];
vpn1 = lu_vaddr_i[31:22]; vpn1 = lu_vaddr_i[31:22];
// default assignment // default assignment
lu_hit = '{default: 0}; lu_hit = '{default: 0};
lu_hit_o = 1'b0; lu_hit_o = 1'b0;
lu_content_o = '{default: 0}; lu_content_o = '{default: 0};
lu_is_4M_o = 1'b0; lu_is_4M_o = 1'b0;
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
// first level match, this may be a mega page, check the ASID flags as well // first level match, this may be a mega page, check the ASID flags as well
// if the entry is associated to a global address, don't match the ASID (ASID is don't care) // if the entry is associated to a global address, don't match the ASID (ASID is don't care)
if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid[ASID_WIDTH-1:0]) || content_q[i].g) && vpn1 == tags_q[i].vpn1) begin if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid[ASID_WIDTH-1:0]) || content_q[i].g) && vpn1 == tags_q[i].vpn1) begin
if (tags_q[i].is_4M || vpn0 == tags_q[i].vpn0) begin if (tags_q[i].is_4M || vpn0 == tags_q[i].vpn0) begin
lu_is_4M_o = tags_q[i].is_4M; lu_is_4M_o = tags_q[i].is_4M;
lu_content_o = content_q[i]; lu_content_o = content_q[i];
lu_hit_o = 1'b1; lu_hit_o = 1'b1;
lu_hit[i] = 1'b1; lu_hit[i] = 1'b1;
end
end
end end
end
end end
end
logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high
logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high
logic [TLB_ENTRIES-1:0] vaddr_vpn0_match; logic [TLB_ENTRIES-1:0] vaddr_vpn0_match;
logic [TLB_ENTRIES-1:0] vaddr_vpn1_match; logic [TLB_ENTRIES-1:0] vaddr_vpn1_match;
assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i); assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i);
assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i); assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i);
// ------------------ // ------------------
// Update and Flush // Update and Flush
// ------------------ // ------------------
always_comb begin : update_flush always_comb begin : update_flush
tags_n = tags_q; tags_n = tags_q;
content_n = content_q; content_n = content_q;
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[21:12] == tags_q[i].vpn0); vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[21:12] == tags_q[i].vpn0);
vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[31:22] == tags_q[i].vpn1); vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[31:22] == tags_q[i].vpn1);
if (flush_i) begin if (flush_i) begin
// invalidate logic // invalidate logic
// flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case) // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case)
if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0 ) if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0;
tags_n[i].valid = 1'b0; // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages
// flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages else if (asid_to_be_flushed_is0 && ( (vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M) ) && (~vaddr_to_be_flushed_is0))
else if (asid_to_be_flushed_is0 && ( (vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M) ) && (~vaddr_to_be_flushed_is0)) tags_n[i].valid = 1'b0;
tags_n[i].valid = 1'b0; // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case)
// the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0))
else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) tags_n[i].valid = 1'b0;
tags_n[i].valid = 1'b0; // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case)
// the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!asid_to_be_flushed_is0))
else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!asid_to_be_flushed_is0)) tags_n[i].valid = 1'b0;
tags_n[i].valid = 1'b0; // normal replacement
// normal replacement end else if (update_i.valid & replace_en[i]) begin
end else if (update_i.valid & replace_en[i]) begin // update tag array
// update tag array tags_n[i] = '{
tags_n[i] = '{ asid: update_i.asid,
asid: update_i.asid, vpn1: update_i.vpn[19:10],
vpn1: update_i.vpn [19:10], vpn0: update_i.vpn[9:0],
vpn0: update_i.vpn [9:0], is_4M: update_i.is_4M,
is_4M: update_i.is_4M, valid: 1'b1
valid: 1'b1 };
}; // and content as well
// and content as well content_n[i] = update_i.content;
content_n[i] = update_i.content; end
end
end
end end
end
// ----------------------------------------------- // -----------------------------------------------
// PLRU - Pseudo Least Recently Used Replacement // PLRU - Pseudo Least Recently Used Replacement
// ----------------------------------------------- // -----------------------------------------------
logic[2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n; logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n;
logic en; logic en;
int unsigned idx_base, shift, new_index; int unsigned idx_base, shift, new_index;
always_comb begin : plru_replacement always_comb begin : plru_replacement
plru_tree_n = plru_tree_q; plru_tree_n = plru_tree_q;
en = '0; en = '0;
idx_base = '0; idx_base = '0;
shift = '0; shift = '0;
new_index = '0; new_index = '0;
// The PLRU-tree indexing: // The PLRU-tree indexing:
// lvl0 0 // lvl0 0
// / \ // / \
// / \ // / \
// lvl1 1 2 // lvl1 1 2
// / \ / \ // / \ / \
@ -172,94 +174,108 @@ module cva6_tlb_sv32 import ariane_pkg::*; #(
// lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0}; // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0};
// default: begin /* No hit */ end // default: begin /* No hit */ end
// endcase // endcase
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin for (
// we got a hit so update the pointer as it was least recently used int unsigned i = 0; i < TLB_ENTRIES; i++
if (lu_hit[i] & lu_access_i) begin ) begin
// Set the nodes to the values we would expect // we got a hit so update the pointer as it was least recently used
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin if (lu_hit[i] & lu_access_i) begin
idx_base = $unsigned((2**lvl)-1); // Set the nodes to the values we would expect
// lvl0 <=> MSB, lvl1 <=> MSB-1, ... for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
shift = $clog2(TLB_ENTRIES) - lvl; idx_base = $unsigned((2 ** lvl) - 1);
// to circumvent the 32 bit integer arithmetic assignment // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
new_index = ~((i >> (shift-1)) & 32'b1); shift = $clog2(TLB_ENTRIES) - lvl;
plru_tree_n[idx_base + (i >> shift)] = new_index[0]; // to circumvent the 32 bit integer arithmetic assignment
end new_index = ~((i >> (shift - 1)) & 32'b1);
end plru_tree_n[idx_base+(i>>shift)] = new_index[0];
end end
// Decode tree to write enable signals
// Next for-loop basically creates the following logic for e.g. an 8 entry
// TLB (note: pseudo-code obviously):
// replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
// replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
// replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
// replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
// replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
// replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
// replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
// replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
// For each entry traverse the tree. If every tree-node matches,
// the corresponding bit of the entry's index, this is
// the next entry to replace.
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
en = 1'b1;
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2**lvl)-1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
// en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
new_index = (i >> (shift-1)) & 32'b1;
if (new_index[0]) begin
en &= plru_tree_q[idx_base + (i>>shift)];
end else begin
en &= ~plru_tree_q[idx_base + (i>>shift)];
end
end
replace_en[i] = en;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
tags_q <= '{default: 0};
content_q <= '{default: 0};
plru_tree_q <= '{default: 0};
end else begin
tags_q <= tags_n;
content_q <= content_n;
plru_tree_q <= plru_tree_n;
end
end
//--------------
// Sanity checks
//--------------
//pragma translate_off
`ifndef VERILATOR
initial begin : p_assertions
assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
else begin $error("TLB size must be a multiple of 2 and greater than 1"); $stop(); end
assert (ASID_WIDTH >= 1)
else begin $error("ASID width must be at least 1"); $stop(); end
end
// Just for checking
function int countSetBits(logic[TLB_ENTRIES-1:0] vector);
automatic int count = 0;
foreach (vector[idx]) begin
count += vector[idx];
end end
return count; end
endfunction // Decode tree to write enable signals
// Next for-loop basically creates the following logic for e.g. an 8 entry
// TLB (note: pseudo-code obviously):
// replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
// replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
// replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
// replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
// replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
// replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
// replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
// replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
// For each entry traverse the tree. If every tree-node matches,
// the corresponding bit of the entry's index, this is
// the next entry to replace.
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
en = 1'b1;
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2 ** lvl) - 1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1)) // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
else begin $error("More then one hit in TLB!"); $stop(); end new_index = (i >> (shift - 1)) & 32'b1;
assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1)) if (new_index[0]) begin
else begin $error("More then one TLB entry selected for next replace!"); $stop(); end en &= plru_tree_q[idx_base+(i>>shift)];
end else begin
en &= ~plru_tree_q[idx_base+(i>>shift)];
end
end
replace_en[i] = en;
end
end
`endif // sequential process
//pragma translate_on always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
tags_q <= '{default: 0};
content_q <= '{default: 0};
plru_tree_q <= '{default: 0};
end else begin
tags_q <= tags_n;
content_q <= content_n;
plru_tree_q <= plru_tree_n;
end
end
//--------------
// Sanity checks
//--------------
//pragma translate_off
`ifndef VERILATOR
initial begin : p_assertions
assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
else begin
$error("TLB size must be a multiple of 2 and greater than 1");
$stop();
end
assert (ASID_WIDTH >= 1)
else begin
$error("ASID width must be at least 1");
$stop();
end
end
// Just for checking
function int countSetBits(logic [TLB_ENTRIES-1:0] vector);
automatic int count = 0;
foreach (vector[idx]) begin
count += vector[idx];
end
return count;
endfunction
assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1))
else begin
$error("More then one hit in TLB!");
$stop();
end
assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1))
else begin
$error("More then one TLB entry selected for next replace!");
$stop();
end
`endif
//pragma translate_on
endmodule endmodule

View file

@ -15,448 +15,505 @@
// privilege specification 1.11-WIP // privilege specification 1.11-WIP
module mmu import ariane_pkg::*; #( module mmu
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int unsigned INSTR_TLB_ENTRIES = 4, #(
parameter int unsigned DATA_TLB_ENTRIES = 4, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned ASID_WIDTH = 1 parameter int unsigned INSTR_TLB_ENTRIES = 4,
parameter int unsigned DATA_TLB_ENTRIES = 4,
parameter int unsigned ASID_WIDTH = 1
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input logic enable_translation_i, input logic enable_translation_i,
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
// IF interface // IF interface
input icache_arsp_t icache_areq_i, input icache_arsp_t icache_areq_i,
output icache_areq_t icache_areq_o, output icache_areq_t icache_areq_o,
// LSU interface // LSU interface
// this is a more minimalistic interface because the actual addressing logic is handled // this is a more minimalistic interface because the actual addressing logic is handled
// in the LSU as we distinguish load and stores, what we do here is simple address translation // in the LSU as we distinguish load and stores, what we do here is simple address translation
input exception_t misaligned_ex_i, input exception_t misaligned_ex_i,
input logic lsu_req_i, // request address translation input logic lsu_req_i, // request address translation
input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in
input logic lsu_is_store_i, // the translation is requested by a store input logic lsu_is_store_i, // the translation is requested by a store
// if we need to walk the page table we can't grant in the same cycle // if we need to walk the page table we can't grant in the same cycle
// Cycle 0 // Cycle 0
output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB
output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit)
// Cycle 1 // Cycle 1
output logic lsu_valid_o, // translation is valid output logic lsu_valid_o, // translation is valid
output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address
output exception_t lsu_exception_o, // address translation threw an exception output exception_t lsu_exception_o, // address translation threw an exception
// General control signals // General control signals
input riscv::priv_lvl_t priv_lvl_i, input riscv::priv_lvl_t priv_lvl_i,
input riscv::priv_lvl_t ld_st_priv_lvl_i, input riscv::priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i, input logic sum_i,
input logic mxr_i, input logic mxr_i,
// input logic flag_mprv_i, // input logic flag_mprv_i,
input logic [riscv::PPNW-1:0] satp_ppn_i, input logic [riscv::PPNW-1:0] satp_ppn_i,
input logic [ASID_WIDTH-1:0] asid_i, input logic [ASID_WIDTH-1:0] asid_i,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
input logic flush_tlb_i, input logic flush_tlb_i,
// Performance counters // Performance counters
output logic itlb_miss_o, output logic itlb_miss_o,
output logic dtlb_miss_o, output logic dtlb_miss_o,
// PTW memory interface // PTW memory interface
input dcache_req_o_t req_port_i, input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o, output dcache_req_i_t req_port_o,
// PMP // PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i, input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i input logic [15:0][riscv::PLEN-3:0] pmpaddr_i
); );
logic iaccess_err; // insufficient privilege to access this instruction page logic iaccess_err; // insufficient privilege to access this instruction page
logic daccess_err; // insufficient privilege to access this data page logic daccess_err; // insufficient privilege to access this data page
logic ptw_active; // PTW is currently walking a page table logic ptw_active; // PTW is currently walking a page table
logic walking_instr; // PTW is walking because of an ITLB miss logic walking_instr; // PTW is walking because of an ITLB miss
logic ptw_error; // PTW threw an exception logic ptw_error; // PTW threw an exception
logic ptw_access_exception; // PTW threw an access exception (PMPs) logic ptw_access_exception; // PTW threw an access exception (PMPs)
logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr
logic [riscv::VLEN-1:0] update_vaddr; logic [riscv::VLEN-1:0] update_vaddr;
tlb_update_t update_ptw_itlb, update_ptw_dtlb; tlb_update_t update_ptw_itlb, update_ptw_dtlb;
logic itlb_lu_access; logic itlb_lu_access;
riscv::pte_t itlb_content; riscv::pte_t itlb_content;
logic itlb_is_2M; logic itlb_is_2M;
logic itlb_is_1G; logic itlb_is_1G;
logic itlb_lu_hit; logic itlb_lu_hit;
logic dtlb_lu_access; logic dtlb_lu_access;
riscv::pte_t dtlb_content; riscv::pte_t dtlb_content;
logic dtlb_is_2M; logic dtlb_is_2M;
logic dtlb_is_1G; logic dtlb_is_1G;
logic dtlb_lu_hit; logic dtlb_lu_hit;
// Assignments // Assignments
assign itlb_lu_access = icache_areq_i.fetch_req; assign itlb_lu_access = icache_areq_i.fetch_req;
assign dtlb_lu_access = lsu_req_i; assign dtlb_lu_access = lsu_req_i;
tlb #( tlb #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.TLB_ENTRIES ( INSTR_TLB_ENTRIES ), .TLB_ENTRIES(INSTR_TLB_ENTRIES),
.ASID_WIDTH ( ASID_WIDTH ) .ASID_WIDTH (ASID_WIDTH)
) i_itlb ( ) i_itlb (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_tlb_i ), .flush_i(flush_tlb_i),
.update_i ( update_ptw_itlb ), .update_i(update_ptw_itlb),
.lu_access_i ( itlb_lu_access ), .lu_access_i (itlb_lu_access),
.lu_asid_i ( asid_i ), .lu_asid_i (asid_i),
.asid_to_be_flushed_i ( asid_to_be_flushed_i ), .asid_to_be_flushed_i (asid_to_be_flushed_i),
.vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ), .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
.lu_vaddr_i ( icache_areq_i.fetch_vaddr ), .lu_vaddr_i (icache_areq_i.fetch_vaddr),
.lu_content_o ( itlb_content ), .lu_content_o (itlb_content),
.lu_is_2M_o ( itlb_is_2M ), .lu_is_2M_o(itlb_is_2M),
.lu_is_1G_o ( itlb_is_1G ), .lu_is_1G_o(itlb_is_1G),
.lu_hit_o ( itlb_lu_hit ) .lu_hit_o (itlb_lu_hit)
); );
tlb #( tlb #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.TLB_ENTRIES ( DATA_TLB_ENTRIES ), .TLB_ENTRIES(DATA_TLB_ENTRIES),
.ASID_WIDTH ( ASID_WIDTH ) .ASID_WIDTH (ASID_WIDTH)
) i_dtlb ( ) i_dtlb (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( flush_tlb_i ), .flush_i(flush_tlb_i),
.update_i ( update_ptw_dtlb ), .update_i(update_ptw_dtlb),
.lu_access_i ( dtlb_lu_access ), .lu_access_i (dtlb_lu_access),
.lu_asid_i ( asid_i ), .lu_asid_i (asid_i),
.asid_to_be_flushed_i ( asid_to_be_flushed_i ), .asid_to_be_flushed_i (asid_to_be_flushed_i),
.vaddr_to_be_flushed_i ( vaddr_to_be_flushed_i ), .vaddr_to_be_flushed_i(vaddr_to_be_flushed_i),
.lu_vaddr_i ( lsu_vaddr_i ), .lu_vaddr_i (lsu_vaddr_i),
.lu_content_o ( dtlb_content ), .lu_content_o (dtlb_content),
.lu_is_2M_o ( dtlb_is_2M ), .lu_is_2M_o(dtlb_is_2M),
.lu_is_1G_o ( dtlb_is_1G ), .lu_is_1G_o(dtlb_is_1G),
.lu_hit_o ( dtlb_lu_hit ) .lu_hit_o (dtlb_lu_hit)
); );
ptw #( ptw #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg (CVA6Cfg),
.ASID_WIDTH ( ASID_WIDTH ) .ASID_WIDTH(ASID_WIDTH)
) i_ptw ( ) i_ptw (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.ptw_active_o ( ptw_active ), .ptw_active_o (ptw_active),
.walking_instr_o ( walking_instr ), .walking_instr_o (walking_instr),
.ptw_error_o ( ptw_error ), .ptw_error_o (ptw_error),
.ptw_access_exception_o ( ptw_access_exception ), .ptw_access_exception_o(ptw_access_exception),
.enable_translation_i ( enable_translation_i ), .enable_translation_i (enable_translation_i),
.update_vaddr_o ( update_vaddr ), .update_vaddr_o(update_vaddr),
.itlb_update_o ( update_ptw_itlb ), .itlb_update_o (update_ptw_itlb),
.dtlb_update_o ( update_ptw_dtlb ), .dtlb_update_o (update_ptw_dtlb),
.itlb_access_i ( itlb_lu_access ), .itlb_access_i(itlb_lu_access),
.itlb_hit_i ( itlb_lu_hit ), .itlb_hit_i (itlb_lu_hit),
.itlb_vaddr_i ( icache_areq_i.fetch_vaddr ), .itlb_vaddr_i (icache_areq_i.fetch_vaddr),
.dtlb_access_i ( dtlb_lu_access ), .dtlb_access_i(dtlb_lu_access),
.dtlb_hit_i ( dtlb_lu_hit ), .dtlb_hit_i (dtlb_lu_hit),
.dtlb_vaddr_i ( lsu_vaddr_i ), .dtlb_vaddr_i (lsu_vaddr_i),
.req_port_i ( req_port_i ), .req_port_i (req_port_i),
.req_port_o ( req_port_o ), .req_port_o (req_port_o),
.pmpcfg_i, .pmpcfg_i,
.pmpaddr_i, .pmpaddr_i,
.bad_paddr_o ( ptw_bad_paddr ), .bad_paddr_o(ptw_bad_paddr),
.* .*
); );
// ila_1 i_ila_1 ( // ila_1 i_ila_1 (
// .clk(clk_i), // input wire clk // .clk(clk_i), // input wire clk
// .probe0({req_port_o.address_tag, req_port_o.address_index}), // .probe0({req_port_o.address_tag, req_port_o.address_index}),
// .probe1(req_port_o.data_req), // input wire [63:0] probe1 // .probe1(req_port_o.data_req), // input wire [63:0] probe1
// .probe2(req_port_i.data_gnt), // input wire [0:0] probe2 // .probe2(req_port_i.data_gnt), // input wire [0:0] probe2
// .probe3(req_port_i.data_rdata), // input wire [0:0] probe3 // .probe3(req_port_i.data_rdata), // input wire [0:0] probe3
// .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4 // .probe4(req_port_i.data_rvalid), // input wire [0:0] probe4
// .probe5(ptw_error), // input wire [1:0] probe5 // .probe5(ptw_error), // input wire [1:0] probe5
// .probe6(update_vaddr), // input wire [0:0] probe6 // .probe6(update_vaddr), // input wire [0:0] probe6
// .probe7(update_ptw_itlb.valid), // input wire [0:0] probe7 // .probe7(update_ptw_itlb.valid), // input wire [0:0] probe7
// .probe8(update_ptw_dtlb.valid), // input wire [0:0] probe8 // .probe8(update_ptw_dtlb.valid), // input wire [0:0] probe8
// .probe9(dtlb_lu_access), // input wire [0:0] probe9 // .probe9(dtlb_lu_access), // input wire [0:0] probe9
// .probe10(lsu_vaddr_i), // input wire [0:0] probe10 // .probe10(lsu_vaddr_i), // input wire [0:0] probe10
// .probe11(dtlb_lu_hit), // input wire [0:0] probe11 // .probe11(dtlb_lu_hit), // input wire [0:0] probe11
// .probe12(itlb_lu_access), // input wire [0:0] probe12 // .probe12(itlb_lu_access), // input wire [0:0] probe12
// .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13 // .probe13(icache_areq_i.fetch_vaddr), // input wire [0:0] probe13
// .probe14(itlb_lu_hit) // input wire [0:0] probe13 // .probe14(itlb_lu_hit) // input wire [0:0] probe13
// ); // );
//----------------------- //-----------------------
// Instruction Interface // Instruction Interface
//----------------------- //-----------------------
logic match_any_execute_region; logic match_any_execute_region;
logic pmp_instr_allow; logic pmp_instr_allow;
// The instruction interface is a simple request response interface // The instruction interface is a simple request response interface
always_comb begin : instr_interface always_comb begin : instr_interface
// MMU disabled: just pass through // MMU disabled: just pass through
icache_areq_o.fetch_valid = icache_areq_i.fetch_req; icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; // play through in case we disabled address translation icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; // play through in case we disabled address translation
// two potential exception sources: // two potential exception sources:
// 1. HPTW threw an exception -> signal with a page fault exception // 1. HPTW threw an exception -> signal with a page fault exception
// 2. We got an access error because of insufficient permissions -> throw an access exception // 2. We got an access error because of insufficient permissions -> throw an access exception
icache_areq_o.fetch_exception = '0; icache_areq_o.fetch_exception = '0;
// Check whether we are allowed to access this memory region from a fetch perspective // Check whether we are allowed to access this memory region from a fetch perspective
iaccess_err = icache_areq_i.fetch_req && enable_translation_i iaccess_err = icache_areq_i.fetch_req && enable_translation_i
&& (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u) && (((priv_lvl_i == riscv::PRIV_LVL_U) && ~itlb_content.u)
|| ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u)); || ((priv_lvl_i == riscv::PRIV_LVL_S) && itlb_content.u));
// MMU enabled: address from TLB, request delayed until hit. Error when TLB // MMU enabled: address from TLB, request delayed until hit. Error when TLB
// hit and no access right or TLB hit and translated address not valid (e.g. // hit and no access right or TLB hit and translated address not valid (e.g.
// AXI decode error), or when PTW performs walk due to ITLB miss and raises // AXI decode error), or when PTW performs walk due to ITLB miss and raises
// an error. // an error.
if (enable_translation_i) begin if (enable_translation_i) begin
// we work with SV39 or SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal // we work with SV39 or SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal
if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1}; icache_areq_o.fetch_exception = {
end riscv::INSTR_ACCESS_FAULT,
{{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
1'b1
};
end
icache_areq_o.fetch_valid = 1'b0; icache_areq_o.fetch_valid = 1'b0;
// 4K page // 4K page
icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]}; icache_areq_o.fetch_paddr = {itlb_content.ppn, icache_areq_i.fetch_vaddr[11:0]};
// Mega page // Mega page
if (itlb_is_2M) begin if (itlb_is_2M) begin
icache_areq_o.fetch_paddr[20:12] = icache_areq_i.fetch_vaddr[20:12]; icache_areq_o.fetch_paddr[20:12] = icache_areq_i.fetch_vaddr[20:12];
end end
// Giga page // Giga page
if (itlb_is_1G) begin if (itlb_is_1G) begin
icache_areq_o.fetch_paddr[29:12] = icache_areq_i.fetch_vaddr[29:12]; icache_areq_o.fetch_paddr[29:12] = icache_areq_i.fetch_vaddr[29:12];
end end
// --------- // ---------
// ITLB Hit // ITLB Hit
// -------- // --------
// if we hit the ITLB output the request signal immediately // if we hit the ITLB output the request signal immediately
if (itlb_lu_hit) begin if (itlb_lu_hit) begin
icache_areq_o.fetch_valid = icache_areq_i.fetch_req; icache_areq_o.fetch_valid = icache_areq_i.fetch_req;
// we got an access error // we got an access error
if (iaccess_err) begin if (iaccess_err) begin
// throw a page fault // throw a page fault
icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1}; icache_areq_o.fetch_exception = {
end else if (!pmp_instr_allow) begin riscv::INSTR_PAGE_FAULT,
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::PLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1}; {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr},
end 1'b1
end else };
// --------- end else if (!pmp_instr_allow) begin
// ITLB Miss icache_areq_o.fetch_exception = {
// --------- riscv::INSTR_ACCESS_FAULT,
// watch out for exceptions happening during walking the page table {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_i.fetch_vaddr},
if (ptw_active && walking_instr) begin 1'b1
icache_areq_o.fetch_valid = ptw_error | ptw_access_exception; };
if (ptw_error) icache_areq_o.fetch_exception = {riscv::INSTR_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, update_vaddr}, 1'b1};
else icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, update_vaddr}, 1'b1};
end
end
// if it didn't match any execute region throw an `Instruction Access Fault`
// or: if we are not translating, check PMPs immediately on the paddr
if ((!match_any_execute_region && !ptw_error) || (!enable_translation_i && !pmp_instr_allow)) begin
icache_areq_o.fetch_exception = {riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN-riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}, 1'b1};
end end
end else
// ---------
// ITLB Miss
// ---------
// watch out for exceptions happening during walking the page table
if (ptw_active && walking_instr) begin
icache_areq_o.fetch_valid = ptw_error | ptw_access_exception;
if (ptw_error)
icache_areq_o.fetch_exception = {
riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1
};
else
icache_areq_o.fetch_exception = {
riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1
};
end
end end
// if it didn't match any execute region throw an `Instruction Access Fault`
// check for execute flag on memory // or: if we are not translating, check PMPs immediately on the paddr
assign match_any_execute_region = config_pkg::is_inside_execute_regions(CVA6Cfg, {{64-riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}); if ((!match_any_execute_region && !ptw_error) || (!enable_translation_i && !pmp_instr_allow)) begin
icache_areq_o.fetch_exception = {
// Instruction fetch riscv::INSTR_ACCESS_FAULT,
pmp #( {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr},
.CVA6Cfg ( CVA6Cfg ), 1'b1
.PLEN ( riscv::PLEN ), };
.PMP_LEN ( riscv::PLEN - 2 ),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries )
) i_pmp_if (
.addr_i ( icache_areq_o.fetch_paddr ),
.priv_lvl_i,
// we will always execute on the instruction fetch port
.access_type_i ( riscv::ACCESS_EXEC ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( pmp_instr_allow )
);
//-----------------------
// Data Interface
//-----------------------
logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q;
riscv::pte_t dtlb_pte_n, dtlb_pte_q;
exception_t misaligned_ex_n, misaligned_ex_q;
logic lsu_req_n, lsu_req_q;
logic lsu_is_store_n, lsu_is_store_q;
logic dtlb_hit_n, dtlb_hit_q;
logic dtlb_is_2M_n, dtlb_is_2M_q;
logic dtlb_is_1G_n, dtlb_is_1G_q;
// check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
// Wires to PMP checks
riscv::pmp_access_t pmp_access_type;
logic pmp_data_allow;
localparam PPNWMin = (riscv::PPNW-1 > 29) ? 29 : riscv::PPNW-1;
// The data interface is simpler and only consists of a request/response interface
always_comb begin : data_interface
// save request and DTLB response
lsu_vaddr_n = lsu_vaddr_i;
lsu_req_n = lsu_req_i;
misaligned_ex_n = misaligned_ex_i;
dtlb_pte_n = dtlb_content;
dtlb_hit_n = dtlb_lu_hit;
lsu_is_store_n = lsu_is_store_i;
dtlb_is_2M_n = dtlb_is_2M;
dtlb_is_1G_n = dtlb_is_1G;
lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0];
lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PLEN-1:12];
lsu_valid_o = lsu_req_q;
lsu_exception_o = misaligned_ex_q;
pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
// mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions
misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i;
// Check if the User flag is set, then we may only access it in supervisor mode
// if SUM is enabled
daccess_err = en_ld_st_translation_i && ((ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
(ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u)); // this is not a user page but we are in user mode and trying to access it
// translation is enabled and no misaligned exception occurred
if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
lsu_valid_o = 1'b0;
// 4K page
lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
lsu_dtlb_ppn_o = dtlb_content.ppn;
// Mega page
if (dtlb_is_2M_q) begin
lsu_paddr_o[20:12] = lsu_vaddr_q[20:12];
lsu_dtlb_ppn_o[20:12] = lsu_vaddr_n[20:12];
end
// Giga page
if (dtlb_is_1G_q) begin
lsu_paddr_o[PPNWMin:12] = lsu_vaddr_q[PPNWMin:12];
lsu_dtlb_ppn_o[PPNWMin:12] = lsu_vaddr_n[PPNWMin:12];
end
// ---------
// DTLB Hit
// --------
if (dtlb_hit_q && lsu_req_q) begin
lsu_valid_o = 1'b1;
// exception priority:
// PAGE_FAULTS have higher priority than ACCESS_FAULTS
// virtual memory based exceptions are PAGE_FAULTS
// physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
// this is a store
if (lsu_is_store_q) begin
// check if the page is write-able and we are not violating privileges
// also check if the dirty flag is set
if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
end
// this is a load
end else begin
// check for sufficient access privileges - throw a page fault if necessary
if (daccess_err) begin
lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},lsu_vaddr_q}, 1'b1};
end
end
end else
// ---------
// DTLB Miss
// ---------
// watch out for exceptions
if (ptw_active && !walking_instr) begin
// page table walker threw an exception
if (ptw_error) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::STORE_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1};
end else begin
lsu_exception_o = {riscv::LOAD_PAGE_FAULT, {{riscv::XLEN-riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}},update_vaddr}, 1'b1};
end
end
if (ptw_access_exception) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// Any fault of the page table walk should be based of the original access type
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1};
end else begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, {{riscv::XLEN-riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1};
end
end
end
end
// If translation is not enabled, check the paddr immediately against PMPs
else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin
if (lsu_is_store_q) begin
lsu_exception_o = {riscv::ST_ACCESS_FAULT, {{riscv::XLEN-riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1};
end else begin
lsu_exception_o = {riscv::LD_ACCESS_FAULT, {{riscv::XLEN-riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1};
end
end
end end
end
// Load/store PMP check // check for execute flag on memory
pmp #( assign match_any_execute_region = config_pkg::is_inside_execute_regions(
.CVA6Cfg ( CVA6Cfg ), CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}
.PLEN ( riscv::PLEN ), );
.PMP_LEN ( riscv::PLEN - 2 ),
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries )
) i_pmp_data (
.addr_i ( lsu_paddr_o ),
.priv_lvl_i ( ld_st_priv_lvl_i ),
.access_type_i ( pmp_access_type ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( pmp_data_allow )
);
// ---------- // Instruction fetch
// Registers pmp #(
// ---------- .CVA6Cfg (CVA6Cfg),
always_ff @(posedge clk_i or negedge rst_ni) begin .PLEN (riscv::PLEN),
if (~rst_ni) begin .PMP_LEN (riscv::PLEN - 2),
lsu_vaddr_q <= '0; .NR_ENTRIES(CVA6Cfg.NrPMPEntries)
lsu_req_q <= '0; ) i_pmp_if (
misaligned_ex_q <= '0; .addr_i (icache_areq_o.fetch_paddr),
dtlb_pte_q <= '0; .priv_lvl_i,
dtlb_hit_q <= '0; // we will always execute on the instruction fetch port
lsu_is_store_q <= '0; .access_type_i(riscv::ACCESS_EXEC),
dtlb_is_2M_q <= '0; // Configuration
dtlb_is_1G_q <= '0; .conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (pmp_instr_allow)
);
//-----------------------
// Data Interface
//-----------------------
logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q;
riscv::pte_t dtlb_pte_n, dtlb_pte_q;
exception_t misaligned_ex_n, misaligned_ex_q;
logic lsu_req_n, lsu_req_q;
logic lsu_is_store_n, lsu_is_store_q;
logic dtlb_hit_n, dtlb_hit_q;
logic dtlb_is_2M_n, dtlb_is_2M_q;
logic dtlb_is_1G_n, dtlb_is_1G_q;
// check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
// Wires to PMP checks
riscv::pmp_access_t pmp_access_type;
logic pmp_data_allow;
localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1;
// The data interface is simpler and only consists of a request/response interface
always_comb begin : data_interface
// save request and DTLB response
lsu_vaddr_n = lsu_vaddr_i;
lsu_req_n = lsu_req_i;
misaligned_ex_n = misaligned_ex_i;
dtlb_pte_n = dtlb_content;
dtlb_hit_n = dtlb_lu_hit;
lsu_is_store_n = lsu_is_store_i;
dtlb_is_2M_n = dtlb_is_2M;
dtlb_is_1G_n = dtlb_is_1G;
lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0];
lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PLEN-1:12];
lsu_valid_o = lsu_req_q;
lsu_exception_o = misaligned_ex_q;
pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ;
// mute misaligned exceptions if there is no request otherwise they will throw accidental exceptions
misaligned_ex_n.valid = misaligned_ex_i.valid & lsu_req_i;
// Check if the User flag is set, then we may only access it in supervisor mode
// if SUM is enabled
daccess_err = en_ld_st_translation_i && ((ld_st_priv_lvl_i == riscv::PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
(ld_st_priv_lvl_i == riscv::PRIV_LVL_U && !dtlb_pte_q.u)); // this is not a user page but we are in user mode and trying to access it
// translation is enabled and no misaligned exception occurred
if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
lsu_valid_o = 1'b0;
// 4K page
lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
lsu_dtlb_ppn_o = dtlb_content.ppn;
// Mega page
if (dtlb_is_2M_q) begin
lsu_paddr_o[20:12] = lsu_vaddr_q[20:12];
lsu_dtlb_ppn_o[20:12] = lsu_vaddr_n[20:12];
end
// Giga page
if (dtlb_is_1G_q) begin
lsu_paddr_o[PPNWMin:12] = lsu_vaddr_q[PPNWMin:12];
lsu_dtlb_ppn_o[PPNWMin:12] = lsu_vaddr_n[PPNWMin:12];
end
// ---------
// DTLB Hit
// --------
if (dtlb_hit_q && lsu_req_q) begin
lsu_valid_o = 1'b1;
// exception priority:
// PAGE_FAULTS have higher priority than ACCESS_FAULTS
// virtual memory based exceptions are PAGE_FAULTS
// physical memory based exceptions are ACCESS_FAULTS (PMA/PMP)
// this is a store
if (lsu_is_store_q) begin
// check if the page is write-able and we are not violating privileges
// also check if the dirty flag is set
if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin
lsu_exception_o = {
riscv::STORE_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
};
// Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {
riscv::ST_ACCESS_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
};
end
// this is a load
end else begin end else begin
lsu_vaddr_q <= lsu_vaddr_n; // check for sufficient access privileges - throw a page fault if necessary
lsu_req_q <= lsu_req_n; if (daccess_err) begin
misaligned_ex_q <= misaligned_ex_n; lsu_exception_o = {
dtlb_pte_q <= dtlb_pte_n; riscv::LOAD_PAGE_FAULT,
dtlb_hit_q <= dtlb_hit_n; {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
lsu_is_store_q <= lsu_is_store_n; 1'b1
dtlb_is_2M_q <= dtlb_is_2M_n; };
dtlb_is_1G_q <= dtlb_is_1G_n; // Check if any PMPs are violated
end else if (!pmp_data_allow) begin
lsu_exception_o = {
riscv::LD_ACCESS_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q},
1'b1
};
end
end end
end else
// ---------
// DTLB Miss
// ---------
// watch out for exceptions
if (ptw_active && !walking_instr) begin
// page table walker threw an exception
if (ptw_error) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
if (lsu_is_store_q) begin
lsu_exception_o = {
riscv::STORE_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
1'b1
};
end else begin
lsu_exception_o = {
riscv::LOAD_PAGE_FAULT,
{{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr},
1'b1
};
end
end
if (ptw_access_exception) begin
// an error makes the translation valid
lsu_valid_o = 1'b1;
// Any fault of the page table walk should be based of the original access type
if (lsu_is_store_q) begin
lsu_exception_o = {
riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1
};
end else begin
lsu_exception_o = {
riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1
};
end
end
end
end // If translation is not enabled, check the paddr immediately against PMPs
else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin
if (lsu_is_store_q) begin
lsu_exception_o = {
riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1
};
end else begin
lsu_exception_o = {
riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1
};
end
end end
end
// Load/store PMP check
pmp #(
.CVA6Cfg (CVA6Cfg),
.PLEN (riscv::PLEN),
.PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_data (
.addr_i (lsu_paddr_o),
.priv_lvl_i (ld_st_priv_lvl_i),
.access_type_i(pmp_access_type),
// Configuration
.conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (pmp_data_allow)
);
// ----------
// Registers
// ----------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
lsu_vaddr_q <= '0;
lsu_req_q <= '0;
misaligned_ex_q <= '0;
dtlb_pte_q <= '0;
dtlb_hit_q <= '0;
lsu_is_store_q <= '0;
dtlb_is_2M_q <= '0;
dtlb_is_1G_q <= '0;
end else begin
lsu_vaddr_q <= lsu_vaddr_n;
lsu_req_q <= lsu_req_n;
misaligned_ex_q <= misaligned_ex_n;
dtlb_pte_q <= dtlb_pte_n;
dtlb_hit_q <= dtlb_hit_n;
lsu_is_store_q <= lsu_is_store_n;
dtlb_is_2M_q <= dtlb_is_2M_n;
dtlb_is_1G_q <= dtlb_is_1G_n;
end
end
endmodule endmodule

View file

@ -15,395 +15,395 @@
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
module ptw import ariane_pkg::*; #( module ptw
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int ASID_WIDTH = 1 #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int ASID_WIDTH = 1
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // flush everything, we need to do this because input logic flush_i, // flush everything, we need to do this because
// actually everything we do is speculative at this stage // actually everything we do is speculative at this stage
// e.g.: there could be a CSR instruction that changes everything // e.g.: there could be a CSR instruction that changes everything
output logic ptw_active_o, output logic ptw_active_o,
output logic walking_instr_o, // set when walking for TLB output logic walking_instr_o, // set when walking for TLB
output logic ptw_error_o, // set when an error occurred output logic ptw_error_o, // set when an error occurred
output logic ptw_access_exception_o, // set when an PMP access exception occured output logic ptw_access_exception_o, // set when an PMP access exception occured
input logic enable_translation_i, // CSRs indicate to enable SV39 input logic enable_translation_i, // CSRs indicate to enable SV39
input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores
input logic lsu_is_store_i, // this translation was triggered by a store input logic lsu_is_store_i, // this translation was triggered by a store
// PTW memory interface // PTW memory interface
input dcache_req_o_t req_port_i, input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o, output dcache_req_i_t req_port_o,
// to TLBs, update logic // to TLBs, update logic
output tlb_update_t itlb_update_o, output tlb_update_t itlb_update_o,
output tlb_update_t dtlb_update_o, output tlb_update_t dtlb_update_o,
output logic [riscv::VLEN-1:0] update_vaddr_o, output logic [riscv::VLEN-1:0] update_vaddr_o,
input logic [ASID_WIDTH-1:0] asid_i, input logic [ ASID_WIDTH-1:0] asid_i,
// from TLBs // from TLBs
// did we miss? // did we miss?
input logic itlb_access_i, input logic itlb_access_i,
input logic itlb_hit_i, input logic itlb_hit_i,
input logic [riscv::VLEN-1:0] itlb_vaddr_i, input logic [riscv::VLEN-1:0] itlb_vaddr_i,
input logic dtlb_access_i, input logic dtlb_access_i,
input logic dtlb_hit_i, input logic dtlb_hit_i,
input logic [riscv::VLEN-1:0] dtlb_vaddr_i, input logic [riscv::VLEN-1:0] dtlb_vaddr_i,
// from CSR file // from CSR file
input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp
input logic mxr_i, input logic mxr_i,
// Performance counters // Performance counters
output logic itlb_miss_o, output logic itlb_miss_o,
output logic dtlb_miss_o, output logic dtlb_miss_o,
// PMP // PMP
input riscv::pmpcfg_t [15:0] pmpcfg_i, input riscv::pmpcfg_t [15:0] pmpcfg_i,
input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, input logic [15:0][riscv::PLEN-3:0] pmpaddr_i,
output logic [riscv::PLEN-1:0] bad_paddr_o output logic [riscv::PLEN-1:0] bad_paddr_o
); );
// input registers
logic data_rvalid_q;
logic [63:0] data_rdata_q;
riscv::pte_t pte;
assign pte = riscv::pte_t'(data_rdata_q);
enum logic [2:0] {
IDLE,
WAIT_GRANT,
PTE_LOOKUP,
WAIT_RVALID,
PROPAGATE_ERROR,
PROPAGATE_ACCESS_ERROR
}
state_q, state_d;
// SV39 defines three levels of page tables
enum logic [1:0] {
LVL1,
LVL2,
LVL3
}
ptw_lvl_q, ptw_lvl_n;
// is this an instruction page table walk?
logic is_instr_ptw_q, is_instr_ptw_n;
logic global_mapping_q, global_mapping_n;
// latched tag signal
logic tag_valid_n, tag_valid_q;
// register the ASID
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n;
// register the VPN we need to walk, SV39 defines a 39 bit virtual address
logic [riscv::VLEN-1:0] vaddr_q, vaddr_n;
// 4 byte aligned physical pointer
logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
// Assignments
assign update_vaddr_o = vaddr_q;
assign ptw_active_o = (state_q != IDLE);
assign walking_instr_o = is_instr_ptw_q;
// directly output the correct physical address
assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
// we are never going to kill this request
assign req_port_o.kill_req = '0;
// we are never going to write with the HPTW
assign req_port_o.data_wdata = 64'b0;
// we only issue one single request at a time
assign req_port_o.data_id = '0;
// -----------
// TLB Update
// -----------
assign itlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
assign dtlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
// update the correct page table level
assign itlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign itlb_update_o.is_1G = (ptw_lvl_q == LVL1);
assign dtlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign dtlb_update_o.is_1G = (ptw_lvl_q == LVL1);
// output the correct ASID
assign itlb_update_o.asid = tlb_update_asid_q;
assign dtlb_update_o.asid = tlb_update_asid_q;
// set the global mapping bit
assign itlb_update_o.content = pte | (global_mapping_q << 5);
assign dtlb_update_o.content = pte | (global_mapping_q << 5);
assign req_port_o.tag_valid = tag_valid_q;
logic allow_access;
assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0;
pmp #(
.CVA6Cfg (CVA6Cfg),
.PLEN (riscv::PLEN),
.PMP_LEN (riscv::PLEN - 2),
.NR_ENTRIES(CVA6Cfg.NrPMPEntries)
) i_pmp_ptw (
.addr_i (ptw_pptr_q),
// PTW access are always checked as if in S-Mode...
.priv_lvl_i (riscv::PRIV_LVL_S),
// ...and they are always loads
.access_type_i(riscv::ACCESS_READ),
// Configuration
.conf_addr_i (pmpaddr_i),
.conf_i (pmpcfg_i),
.allow_o (allow_access)
);
//-------------------
// Page table walker
//-------------------
// A virtual address va is translated into a physical address pa as follows:
// 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
// PAGESIZE=2^12 and LEVELS=3.)
// 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
// Sv32, PTESIZE=4.)
// 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
// exception.
// 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
// Otherwise, this PTE is a pointer to the next level of the page table.
// Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
// a = pte.ppn × PAGESIZE and go to step 2.
// 5. A leaf PTE has been found. Determine if the requested memory access
// is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
// raise an access exception. Otherwise, the translation is successful.
// Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
// The translated physical address is given as follows:
// - pa.pgoff = va.pgoff.
// - If i > 0, then this is a superpage translation and
// pa.ppn[i-1:0] = va.vpn[i-1:0].
// - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
req_port_o.data_req = 1'b0;
req_port_o.data_be = 8'hFF;
req_port_o.data_size = 2'b11;
req_port_o.data_we = 1'b0;
ptw_error_o = 1'b0;
ptw_access_exception_o = 1'b0;
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
state_d = state_q;
global_mapping_n = global_mapping_q;
// input registers // input registers
logic data_rvalid_q; tlb_update_asid_n = tlb_update_asid_q;
logic [63:0] data_rdata_q; vaddr_n = vaddr_q;
riscv::pte_t pte; itlb_miss_o = 1'b0;
assign pte = riscv::pte_t'(data_rdata_q); dtlb_miss_o = 1'b0;
enum logic[2:0] { case (state_q)
IDLE,
WAIT_GRANT,
PTE_LOOKUP,
WAIT_RVALID,
PROPAGATE_ERROR,
PROPAGATE_ACCESS_ERROR
} state_q, state_d;
// SV39 defines three levels of page tables IDLE: begin
enum logic [1:0] { // by default we start with the top-most page table
LVL1, LVL2, LVL3 ptw_lvl_n = LVL1;
} ptw_lvl_q, ptw_lvl_n; global_mapping_n = 1'b0;
is_instr_ptw_n = 1'b0;
// if we got an ITLB miss
if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[riscv::SV-1:30], 3'b0};
is_instr_ptw_n = 1'b1;
tlb_update_asid_n = asid_i;
vaddr_n = itlb_vaddr_i;
state_d = WAIT_GRANT;
itlb_miss_o = 1'b1;
// we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[riscv::SV-1:30], 3'b0};
tlb_update_asid_n = asid_i;
vaddr_n = dtlb_vaddr_i;
state_d = WAIT_GRANT;
dtlb_miss_o = 1'b1;
end
end
// is this an instruction page table walk? WAIT_GRANT: begin
logic is_instr_ptw_q, is_instr_ptw_n; // send a request out
logic global_mapping_q, global_mapping_n; req_port_o.data_req = 1'b1;
// latched tag signal // wait for the WAIT_GRANT
logic tag_valid_n, tag_valid_q; if (req_port_i.data_gnt) begin
// register the ASID // send the tag valid signal one cycle later
logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; tag_valid_n = 1'b1;
// register the VPN we need to walk, SV39 defines a 39 bit virtual address state_d = PTE_LOOKUP;
logic [riscv::VLEN-1:0] vaddr_q, vaddr_n; end
// 4 byte aligned physical pointer end
logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n;
// Assignments PTE_LOOKUP: begin
assign update_vaddr_o = vaddr_q; // we wait for the valid signal
if (data_rvalid_q) begin
assign ptw_active_o = (state_q != IDLE); // check if the global mapping bit is set
assign walking_instr_o = is_instr_ptw_q; if (pte.g) global_mapping_n = 1'b1;
// directly output the correct physical address
assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0];
assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH];
// we are never going to kill this request
assign req_port_o.kill_req = '0;
// we are never going to write with the HPTW
assign req_port_o.data_wdata = 64'b0;
// we only issue one single request at a time
assign req_port_o.data_id = '0;
// -----------
// TLB Update
// -----------
assign itlb_update_o.vpn = {{39-riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
assign dtlb_update_o.vpn = {{39-riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]};
// update the correct page table level
assign itlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign itlb_update_o.is_1G = (ptw_lvl_q == LVL1);
assign dtlb_update_o.is_2M = (ptw_lvl_q == LVL2);
assign dtlb_update_o.is_1G = (ptw_lvl_q == LVL1);
// output the correct ASID
assign itlb_update_o.asid = tlb_update_asid_q;
assign dtlb_update_o.asid = tlb_update_asid_q;
// set the global mapping bit
assign itlb_update_o.content = pte | (global_mapping_q << 5);
assign dtlb_update_o.content = pte | (global_mapping_q << 5);
assign req_port_o.tag_valid = tag_valid_q; // -------------
// Invalid PTE
// -------------
// If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception.
if (!pte.v || (!pte.r && pte.w)) state_d = PROPAGATE_ERROR;
// -----------
// Valid PTE
// -----------
else begin
state_d = IDLE;
// it is a valid PTE
// if pte.r = 1 or pte.x = 1 it is a valid PTE
if (pte.r || pte.x) begin
// Valid translation found (either 1G, 2M or 4K entry)
if (is_instr_ptw_q) begin
// ------------
// Update ITLB
// ------------
// If page is not executable, we can directly raise an error. This
// doesn't put a useless entry into the TLB. The same idea applies
// to the access flag since we let the access flag be managed by SW.
if (!pte.x || !pte.a) state_d = PROPAGATE_ERROR;
else itlb_update_o.valid = 1'b1;
logic allow_access; end else begin
// ------------
assign bad_paddr_o = ptw_access_exception_o ? ptw_pptr_q : 'b0; // Update DTLB
// ------------
pmp #( // Check if the access flag has been set, otherwise throw a page-fault
.CVA6Cfg ( CVA6Cfg ), // and let the software handle those bits.
.PLEN ( riscv::PLEN ), // If page is not readable (there are no write-only pages)
.PMP_LEN ( riscv::PLEN - 2 ), // we can directly raise an error. This doesn't put a useless
.NR_ENTRIES ( CVA6Cfg.NrPMPEntries ) // entry into the TLB.
) i_pmp_ptw ( if (pte.a && (pte.r || (pte.x && mxr_i))) begin
.addr_i ( ptw_pptr_q ), dtlb_update_o.valid = 1'b1;
// PTW access are always checked as if in S-Mode... end else begin
.priv_lvl_i ( riscv::PRIV_LVL_S ), state_d = PROPAGATE_ERROR;
// ...and they are always loads
.access_type_i ( riscv::ACCESS_READ ),
// Configuration
.conf_addr_i ( pmpaddr_i ),
.conf_i ( pmpcfg_i ),
.allow_o ( allow_access )
);
//-------------------
// Page table walker
//-------------------
// A virtual address va is translated into a physical address pa as follows:
// 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39,
// PAGESIZE=2^12 and LEVELS=3.)
// 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. (For
// Sv32, PTESIZE=4.)
// 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an access
// exception.
// 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to step 5.
// Otherwise, this PTE is a pointer to the next level of the page table.
// Let i=i-1. If i < 0, stop and raise an access exception. Otherwise, let
// a = pte.ppn × PAGESIZE and go to step 2.
// 5. A leaf PTE has been found. Determine if the requested memory access
// is allowed by the pte.r, pte.w, and pte.x bits. If not, stop and
// raise an access exception. Otherwise, the translation is successful.
// Set pte.a to 1, and, if the memory access is a store, set pte.d to 1.
// The translated physical address is given as follows:
// - pa.pgoff = va.pgoff.
// - If i > 0, then this is a superpage translation and
// pa.ppn[i-1:0] = va.vpn[i-1:0].
// - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
always_comb begin : ptw
// default assignments
// PTW memory interface
tag_valid_n = 1'b0;
req_port_o.data_req = 1'b0;
req_port_o.data_be = 8'hFF;
req_port_o.data_size = 2'b11;
req_port_o.data_we = 1'b0;
ptw_error_o = 1'b0;
ptw_access_exception_o = 1'b0;
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
is_instr_ptw_n = is_instr_ptw_q;
ptw_lvl_n = ptw_lvl_q;
ptw_pptr_n = ptw_pptr_q;
state_d = state_q;
global_mapping_n = global_mapping_q;
// input registers
tlb_update_asid_n = tlb_update_asid_q;
vaddr_n = vaddr_q;
itlb_miss_o = 1'b0;
dtlb_miss_o = 1'b0;
case (state_q)
IDLE: begin
// by default we start with the top-most page table
ptw_lvl_n = LVL1;
global_mapping_n = 1'b0;
is_instr_ptw_n = 1'b0;
// if we got an ITLB miss
if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin
ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[riscv::SV-1:30], 3'b0};
is_instr_ptw_n = 1'b1;
tlb_update_asid_n = asid_i;
vaddr_n = itlb_vaddr_i;
state_d = WAIT_GRANT;
itlb_miss_o = 1'b1;
// we got an DTLB miss
end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin
ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[riscv::SV-1:30], 3'b0};
tlb_update_asid_n = asid_i;
vaddr_n = dtlb_vaddr_i;
state_d = WAIT_GRANT;
dtlb_miss_o = 1'b1;
end end
end // Request is a store: perform some additional checks
// If the request was a store and the page is not write-able, raise an error
WAIT_GRANT: begin // the same applies if the dirty flag is not set
// send a request out if (lsu_is_store_i && (!pte.w || !pte.d)) begin
req_port_o.data_req = 1'b1; dtlb_update_o.valid = 1'b0;
// wait for the WAIT_GRANT state_d = PROPAGATE_ERROR;
if (req_port_i.data_gnt) begin
// send the tag valid signal one cycle later
tag_valid_n = 1'b1;
state_d = PTE_LOOKUP;
end end
end
// check if the ppn is correctly aligned:
// 6. If i > 0 and pa.ppn[i 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[17:0] != '0) begin
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end else if (ptw_lvl_q == LVL2 && pte.ppn[8:0] != '0) begin
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end
// this is a pointer to the next TLB level
end else begin
// pointer to next level of page table
if (ptw_lvl_q == LVL1) begin
// we are in the second level now
ptw_lvl_n = LVL2;
ptw_pptr_n = {pte.ppn, vaddr_q[29:21], 3'b0};
end
if (ptw_lvl_q == LVL2) begin
// here we received a pointer to the third level
ptw_lvl_n = LVL3;
ptw_pptr_n = {pte.ppn, vaddr_q[20:12], 3'b0};
end
state_d = WAIT_GRANT;
if (ptw_lvl_q == LVL3) begin
// Should already be the last level page table => Error
ptw_lvl_n = LVL3;
state_d = PROPAGATE_ERROR;
end
end end
end
PTE_LOOKUP: begin // Check if this access was actually allowed from a PMP perspective
// we wait for the valid signal if (!allow_access) begin
if (data_rvalid_q) begin itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
// we have to return the failed address in bad_addr
ptw_pptr_n = ptw_pptr_q;
state_d = PROPAGATE_ACCESS_ERROR;
end
end
// we've got a data WAIT_GRANT so tell the cache that the tag is valid
end
// Propagate error to MMU/LSU
PROPAGATE_ERROR: begin
state_d = IDLE;
ptw_error_o = 1'b1;
end
PROPAGATE_ACCESS_ERROR: begin
state_d = IDLE;
ptw_access_exception_o = 1'b1;
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_q) state_d = IDLE;
end
default: begin
state_d = IDLE;
end
endcase
// check if the global mapping bit is set // -------
if (pte.g) // Flush
global_mapping_n = 1'b1; // -------
// should we have flushed before we got an rvalid, wait for it until going back to IDLE
// ------------- if (flush_i) begin
// Invalid PTE // on a flush check whether we are
// ------------- // 1. in the PTE Lookup check whether we still need to wait for an rvalid
// If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise a page-fault exception. // 2. waiting for a grant, if so: wait for it
if (!pte.v || (!pte.r && pte.w)) // if not, go back to idle
state_d = PROPAGATE_ERROR; if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
// -----------
// Valid PTE
// -----------
else begin
state_d = IDLE;
// it is a valid PTE
// if pte.r = 1 or pte.x = 1 it is a valid PTE
if (pte.r || pte.x) begin
// Valid translation found (either 1G, 2M or 4K entry)
if (is_instr_ptw_q) begin
// ------------
// Update ITLB
// ------------
// If page is not executable, we can directly raise an error. This
// doesn't put a useless entry into the TLB. The same idea applies
// to the access flag since we let the access flag be managed by SW.
if (!pte.x || !pte.a)
state_d = PROPAGATE_ERROR;
else
itlb_update_o.valid = 1'b1;
end else begin
// ------------
// Update DTLB
// ------------
// Check if the access flag has been set, otherwise throw a page-fault
// and let the software handle those bits.
// If page is not readable (there are no write-only pages)
// we can directly raise an error. This doesn't put a useless
// entry into the TLB.
if (pte.a && (pte.r || (pte.x && mxr_i))) begin
dtlb_update_o.valid = 1'b1;
end else begin
state_d = PROPAGATE_ERROR;
end
// Request is a store: perform some additional checks
// If the request was a store and the page is not write-able, raise an error
// the same applies if the dirty flag is not set
if (lsu_is_store_i && (!pte.w || !pte.d)) begin
dtlb_update_o.valid = 1'b0;
state_d = PROPAGATE_ERROR;
end
end
// check if the ppn is correctly aligned:
// 6. If i > 0 and pa.ppn[i 1 : 0] != 0, this is a misaligned superpage; stop and raise a page-fault
// exception.
if (ptw_lvl_q == LVL1 && pte.ppn[17:0] != '0) begin
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end else if (ptw_lvl_q == LVL2 && pte.ppn[8:0] != '0) begin
state_d = PROPAGATE_ERROR;
dtlb_update_o.valid = 1'b0;
itlb_update_o.valid = 1'b0;
end
// this is a pointer to the next TLB level
end else begin
// pointer to next level of page table
if (ptw_lvl_q == LVL1) begin
// we are in the second level now
ptw_lvl_n = LVL2;
ptw_pptr_n = {pte.ppn, vaddr_q[29:21], 3'b0};
end
if (ptw_lvl_q == LVL2) begin
// here we received a pointer to the third level
ptw_lvl_n = LVL3;
ptw_pptr_n = {pte.ppn, vaddr_q[20:12], 3'b0};
end
state_d = WAIT_GRANT;
if (ptw_lvl_q == LVL3) begin
// Should already be the last level page table => Error
ptw_lvl_n = LVL3;
state_d = PROPAGATE_ERROR;
end
end
end
// Check if this access was actually allowed from a PMP perspective
if (!allow_access) begin
itlb_update_o.valid = 1'b0;
dtlb_update_o.valid = 1'b0;
// we have to return the failed address in bad_addr
ptw_pptr_n = ptw_pptr_q;
state_d = PROPAGATE_ACCESS_ERROR;
end
end
// we've got a data WAIT_GRANT so tell the cache that the tag is valid
end
// Propagate error to MMU/LSU
PROPAGATE_ERROR: begin
state_d = IDLE;
ptw_error_o = 1'b1;
end
PROPAGATE_ACCESS_ERROR: begin
state_d = IDLE;
ptw_access_exception_o = 1'b1;
end
// wait for the rvalid before going back to IDLE
WAIT_RVALID: begin
if (data_rvalid_q)
state_d = IDLE;
end
default: begin
state_d = IDLE;
end
endcase
// -------
// Flush
// -------
// should we have flushed before we got an rvalid, wait for it until going back to IDLE
if (flush_i) begin
// on a flush check whether we are
// 1. in the PTE Lookup check whether we still need to wait for an rvalid
// 2. waiting for a grant, if so: wait for it
// if not, go back to idle
if (((state_q inside {PTE_LOOKUP, WAIT_RVALID}) && !data_rvalid_q) ||
((state_q == WAIT_GRANT) && req_port_i.data_gnt)) ((state_q == WAIT_GRANT) && req_port_i.data_gnt))
state_d = WAIT_RVALID; state_d = WAIT_RVALID;
else else state_d = IDLE;
state_d = IDLE;
end
end end
end
// sequential process // sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin if (~rst_ni) begin
state_q <= IDLE; state_q <= IDLE;
is_instr_ptw_q <= 1'b0; is_instr_ptw_q <= 1'b0;
ptw_lvl_q <= LVL1; ptw_lvl_q <= LVL1;
tag_valid_q <= 1'b0; tag_valid_q <= 1'b0;
tlb_update_asid_q <= '0; tlb_update_asid_q <= '0;
vaddr_q <= '0; vaddr_q <= '0;
ptw_pptr_q <= '0; ptw_pptr_q <= '0;
global_mapping_q <= 1'b0; global_mapping_q <= 1'b0;
data_rdata_q <= '0; data_rdata_q <= '0;
data_rvalid_q <= 1'b0; data_rvalid_q <= 1'b0;
end else begin end else begin
state_q <= state_d; state_q <= state_d;
ptw_pptr_q <= ptw_pptr_n; ptw_pptr_q <= ptw_pptr_n;
is_instr_ptw_q <= is_instr_ptw_n; is_instr_ptw_q <= is_instr_ptw_n;
ptw_lvl_q <= ptw_lvl_n; ptw_lvl_q <= ptw_lvl_n;
tag_valid_q <= tag_valid_n; tag_valid_q <= tag_valid_n;
tlb_update_asid_q <= tlb_update_asid_n; tlb_update_asid_q <= tlb_update_asid_n;
vaddr_q <= vaddr_n; vaddr_q <= vaddr_n;
global_mapping_q <= global_mapping_n; global_mapping_q <= global_mapping_n;
data_rdata_q <= req_port_i.data_rdata; data_rdata_q <= req_port_i.data_rdata;
data_rvalid_q <= req_port_i.data_rvalid; data_rvalid_q <= req_port_i.data_rvalid;
end
end end
end
endmodule endmodule
/* verilator lint_on WIDTH */ /* verilator lint_on WIDTH */

View file

@ -15,149 +15,151 @@
// fully set-associative // fully set-associative
module tlb import ariane_pkg::*; #( module tlb
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int unsigned TLB_ENTRIES = 4, #(
parameter int unsigned ASID_WIDTH = 1 parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
)( parameter int unsigned TLB_ENTRIES = 4,
input logic clk_i, // Clock parameter int unsigned ASID_WIDTH = 1
input logic rst_ni, // Asynchronous reset active low ) (
input logic flush_i, // Flush signal input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // Flush signal
// Update TLB // Update TLB
input tlb_update_t update_i, input tlb_update_t update_i,
// Lookup signals // Lookup signals
input logic lu_access_i, input logic lu_access_i,
input logic [ASID_WIDTH-1:0] lu_asid_i, input logic [ ASID_WIDTH-1:0] lu_asid_i,
input logic [riscv::VLEN-1:0] lu_vaddr_i, input logic [riscv::VLEN-1:0] lu_vaddr_i,
output riscv::pte_t lu_content_o, output riscv::pte_t lu_content_o,
input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i,
input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i,
output logic lu_is_2M_o, output logic lu_is_2M_o,
output logic lu_is_1G_o, output logic lu_is_1G_o,
output logic lu_hit_o output logic lu_hit_o
); );
// SV39 defines three levels of page tables // SV39 defines three levels of page tables
struct packed { struct packed {
logic [ASID_WIDTH-1:0] asid; logic [ASID_WIDTH-1:0] asid;
logic [riscv::VPN2:0] vpn2; logic [riscv::VPN2:0] vpn2;
logic [8:0] vpn1; logic [8:0] vpn1;
logic [8:0] vpn0; logic [8:0] vpn0;
logic is_2M; logic is_2M;
logic is_1G; logic is_1G;
logic valid; logic valid;
} [TLB_ENTRIES-1:0] tags_q, tags_n; } [TLB_ENTRIES-1:0]
tags_q, tags_n;
riscv::pte_t [TLB_ENTRIES-1:0] content_q, content_n; riscv::pte_t [TLB_ENTRIES-1:0] content_q, content_n;
logic [8:0] vpn0, vpn1; logic [8:0] vpn0, vpn1;
logic [riscv::VPN2:0] vpn2; logic [ riscv::VPN2:0] vpn2;
logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic
logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy
//------------- //-------------
// Translation // Translation
//------------- //-------------
always_comb begin : translation always_comb begin : translation
vpn0 = lu_vaddr_i[20:12]; vpn0 = lu_vaddr_i[20:12];
vpn1 = lu_vaddr_i[29:21]; vpn1 = lu_vaddr_i[29:21];
vpn2 = lu_vaddr_i[30+riscv::VPN2:30]; vpn2 = lu_vaddr_i[30+riscv::VPN2:30];
// default assignment // default assignment
lu_hit = '{default: 0}; lu_hit = '{default: 0};
lu_hit_o = 1'b0; lu_hit_o = 1'b0;
lu_content_o = '{default: 0}; lu_content_o = '{default: 0};
lu_is_1G_o = 1'b0; lu_is_1G_o = 1'b0;
lu_is_2M_o = 1'b0; lu_is_2M_o = 1'b0;
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
// first level match, this may be a giga page, check the ASID flags as well // first level match, this may be a giga page, check the ASID flags as well
// if the entry is associated to a global address, don't match the ASID (ASID is don't care) // if the entry is associated to a global address, don't match the ASID (ASID is don't care)
if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid) || content_q[i].g) && vpn2 == tags_q[i].vpn2) begin if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid) || content_q[i].g) && vpn2 == tags_q[i].vpn2) begin
// second level // second level
if (tags_q[i].is_1G) begin if (tags_q[i].is_1G) begin
lu_is_1G_o = 1'b1; lu_is_1G_o = 1'b1;
lu_content_o = content_q[i]; lu_content_o = content_q[i];
lu_hit_o = 1'b1; lu_hit_o = 1'b1;
lu_hit[i] = 1'b1; lu_hit[i] = 1'b1;
// not a giga page hit so check further // not a giga page hit so check further
end else if (vpn1 == tags_q[i].vpn1) begin end else if (vpn1 == tags_q[i].vpn1) begin
// this could be a 2 mega page hit or a 4 kB hit // this could be a 2 mega page hit or a 4 kB hit
// output accordingly // output accordingly
if (tags_q[i].is_2M || vpn0 == tags_q[i].vpn0) begin if (tags_q[i].is_2M || vpn0 == tags_q[i].vpn0) begin
lu_is_2M_o = tags_q[i].is_2M; lu_is_2M_o = tags_q[i].is_2M;
lu_content_o = content_q[i]; lu_content_o = content_q[i];
lu_hit_o = 1'b1; lu_hit_o = 1'b1;
lu_hit[i] = 1'b1; lu_hit[i] = 1'b1;
end end
end
end
end end
end
end end
end
logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high logic asid_to_be_flushed_is0; // indicates that the ASID provided by SFENCE.VMA (rs2)is 0, active high
logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high logic vaddr_to_be_flushed_is0; // indicates that the VADDR provided by SFENCE.VMA (rs1)is 0, active high
logic [TLB_ENTRIES-1:0] vaddr_vpn0_match; logic [TLB_ENTRIES-1:0] vaddr_vpn0_match;
logic [TLB_ENTRIES-1:0] vaddr_vpn1_match; logic [TLB_ENTRIES-1:0] vaddr_vpn1_match;
logic [TLB_ENTRIES-1:0] vaddr_vpn2_match; logic [TLB_ENTRIES-1:0] vaddr_vpn2_match;
assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i); assign asid_to_be_flushed_is0 = ~(|asid_to_be_flushed_i);
assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i); assign vaddr_to_be_flushed_is0 = ~(|vaddr_to_be_flushed_i);
// ------------------ // ------------------
// Update and Flush // Update and Flush
// ------------------ // ------------------
always_comb begin : update_flush always_comb begin : update_flush
tags_n = tags_q; tags_n = tags_q;
content_n = content_q; content_n = content_q;
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin
vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[20:12] == tags_q[i].vpn0); vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[20:12] == tags_q[i].vpn0);
vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[29:21] == tags_q[i].vpn1); vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[29:21] == tags_q[i].vpn1);
vaddr_vpn2_match[i] = (vaddr_to_be_flushed_i[30+riscv::VPN2:30] == tags_q[i].vpn2); vaddr_vpn2_match[i] = (vaddr_to_be_flushed_i[30+riscv::VPN2:30] == tags_q[i].vpn2);
if (flush_i) begin if (flush_i) begin
// invalidate logic // invalidate logic
// flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case) // flush everything if ASID is 0 and vaddr is 0 ("SFENCE.VMA x0 x0" case)
if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0 ) if (asid_to_be_flushed_is0 && vaddr_to_be_flushed_is0) tags_n[i].valid = 1'b0;
tags_n[i].valid = 1'b0; // flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages
// flush vaddr in all addressing space ("SFENCE.VMA vaddr x0" case), it should happen only for leaf pages else if (asid_to_be_flushed_is0 && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M) ) && (~vaddr_to_be_flushed_is0))
else if (asid_to_be_flushed_is0 && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M) ) && (~vaddr_to_be_flushed_is0)) tags_n[i].valid = 1'b0;
tags_n[i].valid = 1'b0; // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case)
// the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0))
else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i] && vaddr_vpn2_match[i]) || (vaddr_vpn2_match[i] && tags_q[i].is_1G) || (vaddr_vpn1_match[i] && vaddr_vpn2_match[i] && tags_q[i].is_2M)) && (asid_to_be_flushed_i == tags_q[i].asid) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) tags_n[i].valid = 1'b0;
tags_n[i].valid = 1'b0; // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case)
// the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid) && (!asid_to_be_flushed_is0))
else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid) && (!asid_to_be_flushed_is0)) tags_n[i].valid = 1'b0;
tags_n[i].valid = 1'b0; // normal replacement
// normal replacement end else if (update_i.valid & replace_en[i]) begin
end else if (update_i.valid & replace_en[i]) begin // update tag array
// update tag array tags_n[i] = '{
tags_n[i] = '{ asid: update_i.asid,
asid: update_i.asid, vpn2: update_i.vpn[18+riscv::VPN2:18],
vpn2: update_i.vpn [18+riscv::VPN2:18], vpn1: update_i.vpn[17:9],
vpn1: update_i.vpn [17:9], vpn0: update_i.vpn[8:0],
vpn0: update_i.vpn [8:0], is_1G: update_i.is_1G,
is_1G: update_i.is_1G, is_2M: update_i.is_2M,
is_2M: update_i.is_2M, valid: 1'b1
valid: 1'b1 };
}; // and content as well
// and content as well content_n[i] = update_i.content;
content_n[i] = update_i.content; end
end
end
end end
end
// ----------------------------------------------- // -----------------------------------------------
// PLRU - Pseudo Least Recently Used Replacement // PLRU - Pseudo Least Recently Used Replacement
// ----------------------------------------------- // -----------------------------------------------
logic[2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n; logic [2*(TLB_ENTRIES-1)-1:0] plru_tree_q, plru_tree_n;
always_comb begin : plru_replacement always_comb begin : plru_replacement
plru_tree_n = plru_tree_q; plru_tree_n = plru_tree_q;
// The PLRU-tree indexing: // The PLRU-tree indexing:
// lvl0 0 // lvl0 0
// / \ // / \
// / \ // / \
// lvl1 1 2 // lvl1 1 2
// / \ / \ // / \ / \
@ -178,97 +180,111 @@ module tlb import ariane_pkg::*; #(
// lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0}; // lu_hit[0]: plru_tree_n[0, 1, 3] = {0, 0, 0};
// default: begin /* No hit */ end // default: begin /* No hit */ end
// endcase // endcase
for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin for (
automatic int unsigned idx_base, shift, new_index; int unsigned i = 0; i < TLB_ENTRIES; i++
// we got a hit so update the pointer as it was least recently used ) begin
if (lu_hit[i] & lu_access_i) begin automatic int unsigned idx_base, shift, new_index;
// Set the nodes to the values we would expect // we got a hit so update the pointer as it was least recently used
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin if (lu_hit[i] & lu_access_i) begin
idx_base = $unsigned((2**lvl)-1); // Set the nodes to the values we would expect
// lvl0 <=> MSB, lvl1 <=> MSB-1, ... for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
shift = $clog2(TLB_ENTRIES) - lvl; idx_base = $unsigned((2 ** lvl) - 1);
// to circumvent the 32 bit integer arithmetic assignment // lvl0 <=> MSB, lvl1 <=> MSB-1, ...
new_index = ~((i >> (shift-1)) & 32'b1); shift = $clog2(TLB_ENTRIES) - lvl;
plru_tree_n[idx_base + (i >> shift)] = new_index[0]; // to circumvent the 32 bit integer arithmetic assignment
end new_index = ~((i >> (shift - 1)) & 32'b1);
end plru_tree_n[idx_base+(i>>shift)] = new_index[0];
end end
// Decode tree to write enable signals
// Next for-loop basically creates the following logic for e.g. an 8 entry
// TLB (note: pseudo-code obviously):
// replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
// replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
// replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
// replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
// replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
// replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
// replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
// replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
// For each entry traverse the tree. If every tree-node matches,
// the corresponding bit of the entry's index, this is
// the next entry to replace.
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
automatic logic en;
automatic int unsigned idx_base, shift, new_index;
en = 1'b1;
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2**lvl)-1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
// en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
new_index = (i >> (shift-1)) & 32'b1;
if (new_index[0]) begin
en &= plru_tree_q[idx_base + (i>>shift)];
end else begin
en &= ~plru_tree_q[idx_base + (i>>shift)];
end
end
replace_en[i] = en;
end
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
tags_q <= '{default: 0};
content_q <= '{default: 0};
plru_tree_q <= '{default: 0};
end else begin
tags_q <= tags_n;
content_q <= content_n;
plru_tree_q <= plru_tree_n;
end
end
//--------------
// Sanity checks
//--------------
//pragma translate_off
`ifndef VERILATOR
initial begin : p_assertions
assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
else begin $error("TLB size must be a multiple of 2 and greater than 1"); $stop(); end
assert (ASID_WIDTH >= 1)
else begin $error("ASID width must be at least 1"); $stop(); end
end
// Just for checking
function int countSetBits(logic[TLB_ENTRIES-1:0] vector);
automatic int count = 0;
foreach (vector[idx]) begin
count += vector[idx];
end end
return count; end
endfunction // Decode tree to write enable signals
// Next for-loop basically creates the following logic for e.g. an 8 entry
// TLB (note: pseudo-code obviously):
// replace_en[7] = &plru_tree_q[ 6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,1}
// replace_en[6] = &plru_tree_q[~6, 2, 0]; //plru_tree_q[0,2,6]=={1,1,0}
// replace_en[5] = &plru_tree_q[ 5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,1}
// replace_en[4] = &plru_tree_q[~5,~2, 0]; //plru_tree_q[0,2,5]=={1,0,0}
// replace_en[3] = &plru_tree_q[ 4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,1}
// replace_en[2] = &plru_tree_q[~4, 1,~0]; //plru_tree_q[0,1,4]=={0,1,0}
// replace_en[1] = &plru_tree_q[ 3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,1}
// replace_en[0] = &plru_tree_q[~3,~1,~0]; //plru_tree_q[0,1,3]=={0,0,0}
// For each entry traverse the tree. If every tree-node matches,
// the corresponding bit of the entry's index, this is
// the next entry to replace.
for (int unsigned i = 0; i < TLB_ENTRIES; i += 1) begin
automatic logic en;
automatic int unsigned idx_base, shift, new_index;
en = 1'b1;
for (int unsigned lvl = 0; lvl < $clog2(TLB_ENTRIES); lvl++) begin
idx_base = $unsigned((2 ** lvl) - 1);
// lvl0 <=> MSB, lvl1 <=> MSB-1, ...
shift = $clog2(TLB_ENTRIES) - lvl;
assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1)) // en &= plru_tree_q[idx_base + (i>>shift)] == ((i >> (shift-1)) & 1'b1);
else begin $error("More then one hit in TLB!"); $stop(); end new_index = (i >> (shift - 1)) & 32'b1;
assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1)) if (new_index[0]) begin
else begin $error("More then one TLB entry selected for next replace!"); $stop(); end en &= plru_tree_q[idx_base+(i>>shift)];
end else begin
en &= ~plru_tree_q[idx_base+(i>>shift)];
end
end
replace_en[i] = en;
end
end
`endif // sequential process
//pragma translate_on always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
tags_q <= '{default: 0};
content_q <= '{default: 0};
plru_tree_q <= '{default: 0};
end else begin
tags_q <= tags_n;
content_q <= content_n;
plru_tree_q <= plru_tree_n;
end
end
//--------------
// Sanity checks
//--------------
//pragma translate_off
`ifndef VERILATOR
initial begin : p_assertions
assert ((TLB_ENTRIES % 2 == 0) && (TLB_ENTRIES > 1))
else begin
$error("TLB size must be a multiple of 2 and greater than 1");
$stop();
end
assert (ASID_WIDTH >= 1)
else begin
$error("ASID width must be at least 1");
$stop();
end
end
// Just for checking
function int countSetBits(logic [TLB_ENTRIES-1:0] vector);
automatic int count = 0;
foreach (vector[idx]) begin
count += vector[idx];
end
return count;
endfunction
assert property (@(posedge clk_i) (countSetBits(lu_hit) <= 1))
else begin
$error("More then one hit in TLB!");
$stop();
end
assert property (@(posedge clk_i) (countSetBits(replace_en) <= 1))
else begin
$error("More then one TLB entry selected for next replace!");
$stop();
end
`endif
//pragma translate_on
endmodule endmodule

View file

@ -1,145 +1,149 @@
module mult import ariane_pkg::*; #( module mult
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic flush_i, input logic flush_i,
input fu_data_t fu_data_i, input fu_data_t fu_data_i,
input logic mult_valid_i, input logic mult_valid_i,
output riscv::xlen_t result_o, output riscv::xlen_t result_o,
output logic mult_valid_o, output logic mult_valid_o,
output logic mult_ready_o, output logic mult_ready_o,
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
); );
logic mul_valid; logic mul_valid;
logic div_valid; logic div_valid;
logic div_ready_i; // receiver of division result is able to accept the result logic div_ready_i; // receiver of division result is able to accept the result
logic [TRANS_ID_BITS-1:0] mul_trans_id; logic [TRANS_ID_BITS-1:0] mul_trans_id;
logic [TRANS_ID_BITS-1:0] div_trans_id; logic [TRANS_ID_BITS-1:0] div_trans_id;
riscv::xlen_t mul_result; riscv::xlen_t mul_result;
riscv::xlen_t div_result; riscv::xlen_t div_result;
logic div_valid_op; logic div_valid_op;
logic mul_valid_op; logic mul_valid_op;
// Input Arbitration // Input Arbitration
assign mul_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR }); assign mul_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR });
assign div_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW }); assign div_valid_op = ~flush_i && mult_valid_i && (fu_data_i.operation inside { DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW });
// --------------------- // ---------------------
// Output Arbitration // Output Arbitration
// --------------------- // ---------------------
// we give precedence to multiplication as the divider supports stalling and the multiplier is // we give precedence to multiplication as the divider supports stalling and the multiplier is
// just a dumb pipelined multiplier // just a dumb pipelined multiplier
assign div_ready_i = (mul_valid) ? 1'b0 : 1'b1; assign div_ready_i = (mul_valid) ? 1'b0 : 1'b1;
assign mult_trans_id_o = (mul_valid) ? mul_trans_id : div_trans_id; assign mult_trans_id_o = (mul_valid) ? mul_trans_id : div_trans_id;
assign result_o = (mul_valid) ? mul_result : div_result; assign result_o = (mul_valid) ? mul_result : div_result;
assign mult_valid_o = div_valid | mul_valid; assign mult_valid_o = div_valid | mul_valid;
// mult_ready_o = division as the multiplication will unconditionally be ready to accept new requests // mult_ready_o = division as the multiplication will unconditionally be ready to accept new requests
// --------------------- // ---------------------
// Multiplication // Multiplication
// --------------------- // ---------------------
multiplier #( multiplier #(
.CVA6Cfg ( CVA6Cfg ) .CVA6Cfg(CVA6Cfg)
) i_multiplier ( ) i_multiplier (
.clk_i, .clk_i,
.rst_ni, .rst_ni,
.trans_id_i ( fu_data_i.trans_id ), .trans_id_i (fu_data_i.trans_id),
.operation_i ( fu_data_i.operation ), .operation_i (fu_data_i.operation),
.operand_a_i ( fu_data_i.operand_a ), .operand_a_i (fu_data_i.operand_a),
.operand_b_i ( fu_data_i.operand_b ), .operand_b_i (fu_data_i.operand_b),
.result_o ( mul_result ), .result_o (mul_result),
.mult_valid_i ( mul_valid_op ), .mult_valid_i (mul_valid_op),
.mult_valid_o ( mul_valid ), .mult_valid_o (mul_valid),
.mult_trans_id_o ( mul_trans_id ), .mult_trans_id_o(mul_trans_id),
.mult_ready_o ( ) // this unit is unconditionally ready .mult_ready_o () // this unit is unconditionally ready
); );
// --------------------- // ---------------------
// Division // Division
// --------------------- // ---------------------
riscv::xlen_t operand_b, operand_a; // input operands after input MUX (input silencing, word operations or full inputs) riscv::xlen_t
riscv::xlen_t result; // result before result mux operand_b,
operand_a; // input operands after input MUX (input silencing, word operations or full inputs)
riscv::xlen_t result; // result before result mux
logic div_signed; // signed or unsigned division logic div_signed; // signed or unsigned division
logic rem; // is it a reminder (or not a reminder e.g.: a division) logic rem; // is it a reminder (or not a reminder e.g.: a division)
logic word_op_d, word_op_q; // save whether the operation was signed or not logic word_op_d, word_op_q; // save whether the operation was signed or not
// is this a signed op? // is this a signed op?
assign div_signed = fu_data_i.operation inside {DIV, DIVW, REM, REMW}; assign div_signed = fu_data_i.operation inside {DIV, DIVW, REM, REMW};
// is this a modulo? // is this a modulo?
assign rem = fu_data_i.operation inside {REM, REMU, REMW, REMUW}; assign rem = fu_data_i.operation inside {REM, REMU, REMW, REMUW};
// prepare the input operands and control divider // prepare the input operands and control divider
always_comb begin always_comb begin
// silence the inputs // silence the inputs
operand_a = '0; operand_a = '0;
operand_b = '0; operand_b = '0;
// control signals // control signals
word_op_d = word_op_q; word_op_d = word_op_q;
// we've go a new division operation // we've go a new division operation
if (mult_valid_i && fu_data_i.operation inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin if (mult_valid_i && fu_data_i.operation inside {DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW}) begin
// is this a word operation? // is this a word operation?
if (fu_data_i.operation inside {DIVW, DIVUW, REMW, REMUW}) begin if (fu_data_i.operation inside {DIVW, DIVUW, REMW, REMUW}) begin
// yes so check if we should sign extend this is only done for a signed operation // yes so check if we should sign extend this is only done for a signed operation
if (div_signed) begin if (div_signed) begin
operand_a = sext32(fu_data_i.operand_a[31:0]); operand_a = sext32(fu_data_i.operand_a[31:0]);
operand_b = sext32(fu_data_i.operand_b[31:0]); operand_b = sext32(fu_data_i.operand_b[31:0]);
end else begin
operand_a = fu_data_i.operand_a[31:0];
operand_b = fu_data_i.operand_b[31:0];
end
// save whether we want sign extend the result or not, this is done for all word operations
word_op_d = 1'b1;
end else begin
// regular op
operand_a = fu_data_i.operand_a;
operand_b = fu_data_i.operand_b;
word_op_d = 1'b0;
end
end
end
// ---------------------
// Serial Divider
// ---------------------
serdiv #(
.CVA6Cfg ( CVA6Cfg ),
.WIDTH ( riscv::XLEN )
) i_div (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.id_i ( fu_data_i.trans_id ),
.op_a_i ( operand_a ),
.op_b_i ( operand_b ),
.opcode_i ( {rem, div_signed} ), // 00: udiv, 10: urem, 01: div, 11: rem
.in_vld_i ( div_valid_op ),
.in_rdy_o ( mult_ready_o ),
.flush_i ( flush_i ),
.out_vld_o ( div_valid ),
.out_rdy_i ( div_ready_i ),
.id_o ( div_trans_id ),
.res_o ( result )
);
// Result multiplexer
// if it was a signed word operation the bit will be set and the result will be sign extended accordingly
assign div_result = (word_op_q) ? sext32(result) : result;
// ---------------------
// Registers
// ---------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
word_op_q <= '0;
end else begin end else begin
word_op_q <= word_op_d; operand_a = fu_data_i.operand_a[31:0];
operand_b = fu_data_i.operand_b[31:0];
end end
// save whether we want sign extend the result or not, this is done for all word operations
word_op_d = 1'b1;
end else begin
// regular op
operand_a = fu_data_i.operand_a;
operand_b = fu_data_i.operand_b;
word_op_d = 1'b0;
end
end end
end
// ---------------------
// Serial Divider
// ---------------------
serdiv #(
.CVA6Cfg(CVA6Cfg),
.WIDTH (riscv::XLEN)
) i_div (
.clk_i (clk_i),
.rst_ni (rst_ni),
.id_i (fu_data_i.trans_id),
.op_a_i (operand_a),
.op_b_i (operand_b),
.opcode_i ({rem, div_signed}), // 00: udiv, 10: urem, 01: div, 11: rem
.in_vld_i (div_valid_op),
.in_rdy_o (mult_ready_o),
.flush_i (flush_i),
.out_vld_o(div_valid),
.out_rdy_i(div_ready_i),
.id_o (div_trans_id),
.res_o (result)
);
// Result multiplexer
// if it was a signed word operation the bit will be set and the result will be sign extended accordingly
assign div_result = (word_op_q) ? sext32(result) : result;
// ---------------------
// Registers
// ---------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
word_op_q <= '0;
end else begin
word_op_q <= word_op_d;
end
end
endmodule endmodule

View file

@ -15,136 +15,142 @@
// //
module multiplier import ariane_pkg::*; #( module multiplier
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic [TRANS_ID_BITS-1:0] trans_id_i, input logic [TRANS_ID_BITS-1:0] trans_id_i,
input logic mult_valid_i, input logic mult_valid_i,
input fu_op operation_i, input fu_op operation_i,
input riscv::xlen_t operand_a_i, input riscv::xlen_t operand_a_i,
input riscv::xlen_t operand_b_i, input riscv::xlen_t operand_b_i,
output riscv::xlen_t result_o, output riscv::xlen_t result_o,
output logic mult_valid_o, output logic mult_valid_o,
output logic mult_ready_o, output logic mult_ready_o,
output logic [TRANS_ID_BITS-1:0] mult_trans_id_o output logic [TRANS_ID_BITS-1:0] mult_trans_id_o
); );
// Carry-less multiplication // Carry-less multiplication
logic [riscv::XLEN-1:0] clmul_q, clmul_d, clmulr_q, clmulr_d, operand_a, operand_b, operand_a_rev, operand_b_rev; logic [riscv::XLEN-1:0]
logic clmul_rmode, clmul_hmode; clmul_q, clmul_d, clmulr_q, clmulr_d, operand_a, operand_b, operand_a_rev, operand_b_rev;
logic clmul_rmode, clmul_hmode;
if (ariane_pkg::BITMANIP) begin : gen_bitmanip if (ariane_pkg::BITMANIP) begin : gen_bitmanip
// checking for clmul_rmode and clmul_hmode // checking for clmul_rmode and clmul_hmode
assign clmul_rmode = (operation_i == CLMULR); assign clmul_rmode = (operation_i == CLMULR);
assign clmul_hmode = (operation_i == CLMULH); assign clmul_hmode = (operation_i == CLMULH);
// operand_a and b reverse generator // operand_a and b reverse generator
for (genvar i = 0; i < riscv::XLEN; i++) begin for (genvar i = 0; i < riscv::XLEN; i++) begin
assign operand_a_rev[i] = operand_a_i[(riscv::XLEN-1) -i]; assign operand_a_rev[i] = operand_a_i[(riscv::XLEN-1)-i];
assign operand_b_rev[i] = operand_b_i[(riscv::XLEN-1) -i]; assign operand_b_rev[i] = operand_b_i[(riscv::XLEN-1)-i];
end
// operand_a and operand_b selection
assign operand_a = (clmul_rmode | clmul_hmode) ? operand_a_rev : operand_a_i;
assign operand_b = (clmul_rmode | clmul_hmode) ? operand_b_rev : operand_b_i;
// implementation
always_comb begin
clmul_d = '0;
for (int i = 0; i <= riscv::XLEN; i++) begin
clmul_d = (|((operand_b >> i) & 1)) ? clmul_d ^ (operand_a << i) : clmul_d;
end
end
// clmulr + clmulh result generator
for (genvar i = 0; i < riscv::XLEN; i++) begin
assign clmulr_d[i] = clmul_d[(riscv::XLEN-1)-i];
end
end end
// Pipeline register // operand_a and operand_b selection
logic [TRANS_ID_BITS-1:0] trans_id_q; assign operand_a = (clmul_rmode | clmul_hmode) ? operand_a_rev : operand_a_i;
logic mult_valid_q; assign operand_b = (clmul_rmode | clmul_hmode) ? operand_b_rev : operand_b_i;
fu_op operator_d, operator_q;
logic [riscv::XLEN*2-1:0] mult_result_d, mult_result_q;
// control registers // implementation
logic sign_a, sign_b;
logic mult_valid;
// control signals
assign mult_valid_o = mult_valid_q;
assign mult_trans_id_o = trans_id_q;
assign mult_ready_o = 1'b1;
assign mult_valid = mult_valid_i && (operation_i inside {MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR});
// Sign Select MUX
always_comb begin always_comb begin
sign_a = 1'b0; clmul_d = '0;
sign_b = 1'b0; for (int i = 0; i <= riscv::XLEN; i++) begin
clmul_d = (|((operand_b >> i) & 1)) ? clmul_d ^ (operand_a << i) : clmul_d;
// signed multiplication end
if (operation_i == MULH) begin
sign_a = 1'b1;
sign_b = 1'b1;
// signed - unsigned multiplication
end else if (operation_i == MULHSU) begin
sign_a = 1'b1;
// unsigned multiplication
end else begin
sign_a = 1'b0;
sign_b = 1'b0;
end
end end
// clmulr + clmulh result generator
// single stage version for (genvar i = 0; i < riscv::XLEN; i++) begin
assign mult_result_d = $signed({operand_a_i[riscv::XLEN-1] & sign_a, operand_a_i}) * assign clmulr_d[i] = clmul_d[(riscv::XLEN-1)-i];
$signed({operand_b_i[riscv::XLEN-1] & sign_b, operand_b_i});
assign operator_d = operation_i;
always_comb begin : p_selmux
unique case (operator_q)
MULH, MULHU, MULHSU: result_o = mult_result_q[riscv::XLEN*2-1:riscv::XLEN];
MULW: result_o = sext32(mult_result_q[31:0]);
CLMUL: result_o = clmul_q;
CLMULH: result_o = clmulr_q >> 1;
CLMULR: result_o = clmulr_q;
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
default: result_o = mult_result_q[riscv::XLEN-1:0];// including MUL
endcase
end end
if (ariane_pkg::BITMANIP) begin end
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin // Pipeline register
clmul_q <= '0; logic [TRANS_ID_BITS-1:0] trans_id_q;
clmulr_q <= '0; logic mult_valid_q;
end else begin fu_op operator_d, operator_q;
clmul_q <= clmul_d; logic [riscv::XLEN*2-1:0] mult_result_d, mult_result_q;
clmulr_q <= clmulr_d;
end // control registers
end logic sign_a, sign_b;
logic mult_valid;
// control signals
assign mult_valid_o = mult_valid_q;
assign mult_trans_id_o = trans_id_q;
assign mult_ready_o = 1'b1;
assign mult_valid = mult_valid_i && (operation_i inside {MUL, MULH, MULHU, MULHSU, MULW, CLMUL, CLMULH, CLMULR});
// Sign Select MUX
always_comb begin
sign_a = 1'b0;
sign_b = 1'b0;
// signed multiplication
if (operation_i == MULH) begin
sign_a = 1'b1;
sign_b = 1'b1;
// signed - unsigned multiplication
end else if (operation_i == MULHSU) begin
sign_a = 1'b1;
// unsigned multiplication
end else begin
sign_a = 1'b0;
sign_b = 1'b0;
end end
// ----------------------- end
// Output pipeline register
// -----------------------
// single stage version
assign mult_result_d = $signed(
{operand_a_i[riscv::XLEN-1] & sign_a, operand_a_i}
) * $signed(
{operand_b_i[riscv::XLEN-1] & sign_b, operand_b_i}
);
assign operator_d = operation_i;
always_comb begin : p_selmux
unique case (operator_q)
MULH, MULHU, MULHSU: result_o = mult_result_q[riscv::XLEN*2-1:riscv::XLEN];
MULW: result_o = sext32(mult_result_q[31:0]);
CLMUL: result_o = clmul_q;
CLMULH: result_o = clmulr_q >> 1;
CLMULR: result_o = clmulr_q;
// MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register
default: result_o = mult_result_q[riscv::XLEN-1:0]; // including MUL
endcase
end
if (ariane_pkg::BITMANIP) begin
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin if (~rst_ni) begin
mult_valid_q <= '0; clmul_q <= '0;
trans_id_q <= '0; clmulr_q <= '0;
operator_q <= MUL; end else begin
mult_result_q <= '0; clmul_q <= clmul_d;
end else begin clmulr_q <= clmulr_d;
// Input silencing end
trans_id_q <= trans_id_i;
// Output Register
mult_valid_q <= mult_valid;
operator_q <= operator_d;
mult_result_q <= mult_result_d;
end
end end
end
// -----------------------
// Output pipeline register
// -----------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
mult_valid_q <= '0;
trans_id_q <= '0;
operator_q <= MUL;
mult_result_q <= '0;
end else begin
// Input silencing
trans_id_q <= trans_id_i;
// Output Register
mult_valid_q <= mult_valid;
operator_q <= operator_d;
mult_result_q <= mult_result_d;
end
end
endmodule endmodule

View file

@ -13,50 +13,52 @@
// Description: Performance counters // Description: Performance counters
module perf_counters import ariane_pkg::*; #( module perf_counters
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter int unsigned NumPorts = 3 // number of miss ports #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned NumPorts = 3 // number of miss ports
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
input logic debug_mode_i, // debug mode input logic debug_mode_i, // debug mode
// SRAM like interface // SRAM like interface
input logic [11:0] addr_i, // read/write address (up to 6 counters possible) input logic [11:0] addr_i, // read/write address (up to 6 counters possible)
input logic we_i, // write enable input logic we_i, // write enable
input riscv::xlen_t data_i, // data to write input riscv::xlen_t data_i, // data to write
output riscv::xlen_t data_o, // data to read output riscv::xlen_t data_o, // data to read
// from commit stage // from commit stage
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // acknowledge that we are indeed committing input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // acknowledge that we are indeed committing
// from L1 caches // from L1 caches
input logic l1_icache_miss_i, input logic l1_icache_miss_i,
input logic l1_dcache_miss_i, input logic l1_dcache_miss_i,
// from MMU // from MMU
input logic itlb_miss_i, input logic itlb_miss_i,
input logic dtlb_miss_i, input logic dtlb_miss_i,
// from issue stage // from issue stage
input logic sb_full_i, input logic sb_full_i,
// from frontend // from frontend
input logic if_empty_i, input logic if_empty_i,
// from PC Gen // from PC Gen
input exception_t ex_i, input exception_t ex_i,
input logic eret_i, input logic eret_i,
input bp_resolve_t resolved_branch_i, input bp_resolve_t resolved_branch_i,
// for newly added events // for newly added events
input exception_t branch_exceptions_i, //Branch exceptions->execute unit-> branch_exception_o input exception_t branch_exceptions_i, //Branch exceptions->execute unit-> branch_exception_o
input icache_dreq_t l1_icache_access_i, input icache_dreq_t l1_icache_access_i,
input dcache_req_i_t[2:0] l1_dcache_access_i, input dcache_req_i_t [2:0] l1_dcache_access_i,
input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]miss_vld_bits_i, //For Cache eviction (3ports-LOAD,STORE,PTW) input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]miss_vld_bits_i, //For Cache eviction (3ports-LOAD,STORE,PTW)
input logic i_tlb_flush_i, input logic i_tlb_flush_i,
input logic stall_issue_i, //stall-read operands input logic stall_issue_i, //stall-read operands
input logic[31:0] mcountinhibit_i input logic [31:0] mcountinhibit_i
); );
logic [63:0] generic_counter_d[6:1]; logic [63:0] generic_counter_d[6:1];
logic [63:0] generic_counter_q[6:1]; logic [63:0] generic_counter_q[6:1];
//internal signal to keep track of exception //internal signal to keep track of exception
logic read_access_exception,update_access_exception; logic read_access_exception, update_access_exception;
logic events[6:1]; logic events[6:1];
//internal signal for MUX select line input //internal signal for MUX select line input
@ -64,116 +66,155 @@ module perf_counters import ariane_pkg::*; #(
logic [4:0] mhpmevent_q[6:1]; logic [4:0] mhpmevent_q[6:1];
//Multiplexer //Multiplexer
always_comb begin : Mux always_comb begin : Mux
events[6:1]='{default:0}; events[6:1] = '{default: 0};
for(int unsigned i = 1; i <= 6; i++) begin
case(mhpmevent_q[i])
5'b00000 : events[i] = 0;
5'b00001 : events[i] = l1_icache_miss_i;//L1 I-Cache misses
5'b00010 : events[i] = l1_dcache_miss_i;//L1 D-Cache misses
5'b00011 : events[i] = itlb_miss_i;//ITLB misses
5'b00100 : events[i] = dtlb_miss_i;//DTLB misses
5'b00101 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == LOAD;//Load accesses
5'b00110 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == STORE;//Store accesses
5'b00111 : events[i] = ex_i.valid;//Exceptions
5'b01000 : events[i] = eret_i;//Exception handler returns
5'b01001 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == CTRL_FLOW;//Branch instructions
5'b01010 : events[i] = resolved_branch_i.valid && resolved_branch_i.is_mispredict;//Branch mispredicts
5'b01011 : events[i] = branch_exceptions_i.valid;//Branch exceptions
// The standard software calling convention uses register x1 to hold the return address on a call
// the unconditional jump is decoded as ADD op
5'b01100 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == CTRL_FLOW && (commit_instr_i[j].op == ADD || commit_instr_i[j].op == JALR) && (commit_instr_i[j].rd == 'd1 || commit_instr_i[j].rd == 'd5);//Call
5'b01101 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].op == JALR && commit_instr_i[j].rd == 'd0;//Return
5'b01110 : events[i] = sb_full_i;//MSB Full
5'b01111 : events[i] = if_empty_i;//Instruction fetch Empty
5'b10000 : events[i] = l1_icache_access_i.req;//L1 I-Cache accesses
5'b10001 : events[i] = l1_dcache_access_i[0].data_req || l1_dcache_access_i[1].data_req || l1_dcache_access_i[2].data_req;//L1 D-Cache accesses
5'b10010 : events[i] = (l1_dcache_miss_i && miss_vld_bits_i[0] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[1] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[2] == 8'hFF);//eviction
5'b10011 : events[i] = i_tlb_flush_i;//I-TLB flush
5'b10100 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == ALU || commit_instr_i[j].fu == MULT;//Integer instructions
5'b10101 : for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++) if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == FPU || commit_instr_i[j].fu == FPU_VEC;//Floating Point Instructions
5'b10110 : events[i] = stall_issue_i;//Pipeline bubbles
default: events[i] = 0;
endcase
end
for (int unsigned i = 1; i <= 6; i++) begin
case (mhpmevent_q[i])
5'b00000: events[i] = 0;
5'b00001: events[i] = l1_icache_miss_i; //L1 I-Cache misses
5'b00010: events[i] = l1_dcache_miss_i; //L1 D-Cache misses
5'b00011: events[i] = itlb_miss_i; //ITLB misses
5'b00100: events[i] = dtlb_miss_i; //DTLB misses
5'b00101:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == LOAD; //Load accesses
5'b00110:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == STORE; //Store accesses
5'b00111: events[i] = ex_i.valid; //Exceptions
5'b01000: events[i] = eret_i; //Exception handler returns
5'b01001:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j]) events[i] = commit_instr_i[j].fu == CTRL_FLOW; //Branch instructions
5'b01010:
events[i] = resolved_branch_i.valid && resolved_branch_i.is_mispredict;//Branch mispredicts
5'b01011: events[i] = branch_exceptions_i.valid; //Branch exceptions
// The standard software calling convention uses register x1 to hold the return address on a call
// the unconditional jump is decoded as ADD op
5'b01100:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j])
events[i] = commit_instr_i[j].fu == CTRL_FLOW && (commit_instr_i[j].op == ADD || commit_instr_i[j].op == JALR) && (commit_instr_i[j].rd == 'd1 || commit_instr_i[j].rd == 'd5);//Call
5'b01101:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j])
events[i] = commit_instr_i[j].op == JALR && commit_instr_i[j].rd == 'd0; //Return
5'b01110: events[i] = sb_full_i; //MSB Full
5'b01111: events[i] = if_empty_i; //Instruction fetch Empty
5'b10000: events[i] = l1_icache_access_i.req; //L1 I-Cache accesses
5'b10001:
events[i] = l1_dcache_access_i[0].data_req || l1_dcache_access_i[1].data_req || l1_dcache_access_i[2].data_req;//L1 D-Cache accesses
5'b10010:
events[i] = (l1_dcache_miss_i && miss_vld_bits_i[0] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[1] == 8'hFF) || (l1_dcache_miss_i && miss_vld_bits_i[2] == 8'hFF);//eviction
5'b10011: events[i] = i_tlb_flush_i; //I-TLB flush
5'b10100:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j])
events[i] = commit_instr_i[j].fu == ALU || commit_instr_i[j].fu == MULT;//Integer instructions
5'b10101:
for (int unsigned j = 0; j < CVA6Cfg.NrCommitPorts; j++)
if (commit_ack_i[j])
events[i] = commit_instr_i[j].fu == FPU || commit_instr_i[j].fu == FPU_VEC;//Floating Point Instructions
5'b10110: events[i] = stall_issue_i; //Pipeline bubbles
default: events[i] = 0;
endcase
end end
always_comb begin : generic_counter end
generic_counter_d = generic_counter_q;
data_o = 'b0;
mhpmevent_d = mhpmevent_q;
read_access_exception = 1'b0;
update_access_exception = 1'b0;
for(int unsigned i = 1; i <= 6; i++) begin always_comb begin : generic_counter
if ((!debug_mode_i) && (!we_i)) begin generic_counter_d = generic_counter_q;
if ((events[i]) == 1 && (!mcountinhibit_i[i+2]))begin data_o = 'b0;
generic_counter_d[i] = generic_counter_q[i] + 1'b1;end mhpmevent_d = mhpmevent_q;
else begin read_access_exception = 1'b0;
generic_counter_d[i] = 'b0;end update_access_exception = 1'b0;
for (int unsigned i = 1; i <= 6; i++) begin
if ((!debug_mode_i) && (!we_i)) begin
if ((events[i]) == 1 && (!mcountinhibit_i[i+2])) begin
generic_counter_d[i] = generic_counter_q[i] + 1'b1;
end else begin
generic_counter_d[i] = 'b0;
end end
end end
//Read
unique case (addr_i)
riscv::CSR_MHPM_COUNTER_3,
riscv::CSR_MHPM_COUNTER_4,
riscv::CSR_MHPM_COUNTER_5,
riscv::CSR_MHPM_COUNTER_6,
riscv::CSR_MHPM_COUNTER_7,
riscv::CSR_MHPM_COUNTER_8 :begin if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3 + 1][31:0]; else data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3 + 1];end
riscv::CSR_MHPM_COUNTER_3H,
riscv::CSR_MHPM_COUNTER_4H,
riscv::CSR_MHPM_COUNTER_5H,
riscv::CSR_MHPM_COUNTER_6H,
riscv::CSR_MHPM_COUNTER_7H,
riscv::CSR_MHPM_COUNTER_8H :begin if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3H + 1][63:32]; else read_access_exception = 1'b1;end
riscv::CSR_MHPM_EVENT_3,
riscv::CSR_MHPM_EVENT_4,
riscv::CSR_MHPM_EVENT_5,
riscv::CSR_MHPM_EVENT_6,
riscv::CSR_MHPM_EVENT_7,
riscv::CSR_MHPM_EVENT_8 : data_o = mhpmevent_q[addr_i-riscv::CSR_MHPM_EVENT_3 + 1] ;
default: data_o = 'b0;
endcase
//Write
if(we_i) begin
unique case(addr_i)
riscv::CSR_MHPM_COUNTER_3,
riscv::CSR_MHPM_COUNTER_4,
riscv::CSR_MHPM_COUNTER_5,
riscv::CSR_MHPM_COUNTER_6,
riscv::CSR_MHPM_COUNTER_7,
riscv::CSR_MHPM_COUNTER_8 :begin if (riscv::XLEN == 32) generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3 + 1][31:0] = data_i; else generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3 + 1] = data_i; end
riscv::CSR_MHPM_COUNTER_3H,
riscv::CSR_MHPM_COUNTER_4H,
riscv::CSR_MHPM_COUNTER_5H,
riscv::CSR_MHPM_COUNTER_6H,
riscv::CSR_MHPM_COUNTER_7H,
riscv::CSR_MHPM_COUNTER_8H :begin if (riscv::XLEN == 32) generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3H + 1][63:32] = data_i; else update_access_exception = 1'b1;end
riscv::CSR_MHPM_EVENT_3,
riscv::CSR_MHPM_EVENT_4,
riscv::CSR_MHPM_EVENT_5,
riscv::CSR_MHPM_EVENT_6,
riscv::CSR_MHPM_EVENT_7,
riscv::CSR_MHPM_EVENT_8 : mhpmevent_d[addr_i-riscv::CSR_MHPM_EVENT_3 + 1] = data_i;
default: update_access_exception = 1'b1;
endcase
end
end end
//Registers //Read
unique case (addr_i)
riscv::CSR_MHPM_COUNTER_3,
riscv::CSR_MHPM_COUNTER_4,
riscv::CSR_MHPM_COUNTER_5,
riscv::CSR_MHPM_COUNTER_6,
riscv::CSR_MHPM_COUNTER_7,
riscv::CSR_MHPM_COUNTER_8 :begin
if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0];
else data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1];
end
riscv::CSR_MHPM_COUNTER_3H,
riscv::CSR_MHPM_COUNTER_4H,
riscv::CSR_MHPM_COUNTER_5H,
riscv::CSR_MHPM_COUNTER_6H,
riscv::CSR_MHPM_COUNTER_7H,
riscv::CSR_MHPM_COUNTER_8H :begin
if (riscv::XLEN == 32)
data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32];
else read_access_exception = 1'b1;
end
riscv::CSR_MHPM_EVENT_3,
riscv::CSR_MHPM_EVENT_4,
riscv::CSR_MHPM_EVENT_5,
riscv::CSR_MHPM_EVENT_6,
riscv::CSR_MHPM_EVENT_7,
riscv::CSR_MHPM_EVENT_8 :
data_o = mhpmevent_q[addr_i-riscv::CSR_MHPM_EVENT_3+1];
default: data_o = 'b0;
endcase
//Write
if (we_i) begin
unique case (addr_i)
riscv::CSR_MHPM_COUNTER_3,
riscv::CSR_MHPM_COUNTER_4,
riscv::CSR_MHPM_COUNTER_5,
riscv::CSR_MHPM_COUNTER_6,
riscv::CSR_MHPM_COUNTER_7,
riscv::CSR_MHPM_COUNTER_8 :begin
if (riscv::XLEN == 32)
generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0] = data_i;
else generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1] = data_i;
end
riscv::CSR_MHPM_COUNTER_3H,
riscv::CSR_MHPM_COUNTER_4H,
riscv::CSR_MHPM_COUNTER_5H,
riscv::CSR_MHPM_COUNTER_6H,
riscv::CSR_MHPM_COUNTER_7H,
riscv::CSR_MHPM_COUNTER_8H :begin
if (riscv::XLEN == 32)
generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32] = data_i;
else update_access_exception = 1'b1;
end
riscv::CSR_MHPM_EVENT_3,
riscv::CSR_MHPM_EVENT_4,
riscv::CSR_MHPM_EVENT_5,
riscv::CSR_MHPM_EVENT_6,
riscv::CSR_MHPM_EVENT_7,
riscv::CSR_MHPM_EVENT_8 :
mhpmevent_d[addr_i-riscv::CSR_MHPM_EVENT_3+1] = data_i;
default: update_access_exception = 1'b1;
endcase
end
end
//Registers
always_ff @(posedge clk_i or negedge rst_ni) begin always_ff @(posedge clk_i or negedge rst_ni) begin
if (!rst_ni) begin if (!rst_ni) begin
generic_counter_q <= '{default:0}; generic_counter_q <= '{default: 0};
mhpmevent_q <= '{default:0}; mhpmevent_q <= '{default: 0};
end else begin end else begin
generic_counter_q <= generic_counter_d; generic_counter_q <= generic_counter_d;
mhpmevent_q <= mhpmevent_d; mhpmevent_q <= mhpmevent_d;
end end
end end
endmodule endmodule

View file

@ -13,43 +13,43 @@
// Description: PMP package // Description: PMP package
package riscv; package riscv;
// -------------------- // --------------------
// Privilege Spec // Privilege Spec
// -------------------- // --------------------
typedef enum logic[1:0] { typedef enum logic [1:0] {
PRIV_LVL_M = 2'b11, PRIV_LVL_M = 2'b11,
PRIV_LVL_S = 2'b01, PRIV_LVL_S = 2'b01,
PRIV_LVL_U = 2'b00 PRIV_LVL_U = 2'b00
} priv_lvl_t; } priv_lvl_t;
// PMP // PMP
typedef enum logic [1:0] { typedef enum logic [1:0] {
OFF = 2'b00, OFF = 2'b00,
TOR = 2'b01, TOR = 2'b01,
NA4 = 2'b10, NA4 = 2'b10,
NAPOT = 2'b11 NAPOT = 2'b11
} pmp_addr_mode_t; } pmp_addr_mode_t;
// PMP Access Type // PMP Access Type
typedef enum logic [2:0] { typedef enum logic [2:0] {
ACCESS_NONE = 3'b000, ACCESS_NONE = 3'b000,
ACCESS_READ = 3'b001, ACCESS_READ = 3'b001,
ACCESS_WRITE = 3'b010, ACCESS_WRITE = 3'b010,
ACCESS_EXEC = 3'b100 ACCESS_EXEC = 3'b100
} pmp_access_t; } pmp_access_t;
typedef struct packed { typedef struct packed {
logic x; logic x;
logic w; logic w;
logic r; logic r;
} pmpcfg_access_t; } pmpcfg_access_t;
// packed struct of a PMP configuration register (8bit) // packed struct of a PMP configuration register (8bit)
typedef struct packed { typedef struct packed {
logic locked; // lock this configuration logic locked; // lock this configuration
logic [1:0] reserved; logic [1:0] reserved;
pmp_addr_mode_t addr_mode; // Off, TOR, NA4, NAPOT pmp_addr_mode_t addr_mode; // Off, TOR, NA4, NAPOT
pmpcfg_access_t access_type; pmpcfg_access_t access_type;
} pmpcfg_t; } pmpcfg_t;
endpackage endpackage

View file

@ -13,10 +13,10 @@
// Description: purely combinatorial PMP unit (with extraction for more complex configs such as NAPOT) // Description: purely combinatorial PMP unit (with extraction for more complex configs such as NAPOT)
module pmp #( module pmp #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter int unsigned PLEN = 34, // rv64: 56 parameter int unsigned PLEN = 34, // rv64: 56
parameter int unsigned PMP_LEN = 32, // rv64: 54 parameter int unsigned PMP_LEN = 32, // rv64: 54
parameter int unsigned NR_ENTRIES = 4 parameter int unsigned NR_ENTRIES = 4
) ( ) (
// Input // Input
input logic [PLEN-1:0] addr_i, input logic [PLEN-1:0] addr_i,
@ -28,66 +28,67 @@ module pmp #(
// Output // Output
output logic allow_o output logic allow_o
); );
// if there are no PMPs we can always grant the access. // if there are no PMPs we can always grant the access.
if (NR_ENTRIES > 0) begin : gen_pmp if (NR_ENTRIES > 0) begin : gen_pmp
logic [NR_ENTRIES-1:0] match; logic [NR_ENTRIES-1:0] match;
for (genvar i = 0; i < NR_ENTRIES; i++) begin for (genvar i = 0; i < NR_ENTRIES; i++) begin
logic [PMP_LEN-1:0] conf_addr_prev; logic [PMP_LEN-1:0] conf_addr_prev;
assign conf_addr_prev = (i == 0) ? '0 : conf_addr_i[i-1]; assign conf_addr_prev = (i == 0) ? '0 : conf_addr_i[i-1];
pmp_entry #( pmp_entry #(
.CVA6Cfg ( CVA6Cfg ), .CVA6Cfg(CVA6Cfg),
.PLEN ( PLEN ), .PLEN (PLEN),
.PMP_LEN ( PMP_LEN ) .PMP_LEN(PMP_LEN)
) i_pmp_entry( ) i_pmp_entry (
.addr_i ( addr_i ), .addr_i (addr_i),
.conf_addr_i ( conf_addr_i[i] ), .conf_addr_i (conf_addr_i[i]),
.conf_addr_prev_i ( conf_addr_prev ), .conf_addr_prev_i(conf_addr_prev),
.conf_addr_mode_i ( conf_i[i].addr_mode ), .conf_addr_mode_i(conf_i[i].addr_mode),
.match_o ( match[i] ) .match_o (match[i])
); );
end
always_comb begin
int i;
allow_o = 1'b0;
for (i = 0; i < NR_ENTRIES; i++) begin
// either we are in S or U mode or the config is locked in which
// case it also applies in M mode
if (priv_lvl_i != riscv::PRIV_LVL_M || conf_i[i].locked) begin
if (match[i]) begin
if ((access_type_i & conf_i[i].access_type) != access_type_i) allow_o = 1'b0;
else allow_o = 1'b1;
break;
end
end
end
if (i == NR_ENTRIES) begin // no PMP entry matched the address
// allow all accesses from M-mode for no pmp match
if (priv_lvl_i == riscv::PRIV_LVL_M) allow_o = 1'b1;
// disallow accesses for all other modes
else allow_o = 1'b0;
end
end
end else assign allow_o = 1'b1;
// synthesis translate_off
always_comb begin
logic no_locked;
no_locked = 1'b0;
if(priv_lvl_i == riscv::PRIV_LVL_M) begin
no_locked = 1'b1;
for (int i = 0; i < NR_ENTRIES; i++) begin
if (conf_i[i].locked && conf_i[i].addr_mode != riscv::OFF) begin
no_locked &= 1'b0;
end else no_locked &= 1'b1;
end
if (no_locked == 1'b1) assert(allow_o == 1'b1);
end
end end
// synthesis translate_on
always_comb begin
int i;
allow_o = 1'b0;
for (i = 0; i < NR_ENTRIES; i++) begin
// either we are in S or U mode or the config is locked in which
// case it also applies in M mode
if (priv_lvl_i != riscv::PRIV_LVL_M || conf_i[i].locked) begin
if (match[i]) begin
if ((access_type_i & conf_i[i].access_type) != access_type_i) allow_o = 1'b0;
else allow_o = 1'b1;
break;
end
end
end
if (i == NR_ENTRIES) begin // no PMP entry matched the address
// allow all accesses from M-mode for no pmp match
if (priv_lvl_i == riscv::PRIV_LVL_M) allow_o = 1'b1;
// disallow accesses for all other modes
else
allow_o = 1'b0;
end
end
end else assign allow_o = 1'b1;
// synthesis translate_off
always_comb begin
logic no_locked;
no_locked = 1'b0;
if (priv_lvl_i == riscv::PRIV_LVL_M) begin
no_locked = 1'b1;
for (int i = 0; i < NR_ENTRIES; i++) begin
if (conf_i[i].locked && conf_i[i].addr_mode != riscv::OFF) begin
no_locked &= 1'b0;
end else no_locked &= 1'b1;
end
if (no_locked == 1'b1) assert (allow_o == 1'b1);
end
end
// synthesis translate_on
endmodule endmodule

View file

@ -28,95 +28,98 @@ module pmp_entry #(
// Output // Output
output logic match_o output logic match_o
); );
logic [PLEN-1:0] conf_addr_n; logic [PLEN-1:0] conf_addr_n;
logic [$clog2(PLEN)-1:0] trail_ones; logic [$clog2(PLEN)-1:0] trail_ones;
logic [PLEN-1:0] base; logic [PLEN-1:0] base;
logic [PLEN-1:0] mask; logic [PLEN-1:0] mask;
int unsigned size; int unsigned size;
assign conf_addr_n = {2'b11, ~conf_addr_i}; assign conf_addr_n = {2'b11, ~conf_addr_i};
lzc #(.WIDTH(PLEN), .MODE(1'b0)) i_lzc( lzc #(
.in_i ( conf_addr_n ), .WIDTH(PLEN),
.cnt_o ( trail_ones ), .MODE (1'b0)
.empty_o ( ) ) i_lzc (
); .in_i (conf_addr_n),
.cnt_o (trail_ones),
.empty_o()
);
always_comb begin always_comb begin
case (conf_addr_mode_i) case (conf_addr_mode_i)
riscv::TOR: begin riscv::TOR: begin
base = '0; base = '0;
mask = '0; mask = '0;
size = '0; size = '0;
// check that the requested address is in between the two // check that the requested address is in between the two
// configuration addresses // configuration addresses
if (addr_i >= ({2'b0, conf_addr_prev_i} << 2) && addr_i < ({2'b0, conf_addr_i} << 2)) begin if (addr_i >= ({2'b0, conf_addr_prev_i} << 2) && addr_i < ({2'b0, conf_addr_i} << 2)) begin
match_o = 1'b1; match_o = 1'b1;
end else match_o = 1'b0; end else match_o = 1'b0;
// synthesis translate_off // synthesis translate_off
if (match_o == 0) begin if (match_o == 0) begin
assert(addr_i >= ({2'b0, conf_addr_i} << 2) || addr_i < ({2'b0, conf_addr_prev_i} << 2)); assert (addr_i >= ({2'b0, conf_addr_i} << 2) || addr_i < ({2'b0, conf_addr_prev_i} << 2));
end else begin end else begin
assert(addr_i < ({2'b0, conf_addr_i} << 2) && addr_i >= ({2'b0, conf_addr_prev_i} << 2)); assert (addr_i < ({2'b0, conf_addr_i} << 2) && addr_i >= ({2'b0, conf_addr_prev_i} << 2));
end end
// synthesis translate_on // synthesis translate_on
end
riscv::NA4, riscv::NAPOT: begin
if (conf_addr_mode_i == riscv::NA4) size = 2;
else begin
// use the extracted trailing ones
size = {{(32 - $clog2(PLEN)) {1'b0}}, trail_ones} + 3;
end
mask = '1 << size;
base = ({2'b0, conf_addr_i} << 2) & mask;
match_o = (addr_i & mask) == base ? 1'b1 : 1'b0;
// synthesis translate_off
// size extract checks
assert (size >= 2);
if (conf_addr_mode_i == riscv::NAPOT) begin
assert (size > 2);
if (size < PMP_LEN) assert (conf_addr_i[size-3] == 0);
for (int i = 0; i < PMP_LEN; i++) begin
if (size > 3 && i <= size - 4) begin
assert (conf_addr_i[i] == 1); // check that all the rest are ones
end end
riscv::NA4, riscv::NAPOT: begin end
end
if (conf_addr_mode_i == riscv::NA4) size = 2;
else begin
// use the extracted trailing ones
size = {{(32-$clog2(PLEN)){1'b0}}, trail_ones} + 3;
end
mask = '1 << size;
base = ({2'b0, conf_addr_i} << 2) & mask;
match_o = (addr_i & mask) == base ? 1'b1 : 1'b0;
// synthesis translate_off
// size extract checks
assert(size >= 2);
if (conf_addr_mode_i == riscv::NAPOT) begin
assert(size > 2);
if (size < PMP_LEN) assert(conf_addr_i[size - 3] == 0);
for (int i = 0; i < PMP_LEN; i++) begin
if (size > 3 && i <= size - 4) begin
assert(conf_addr_i[i] == 1); // check that all the rest are ones
end
end
end
if (size < PLEN-1) begin
if (base + 2**size > base) begin // check for overflow
if (match_o == 0) begin
assert(addr_i >= base + 2**size || addr_i < base);
end else begin
assert(addr_i < base + 2**size && addr_i >= base);
end
end else begin
if (match_o == 0) begin
assert(addr_i - 2**size >= base || addr_i < base);
end else begin
assert(addr_i - 2**size < base && addr_i >= base);
end
end
end
// synthesis translate_on
if (size < PLEN - 1) begin
if (base + 2 ** size > base) begin // check for overflow
if (match_o == 0) begin
assert (addr_i >= base + 2 ** size || addr_i < base);
end else begin
assert (addr_i < base + 2 ** size && addr_i >= base);
end end
riscv::OFF: begin end else begin
match_o = 1'b0; if (match_o == 0) begin
base = '0; assert (addr_i - 2 ** size >= base || addr_i < base);
mask = '0; end else begin
size = '0; assert (addr_i - 2 ** size < base && addr_i >= base);
end end
default: begin end
match_o = 0; end
base = '0; // synthesis translate_on
mask = '0;
size = '0; end
end riscv::OFF: begin
endcase match_o = 1'b0;
end base = '0;
mask = '0;
size = '0;
end
default: begin
match_o = 0;
base = '0;
mask = '0;
size = '0;
end
endcase
end
endmodule endmodule

View file

@ -15,84 +15,98 @@
import tb_pkg::*; import tb_pkg::*;
module pmp_tb; module pmp_tb;
timeunit 1ns; timeunit 1ns; timeprecision 1ps;
timeprecision 1ps;
localparam int unsigned WIDTH = 16; localparam int unsigned WIDTH = 16;
localparam int unsigned PMP_LEN = 13; localparam int unsigned PMP_LEN = 13;
localparam int unsigned NR_ENTRIES = 4; localparam int unsigned NR_ENTRIES = 4;
logic [WIDTH-1:0] addr; logic [WIDTH-1:0] addr;
riscv::pmp_access_t access_type; riscv::pmp_access_t access_type;
// Configuration
logic [NR_ENTRIES-1:0][PMP_LEN-1:0] conf_addr;
riscv::pmpcfg_t [NR_ENTRIES-1:0] conf;
// Output // Configuration
logic allow; logic [NR_ENTRIES-1:0][PMP_LEN-1:0] conf_addr;
riscv::pmpcfg_t [NR_ENTRIES-1:0] conf;
// helper signals // Output
logic[WIDTH-1:0] base; logic allow;
int unsigned size;
pmp #( // helper signals
.PLEN(WIDTH), logic [WIDTH-1:0] base;
.PMP_LEN(PMP_LEN), int unsigned size;
.NR_ENTRIES(NR_ENTRIES)
) i_pmp(
.addr_i ( addr ),
.access_type_i ( access_type ),
.priv_lvl_i ( riscv::PRIV_LVL_U ),
.conf_addr_i ( conf_addr ),
.conf_i ( conf ),
.allow_o ( allow )
);
initial begin pmp #(
// set all pmps to disabled initially .PLEN(WIDTH),
for (int i = 0; i < NR_ENTRIES; i++) begin .PMP_LEN(PMP_LEN),
conf[i].addr_mode = riscv::OFF; .NR_ENTRIES(NR_ENTRIES)
end ) i_pmp (
.addr_i (addr),
.access_type_i(access_type),
.priv_lvl_i (riscv::PRIV_LVL_U),
.conf_addr_i (conf_addr),
.conf_i (conf),
.allow_o (allow)
);
// test napot 1
addr = 16'b00011001_10111010;
access_type = riscv::ACCESS_READ;
// pmp 3
base = 16'b00011001_00000000;
size = 8;
conf_addr[2] = P#(.WIDTH(WIDTH), .PMP_LEN(PMP_LEN))::base_to_conf(base, size);
conf[2].addr_mode = riscv::NAPOT;
conf[2].access_type = riscv::ACCESS_READ | riscv::ACCESS_WRITE | riscv::ACCESS_EXEC;
#5ns;
assert(allow == 1);
// add second PMP entry that disallows
// pmp 1
base = 16'b00011001_10110000;
size = 4;
conf_addr[1] = P#(.WIDTH(WIDTH), .PMP_LEN(PMP_LEN))::base_to_conf(base, size);
conf[1].addr_mode = riscv::NAPOT;
conf[1].access_type = '0;
#5ns;
assert(allow == 0);
// add third PMP entry that allows again
// pmp 2
base = 16'b00011001_10111000;
size = 3;
conf_addr[0] = P#(.WIDTH(WIDTH), .PMP_LEN(PMP_LEN))::base_to_conf(base, size);
conf[0].addr_mode = riscv::NAPOT;
conf[0].access_type = riscv::ACCESS_READ;
#5ns;
assert(allow == 1);
initial begin
// set all pmps to disabled initially
for (int i = 0; i < NR_ENTRIES; i++) begin
conf[i].addr_mode = riscv::OFF;
end end
endmodule
// test napot 1
addr = 16'b00011001_10111010;
access_type = riscv::ACCESS_READ;
// pmp 3
base = 16'b00011001_00000000;
size = 8;
conf_addr[2] = P#(
.WIDTH (WIDTH),
.PMP_LEN(PMP_LEN)
)::base_to_conf(
base, size
);
conf[2].addr_mode = riscv::NAPOT;
conf[2].access_type = riscv::ACCESS_READ | riscv::ACCESS_WRITE | riscv::ACCESS_EXEC;
#5ns;
assert (allow == 1);
// add second PMP entry that disallows
// pmp 1
base = 16'b00011001_10110000;
size = 4;
conf_addr[1] = P#(
.WIDTH (WIDTH),
.PMP_LEN(PMP_LEN)
)::base_to_conf(
base, size
);
conf[1].addr_mode = riscv::NAPOT;
conf[1].access_type = '0;
#5ns;
assert (allow == 0);
// add third PMP entry that allows again
// pmp 2
base = 16'b00011001_10111000;
size = 3;
conf_addr[0] = P#(
.WIDTH (WIDTH),
.PMP_LEN(PMP_LEN)
)::base_to_conf(
base, size
);
conf[0].addr_mode = riscv::NAPOT;
conf[0].access_type = riscv::ACCESS_READ;
#5ns;
assert (allow == 1);
end
endmodule

View file

@ -14,23 +14,26 @@
package tb_pkg; package tb_pkg;
class P #(parameter WIDTH=32, parameter PMP_LEN=32); class P #(
static function logic[PMP_LEN-1:0] base_to_conf(logic[WIDTH-1:0] base, int unsigned size_i); parameter WIDTH = 32,
logic[PMP_LEN-1:0] pmp_reg; parameter PMP_LEN = 32
);
static function logic [PMP_LEN-1:0] base_to_conf(logic [WIDTH-1:0] base, int unsigned size_i);
logic [PMP_LEN-1:0] pmp_reg;
pmp_reg = '0; pmp_reg = '0;
for (int i = 0; i < WIDTH-2 && i < PMP_LEN; i++) begin for (int i = 0; i < WIDTH - 2 && i < PMP_LEN; i++) begin
if (i+3 > size_i) begin if (i + 3 > size_i) begin
pmp_reg[i] = base[i+2]; pmp_reg[i] = base[i+2];
end else if (i+3 == size_i) begin end else if (i + 3 == size_i) begin
pmp_reg[i] = 1'b0; pmp_reg[i] = 1'b0;
end else begin end else begin
pmp_reg[i] = 1'b1; pmp_reg[i] = 1'b1;
end end
end end
return pmp_reg; return pmp_reg;
endfunction endfunction
endclass endclass
endpackage endpackage

View file

@ -13,78 +13,78 @@
// Description: Scoreboard - keeps track of all decoded, issued and committed instructions // Description: Scoreboard - keeps track of all decoded, issued and committed instructions
module scoreboard #( module scoreboard #(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter bit IsRVFI = bit'(0), parameter bit IsRVFI = bit'(0),
parameter type rs3_len_t = logic, parameter type rs3_len_t = logic,
parameter int unsigned NR_ENTRIES = 8 // must be a power of 2 parameter int unsigned NR_ENTRIES = 8 // must be a power of 2
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
output logic sb_full_o, output logic sb_full_o,
input logic flush_unissued_instr_i, // flush only un-issued instructions input logic flush_unissued_instr_i, // flush only un-issued instructions
input logic flush_i, // flush whole scoreboard input logic flush_i, // flush whole scoreboard
input logic unresolved_branch_i, // we have an unresolved branch input logic unresolved_branch_i, // we have an unresolved branch
// list of clobbered registers to issue stage // list of clobbered registers to issue stage
output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr_o, output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_gpr_o,
output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o, output ariane_pkg::fu_t [2**ariane_pkg::REG_ADDR_SIZE-1:0] rd_clobber_fpr_o,
// regfile like interface to operand read stage // regfile like interface to operand read stage
input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i, input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i,
output riscv::xlen_t rs1_o, output riscv::xlen_t rs1_o,
output logic rs1_valid_o, output logic rs1_valid_o,
input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i, input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i,
output riscv::xlen_t rs2_o, output riscv::xlen_t rs2_o,
output logic rs2_valid_o, output logic rs2_valid_o,
input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i, input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i,
output rs3_len_t rs3_o, output rs3_len_t rs3_o,
output logic rs3_valid_o, output logic rs3_valid_o,
// advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer
output ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o, output ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o,
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i,
// instruction to put on top of scoreboard e.g.: top pointer // instruction to put on top of scoreboard e.g.: top pointer
// we can always put this instruction to the top unless we signal with asserted full_o // we can always put this instruction to the top unless we signal with asserted full_o
input ariane_pkg::scoreboard_entry_t decoded_instr_i, input ariane_pkg::scoreboard_entry_t decoded_instr_i,
input logic decoded_instr_valid_i, input logic decoded_instr_valid_i,
output logic decoded_instr_ack_o, output logic decoded_instr_ack_o,
// instruction to issue logic, if issue_instr_valid and issue_ready is asserted, advance the issue pointer // instruction to issue logic, if issue_instr_valid and issue_ready is asserted, advance the issue pointer
output ariane_pkg::scoreboard_entry_t issue_instr_o, output ariane_pkg::scoreboard_entry_t issue_instr_o,
output logic issue_instr_valid_o, output logic issue_instr_valid_o,
input logic issue_ack_i, input logic issue_ack_i,
// write-back port // write-back port
input ariane_pkg::bp_resolve_t resolved_branch_i, input ariane_pkg::bp_resolve_t resolved_branch_i,
input logic [CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back input logic [CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back
input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, // write data in input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, // write data in
input ariane_pkg::exception_t [CVA6Cfg.NrWbPorts-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception) input ariane_pkg::exception_t [CVA6Cfg.NrWbPorts-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception)
input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, // data in is valid input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, // data in is valid
input logic x_we_i, // cvxif we for writeback input logic x_we_i, // cvxif we for writeback
// RVFI // RVFI
input [riscv::VLEN-1:0] lsu_addr_i, input [ riscv::VLEN-1:0] lsu_addr_i,
input [(riscv::XLEN/8)-1:0] lsu_rmask_i, input [ (riscv::XLEN/8)-1:0] lsu_rmask_i,
input [(riscv::XLEN/8)-1:0] lsu_wmask_i, input [ (riscv::XLEN/8)-1:0] lsu_wmask_i,
input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i, input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i,
input riscv::xlen_t rs1_forwarding_i, input riscv::xlen_t rs1_forwarding_i,
input riscv::xlen_t rs2_forwarding_i input riscv::xlen_t rs2_forwarding_i
); );
localparam int unsigned BITS_ENTRIES = $clog2(NR_ENTRIES); localparam int unsigned BITS_ENTRIES = $clog2(NR_ENTRIES);
// this is the FIFO struct of the issue queue // this is the FIFO struct of the issue queue
typedef struct packed { typedef struct packed {
logic issued; // this bit indicates whether we issued this instruction e.g.: if it is valid logic issued; // this bit indicates whether we issued this instruction e.g.: if it is valid
logic is_rd_fpr_flag; // redundant meta info, added for speed logic is_rd_fpr_flag; // redundant meta info, added for speed
ariane_pkg::scoreboard_entry_t sbe; // this is the score board entry we will send to ex ariane_pkg::scoreboard_entry_t sbe; // this is the score board entry we will send to ex
} sb_mem_t; } sb_mem_t;
sb_mem_t [NR_ENTRIES-1:0] mem_q, mem_n; sb_mem_t [NR_ENTRIES-1:0] mem_q, mem_n;
logic issue_full, issue_en; logic issue_full, issue_en;
logic [BITS_ENTRIES:0] issue_cnt_n, issue_cnt_q; logic [BITS_ENTRIES:0] issue_cnt_n, issue_cnt_q;
logic [BITS_ENTRIES-1:0] issue_pointer_n, issue_pointer_q; logic [BITS_ENTRIES-1:0] issue_pointer_n, issue_pointer_q;
logic [CVA6Cfg.NrCommitPorts-1:0][BITS_ENTRIES-1:0] commit_pointer_n, commit_pointer_q; logic [CVA6Cfg.NrCommitPorts-1:0][BITS_ENTRIES-1:0] commit_pointer_n, commit_pointer_q;
logic [$clog2(CVA6Cfg.NrCommitPorts):0] num_commit; logic [$clog2(CVA6Cfg.NrCommitPorts):0] num_commit;
@ -92,7 +92,7 @@ module scoreboard #(
// works since aligned to power of 2 // works since aligned to power of 2
assign issue_full = (issue_cnt_q[BITS_ENTRIES] == 1'b1); assign issue_full = (issue_cnt_q[BITS_ENTRIES] == 1'b1);
assign sb_full_o = issue_full; assign sb_full_o = issue_full;
ariane_pkg::scoreboard_entry_t decoded_instr; ariane_pkg::scoreboard_entry_t decoded_instr;
always_comb begin always_comb begin
@ -130,18 +130,21 @@ module scoreboard #(
// keep track of all issued instructions // keep track of all issued instructions
always_comb begin : issue_fifo always_comb begin : issue_fifo
// default assignment // default assignment
mem_n = mem_q; mem_n = mem_q;
issue_en = 1'b0; issue_en = 1'b0;
// if we got a acknowledge from the issue stage, put this scoreboard entry in the queue // if we got a acknowledge from the issue stage, put this scoreboard entry in the queue
if (decoded_instr_valid_i && decoded_instr_ack_o && !flush_unissued_instr_i) begin if (decoded_instr_valid_i && decoded_instr_ack_o && !flush_unissued_instr_i) begin
// the decoded instruction we put in there is valid (1st bit) // the decoded instruction we put in there is valid (1st bit)
// increase the issue counter and advance issue pointer // increase the issue counter and advance issue pointer
issue_en = 1'b1; issue_en = 1'b1;
mem_n[issue_pointer_q] = {1'b1, // valid bit mem_n[issue_pointer_q] = {
(CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(decoded_instr_i.op)), // whether rd goes to the fpr 1'b1, // valid bit
decoded_instr // decoded instruction record (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(
}; decoded_instr_i.op
)), // whether rd goes to the fpr
decoded_instr // decoded instruction record
};
end end
// ------------ // ------------
@ -149,8 +152,7 @@ module scoreboard #(
// ------------ // ------------
for (int unsigned i = 0; i < NR_ENTRIES; i++) begin for (int unsigned i = 0; i < NR_ENTRIES; i++) begin
// The FU is NONE -> this instruction is valid immediately // The FU is NONE -> this instruction is valid immediately
if (mem_q[i].sbe.fu == ariane_pkg::NONE && mem_q[i].issued) if (mem_q[i].sbe.fu == ariane_pkg::NONE && mem_q[i].issued) mem_n[i].sbe.valid = 1'b1;
mem_n[i].sbe.valid = 1'b1;
end end
// ------------ // ------------
@ -158,10 +160,10 @@ module scoreboard #(
// ------------ // ------------
if (IsRVFI) begin if (IsRVFI) begin
if (lsu_rmask_i != 0) begin if (lsu_rmask_i != 0) begin
mem_n[lsu_addr_trans_id_i].sbe.lsu_addr = lsu_addr_i; mem_n[lsu_addr_trans_id_i].sbe.lsu_addr = lsu_addr_i;
mem_n[lsu_addr_trans_id_i].sbe.lsu_rmask = lsu_rmask_i; mem_n[lsu_addr_trans_id_i].sbe.lsu_rmask = lsu_rmask_i;
end else if (lsu_wmask_i != 0) begin end else if (lsu_wmask_i != 0) begin
mem_n[lsu_addr_trans_id_i].sbe.lsu_addr = lsu_addr_i; mem_n[lsu_addr_trans_id_i].sbe.lsu_addr = lsu_addr_i;
mem_n[lsu_addr_trans_id_i].sbe.lsu_wmask = lsu_wmask_i; mem_n[lsu_addr_trans_id_i].sbe.lsu_wmask = lsu_wmask_i;
mem_n[lsu_addr_trans_id_i].sbe.lsu_wdata = wbdata_i[1]; mem_n[lsu_addr_trans_id_i].sbe.lsu_wdata = wbdata_i[1];
end end
@ -171,7 +173,7 @@ module scoreboard #(
// check if this instruction was issued (e.g.: it could happen after a flush that there is still // check if this instruction was issued (e.g.: it could happen after a flush that there is still
// something in the pipeline e.g. an incomplete memory operation) // something in the pipeline e.g. an incomplete memory operation)
if (wt_valid_i[i] && mem_q[trans_id_i[i]].issued) begin if (wt_valid_i[i] && mem_q[trans_id_i[i]].issued) begin
mem_n[trans_id_i[i]].sbe.valid = 1'b1; mem_n[trans_id_i[i]].sbe.valid = 1'b1;
mem_n[trans_id_i[i]].sbe.result = wbdata_i[i]; mem_n[trans_id_i[i]].sbe.result = wbdata_i[i];
// save the target address of a branch (needed for debug in commit stage) // save the target address of a branch (needed for debug in commit stage)
mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address;
@ -179,11 +181,10 @@ module scoreboard #(
mem_n[trans_id_i[i]].sbe.rd = 5'b0; mem_n[trans_id_i[i]].sbe.rd = 5'b0;
end end
// write the exception back if it is valid // write the exception back if it is valid
if (ex_i[i].valid) if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i];
mem_n[trans_id_i[i]].sbe.ex = ex_i[i];
// write the fflags back from the FPU (exception valid is never set), leave tval intact // write the fflags back from the FPU (exception valid is never set), leave tval intact
else if(CVA6Cfg.FpPresent && mem_q[trans_id_i[i]].sbe.fu inside {ariane_pkg::FPU, ariane_pkg::FPU_VEC}) begin else if(CVA6Cfg.FpPresent && mem_q[trans_id_i[i]].sbe.fu inside {ariane_pkg::FPU, ariane_pkg::FPU_VEC}) begin
mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause; mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause;
end end
end end
end end
@ -195,8 +196,8 @@ module scoreboard #(
for (logic [CVA6Cfg.NrCommitPorts-1:0] i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin for (logic [CVA6Cfg.NrCommitPorts-1:0] i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin
if (commit_ack_i[i]) begin if (commit_ack_i[i]) begin
// this instruction is no longer in issue e.g.: it is considered finished // this instruction is no longer in issue e.g.: it is considered finished
mem_n[commit_pointer_q[i]].issued = 1'b0; mem_n[commit_pointer_q[i]].issued = 1'b0;
mem_n[commit_pointer_q[i]].sbe.valid = 1'b0; mem_n[commit_pointer_q[i]].sbe.valid = 1'b0;
end end
end end
@ -216,12 +217,12 @@ module scoreboard #(
// FIFO counter updates // FIFO counter updates
assign num_commit = (CVA6Cfg.NrCommitPorts == 2) ? commit_ack_i[1] + commit_ack_i[0] : commit_ack_i[0]; assign num_commit = (CVA6Cfg.NrCommitPorts == 2) ? commit_ack_i[1] + commit_ack_i[0] : commit_ack_i[0];
assign issue_cnt_n = (flush_i) ? '0 : issue_cnt_q - num_commit + issue_en; assign issue_cnt_n = (flush_i) ? '0 : issue_cnt_q - num_commit + issue_en;
assign commit_pointer_n[0] = (flush_i) ? '0 : commit_pointer_q[0] + num_commit; assign commit_pointer_n[0] = (flush_i) ? '0 : commit_pointer_q[0] + num_commit;
assign issue_pointer_n = (flush_i) ? '0 : issue_pointer_q + issue_en; assign issue_pointer_n = (flush_i) ? '0 : issue_pointer_q + issue_en;
// precompute offsets for commit slots // precompute offsets for commit slots
for (genvar k=1; k < CVA6Cfg.NrCommitPorts; k++) begin : gen_cnt_incr for (genvar k = 1; k < CVA6Cfg.NrCommitPorts; k++) begin : gen_cnt_incr
assign commit_pointer_n[k] = (flush_i) ? '0 : commit_pointer_n[0] + unsigned'(k); assign commit_pointer_n[k] = (flush_i) ? '0 : commit_pointer_n[0] + unsigned'(k);
end end
@ -229,17 +230,17 @@ module scoreboard #(
// RD clobber process // RD clobber process
// ------------------- // -------------------
// rd_clobber output: output currently clobbered destination registers // rd_clobber output: output currently clobbered destination registers
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][NR_ENTRIES:0] gpr_clobber_vld; logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][NR_ENTRIES:0] gpr_clobber_vld;
logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][NR_ENTRIES:0] fpr_clobber_vld; logic [2**ariane_pkg::REG_ADDR_SIZE-1:0][NR_ENTRIES:0] fpr_clobber_vld;
ariane_pkg::fu_t [NR_ENTRIES:0] clobber_fu; ariane_pkg::fu_t [ NR_ENTRIES:0] clobber_fu;
always_comb begin : clobber_assign always_comb begin : clobber_assign
gpr_clobber_vld = '0; gpr_clobber_vld = '0;
fpr_clobber_vld = '0; fpr_clobber_vld = '0;
// default (highest entry hast lowest prio in arbiter tree below) // default (highest entry hast lowest prio in arbiter tree below)
clobber_fu[NR_ENTRIES] = ariane_pkg::NONE; clobber_fu[NR_ENTRIES] = ariane_pkg::NONE;
for (int unsigned i = 0; i < 2**ariane_pkg::REG_ADDR_SIZE; i++) begin for (int unsigned i = 0; i < 2 ** ariane_pkg::REG_ADDR_SIZE; i++) begin
gpr_clobber_vld[i][NR_ENTRIES] = 1'b1; gpr_clobber_vld[i][NR_ENTRIES] = 1'b1;
fpr_clobber_vld[i][NR_ENTRIES] = 1'b1; fpr_clobber_vld[i][NR_ENTRIES] = 1'b1;
end end
@ -255,45 +256,45 @@ module scoreboard #(
gpr_clobber_vld[0] = '0; gpr_clobber_vld[0] = '0;
end end
for (genvar k = 0; k < 2**ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers for (genvar k = 0; k < 2 ** ariane_pkg::REG_ADDR_SIZE; k++) begin : gen_sel_clobbers
// get fu that is going to clobber this register (there should be only one) // get fu that is going to clobber this register (there should be only one)
rr_arb_tree #( rr_arb_tree #(
.NumIn(NR_ENTRIES+1), .NumIn(NR_ENTRIES + 1),
.DataType(ariane_pkg::fu_t), .DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1), .ExtPrio(1'b1),
.AxiVldRdy(1'b1) .AxiVldRdy(1'b1)
) i_sel_gpr_clobbers ( ) i_sel_gpr_clobbers (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i(1'b0),
.rr_i ( '0 ), .rr_i ('0),
.req_i ( gpr_clobber_vld[k] ), .req_i (gpr_clobber_vld[k]),
.gnt_o ( ), .gnt_o (),
.data_i ( clobber_fu ), .data_i (clobber_fu),
.gnt_i ( 1'b1 ), .gnt_i (1'b1),
.req_o ( ), .req_o (),
.data_o ( rd_clobber_gpr_o[k] ), .data_o (rd_clobber_gpr_o[k]),
.idx_o ( ) .idx_o ()
); );
if(CVA6Cfg.FpPresent) begin if (CVA6Cfg.FpPresent) begin
rr_arb_tree #( rr_arb_tree #(
.NumIn(NR_ENTRIES+1), .NumIn(NR_ENTRIES + 1),
.DataType(ariane_pkg::fu_t), .DataType(ariane_pkg::fu_t),
.ExtPrio(1'b1), .ExtPrio(1'b1),
.AxiVldRdy(1'b1) .AxiVldRdy(1'b1)
) i_sel_fpr_clobbers ( ) i_sel_fpr_clobbers (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i(1'b0),
.rr_i ( '0 ), .rr_i ('0),
.req_i ( fpr_clobber_vld[k] ), .req_i (fpr_clobber_vld[k]),
.gnt_o ( ), .gnt_o (),
.data_i ( clobber_fu ), .data_i (clobber_fu),
.gnt_i ( 1'b1 ), .gnt_i (1'b1),
.req_o ( ), .req_o (),
.data_o ( rd_clobber_fpr_o[k] ), .data_o (rd_clobber_fpr_o[k]),
.idx_o ( ) .idx_o ()
); );
end end
end end
@ -307,136 +308,157 @@ module scoreboard #(
// WB ports have higher prio than entries // WB ports have higher prio than entries
for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb for (genvar k = 0; unsigned'(k) < CVA6Cfg.NrWbPorts; k++) begin : gen_rs_wb
assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(issue_instr_o.op))); assign rs1_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs1_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(issue_instr_o.op))); issue_instr_o.op
assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_o.op))); )));
assign rs_data[k] = wbdata_i[k]; assign rs2_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs2_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_o.op
)));
assign rs3_fwd_req[k] = (mem_q[trans_id_i[k]].sbe.rd == rs3_i) & wt_valid_i[k] & (~ex_i[k].valid) & (mem_q[trans_id_i[k]].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_o.op
)));
assign rs_data[k] = wbdata_i[k];
end end
for (genvar k = 0; unsigned'(k) < NR_ENTRIES; k++) begin : gen_rs_entries for (genvar k = 0; unsigned'(k) < NR_ENTRIES; k++) begin : gen_rs_entries
assign rs1_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(issue_instr_o.op))); assign rs1_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs1_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
assign rs2_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(issue_instr_o.op))); issue_instr_o.op
assign rs3_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_o.op))); )));
assign rs_data[k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result; assign rs2_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs2_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_o.op
)));
assign rs3_fwd_req[k+CVA6Cfg.NrWbPorts] = (mem_q[k].sbe.rd == rs3_i) & mem_q[k].issued & mem_q[k].sbe.valid & (mem_q[k].is_rd_fpr_flag == (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_o.op
)));
assign rs_data[k+CVA6Cfg.NrWbPorts] = mem_q[k].sbe.result;
end end
// check whether we are accessing GPR[0] // check whether we are accessing GPR[0]
assign rs1_valid_o = rs1_valid & ((|rs1_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(issue_instr_o.op))); assign rs1_valid_o = rs1_valid & ((|rs1_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs1_fpr(
assign rs2_valid_o = rs2_valid & ((|rs2_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(issue_instr_o.op))); issue_instr_o.op
assign rs3_valid_o = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid & ((|rs3_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(issue_instr_o.op))) : rs3_valid; )));
assign rs2_valid_o = rs2_valid & ((|rs2_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_rs2_fpr(
issue_instr_o.op
)));
assign rs3_valid_o = CVA6Cfg.NrRgprPorts == 3 ? rs3_valid & ((|rs3_i) | (CVA6Cfg.FpPresent && ariane_pkg::is_imm_fpr(
issue_instr_o.op
))) : rs3_valid;
// use fixed prio here // use fixed prio here
// this implicitly gives higher prio to WB ports // this implicitly gives higher prio to WB ports
rr_arb_tree #( rr_arb_tree #(
.NumIn(NR_ENTRIES+CVA6Cfg.NrWbPorts), .NumIn(NR_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(riscv::XLEN), .DataWidth(riscv::XLEN),
.ExtPrio(1'b1), .ExtPrio(1'b1),
.AxiVldRdy(1'b1) .AxiVldRdy(1'b1)
) i_sel_rs1 ( ) i_sel_rs1 (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i(1'b0),
.rr_i ( '0 ), .rr_i ('0),
.req_i ( rs1_fwd_req ), .req_i (rs1_fwd_req),
.gnt_o ( ), .gnt_o (),
.data_i ( rs_data ), .data_i (rs_data),
.gnt_i ( 1'b1 ), .gnt_i (1'b1),
.req_o ( rs1_valid ), .req_o (rs1_valid),
.data_o ( rs1_o ), .data_o (rs1_o),
.idx_o ( ) .idx_o ()
); );
rr_arb_tree #( rr_arb_tree #(
.NumIn(NR_ENTRIES+CVA6Cfg.NrWbPorts), .NumIn(NR_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(riscv::XLEN), .DataWidth(riscv::XLEN),
.ExtPrio(1'b1), .ExtPrio(1'b1),
.AxiVldRdy(1'b1) .AxiVldRdy(1'b1)
) i_sel_rs2 ( ) i_sel_rs2 (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i(1'b0),
.rr_i ( '0 ), .rr_i ('0),
.req_i ( rs2_fwd_req ), .req_i (rs2_fwd_req),
.gnt_o ( ), .gnt_o (),
.data_i ( rs_data ), .data_i (rs_data),
.gnt_i ( 1'b1 ), .gnt_i (1'b1),
.req_o ( rs2_valid ), .req_o (rs2_valid),
.data_o ( rs2_o ), .data_o (rs2_o),
.idx_o ( ) .idx_o ()
); );
riscv::xlen_t rs3; riscv::xlen_t rs3;
rr_arb_tree #( rr_arb_tree #(
.NumIn(NR_ENTRIES+CVA6Cfg.NrWbPorts), .NumIn(NR_ENTRIES + CVA6Cfg.NrWbPorts),
.DataWidth(riscv::XLEN), .DataWidth(riscv::XLEN),
.ExtPrio(1'b1), .ExtPrio(1'b1),
.AxiVldRdy(1'b1) .AxiVldRdy(1'b1)
) i_sel_rs3 ( ) i_sel_rs3 (
.clk_i ( clk_i ), .clk_i (clk_i),
.rst_ni ( rst_ni ), .rst_ni (rst_ni),
.flush_i ( 1'b0 ), .flush_i(1'b0),
.rr_i ( '0 ), .rr_i ('0),
.req_i ( rs3_fwd_req ), .req_i (rs3_fwd_req),
.gnt_o ( ), .gnt_o (),
.data_i ( rs_data ), .data_i (rs_data),
.gnt_i ( 1'b1 ), .gnt_i (1'b1),
.req_o ( rs3_valid ), .req_o (rs3_valid),
.data_o ( rs3 ), .data_o (rs3),
.idx_o ( ) .idx_o ()
); );
if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port
assign rs3_o = rs3[riscv::XLEN-1:0]; assign rs3_o = rs3[riscv::XLEN-1:0];
end else begin : gen_fp_three_port end else begin : gen_fp_three_port
assign rs3_o = rs3[CVA6Cfg.FLen-1:0]; assign rs3_o = rs3[CVA6Cfg.FLen-1:0];
end end
// sequential process // sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin : regs always_ff @(posedge clk_i or negedge rst_ni) begin : regs
if(!rst_ni) begin if (!rst_ni) begin
mem_q <= '{default: sb_mem_t'(0)}; mem_q <= '{default: sb_mem_t'(0)};
issue_cnt_q <= '0; issue_cnt_q <= '0;
commit_pointer_q <= '0; commit_pointer_q <= '0;
issue_pointer_q <= '0; issue_pointer_q <= '0;
end else begin end else begin
issue_cnt_q <= issue_cnt_n; issue_cnt_q <= issue_cnt_n;
issue_pointer_q <= issue_pointer_n; issue_pointer_q <= issue_pointer_n;
mem_q <= mem_n; mem_q <= mem_n;
commit_pointer_q <= commit_pointer_n; commit_pointer_q <= commit_pointer_n;
end end
end end
//pragma translate_off //pragma translate_off
initial begin initial begin
assert (NR_ENTRIES == 2**BITS_ENTRIES) else $fatal(1, "Scoreboard size needs to be a power of two."); assert (NR_ENTRIES == 2 ** BITS_ENTRIES)
else $fatal(1, "Scoreboard size needs to be a power of two.");
end end
// assert that zero is never set // assert that zero is never set
assert property ( assert property (@(posedge clk_i) disable iff (!rst_ni) (rd_clobber_gpr_o[0] == ariane_pkg::NONE))
@(posedge clk_i) disable iff (!rst_ni) (rd_clobber_gpr_o[0] == ariane_pkg::NONE)) else $fatal(1, "RD 0 should not bet set");
else $fatal (1,"RD 0 should not bet set");
// assert that we never acknowledge a commit if the instruction is not valid // assert that we never acknowledge a commit if the instruction is not valid
assert property ( assert property (
@(posedge clk_i) disable iff (!rst_ni) commit_ack_i[0] |-> commit_instr_o[0].valid) @(posedge clk_i) disable iff (!rst_ni) commit_ack_i[0] |-> commit_instr_o[0].valid)
else $fatal (1,"Commit acknowledged but instruction is not valid"); else $fatal(1, "Commit acknowledged but instruction is not valid");
assert property ( assert property (
@(posedge clk_i) disable iff (!rst_ni) commit_ack_i[1] |-> commit_instr_o[1].valid) @(posedge clk_i) disable iff (!rst_ni) commit_ack_i[1] |-> commit_instr_o[1].valid)
else $fatal (1,"Commit acknowledged but instruction is not valid"); else $fatal(1, "Commit acknowledged but instruction is not valid");
// assert that we never give an issue ack signal if the instruction is not valid // assert that we never give an issue ack signal if the instruction is not valid
assert property ( assert property (@(posedge clk_i) disable iff (!rst_ni) issue_ack_i |-> issue_instr_valid_o)
@(posedge clk_i) disable iff (!rst_ni) issue_ack_i |-> issue_instr_valid_o) else $fatal(1, "Issue acknowledged but instruction is not valid");
else $fatal (1,"Issue acknowledged but instruction is not valid");
// there should never be more than one instruction writing the same destination register (except x0) // there should never be more than one instruction writing the same destination register (except x0)
// check that no functional unit is retiring with the same transaction id // check that no functional unit is retiring with the same transaction id
for (genvar i = 0; i < CVA6Cfg.NrWbPorts; i++) begin for (genvar i = 0; i < CVA6Cfg.NrWbPorts; i++) begin
for (genvar j = 0; j < CVA6Cfg.NrWbPorts; j++) begin for (genvar j = 0; j < CVA6Cfg.NrWbPorts; j++) begin
assert property ( assert property (
@(posedge clk_i) disable iff (!rst_ni) wt_valid_i[i] && wt_valid_i[j] && (i != j) |-> (trans_id_i[i] != trans_id_i[j])) @(posedge clk_i) disable iff (!rst_ni) wt_valid_i[i] && wt_valid_i[j] && (i != j) |-> (trans_id_i[i] != trans_id_i[j]))
else $fatal (1,"Two or more functional units are retiring instructions with the same transaction id!"); else
$fatal(
1,
"Two or more functional units are retiring instructions with the same transaction id!"
);
end end
end end
//pragma translate_on //pragma translate_on

View file

@ -15,41 +15,48 @@
// Description: simple 64bit serial divider // Description: simple 64bit serial divider
module serdiv import ariane_pkg::*; #( module serdiv
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, import ariane_pkg::*;
parameter WIDTH = 64, #(
parameter STABLE_HANDSHAKE = 0 // Guarantee a stable in_rdy_o during the input handshake. Keep it at 0 in CVA6 parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter WIDTH = 64,
parameter STABLE_HANDSHAKE = 0 // Guarantee a stable in_rdy_o during the input handshake. Keep it at 0 in CVA6
) ( ) (
input logic clk_i, input logic clk_i,
input logic rst_ni, input logic rst_ni,
// input IF // input IF
input logic [TRANS_ID_BITS-1:0] id_i, input logic [TRANS_ID_BITS-1:0] id_i,
input logic [WIDTH-1:0] op_a_i, input logic [WIDTH-1:0] op_a_i,
input logic [WIDTH-1:0] op_b_i, input logic [WIDTH-1:0] op_b_i,
input logic [1:0] opcode_i, // 0: udiv, 2: urem, 1: div, 3: rem input logic [1:0] opcode_i, // 0: udiv, 2: urem, 1: div, 3: rem
// handshake // handshake
input logic in_vld_i, // there is a cycle delay from in_rdy_o->in_vld_i, see issue_read_operands.sv stage input logic in_vld_i, // there is a cycle delay from in_rdy_o->in_vld_i, see issue_read_operands.sv stage
output logic in_rdy_o, output logic in_rdy_o,
input logic flush_i, input logic flush_i,
// output IF // output IF
output logic out_vld_o, output logic out_vld_o,
input logic out_rdy_i, input logic out_rdy_i,
output logic [TRANS_ID_BITS-1:0] id_o, output logic [TRANS_ID_BITS-1:0] id_o,
output logic [WIDTH-1:0] res_o output logic [WIDTH-1:0] res_o
); );
///////////////////////////////////// /////////////////////////////////////
// signal declarations // signal declarations
///////////////////////////////////// /////////////////////////////////////
enum logic [1:0] {IDLE, DIVIDE, FINISH} state_d, state_q; enum logic [1:0] {
IDLE,
DIVIDE,
FINISH
}
state_d, state_q;
logic [WIDTH-1:0] res_q, res_d; logic [WIDTH-1:0] res_q, res_d;
logic [WIDTH-1:0] op_a_q, op_a_d; logic [WIDTH-1:0] op_a_q, op_a_d;
logic [WIDTH-1:0] op_b_q, op_b_d; logic [WIDTH-1:0] op_b_q, op_b_d;
logic op_a_sign, op_b_sign; logic op_a_sign, op_b_sign;
logic op_b_zero, op_b_zero_q, op_b_zero_d; logic op_b_zero, op_b_zero_q, op_b_zero_d;
logic op_b_neg_one, op_b_neg_one_q, op_b_neg_one_d; logic op_b_neg_one, op_b_neg_one_q, op_b_neg_one_d;
logic [TRANS_ID_BITS-1:0] id_q, id_d; logic [TRANS_ID_BITS-1:0] id_q, id_d;
@ -69,125 +76,124 @@ module serdiv import ariane_pkg::*; #(
logic [WIDTH-1:0] lzc_a_input, lzc_b_input, op_b; logic [WIDTH-1:0] lzc_a_input, lzc_b_input, op_b;
logic [$clog2(WIDTH)-1:0] lzc_a_result, lzc_b_result; logic [$clog2(WIDTH)-1:0] lzc_a_result, lzc_b_result;
logic [$clog2(WIDTH+1)-1:0] shift_a; logic [$clog2(WIDTH+1)-1:0] shift_a;
logic [$clog2(WIDTH+1):0] div_shift; logic [ $clog2(WIDTH+1):0] div_shift;
logic a_reg_en, b_reg_en, res_reg_en, ab_comp, pm_sel, load_en; logic a_reg_en, b_reg_en, res_reg_en, ab_comp, pm_sel, load_en;
logic lzc_a_no_one, lzc_b_no_one; logic lzc_a_no_one, lzc_b_no_one;
logic div_res_zero_d, div_res_zero_q; logic div_res_zero_d, div_res_zero_q;
///////////////////////////////////// /////////////////////////////////////
// align the input operands // align the input operands
// for faster division // for faster division
///////////////////////////////////// /////////////////////////////////////
assign op_a_sign = op_a_i[$high(op_a_i)]; assign op_a_sign = op_a_i[$high(op_a_i)];
assign op_b_sign = op_b_i[$high(op_b_i)]; assign op_b_sign = op_b_i[$high(op_b_i)];
assign op_b_zero = lzc_b_no_one & ~op_b_sign; assign op_b_zero = lzc_b_no_one & ~op_b_sign;
assign op_b_neg_one = lzc_b_no_one & op_b_sign; assign op_b_neg_one = lzc_b_no_one & op_b_sign;
assign lzc_a_input = (opcode_i[0] & op_a_sign) ? {~op_a_i[$high(op_a_i)-1:0], 1'b1} : op_a_i; assign lzc_a_input = (opcode_i[0] & op_a_sign) ? {~op_a_i[$high(op_a_i)-1:0], 1'b1} : op_a_i;
assign lzc_b_input = (opcode_i[0] & op_b_sign) ? ~op_b_i : op_b_i; assign lzc_b_input = (opcode_i[0] & op_b_sign) ? ~op_b_i : op_b_i;
lzc #( lzc #(
.MODE ( 1 ), // count leading zeros .MODE (1), // count leading zeros
.WIDTH ( WIDTH ) .WIDTH(WIDTH)
) i_lzc_a ( ) i_lzc_a (
.in_i ( lzc_a_input ), .in_i (lzc_a_input),
.cnt_o ( lzc_a_result ), .cnt_o (lzc_a_result),
.empty_o ( lzc_a_no_one ) .empty_o(lzc_a_no_one)
); );
lzc #( lzc #(
.MODE ( 1 ), // count leading zeros .MODE (1), // count leading zeros
.WIDTH ( WIDTH ) .WIDTH(WIDTH)
) i_lzc_b ( ) i_lzc_b (
.in_i ( lzc_b_input ), .in_i (lzc_b_input),
.cnt_o ( lzc_b_result ), .cnt_o (lzc_b_result),
.empty_o ( lzc_b_no_one ) .empty_o(lzc_b_no_one)
); );
assign shift_a = (lzc_a_no_one) ? WIDTH : {1'b0, lzc_a_result}; assign shift_a = (lzc_a_no_one) ? WIDTH : {1'b0, lzc_a_result};
assign div_shift = {1'b0, lzc_b_result} - shift_a; assign div_shift = {1'b0, lzc_b_result} - shift_a;
assign op_b = op_b_i <<< $unsigned(div_shift); assign op_b = op_b_i <<< $unsigned(div_shift);
// the division is zero if |opB| > |opA| and can be terminated // the division is zero if |opB| > |opA| and can be terminated
assign div_res_zero_d = (load_en) ? div_shift[$high(div_shift)] : div_res_zero_q; assign div_res_zero_d = (load_en) ? div_shift[$high(div_shift)] : div_res_zero_q;
///////////////////////////////////// /////////////////////////////////////
// Datapath // Datapath
///////////////////////////////////// /////////////////////////////////////
assign pm_sel = load_en & ~(opcode_i[0] & (op_a_sign ^ op_b_sign)); assign pm_sel = load_en & ~(opcode_i[0] & (op_a_sign ^ op_b_sign));
// muxes // muxes
assign add_mux = (load_en) ? op_a_i : op_b_q; assign add_mux = (load_en) ? op_a_i : op_b_q;
// attention: logical shift by one in case of negative operand B! // attention: logical shift by one in case of negative operand B!
assign b_mux = (load_en) ? op_b : {comp_inv_q, (op_b_q[$high(op_b_q):1])}; assign b_mux = (load_en) ? op_b : {comp_inv_q, (op_b_q[$high(op_b_q):1])};
// in case of bad timing, we could output from regs -> needs a cycle more in the FSM // in case of bad timing, we could output from regs -> needs a cycle more in the FSM
assign out_mux = (rem_sel_q) ? (op_b_neg_one_q ? '0 : op_a_q) : (op_b_zero_q ? '1 : (op_b_neg_one_q ? op_a_q : res_q)); assign out_mux = (rem_sel_q) ? (op_b_neg_one_q ? '0 : op_a_q) : (op_b_zero_q ? '1 : (op_b_neg_one_q ? op_a_q : res_q));
// invert if necessary // invert if necessary
assign res_o = (res_inv_q) ? -$signed(out_mux) : out_mux; assign res_o = (res_inv_q) ? -$signed(out_mux) : out_mux;
// main comparator // main comparator
assign ab_comp = ((op_a_q == op_b_q) | ((op_a_q > op_b_q) ^ comp_inv_q)) & ((|op_a_q) | op_b_zero_q); assign ab_comp = ((op_a_q == op_b_q) | ((op_a_q > op_b_q) ^ comp_inv_q)) & ((|op_a_q) | op_b_zero_q);
// main adder // main adder
assign add_tmp = (load_en) ? 0 : op_a_q; assign add_tmp = (load_en) ? 0 : op_a_q;
assign add_out = (pm_sel) ? add_tmp + add_mux : add_tmp - $signed(add_mux); assign add_out = (pm_sel) ? add_tmp + add_mux : add_tmp - $signed(add_mux);
///////////////////////////////////// /////////////////////////////////////
// FSM, counter // FSM, counter
///////////////////////////////////// /////////////////////////////////////
assign cnt_zero = (cnt_q == 0); assign cnt_zero = (cnt_q == 0);
assign cnt_d = (load_en) ? div_shift[$clog2(WIDTH)-1:0] : assign cnt_d = (load_en) ? div_shift[$clog2(WIDTH)-1:0] : (~cnt_zero) ? cnt_q - 1 : cnt_q;
(~cnt_zero) ? cnt_q - 1 : cnt_q;
always_comb begin : p_fsm always_comb begin : p_fsm
// default // default
state_d = state_q; state_d = state_q;
in_rdy_o = 1'b0; in_rdy_o = 1'b0;
out_vld_o = 1'b0; out_vld_o = 1'b0;
load_en = 1'b0; load_en = 1'b0;
a_reg_en = 1'b0; a_reg_en = 1'b0;
b_reg_en = 1'b0; b_reg_en = 1'b0;
res_reg_en = 1'b0; res_reg_en = 1'b0;
unique case (state_q) unique case (state_q)
IDLE: begin IDLE: begin
in_rdy_o = 1'b1; in_rdy_o = 1'b1;
if (in_vld_i) begin if (in_vld_i) begin
// CVA6: there is a cycle delay until the valid signal is asserted by the id stage // CVA6: there is a cycle delay until the valid signal is asserted by the id stage
// Ara: we need a stable handshake // Ara: we need a stable handshake
in_rdy_o = (STABLE_HANDSHAKE) ? 1'b1 : 1'b0; in_rdy_o = (STABLE_HANDSHAKE) ? 1'b1 : 1'b0;
a_reg_en = 1'b1; a_reg_en = 1'b1;
b_reg_en = 1'b1; b_reg_en = 1'b1;
load_en = 1'b1; load_en = 1'b1;
state_d = DIVIDE; state_d = DIVIDE;
end end
end end
DIVIDE: begin DIVIDE: begin
if (~(div_res_zero_q | op_b_zero_q | op_b_neg_one_q)) begin if (~(div_res_zero_q | op_b_zero_q | op_b_neg_one_q)) begin
a_reg_en = ab_comp; a_reg_en = ab_comp;
b_reg_en = 1'b1; b_reg_en = 1'b1;
res_reg_en = 1'b1; res_reg_en = 1'b1;
end end
// can end the division immediately if the result is known // can end the division immediately if the result is known
if (div_res_zero_q | op_b_zero_q | op_b_neg_one_q) begin if (div_res_zero_q | op_b_zero_q | op_b_neg_one_q) begin
out_vld_o = 1'b1; out_vld_o = 1'b1;
state_d = FINISH; state_d = FINISH;
if(out_rdy_i) begin if (out_rdy_i) begin
// in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage // in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage
state_d = IDLE; state_d = IDLE;
end end
end else if (cnt_zero) begin end else if (cnt_zero) begin
state_d = FINISH; state_d = FINISH;
end end
end end
FINISH: begin FINISH: begin
@ -195,41 +201,40 @@ module serdiv import ariane_pkg::*; #(
if (out_rdy_i) begin if (out_rdy_i) begin
// in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage // in_rdy_o = 1'b1;// there is a cycle delay until the valid signal is asserted by the id stage
state_d = IDLE; state_d = IDLE;
end end
end end
default : state_d = IDLE; default: state_d = IDLE;
endcase endcase
if (flush_i) begin if (flush_i) begin
in_rdy_o = 1'b0; in_rdy_o = 1'b0;
out_vld_o = 1'b0; out_vld_o = 1'b0;
a_reg_en = 1'b0; a_reg_en = 1'b0;
b_reg_en = 1'b0; b_reg_en = 1'b0;
load_en = 1'b0; load_en = 1'b0;
state_d = IDLE; state_d = IDLE;
end end
end end
///////////////////////////////////// /////////////////////////////////////
// regs, flags // regs, flags
///////////////////////////////////// /////////////////////////////////////
// get flags // get flags
assign rem_sel_d = (load_en) ? opcode_i[1] : rem_sel_q; assign rem_sel_d = (load_en) ? opcode_i[1] : rem_sel_q;
assign comp_inv_d = (load_en) ? opcode_i[0] & op_b_sign : comp_inv_q; assign comp_inv_d = (load_en) ? opcode_i[0] & op_b_sign : comp_inv_q;
assign op_b_zero_d = (load_en) ? op_b_zero : op_b_zero_q; assign op_b_zero_d = (load_en) ? op_b_zero : op_b_zero_q;
assign op_b_neg_one_d = (load_en) ? op_b_neg_one : op_b_neg_one_q; assign op_b_neg_one_d = (load_en) ? op_b_neg_one : op_b_neg_one_q;
assign res_inv_d = (load_en) ? (~op_b_zero | opcode_i[1]) & opcode_i[0] & (op_a_sign ^ op_b_sign ^ op_b_neg_one) : res_inv_q; assign res_inv_d = (load_en) ? (~op_b_zero | opcode_i[1]) & opcode_i[0] & (op_a_sign ^ op_b_sign ^ op_b_neg_one) : res_inv_q;
// transaction id // transaction id
assign id_d = (load_en) ? id_i : id_q; assign id_d = (load_en) ? id_i : id_q;
assign id_o = id_q; assign id_o = id_q;
assign op_a_d = (a_reg_en) ? add_out : op_a_q; assign op_a_d = (a_reg_en) ? add_out : op_a_q;
assign op_b_d = (b_reg_en) ? b_mux : op_b_q; assign op_b_d = (b_reg_en) ? b_mux : op_b_q;
assign res_d = (load_en) ? '0 : assign res_d = (load_en) ? '0 : (res_reg_en) ? {res_q[$high(res_q)-1:0], ab_comp} : res_q;
(res_reg_en) ? {res_q[$high(res_q)-1:0], ab_comp} : res_q;
always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
if (~rst_ni) begin if (~rst_ni) begin

View file

@ -14,272 +14,277 @@
// if they are no longer speculative // if they are no longer speculative
module store_buffer import ariane_pkg::*; #( module store_buffer
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // if we flush we need to pause the transactions on the memory input logic flush_i, // if we flush we need to pause the transactions on the memory
// otherwise we will run in a deadlock with the memory arbiter // otherwise we will run in a deadlock with the memory arbiter
input logic stall_st_pending_i, // Stall issuing non-speculative request input logic stall_st_pending_i, // Stall issuing non-speculative request
output logic no_st_pending_o, // non-speculative queue is empty (e.g.: everything is committed to the memory hierarchy) output logic no_st_pending_o, // non-speculative queue is empty (e.g.: everything is committed to the memory hierarchy)
output logic store_buffer_empty_o, // there is no store pending in neither the speculative unit or the non-speculative queue output logic store_buffer_empty_o, // there is no store pending in neither the speculative unit or the non-speculative queue
input logic [11:0] page_offset_i, // check for the page offset (the last 12 bit if the current load matches them) input logic [11:0] page_offset_i, // check for the page offset (the last 12 bit if the current load matches them)
output logic page_offset_matches_o, // the above input page offset matches -> let the store buffer drain output logic page_offset_matches_o, // the above input page offset matches -> let the store buffer drain
input logic commit_i, // commit the instruction which was placed there most recently input logic commit_i, // commit the instruction which was placed there most recently
output logic commit_ready_o, // commit queue is ready to accept another commit request output logic commit_ready_o, // commit queue is ready to accept another commit request
output logic ready_o, // the store queue is ready to accept a new request output logic ready_o, // the store queue is ready to accept a new request
// it is only ready if it can unconditionally commit the instruction, e.g.: // it is only ready if it can unconditionally commit the instruction, e.g.:
// the commit buffer needs to be empty // the commit buffer needs to be empty
input logic valid_i, // this is a valid store input logic valid_i, // this is a valid store
input logic valid_without_flush_i, // just tell if the address is valid which we are current putting and do not take any further action input logic valid_without_flush_i, // just tell if the address is valid which we are current putting and do not take any further action
input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue
output [riscv::PLEN-1:0] mem_paddr_o, output [riscv::PLEN-1:0] mem_paddr_o,
input riscv::xlen_t data_i, // data which is placed in the queue input riscv::xlen_t data_i, // data which is placed in the queue
input logic [(riscv::XLEN/8)-1:0] be_i, // byte enable in input logic [(riscv::XLEN/8)-1:0] be_i, // byte enable in
input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write)
// D$ interface // D$ interface
input dcache_req_o_t req_port_i, input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o output dcache_req_i_t req_port_o
); );
// the store queue has two parts: // the store queue has two parts:
// 1. Speculative queue // 1. Speculative queue
// 2. Commit queue which is non-speculative, e.g.: the store will definitely happen. // 2. Commit queue which is non-speculative, e.g.: the store will definitely happen.
struct packed { struct packed {
logic [riscv::PLEN-1:0] address; logic [riscv::PLEN-1:0] address;
riscv::xlen_t data; riscv::xlen_t data;
logic [(riscv::XLEN/8)-1:0] be; logic [(riscv::XLEN/8)-1:0] be;
logic [1:0] data_size; logic [1:0] data_size;
logic valid; // this entry is valid, we need this for checking if the address offset matches logic valid; // this entry is valid, we need this for checking if the address offset matches
} speculative_queue_n [DEPTH_SPEC-1:0], speculative_queue_q [DEPTH_SPEC-1:0], }
commit_queue_n [DEPTH_COMMIT-1:0], commit_queue_q [DEPTH_COMMIT-1:0]; speculative_queue_n[DEPTH_SPEC-1:0],
speculative_queue_q[DEPTH_SPEC-1:0],
commit_queue_n[DEPTH_COMMIT-1:0],
commit_queue_q[DEPTH_COMMIT-1:0];
// keep a status count for both buffers // keep a status count for both buffers
logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt_n, speculative_status_cnt_q; logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt_n, speculative_status_cnt_q;
logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt_n, commit_status_cnt_q; logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt_n, commit_status_cnt_q;
// Speculative queue // Speculative queue
logic [$clog2(DEPTH_SPEC)-1:0] speculative_read_pointer_n, speculative_read_pointer_q; logic [$clog2(DEPTH_SPEC)-1:0] speculative_read_pointer_n, speculative_read_pointer_q;
logic [$clog2(DEPTH_SPEC)-1:0] speculative_write_pointer_n, speculative_write_pointer_q; logic [$clog2(DEPTH_SPEC)-1:0] speculative_write_pointer_n, speculative_write_pointer_q;
// Commit Queue // Commit Queue
logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q; logic [$clog2(DEPTH_COMMIT)-1:0] commit_read_pointer_n, commit_read_pointer_q;
logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q; logic [$clog2(DEPTH_COMMIT)-1:0] commit_write_pointer_n, commit_write_pointer_q;
assign store_buffer_empty_o = (speculative_status_cnt_q == 0) & no_st_pending_o; assign store_buffer_empty_o = (speculative_status_cnt_q == 0) & no_st_pending_o;
// ---------------------------------------- // ----------------------------------------
// Speculative Queue - Core Interface // Speculative Queue - Core Interface
// ---------------------------------------- // ----------------------------------------
always_comb begin : core_if always_comb begin : core_if
automatic logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt; automatic logic [$clog2(DEPTH_SPEC):0] speculative_status_cnt;
speculative_status_cnt = speculative_status_cnt_q; speculative_status_cnt = speculative_status_cnt_q;
// default assignments // default assignments
speculative_status_cnt_n = speculative_status_cnt_q; speculative_status_cnt_n = speculative_status_cnt_q;
speculative_read_pointer_n = speculative_read_pointer_q; speculative_read_pointer_n = speculative_read_pointer_q;
speculative_write_pointer_n = speculative_write_pointer_q; speculative_write_pointer_n = speculative_write_pointer_q;
speculative_queue_n = speculative_queue_q; speculative_queue_n = speculative_queue_q;
// LSU interface // LSU interface
// we are ready to accept a new entry and the input data is valid // we are ready to accept a new entry and the input data is valid
if (valid_i) begin if (valid_i) begin
speculative_queue_n[speculative_write_pointer_q].address = paddr_i; speculative_queue_n[speculative_write_pointer_q].address = paddr_i;
speculative_queue_n[speculative_write_pointer_q].data = data_i; speculative_queue_n[speculative_write_pointer_q].data = data_i;
speculative_queue_n[speculative_write_pointer_q].be = be_i; speculative_queue_n[speculative_write_pointer_q].be = be_i;
speculative_queue_n[speculative_write_pointer_q].data_size = data_size_i; speculative_queue_n[speculative_write_pointer_q].data_size = data_size_i;
speculative_queue_n[speculative_write_pointer_q].valid = 1'b1; speculative_queue_n[speculative_write_pointer_q].valid = 1'b1;
// advance the write pointer // advance the write pointer
speculative_write_pointer_n = speculative_write_pointer_q + 1'b1; speculative_write_pointer_n = speculative_write_pointer_q + 1'b1;
speculative_status_cnt++; speculative_status_cnt++;
end
// evict the current entry out of this queue, the commit queue will thankfully take it and commit it
// to the memory hierarchy
if (commit_i) begin
// invalidate
speculative_queue_n[speculative_read_pointer_q].valid = 1'b0;
// advance the read pointer
speculative_read_pointer_n = speculative_read_pointer_q + 1'b1;
speculative_status_cnt--;
end
speculative_status_cnt_n = speculative_status_cnt;
// when we flush evict the speculative stores
if (flush_i) begin
// reset all valid flags
for (int unsigned i = 0; i < DEPTH_SPEC; i++)
speculative_queue_n[i].valid = 1'b0;
speculative_write_pointer_n = speculative_read_pointer_q;
// also reset the status count
speculative_status_cnt_n = 'b0;
end
// we are ready if the speculative and the commit queue have a space left
ready_o = (speculative_status_cnt_n < (DEPTH_SPEC)) || commit_i;
end end
// ---------------------------------------- // evict the current entry out of this queue, the commit queue will thankfully take it and commit it
// Commit Queue - Memory Interface // to the memory hierarchy
// ---------------------------------------- if (commit_i) begin
// invalidate
speculative_queue_n[speculative_read_pointer_q].valid = 1'b0;
// advance the read pointer
speculative_read_pointer_n = speculative_read_pointer_q + 1'b1;
speculative_status_cnt--;
end
// we will never kill a request in the store buffer since we already know that the translation is valid speculative_status_cnt_n = speculative_status_cnt;
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
assign req_port_o.kill_req = 1'b0;
assign req_port_o.data_we = 1'b1; // we will always write in the store queue
assign req_port_o.tag_valid = 1'b0;
// we do not require an acknowledgement for writes, thus we do not need to identify uniquely the responses // when we flush evict the speculative stores
assign req_port_o.data_id = '0; if (flush_i) begin
// those signals can directly be output to the memory // reset all valid flags
assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; for (int unsigned i = 0; i < DEPTH_SPEC; i++) speculative_queue_n[i].valid = 1'b0;
// if we got a new request we already saved the tag from the previous cycle
assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_TAG_WIDTH + speculative_write_pointer_n = speculative_read_pointer_q;
// also reset the status count
speculative_status_cnt_n = 'b0;
end
// we are ready if the speculative and the commit queue have a space left
ready_o = (speculative_status_cnt_n < (DEPTH_SPEC)) || commit_i;
end
// ----------------------------------------
// Commit Queue - Memory Interface
// ----------------------------------------
// we will never kill a request in the store buffer since we already know that the translation is valid
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
assign req_port_o.kill_req = 1'b0;
assign req_port_o.data_we = 1'b1; // we will always write in the store queue
assign req_port_o.tag_valid = 1'b0;
// we do not require an acknowledgement for writes, thus we do not need to identify uniquely the responses
assign req_port_o.data_id = '0;
// those signals can directly be output to the memory
assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_INDEX_WIDTH-1:0];
// if we got a new request we already saved the tag from the previous cycle
assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_TAG_WIDTH +
ariane_pkg::DCACHE_INDEX_WIDTH-1 : ariane_pkg::DCACHE_INDEX_WIDTH-1 :
ariane_pkg::DCACHE_INDEX_WIDTH]; ariane_pkg::DCACHE_INDEX_WIDTH];
assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data; assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data;
assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be; assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be;
assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size; assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size;
assign mem_paddr_o = commit_queue_n[commit_read_pointer_n].address; assign mem_paddr_o = commit_queue_n[commit_read_pointer_n].address;
always_comb begin : store_if always_comb begin : store_if
automatic logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt; automatic logic [$clog2(DEPTH_COMMIT):0] commit_status_cnt;
commit_status_cnt = commit_status_cnt_q; commit_status_cnt = commit_status_cnt_q;
commit_ready_o = (commit_status_cnt_q < DEPTH_COMMIT); commit_ready_o = (commit_status_cnt_q < DEPTH_COMMIT);
// no store is pending if we don't have any element in the commit queue e.g.: it is empty // no store is pending if we don't have any element in the commit queue e.g.: it is empty
no_st_pending_o = (commit_status_cnt_q == 0); no_st_pending_o = (commit_status_cnt_q == 0);
// default assignments // default assignments
commit_read_pointer_n = commit_read_pointer_q; commit_read_pointer_n = commit_read_pointer_q;
commit_write_pointer_n = commit_write_pointer_q; commit_write_pointer_n = commit_write_pointer_q;
commit_queue_n = commit_queue_q; commit_queue_n = commit_queue_q;
req_port_o.data_req = 1'b0; req_port_o.data_req = 1'b0;
// there should be no commit when we are flushing // there should be no commit when we are flushing
// if the entry in the commit queue is valid and not speculative anymore we can issue this instruction // if the entry in the commit queue is valid and not speculative anymore we can issue this instruction
if (commit_queue_q[commit_read_pointer_q].valid && !stall_st_pending_i) begin if (commit_queue_q[commit_read_pointer_q].valid && !stall_st_pending_i) begin
req_port_o.data_req = 1'b1; req_port_o.data_req = 1'b1;
if (req_port_i.data_gnt) begin if (req_port_i.data_gnt) begin
// we can evict it from the commit buffer // we can evict it from the commit buffer
commit_queue_n[commit_read_pointer_q].valid = 1'b0; commit_queue_n[commit_read_pointer_q].valid = 1'b0;
// advance the read_pointer // advance the read_pointer
commit_read_pointer_n = commit_read_pointer_q + 1'b1; commit_read_pointer_n = commit_read_pointer_q + 1'b1;
commit_status_cnt--; commit_status_cnt--;
end end
end end
// we ignore the rvalid signal for now as we assume that the store // we ignore the rvalid signal for now as we assume that the store
// happened if we got a grant // happened if we got a grant
// shift the store request from the speculative buffer to the non-speculative // shift the store request from the speculative buffer to the non-speculative
if (commit_i) begin if (commit_i) begin
commit_queue_n[commit_write_pointer_q] = speculative_queue_q[speculative_read_pointer_q]; commit_queue_n[commit_write_pointer_q] = speculative_queue_q[speculative_read_pointer_q];
commit_write_pointer_n = commit_write_pointer_n + 1'b1; commit_write_pointer_n = commit_write_pointer_n + 1'b1;
commit_status_cnt++; commit_status_cnt++;
end
commit_status_cnt_n = commit_status_cnt;
end end
// ------------------ commit_status_cnt_n = commit_status_cnt;
// Address Checker end
// ------------------
// The load should return the data stored by the most recent store to the
// same physical address. The most direct way to implement this is to
// maintain physical addresses in the store buffer.
// Of course, there are other micro-architectural techniques to accomplish // ------------------
// the same thing: you can interlock and wait for the store buffer to // Address Checker
// drain if the load VA matches any store VA modulo the page size (i.e. // ------------------
// bits 11:0). As a special case, it is correct to bypass if the full VA // The load should return the data stored by the most recent store to the
// matches, and no younger stores' VAs match in bits 11:0. // same physical address. The most direct way to implement this is to
// // maintain physical addresses in the store buffer.
// checks if the requested load is in the store buffer
// page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_matches_o = 1'b0;
// check if the LSBs are identical and the entry is valid // Of course, there are other micro-architectural techniques to accomplish
for (int unsigned i = 0; i < DEPTH_COMMIT; i++) begin // the same thing: you can interlock and wait for the store buffer to
// Check if the page offset matches and whether the entry is valid, for the commit queue // drain if the load VA matches any store VA modulo the page size (i.e.
if ((page_offset_i[11:3] == commit_queue_q[i].address[11:3]) && commit_queue_q[i].valid) begin // bits 11:0). As a special case, it is correct to bypass if the full VA
page_offset_matches_o = 1'b1; // matches, and no younger stores' VAs match in bits 11:0.
break; //
end // checks if the requested load is in the store buffer
end // page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_matches_o = 1'b0;
for (int unsigned i = 0; i < DEPTH_SPEC; i++) begin // check if the LSBs are identical and the entry is valid
// do the same for the speculative queue for (int unsigned i = 0; i < DEPTH_COMMIT; i++) begin
if ((page_offset_i[11:3] == speculative_queue_q[i].address[11:3]) && speculative_queue_q[i].valid) begin // Check if the page offset matches and whether the entry is valid, for the commit queue
page_offset_matches_o = 1'b1; if ((page_offset_i[11:3] == commit_queue_q[i].address[11:3]) && commit_queue_q[i].valid) begin
break; page_offset_matches_o = 1'b1;
end break;
end end
// or it matches with the entry we are currently putting into the queue
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_without_flush_i) begin
page_offset_matches_o = 1'b1;
end
end end
for (int unsigned i = 0; i < DEPTH_SPEC; i++) begin
// do the same for the speculative queue
if ((page_offset_i[11:3] == speculative_queue_q[i].address[11:3]) && speculative_queue_q[i].valid) begin
page_offset_matches_o = 1'b1;
break;
end
end
// or it matches with the entry we are currently putting into the queue
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_without_flush_i) begin
page_offset_matches_o = 1'b1;
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : p_spec
if (~rst_ni) begin
speculative_queue_q <= '{default: 0};
speculative_read_pointer_q <= '0;
speculative_write_pointer_q <= '0;
speculative_status_cnt_q <= '0;
end else begin
speculative_queue_q <= speculative_queue_n;
speculative_read_pointer_q <= speculative_read_pointer_n;
speculative_write_pointer_q <= speculative_write_pointer_n;
speculative_status_cnt_q <= speculative_status_cnt_n;
end
end
// registers // registers
always_ff @(posedge clk_i or negedge rst_ni) begin : p_commit always_ff @(posedge clk_i or negedge rst_ni) begin : p_spec
if (~rst_ni) begin if (~rst_ni) begin
commit_queue_q <= '{default: 0}; speculative_queue_q <= '{default: 0};
commit_read_pointer_q <= '0; speculative_read_pointer_q <= '0;
commit_write_pointer_q <= '0; speculative_write_pointer_q <= '0;
commit_status_cnt_q <= '0; speculative_status_cnt_q <= '0;
end else begin end else begin
commit_queue_q <= commit_queue_n; speculative_queue_q <= speculative_queue_n;
commit_read_pointer_q <= commit_read_pointer_n; speculative_read_pointer_q <= speculative_read_pointer_n;
commit_write_pointer_q <= commit_write_pointer_n; speculative_write_pointer_q <= speculative_write_pointer_n;
commit_status_cnt_q <= commit_status_cnt_n; speculative_status_cnt_q <= speculative_status_cnt_n;
end end
end end
/////////////////////////////////////////////////////// // registers
// assertions always_ff @(posedge clk_i or negedge rst_ni) begin : p_commit
/////////////////////////////////////////////////////// if (~rst_ni) begin
commit_queue_q <= '{default: 0};
commit_read_pointer_q <= '0;
commit_write_pointer_q <= '0;
commit_status_cnt_q <= '0;
end else begin
commit_queue_q <= commit_queue_n;
commit_read_pointer_q <= commit_read_pointer_n;
commit_write_pointer_q <= commit_write_pointer_n;
commit_status_cnt_q <= commit_status_cnt_n;
end
end
//pragma translate_off ///////////////////////////////////////////////////////
// assert that commit is never set when we are flushing this would be counter intuitive // assertions
// as flush and commit is decided in the same stage ///////////////////////////////////////////////////////
commit_and_flush: assert property (
@(posedge clk_i) rst_ni && flush_i |-> !commit_i)
else $error ("[Commit Queue] You are trying to commit and flush in the same cycle");
speculative_buffer_overflow: assert property ( //pragma translate_off
@(posedge clk_i) rst_ni && (speculative_status_cnt_q == DEPTH_SPEC) |-> !valid_i) // assert that commit is never set when we are flushing this would be counter intuitive
else $error ("[Speculative Queue] You are trying to push new data although the buffer is not ready"); // as flush and commit is decided in the same stage
commit_and_flush :
assert property (@(posedge clk_i) rst_ni && flush_i |-> !commit_i)
else $error("[Commit Queue] You are trying to commit and flush in the same cycle");
speculative_buffer_underflow: assert property ( speculative_buffer_overflow :
@(posedge clk_i) rst_ni && (speculative_status_cnt_q == 0) |-> !commit_i) assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == DEPTH_SPEC) |-> !valid_i)
else $error ("[Speculative Queue] You are committing although there are no stores to commit"); else
$error("[Speculative Queue] You are trying to push new data although the buffer is not ready");
commit_buffer_overflow: assert property ( speculative_buffer_underflow :
@(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_COMMIT) |-> !commit_i) assert property (@(posedge clk_i) rst_ni && (speculative_status_cnt_q == 0) |-> !commit_i)
else $error("[Commit Queue] You are trying to commit a store although the buffer is full"); else $error("[Speculative Queue] You are committing although there are no stores to commit");
//pragma translate_on
commit_buffer_overflow :
assert property (@(posedge clk_i) rst_ni && (commit_status_cnt_q == DEPTH_COMMIT) |-> !commit_i)
else $error("[Commit Queue] You are trying to commit a store although the buffer is full");
//pragma translate_on
endmodule endmodule

View file

@ -13,278 +13,279 @@
// Description: Store Unit, takes care of all store requests and atomic memory operations (AMOs) // Description: Store Unit, takes care of all store requests and atomic memory operations (AMOs)
module store_unit import ariane_pkg::*; #( module store_unit
import ariane_pkg::*;
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty
) ( ) (
input logic clk_i, // Clock input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low input logic rst_ni, // Asynchronous reset active low
input logic flush_i, input logic flush_i,
input logic stall_st_pending_i, input logic stall_st_pending_i,
output logic no_st_pending_o, output logic no_st_pending_o,
output logic store_buffer_empty_o, output logic store_buffer_empty_o,
// store unit input port // store unit input port
input logic valid_i, input logic valid_i,
input lsu_ctrl_t lsu_ctrl_i, input lsu_ctrl_t lsu_ctrl_i,
output logic pop_st_o, output logic pop_st_o,
input logic commit_i, input logic commit_i,
output logic commit_ready_o, output logic commit_ready_o,
input logic amo_valid_commit_i, input logic amo_valid_commit_i,
// store unit output port // store unit output port
output logic valid_o, output logic valid_o,
output logic [TRANS_ID_BITS-1:0] trans_id_o, output logic [TRANS_ID_BITS-1:0] trans_id_o,
output riscv::xlen_t result_o, output riscv::xlen_t result_o,
output exception_t ex_o, output exception_t ex_o,
// MMU -> Address Translation // MMU -> Address Translation
output logic translation_req_o, // request address translation output logic translation_req_o, // request address translation
output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out
output [riscv::PLEN-1:0] mem_paddr_o, output [riscv::PLEN-1:0] mem_paddr_o,
input logic [riscv::PLEN-1:0] paddr_i, // physical address in input logic [riscv::PLEN-1:0] paddr_i, // physical address in
input exception_t ex_i, input exception_t ex_i,
input logic dtlb_hit_i, // will be one in the same cycle translation_req was asserted if it hits input logic dtlb_hit_i, // will be one in the same cycle translation_req was asserted if it hits
// address checker // address checker
input logic [11:0] page_offset_i, input logic [11:0] page_offset_i,
output logic page_offset_matches_o, output logic page_offset_matches_o,
// D$ interface // D$ interface
output amo_req_t amo_req_o, output amo_req_t amo_req_o,
input amo_resp_t amo_resp_i, input amo_resp_t amo_resp_i,
input dcache_req_o_t req_port_i, input dcache_req_o_t req_port_i,
output dcache_req_i_t req_port_o output dcache_req_i_t req_port_o
); );
// it doesn't matter what we are writing back as stores don't return anything // it doesn't matter what we are writing back as stores don't return anything
assign result_o = lsu_ctrl_i.data; assign result_o = lsu_ctrl_i.data;
enum logic [1:0] { enum logic [1:0] {
IDLE, IDLE,
VALID_STORE, VALID_STORE,
WAIT_TRANSLATION, WAIT_TRANSLATION,
WAIT_STORE_READY WAIT_STORE_READY
} state_d, state_q; }
state_d, state_q;
// store buffer control signals // store buffer control signals
logic st_ready; logic st_ready;
logic st_valid; logic st_valid;
logic st_valid_without_flush; logic st_valid_without_flush;
logic instr_is_amo; logic instr_is_amo;
assign instr_is_amo = is_amo(lsu_ctrl_i.operation); assign instr_is_amo = is_amo(lsu_ctrl_i.operation);
// keep the data and the byte enable for the second cycle (after address translation) // keep the data and the byte enable for the second cycle (after address translation)
riscv::xlen_t st_data_n, st_data_q; riscv::xlen_t st_data_n, st_data_q;
logic [(riscv::XLEN/8)-1:0] st_be_n, st_be_q; logic [(riscv::XLEN/8)-1:0] st_be_n, st_be_q;
logic [1:0] st_data_size_n, st_data_size_q; logic [1:0] st_data_size_n, st_data_size_q;
amo_t amo_op_d, amo_op_q; amo_t amo_op_d, amo_op_q;
logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
// output assignments // output assignments
assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address
assign trans_id_o = trans_id_q; // transaction id from previous cycle assign trans_id_o = trans_id_q; // transaction id from previous cycle
always_comb begin : store_control always_comb begin : store_control
translation_req_o = 1'b0; translation_req_o = 1'b0;
valid_o = 1'b0; valid_o = 1'b0;
st_valid = 1'b0; st_valid = 1'b0;
st_valid_without_flush = 1'b0; st_valid_without_flush = 1'b0;
pop_st_o = 1'b0; pop_st_o = 1'b0;
ex_o = ex_i; ex_o = ex_i;
trans_id_n = lsu_ctrl_i.trans_id; trans_id_n = lsu_ctrl_i.trans_id;
state_d = state_q; state_d = state_q;
case (state_q) case (state_q)
// we got a valid store // we got a valid store
IDLE: begin IDLE: begin
if (valid_i) begin if (valid_i) begin
state_d = VALID_STORE; state_d = VALID_STORE;
translation_req_o = 1'b1; translation_req_o = 1'b1;
pop_st_o = 1'b1; pop_st_o = 1'b1;
// check if translation was valid and we have space in the store buffer // check if translation was valid and we have space in the store buffer
// otherwise simply stall // otherwise simply stall
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = WAIT_TRANSLATION; state_d = WAIT_TRANSLATION;
pop_st_o = 1'b0; pop_st_o = 1'b0;
end end
if (!st_ready) begin if (!st_ready) begin
state_d = WAIT_STORE_READY; state_d = WAIT_STORE_READY;
pop_st_o = 1'b0; pop_st_o = 1'b0;
end end
end
end
VALID_STORE: begin
valid_o = 1'b1;
// post this store to the store buffer if we are not flushing
if (!flush_i)
st_valid = 1'b1;
st_valid_without_flush = 1'b1;
// we have another request and its not an AMO (the AMO buffer only has depth 1)
if (valid_i && !instr_is_amo) begin
translation_req_o = 1'b1;
state_d = VALID_STORE;
pop_st_o = 1'b1;
if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = WAIT_TRANSLATION;
pop_st_o = 1'b0;
end
if (!st_ready) begin
state_d = WAIT_STORE_READY;
pop_st_o = 1'b0;
end
// if we do not have another request go back to idle
end else begin
state_d = IDLE;
end
end
// the store queue is currently full
WAIT_STORE_READY: begin
// keep the translation request high
translation_req_o = 1'b1;
if (st_ready && dtlb_hit_i) begin
state_d = IDLE;
end
end
// we didn't receive a valid translation, wait for one
// but we know that the store queue is not full as we could only have landed here if
// it wasn't full
WAIT_TRANSLATION: begin
if(ariane_pkg::MMU_PRESENT) begin
translation_req_o = 1'b1;
if (dtlb_hit_i) begin
state_d = IDLE;
end
end
end
endcase
// -----------------
// Access Exception
// -----------------
// we got an address translation exception (access rights, misaligned or page fault)
if (ex_i.valid && (state_q != IDLE)) begin
// the only difference is that we do not want to store this request
pop_st_o = 1'b1;
st_valid = 1'b0;
state_d = IDLE;
valid_o = 1'b1;
end end
end
if (flush_i) VALID_STORE: begin
state_d = IDLE; valid_o = 1'b1;
end // post this store to the store buffer if we are not flushing
if (!flush_i) st_valid = 1'b1;
// ----------- st_valid_without_flush = 1'b1;
// Re-aligner
// -----------
// re-align the write data to comply with the address offset
always_comb begin
st_be_n = lsu_ctrl_i.be;
// don't shift the data if we are going to perform an AMO as we still need to operate on this data
st_data_n = instr_is_amo ? lsu_ctrl_i.data[riscv::XLEN-1:0]
: data_align(lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.data);
st_data_size_n = extract_transfer_size(lsu_ctrl_i.operation);
// save AMO op for next cycle
case (lsu_ctrl_i.operation)
AMO_LRW, AMO_LRD: amo_op_d = AMO_LR;
AMO_SCW, AMO_SCD: amo_op_d = AMO_SC;
AMO_SWAPW, AMO_SWAPD: amo_op_d = AMO_SWAP;
AMO_ADDW, AMO_ADDD: amo_op_d = AMO_ADD;
AMO_ANDW, AMO_ANDD: amo_op_d = AMO_AND;
AMO_ORW, AMO_ORD: amo_op_d = AMO_OR;
AMO_XORW, AMO_XORD: amo_op_d = AMO_XOR;
AMO_MAXW, AMO_MAXD: amo_op_d = AMO_MAX;
AMO_MAXWU, AMO_MAXDU: amo_op_d = AMO_MAXU;
AMO_MINW, AMO_MIND: amo_op_d = AMO_MIN;
AMO_MINWU, AMO_MINDU: amo_op_d = AMO_MINU;
default: amo_op_d = AMO_NONE;
endcase
end
logic store_buffer_valid, amo_buffer_valid; // we have another request and its not an AMO (the AMO buffer only has depth 1)
logic store_buffer_ready, amo_buffer_ready; if (valid_i && !instr_is_amo) begin
// multiplex between store unit and amo buffer translation_req_o = 1'b1;
assign store_buffer_valid = st_valid & (amo_op_q == AMO_NONE); state_d = VALID_STORE;
assign amo_buffer_valid = st_valid & (amo_op_q != AMO_NONE); pop_st_o = 1'b1;
assign st_ready = store_buffer_ready & amo_buffer_ready; if (ariane_pkg::MMU_PRESENT && !dtlb_hit_i) begin
state_d = WAIT_TRANSLATION;
pop_st_o = 1'b0;
end
// --------------- if (!st_ready) begin
// Store Queue state_d = WAIT_STORE_READY;
// --------------- pop_st_o = 1'b0;
store_buffer #( end
.CVA6Cfg ( CVA6Cfg ) // if we do not have another request go back to idle
) store_buffer_i (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.store_buffer_empty_o,
.page_offset_i,
.page_offset_matches_o,
.commit_i,
.commit_ready_o,
.ready_o ( store_buffer_ready ),
.valid_i ( store_buffer_valid ),
// the flush signal can be critical and we need this valid
// signal to check whether the page_offset matches or not,
// functionaly it doesn't make a difference whether we use
// the correct valid signal or not as we are flushing
// the whole pipeline anyway
.valid_without_flush_i ( st_valid_without_flush ),
.paddr_i,
.mem_paddr_o ( mem_paddr_o ),
.data_i ( st_data_q ),
.be_i ( st_be_q ),
.data_size_i ( st_data_size_q ),
.req_port_i ( req_port_i ),
.req_port_o ( req_port_o )
);
amo_buffer #(
.CVA6Cfg ( CVA6Cfg )
) i_amo_buffer (
.clk_i,
.rst_ni,
.flush_i,
.valid_i ( amo_buffer_valid ),
.ready_o ( amo_buffer_ready ),
.paddr_i ( paddr_i ),
.amo_op_i ( amo_op_q ),
.data_i ( st_data_q ),
.data_size_i ( st_data_size_q ),
.amo_req_o ( amo_req_o ),
.amo_resp_i ( amo_resp_i ),
.amo_valid_commit_i ( amo_valid_commit_i ),
.no_st_pending_i ( no_st_pending_o )
);
// ---------------
// Registers
// ---------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
st_be_q <= '0;
st_data_q <= '0;
st_data_size_q <= '0;
trans_id_q <= '0;
amo_op_q <= AMO_NONE;
end else begin end else begin
state_q <= state_d; state_d = IDLE;
st_be_q <= st_be_n;
st_data_q <= st_data_n;
trans_id_q <= trans_id_n;
st_data_size_q <= st_data_size_n;
amo_op_q <= amo_op_d;
end end
end
// the store queue is currently full
WAIT_STORE_READY: begin
// keep the translation request high
translation_req_o = 1'b1;
if (st_ready && dtlb_hit_i) begin
state_d = IDLE;
end
end
// we didn't receive a valid translation, wait for one
// but we know that the store queue is not full as we could only have landed here if
// it wasn't full
WAIT_TRANSLATION: begin
if (ariane_pkg::MMU_PRESENT) begin
translation_req_o = 1'b1;
if (dtlb_hit_i) begin
state_d = IDLE;
end
end
end
endcase
// -----------------
// Access Exception
// -----------------
// we got an address translation exception (access rights, misaligned or page fault)
if (ex_i.valid && (state_q != IDLE)) begin
// the only difference is that we do not want to store this request
pop_st_o = 1'b1;
st_valid = 1'b0;
state_d = IDLE;
valid_o = 1'b1;
end end
if (flush_i) state_d = IDLE;
end
// -----------
// Re-aligner
// -----------
// re-align the write data to comply with the address offset
always_comb begin
st_be_n = lsu_ctrl_i.be;
// don't shift the data if we are going to perform an AMO as we still need to operate on this data
st_data_n = instr_is_amo ? lsu_ctrl_i.data[riscv::XLEN-1:0] :
data_align(lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.data);
st_data_size_n = extract_transfer_size(lsu_ctrl_i.operation);
// save AMO op for next cycle
case (lsu_ctrl_i.operation)
AMO_LRW, AMO_LRD: amo_op_d = AMO_LR;
AMO_SCW, AMO_SCD: amo_op_d = AMO_SC;
AMO_SWAPW, AMO_SWAPD: amo_op_d = AMO_SWAP;
AMO_ADDW, AMO_ADDD: amo_op_d = AMO_ADD;
AMO_ANDW, AMO_ANDD: amo_op_d = AMO_AND;
AMO_ORW, AMO_ORD: amo_op_d = AMO_OR;
AMO_XORW, AMO_XORD: amo_op_d = AMO_XOR;
AMO_MAXW, AMO_MAXD: amo_op_d = AMO_MAX;
AMO_MAXWU, AMO_MAXDU: amo_op_d = AMO_MAXU;
AMO_MINW, AMO_MIND: amo_op_d = AMO_MIN;
AMO_MINWU, AMO_MINDU: amo_op_d = AMO_MINU;
default: amo_op_d = AMO_NONE;
endcase
end
logic store_buffer_valid, amo_buffer_valid;
logic store_buffer_ready, amo_buffer_ready;
// multiplex between store unit and amo buffer
assign store_buffer_valid = st_valid & (amo_op_q == AMO_NONE);
assign amo_buffer_valid = st_valid & (amo_op_q != AMO_NONE);
assign st_ready = store_buffer_ready & amo_buffer_ready;
// ---------------
// Store Queue
// ---------------
store_buffer #(
.CVA6Cfg(CVA6Cfg)
) store_buffer_i (
.clk_i,
.rst_ni,
.flush_i,
.stall_st_pending_i,
.no_st_pending_o,
.store_buffer_empty_o,
.page_offset_i,
.page_offset_matches_o,
.commit_i,
.commit_ready_o,
.ready_o (store_buffer_ready),
.valid_i (store_buffer_valid),
// the flush signal can be critical and we need this valid
// signal to check whether the page_offset matches or not,
// functionaly it doesn't make a difference whether we use
// the correct valid signal or not as we are flushing
// the whole pipeline anyway
.valid_without_flush_i(st_valid_without_flush),
.paddr_i,
.mem_paddr_o (mem_paddr_o),
.data_i (st_data_q),
.be_i (st_be_q),
.data_size_i (st_data_size_q),
.req_port_i (req_port_i),
.req_port_o (req_port_o)
);
amo_buffer #(
.CVA6Cfg(CVA6Cfg)
) i_amo_buffer (
.clk_i,
.rst_ni,
.flush_i,
.valid_i (amo_buffer_valid),
.ready_o (amo_buffer_ready),
.paddr_i (paddr_i),
.amo_op_i (amo_op_q),
.data_i (st_data_q),
.data_size_i (st_data_size_q),
.amo_req_o (amo_req_o),
.amo_resp_i (amo_resp_i),
.amo_valid_commit_i(amo_valid_commit_i),
.no_st_pending_i (no_st_pending_o)
);
// ---------------
// Registers
// ---------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
state_q <= IDLE;
st_be_q <= '0;
st_data_q <= '0;
st_data_size_q <= '0;
trans_id_q <= '0;
amo_op_q <= AMO_NONE;
end else begin
state_q <= state_d;
st_be_q <= st_be_n;
st_data_q <= st_data_n;
trans_id_q <= trans_id_n;
st_data_size_q <= st_data_size_n;
amo_op_q <= amo_op_d;
end
end
endmodule endmodule