Preliminary OS support

This commit is contained in:
Chris Keilbart 2024-09-10 15:13:31 -07:00
parent f0b92a923a
commit 2eeadb43d9
98 changed files with 3711 additions and 1970 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
test_benches/verilator/build

0
LICENSE Executable file → Normal file
View file

10
README.md Executable file → Normal file
View file

@ -1,6 +1,5 @@
# CVA5
CVA5 is a 32-bit RISC-V processor designed for FPGAs supporting the Multiply/Divide and Double-precision Floating-Point extensions (RV32IMD). The processor is written in SystemVerilog and has been designed to be both highly extensible and highly configurable.
CVA5 is a 32-bit RISC-V processor designed for FPGAs supporting the Multiply/Divide, Atomic, and Floating-Point extensions (RV32IMAFD). The processor is written in SystemVerilog and has been designed to be both highly extensible and highly configurable.
The CVA5 is derived from the Taiga Project from Simon Fraser University.
@ -16,7 +15,6 @@ For up-to-date documentation, as well as an automated build environment setup, r
## License
CVA5 is licensed under the Solderpad License, Version 2.1 ( http://solderpad.org/licenses/SHL-2.1/ ). Solderpad is an extension of the Apache License, and many contributions to CVA5 were made under Apache Version 2.0 ( https://www.apache.org/licenses/LICENSE-2.0 )
@ -25,10 +23,14 @@ A zedboard configuration is provided under the examples directory along with too
## Publications
C. Keilbart, Y. Gao, M. Chua, E. Matthews, S. J. Wilton, and L. Shannon, “Designing an IEEE-Compliant FPU that Supports Configurable Precision for Soft Processors,” ACM Trans. Reconfgurable Technol. Syst., vol. 17, no. 2, Apr. 2024.
doi: [https://doi.org/10.1145/3650036](https://doi.org/10.1145/3650036)
E. Matthews, A. Lu, Z. Fang and L. Shannon, "Rethinking Integer Divider Design for FPGA-Based Soft-Processors," 2019 IEEE 27th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), San Diego, CA, USA, 2019, pp. 289-297.
doi: [https://doi.org/10.1109/FCCM.2019.00046](https://doi.org/10.1109/FCCM.2019.00046)
E. Matthews, Z. Aguila and L. Shannon, "Evaluating the Performance Efficiency of a Soft-Processor, Variable-Length, Parallel-Execution-Unit Architecture for FPGAs Using the RISC-V ISA," 2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), Boulder, CO, 2018, pp. 1-8.
doi: [https://doi.org/10.1109/FCCM.2018.00010](https://doi.org/10.1109/FCCM.2018.00010)
E. Matthews and L. Shannon, "TAIGA: A new RISC-V soft-processor framework enabling high performance CPU architectural features," 2017 27th International Conference on Field Programmable Logic and Applications (FPL), Ghent, Belgium, 2017. [https://doi.org/10.23919/FPL.2017.8056766](https://doi.org/10.23919/FPL.2017.8056766)
E. Matthews and L. Shannon, "TAIGA: A new RISC-V soft-processor framework enabling high performance CPU architectural features," 2017 27th International Conference on Field Programmable Logic and Applications (FPL), Ghent, Belgium, 2017.
doi: [https://doi.org/10.23919/FPL.2017.8056766](https://doi.org/10.23919/FPL.2017.8056766)

0
core/common_components/byte_en_bram.sv Executable file → Normal file
View file

12
core/common_components/cva5_fifo.sv Executable file → Normal file
View file

@ -27,10 +27,6 @@
*/
module cva5_fifo
import cva5_config::*;
import riscv_types::*;
import cva5_types::*;
#(
parameter type DATA_TYPE = logic,
parameter FIFO_DEPTH = 4
@ -49,8 +45,10 @@ module cva5_fifo
always_ff @ (posedge clk) begin
if (rst)
fifo.valid <= 0;
else
fifo.valid <= fifo.push | (fifo.valid & ~fifo.pop);
else if (fifo.push & ~fifo.pop)
fifo.valid <= 1;
else if (fifo.pop & ~fifo.push)
fifo.valid <= 0;
end
assign fifo.full = fifo.valid;
@ -134,6 +132,6 @@ module cva5_fifo
fifo_potenial_push_overflow_assertion:
assert property (@(posedge clk) disable iff (rst) fifo.potential_push |-> (~fifo.full | fifo.pop)) else $error("potential push overflow");
fifo_underflow_assertion:
assert property (@(posedge clk) disable iff (rst) fifo.pop |-> fifo.valid) else $error("underflow");
assert property (@(posedge clk) disable iff (rst) fifo.pop |-> (fifo.valid | fifo.push)) else $error("underflow");
endmodule

0
core/common_components/cycler.sv Executable file → Normal file
View file

View file

@ -68,7 +68,7 @@ module lfsr
logic feedback;
////////////////////////////////////////////////////
//Implementation
generate if (WIDTH == 2) begin : gen_width_two
generate if (WIDTH <= 2) begin : gen_width_one_or_two
assign feedback = ~value[WIDTH-1];
end
else begin : gen_width_three_plus
@ -84,8 +84,10 @@ module lfsr
always_ff @ (posedge clk) begin
if (NEEDS_RESET & rst)
value <= '0;
else if (en)
value <= {value[WIDTH-2:0], feedback};
else if (en) begin
value <= value << 1;
value[0] <= feedback;
end
end
endmodule

View file

@ -0,0 +1,69 @@
/*
* Copyright © 2024 Chris Keilbart
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module one_hot_mux
#(
parameter OPTIONS = 5,
parameter type DATA_TYPE = logic
)
(
//Only used for assertions
input logic clk,
input logic rst,
input logic[OPTIONS-1:0] one_hot,
input DATA_TYPE[OPTIONS-1:0] choices,
output DATA_TYPE sel
);
//Casting to eliminate warnings
typedef logic[$bits(DATA_TYPE)-1:0] casted_t;
casted_t[OPTIONS-1:0] choices_casted;
casted_t sel_casted;
////////////////////////////////////////////////////
//Implementation
//Cheaper than converting ohot -> int and indexing
always_comb begin
for (int i = 0; i < OPTIONS; i++)
choices_casted[i] = casted_t'(choices[i]);
sel = DATA_TYPE'(sel_casted);
end
generate if (OPTIONS == 1) begin : gen_no_mux
assign sel_casted = choices_casted[0];
end else begin : gen_mux
always_comb begin
sel_casted = '0;
for (int i = 0; i < OPTIONS; i++)
if (one_hot[i]) sel_casted |= choices_casted[i];
end
end endgenerate
////////////////////////////////////////////////////
//Assertions
ohot_assertion:
assert property (@(posedge clk) disable iff (rst) $onehot0(one_hot))
else $error("Selection mux not one hot");
endmodule

0
core/common_components/one_hot_to_integer.sv Executable file → Normal file
View file

View file

@ -0,0 +1,86 @@
/*
* Copyright © 2024 Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module sdp_ram
#(
parameter ADDR_WIDTH = 10,
parameter NUM_COL = 4, //Number of independently writeable components
parameter COL_WIDTH = 16, //Width the "byte" enable controls
parameter DATA_WIDTH = COL_WIDTH*NUM_COL, //Do not set this to anything else
parameter PIPELINE_DEPTH = 1, //Depth of the output pipeline, is latency in clock cycles
parameter CASCADE_DEPTH = 4 //Maximum depth of the memory block cascade
)
(
input logic clk,
//Port A
input logic a_en,
input logic[NUM_COL-1:0] a_wbe,
input logic[DATA_WIDTH-1:0] a_wdata,
input logic[ADDR_WIDTH-1:0] a_addr,
//Port B
input logic b_en,
input logic[ADDR_WIDTH-1:0] b_addr,
output logic[DATA_WIDTH-1:0] b_rdata
);
(* cascade_height = CASCADE_DEPTH, ramstyle = "no_rw_check" *) //Higher depths use less resources but are slower
logic[DATA_WIDTH-1:0] mem[(1<<ADDR_WIDTH)-1:0];
initial mem = '{default: '0};
//A write
always_ff @(posedge clk) begin
for (int i = 0; i < NUM_COL; i++) begin
if (a_en & a_wbe[i])
mem[a_addr][i*COL_WIDTH +: COL_WIDTH] <= a_wdata[i*COL_WIDTH +: COL_WIDTH];
end
end
//B read
logic[DATA_WIDTH-1:0] b_ram_output;
always_ff @(posedge clk) begin
if (b_en)
b_ram_output <= mem[b_addr];
end
//B pipeline
generate if (PIPELINE_DEPTH > 0) begin : gen_b_pipeline
logic[DATA_WIDTH-1:0] b_data_pipeline[PIPELINE_DEPTH-1:0];
logic[PIPELINE_DEPTH-1:0] b_en_pipeline;
always_ff @(posedge clk) begin
for (int i = 0; i < PIPELINE_DEPTH; i++) begin
b_en_pipeline[i] <= i == 0 ? b_en : b_en_pipeline[i-1];
if (b_en_pipeline[i])
b_data_pipeline[i] <= i == 0 ? b_ram_output : b_data_pipeline[i-1];
end
end
assign b_rdata = b_data_pipeline[PIPELINE_DEPTH-1];
end
else begin : gen_b_transparent_output
assign b_rdata = b_ram_output;
end endgenerate
endmodule

View file

@ -0,0 +1,87 @@
/*
* Copyright © 2024 Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module sdp_ram_padded
#(
parameter ADDR_WIDTH = 10,
parameter NUM_COL = 4, //Number of independently writeable components
parameter COL_WIDTH = 16, //Width the "byte" enable controls
parameter DATA_WIDTH = COL_WIDTH*NUM_COL, //Do not set this to anything else
parameter PIPELINE_DEPTH = 1, //Depth of the output pipeline, is latency in clock cycles
parameter CASCADE_DEPTH = 4 //Maximum depth of the memory block cascade
)
(
input logic clk,
//Port A
input logic a_en,
input logic[NUM_COL-1:0] a_wbe,
input logic[DATA_WIDTH-1:0] a_wdata,
input logic[ADDR_WIDTH-1:0] a_addr,
//Port B
input logic b_en,
input logic[ADDR_WIDTH-1:0] b_addr,
output logic[DATA_WIDTH-1:0] b_rdata
);
//Pad columns to the nearest multiple of 8 or 9 to allow the use of the byte enable
//This results in a more compact BRAM encoding
localparam PAD_WIDTH8 = (8 - (COL_WIDTH % 8)) % 8;
localparam PAD_WIDTH9 = (9 - (COL_WIDTH % 9)) % 9;
localparam PAD_WIDTH = PAD_WIDTH8 <= PAD_WIDTH9 ? PAD_WIDTH8 : PAD_WIDTH9;
localparam PADDED_WIDTH = COL_WIDTH + PAD_WIDTH;
localparam TOTAL_WIDTH = NUM_COL * PADDED_WIDTH;
generate if (PAD_WIDTH == 0 || NUM_COL == 1) begin : gen_no_padding
sdp_ram #(
.ADDR_WIDTH(ADDR_WIDTH),
.NUM_COL(NUM_COL),
.COL_WIDTH(COL_WIDTH),
.PIPELINE_DEPTH(PIPELINE_DEPTH),
.CASCADE_DEPTH(CASCADE_DEPTH)
) mem (.*);
end else begin : gen_padded
logic[TOTAL_WIDTH-1:0] a_padded;
logic[TOTAL_WIDTH-1:0] b_padded;
always_comb begin
a_padded = 'x;
for (int i = 0; i < NUM_COL; i++) begin
a_padded[i*PADDED_WIDTH+:COL_WIDTH] = a_wdata[i*COL_WIDTH+:COL_WIDTH];
b_rdata[i*COL_WIDTH+:COL_WIDTH] = b_padded[i*PADDED_WIDTH+:COL_WIDTH];
end
end
sdp_ram #(
.ADDR_WIDTH(ADDR_WIDTH),
.NUM_COL(NUM_COL),
.COL_WIDTH(PADDED_WIDTH),
.PIPELINE_DEPTH(PIPELINE_DEPTH),
.CASCADE_DEPTH(CASCADE_DEPTH)
) mem (
.a_wdata(a_padded),
.b_rdata(b_padded),
.*);
end endgenerate
endmodule

View file

@ -22,16 +22,12 @@
module toggle_memory
import cva5_config::*;
import cva5_types::*;
# (
parameter DEPTH = 8,
parameter NUM_READ_PORTS = 2
)
(
input logic clk,
input logic rst,
input logic toggle,
input logic [$clog2(DEPTH)-1:0] toggle_id,

View file

@ -22,9 +22,6 @@
module toggle_memory_set
import cva5_config::*;
import cva5_types::*;
# (
parameter DEPTH = 64,
parameter NUM_WRITE_PORTS = 3,
@ -32,7 +29,6 @@ module toggle_memory_set
)
(
input logic clk,
input logic rst,
input logic init_clear,
input logic toggle [NUM_WRITE_PORTS],
@ -53,7 +49,7 @@ module toggle_memory_set
//counter for indexing through memories for post-reset clearing/initialization
lfsr #(.WIDTH($clog2(DEPTH)), .NEEDS_RESET(0))
lfsr_counter (
.clk (clk), .rst (rst),
.clk (clk), .rst (1'b0),
.en(init_clear),
.value(clear_index)
);
@ -76,7 +72,7 @@ module toggle_memory_set
for (j = 0; j < NUM_WRITE_PORTS+1; j++) begin : write_port_gen
toggle_memory #(.DEPTH(DEPTH), .NUM_READ_PORTS(NUM_READ_PORTS+1))
mem (
.clk (clk), .rst (rst),
.clk (clk),
.toggle(_toggle[j]),
.toggle_id(_toggle_addr[j]),
.read_id(_read_addr),

View file

View file

@ -131,4 +131,3 @@ module cva5_wrapper_xilinx
cva5 cpu(.*);
endmodule

View file

145
core/cva5.sv Executable file → Normal file
View file

@ -29,6 +29,7 @@ module cva5
import riscv_types::*;
import cva5_types::*;
import fpu_types::*;
import csr_types::*;
#(
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG
@ -48,6 +49,7 @@ module cva5
l2_requester_interface.master l2,
input logic [63:0] mtime,
input interrupt_t s_interrupt,
input interrupt_t m_interrupt
);
@ -90,7 +92,8 @@ module cva5
tlb_interface itlb();
tlb_interface dtlb();
logic tlb_on;
logic instruction_translation_on;
logic data_translation_on;
logic [ASIDLEN-1:0] asid;
//Instruction ID/Metadata
@ -112,7 +115,6 @@ module cva5
logic decode_uses_rd;
logic fp_decode_uses_rd;
rs_addr_t decode_rd_addr;
exception_sources_t decode_exception_unit;
logic decode_is_store;
phys_addr_t decode_phys_rd_addr;
phys_addr_t fp_decode_phys_rd_addr;
@ -127,7 +129,6 @@ module cva5
retire_packet_t fp_wb_retire;
retire_packet_t store_retire;
id_t retire_ids [RETIRE_PORTS];
id_t retire_ids_next [RETIRE_PORTS];
logic retire_port_valid [RETIRE_PORTS];
logic [LOG2_RETIRE_PORTS : 0] retire_count;
//Writeback
@ -138,29 +139,33 @@ module cva5
phys_addr_t wb_phys_addr [CONFIG.NUM_WB_GROUPS];
phys_addr_t fp_wb_phys_addr [2];
logic [4:0] fflag_wmask;
//Exception
logic [31:0] oldest_pc;
renamer_interface #(.NUM_WB_GROUPS(CONFIG.NUM_WB_GROUPS), .READ_PORTS(REGFILE_READ_PORTS)) decode_rename_interface ();
renamer_interface #(.NUM_WB_GROUPS(2), .READ_PORTS(3)) fp_decode_rename_interface ();
//Global Control
exception_interface exception [NUM_EXCEPTION_SOURCES]();
logic [$clog2(NUM_EXCEPTION_SOURCES)-1:0] current_exception_unit;
gc_outputs_t gc;
tlb_packet_t sfence;
load_store_status_t load_store_status;
logic [LOG2_MAX_IDS:0] post_issue_count;
logic [1:0] current_privilege;
logic mret;
logic sret;
logic [31:0] epc;
logic [31:0] exception_target_pc;
logic csr_frontend_flush;
logic interrupt_taken;
logic interrupt_pending;
logic processing_csr;
//CSR broadcast info
logic [1:0] current_privilege;
logic tvm;
logic tsr;
envcfg_t menvcfg;
envcfg_t senvcfg;
logic [31:0] mepc;
logic [31:0] sepc;
logic [31:0] exception_target_pc;
//Decode Unit and Fetch Unit
logic issue_stage_ready;
@ -176,11 +181,12 @@ module cva5
////////////////////////////////////////////////////
//Implementation
////////////////////////////////////////////////////
// Memory Interface
generate if (CONFIG.INCLUDE_S_MODE || CONFIG.INCLUDE_ICACHE || CONFIG.INCLUDE_DCACHE) begin : gen_l1_arbiter
generate if (CONFIG.MODES == MSU || CONFIG.INCLUDE_ICACHE || CONFIG.INCLUDE_DCACHE) begin : gen_l1_arbiter
l1_arbiter #(.CONFIG(CONFIG))
arb(
.clk (clk),
@ -217,7 +223,6 @@ module cva5
.decode_rd_addr (decode_rd_addr),
.decode_phys_rd_addr (decode_phys_rd_addr),
.fp_decode_phys_rd_addr (fp_decode_phys_rd_addr),
.decode_exception_unit (decode_exception_unit),
.decode_is_store (decode_is_store),
.issue (issue),
.instruction_issued (instruction_issued),
@ -231,12 +236,9 @@ module cva5
.fp_wb_retire (fp_wb_retire),
.store_retire (store_retire),
.retire_ids (retire_ids),
.retire_ids_next (retire_ids_next),
.retire_port_valid(retire_port_valid),
.retire_count (retire_count),
.post_issue_count(post_issue_count),
.oldest_pc (oldest_pc),
.current_exception_unit (current_exception_unit)
.post_issue_count(post_issue_count)
);
////////////////////////////////////////////////////
@ -263,8 +265,7 @@ module cva5
.icache_on ('1),
.tlb (itlb),
.l1_request (l1_request[L1_ICACHE_ID]),
.l1_response (l1_response[L1_ICACHE_ID]),
.exception (1'b0)
.l1_response (l1_response[L1_ICACHE_ID])
);
branch_predictor #(.CONFIG(CONFIG))
@ -285,19 +286,19 @@ module cva5
.ras (ras)
);
generate if (CONFIG.INCLUDE_S_MODE) begin : gen_itlb_immu
tlb_lut_ram #(.WAYS(CONFIG.ITLB.WAYS), .DEPTH(CONFIG.ITLB.DEPTH))
i_tlb (
.clk (clk),
.rst (rst),
.gc (gc),
.abort_request (gc.fetch_flush | early_branch_flush),
.asid (asid),
.tlb (itlb),
.mmu (immu)
);
itlb #(.WAYS(CONFIG.ITLB.WAYS), .DEPTH(CONFIG.ITLB.DEPTH))
i_tlb (
.clk (clk),
.rst (rst),
.translation_on (instruction_translation_on),
.sfence (sfence),
.abort_request (gc.fetch_flush | early_branch_flush),
.asid (asid),
.tlb (itlb),
.mmu (immu)
);
generate if (CONFIG.MODES == MSU) begin : gen_immu
mmu i_mmu (
.clk (clk),
.rst (rst),
@ -308,11 +309,6 @@ module cva5
);
end
else begin
assign itlb.ready = 1;
assign itlb.done = itlb.new_request;
assign itlb.physical_address = itlb.virtual_address;
end
endgenerate
////////////////////////////////////////////////////
@ -348,7 +344,6 @@ module cva5
.decode_uses_rd (decode_uses_rd),
.fp_decode_uses_rd (fp_decode_uses_rd),
.decode_rd_addr (decode_rd_addr),
.decode_exception_unit (decode_exception_unit),
.decode_phys_rd_addr (decode_phys_rd_addr),
.fp_decode_phys_rd_addr (fp_decode_phys_rd_addr),
.decode_phys_rs_addr (decode_phys_rs_addr),
@ -455,8 +450,7 @@ module cva5
.issue (unit_issue[LS_ID]),
.dcache_on (1'b1),
.clear_reservation (1'b0),
.tlb (dtlb),
.tlb_on (tlb_on),
.tlb (dtlb),
.l1_request (l1_request[L1_DCACHE_ID]),
.l1_response (l1_response[L1_DCACHE_ID]),
.sc_complete (sc_complete),
@ -465,8 +459,12 @@ module cva5
.m_avalon (m_avalon),
.dwishbone (dwishbone),
.data_bram (data_bram),
.current_privilege (current_privilege),
.menvcfg (menvcfg),
.senvcfg (senvcfg),
.wb_packet (wb_packet),
.fp_wb_packet (fp_wb_packet),
.retire_id (retire_ids[0]),
.store_retire (store_retire),
.exception (exception[LS_EXCEPTION]),
.load_store_status(load_store_status),
@ -474,18 +472,18 @@ module cva5
.fp_wb (fp_unit_wb[0])
);
generate if (CONFIG.INCLUDE_S_MODE) begin : gen_dtlb_dmmu
tlb_lut_ram #(.WAYS(CONFIG.DTLB.WAYS), .DEPTH(CONFIG.DTLB.DEPTH))
d_tlb (
.clk (clk),
.rst (rst),
.gc (gc),
.abort_request (1'b0),
.asid (asid),
.tlb (dtlb),
.mmu (dmmu)
);
dtlb #(.WAYS(CONFIG.DTLB.WAYS), .DEPTH(CONFIG.DTLB.DEPTH))
d_tlb (
.clk (clk),
.rst (rst),
.translation_on (data_translation_on),
.sfence (sfence),
.asid (asid),
.tlb (dtlb),
.mmu (dmmu)
);
generate if (CONFIG.MODES == MSU) begin : gen_dmmu
mmu d_mmu (
.clk (clk),
.rst (rst),
@ -495,11 +493,6 @@ module cva5
.l1_response (l1_response[L1_DMMU_ID])
);
end
else begin
assign dtlb.ready = 1;
assign dtlb.done = dtlb.new_request;
assign dtlb.physical_address = dtlb.virtual_address;
end
endgenerate
generate if (CONFIG.INCLUDE_UNIT.CSR) begin : gen_csrs
@ -515,25 +508,32 @@ module cva5
.uses_rs (unit_uses_rs[CSR_ID]),
.uses_rd (unit_uses_rd[CSR_ID]),
.rf (rf_issue.data),
.instruction_issued (instruction_issued),
.fp_instruction_issued_with_rd (fp_instruction_issued_with_rd),
.issue (unit_issue[CSR_ID]),
.wb (unit_wb[CSR_ID]),
.current_privilege(current_privilege),
.menvcfg(menvcfg),
.senvcfg(senvcfg),
.fflag_wmask (fflag_wmask),
.dyn_rm (dyn_rm),
.interrupt_taken(interrupt_taken),
.interrupt_pending(interrupt_pending),
.processing_csr(processing_csr),
.tlb_on(tlb_on),
.csr_frontend_flush(csr_frontend_flush),
.instruction_translation_on(instruction_translation_on),
.data_translation_on(data_translation_on),
.asid(asid),
.immu(immu),
.dmmu(dmmu),
.exception(gc.exception),
.exception_pkt(gc.exception),
.exception_target_pc (exception_target_pc),
.mret(mret),
.sret(sret),
.epc(epc),
.mepc(mepc),
.sepc(sepc),
.exception(exception[CSR_EXCEPTION]),
.retire_ids(retire_ids),
.retire_count (retire_count),
.mtime(mtime),
.s_interrupt(s_interrupt),
.m_interrupt(m_interrupt)
);
@ -546,27 +546,30 @@ module cva5
.decode_stage (decode),
.issue_stage (issue),
.issue_stage_ready (issue_stage_ready),
.unit_needed (unit_needed[IEC_ID]),
.uses_rs (unit_uses_rs[IEC_ID]),
.uses_rd (unit_uses_rd[IEC_ID]),
.unit_needed (unit_needed[GC_ID]),
.uses_rs (unit_uses_rs[GC_ID]),
.uses_rd (unit_uses_rd[GC_ID]),
.instruction_issued (instruction_issued),
.constant_alu (constant_alu),
.rf (rf_issue.data),
.issue (unit_issue[IEC_ID]),
.issue (unit_issue[GC_ID]),
.branch_flush (branch_flush),
.local_gc_exception (exception[GC_EXCEPTION]),
.exception (exception),
.exception_target_pc (exception_target_pc),
.current_exception_unit (current_exception_unit),
.csr_frontend_flush (csr_frontend_flush),
.current_privilege (current_privilege),
.tvm (tvm),
.tsr (tsr),
.gc (gc),
.oldest_pc (oldest_pc),
.sfence (sfence),
.mret(mret),
.sret(sret),
.epc(epc),
.retire_ids_next (retire_ids_next),
.mepc(mepc),
.sepc(sepc),
.interrupt_taken(interrupt_taken),
.interrupt_pending(interrupt_pending),
.processing_csr(processing_csr),
.load_store_status(load_store_status),
.post_issue_count (post_issue_count)
.load_store_status(load_store_status)
);
generate if (CONFIG.INCLUDE_UNIT.MUL) begin : gen_mul

52
core/decode_and_issue.sv Executable file → Normal file
View file

@ -40,7 +40,6 @@ module decode_and_issue
input logic pc_id_available,
input decode_packet_t decode,
output logic decode_advance,
output exception_sources_t decode_exception_unit,
//Renamer
renamer_interface.decode renamer,
@ -190,6 +189,10 @@ module decode_and_issue
////////////////////////////////////////////////////
//Issue
always_ff @(posedge clk) begin
if (instruction_issued) begin
issue.pc_r <= issue.pc;
issue.instruction_r <= issue.instruction;
end
if (issue_stage_ready) begin
issue.pc <= decode.pc;
issue.instruction <= decode.instruction;
@ -208,7 +211,6 @@ module decode_and_issue
fp_issue_rd_wb_group <= fp_decode_wb_group;
issue.is_multicycle <= ~unit_needed[ALU_ID];
issue.id <= decode.id;
issue.exception_unit <= decode_exception_unit;
issue_uses_rs <= decode_uses_rs;
fp_issue_uses_rs <= fp_decode_uses_rs;
issue.uses_rd <= decode_uses_rd;
@ -276,29 +278,23 @@ module decode_and_issue
////////////////////////////////////////////////////
//Illegal Instruction check
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_decode_exceptions
generate if (CONFIG.MODES != BARE) begin : gen_decode_exceptions
logic new_exception;
exception_code_t ecode;
exception_code_t ecall_code;
logic [31:0] tval;
//ECALL and EBREAK captured here, but seperated out when ecode is set
assign illegal_instruction_pattern = ~|unit_needed;
//TODO: Consider ways of parameterizing so that any exception generating unit
//can be automatically added to this expression
always_comb begin
unique case (1'b1)
unit_needed[LS_ID] : decode_exception_unit = LS_EXCEPTION;
unit_needed[BR_ID] : decode_exception_unit = BR_EXCEPTION;
default : decode_exception_unit = PRE_ISSUE_EXCEPTION;
endcase
if (~decode.fetch_metadata.ok)
decode_exception_unit = PRE_ISSUE_EXCEPTION;
end
////////////////////////////////////////////////////
//ECALL/EBREAK
//The type of call instruction is depedent on the current privilege level
logic is_ecall;
logic is_ebreak;
assign is_ecall = decode.instruction inside {ECALL};
assign is_ebreak = decode.instruction inside {EBREAK};
always_comb begin
case (current_privilege)
USER_PRIVILEGE : ecall_code = ECALL_U;
@ -311,10 +307,16 @@ module decode_and_issue
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
ecode <=
decode.instruction inside {ECALL} ? ecall_code :
decode.instruction inside {EBREAK} ? BREAK :
is_ecall ? ecall_code :
is_ebreak ? BREAK :
illegal_instruction_pattern ? ILLEGAL_INST :
decode.fetch_metadata.error_code; //(~decode.fetch_metadata.ok)
if (~decode.fetch_metadata.ok | is_ebreak)
tval <= decode.pc;
else if (is_ecall)
tval <= '0;
else
tval <= decode.instruction;
end
end
@ -327,22 +329,20 @@ module decode_and_issue
pre_issue_exception_pending <= illegal_instruction_pattern | (~decode.fetch_metadata.ok);
end
assign new_exception = issue.stage_valid & pre_issue_exception_pending & ~(gc.issue_hold | gc.fetch_flush | exception.valid);
assign new_exception = issue.stage_valid & pre_issue_exception_pending & ~(gc.issue_hold | gc.fetch_flush) & ~exception.valid;
always_ff @(posedge clk) begin
if (rst)
exception.valid <= 0;
else
exception.valid <= (exception.valid | new_exception) & ~exception.ack;
exception.valid <= new_exception;
end
always_ff @(posedge clk) begin
if (new_exception) begin
exception.code <= ecode;
exception.tval <= issue.instruction;
exception.id <= issue.id;
end
end
assign exception.possible = 0; //Not needed because occurs before issue
assign exception.code = ecode;
assign exception.tval = tval;
assign exception.pc = issue.pc;
assign exception.discard = 0;
end endgenerate
////////////////////////////////////////////////////

0
core/execution_units/alu_unit.sv Executable file → Normal file
View file

0
core/execution_units/barrel_shifter.sv Executable file → Normal file
View file

15
core/execution_units/branch_unit.sv Executable file → Normal file
View file

@ -65,7 +65,6 @@ module branch_unit
logic [31:0] new_pc;
logic [31:0] new_pc_ex;
logic [31:0] pc_ex;
logic instruction_is_completing;
logic branch_complete;
@ -200,7 +199,7 @@ module branch_unit
////////////////////////////////////////////////////
//Exception support
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_branch_exception
generate if (CONFIG.MODES != BARE) begin : gen_branch_exception
logic new_exception;
assign new_exception = new_pc[1] & branch_taken & issue.new_request;
@ -208,15 +207,14 @@ module branch_unit
if (rst)
exception.valid <= 0;
else
exception.valid <= (exception.valid & ~exception.ack) | new_exception;
exception.valid <= new_exception;
end
always_ff @(posedge clk) begin
if (issue.new_request)
exception.id <= issue.id;
end
assign exception.possible = 0; //Not needed because branch_flush suppresses issue
assign exception.code = INST_ADDR_MISSALIGNED;
assign exception.tval = new_pc_ex;
assign exception.pc = issue_stage.pc_r;
assign exception.discard = 0;
end
endgenerate
@ -228,13 +226,12 @@ module branch_unit
if (issue.possible_issue) begin
is_return_ex <= is_return;
is_call_ex <= is_call;
pc_ex <= issue_stage.pc;
end
end
assign br_results.id = id_ex;
assign br_results.valid = instruction_is_completing;
assign br_results.pc = pc_ex;
assign br_results.pc = issue_stage.pc_r;
assign br_results.target_pc = new_pc_ex;
assign br_results.branch_taken = branch_taken_ex;
assign br_results.is_branch = ~jal_or_jalr_ex;

839
core/execution_units/csr_unit.sv Executable file → Normal file

File diff suppressed because it is too large Load diff

2
core/execution_units/div_unit.sv Executable file → Normal file
View file

@ -129,7 +129,7 @@ module div_unit
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) prev_div_result_valid_m (
.clk, .rst,
.set(issue.new_request & ~((issue_stage.rd_addr == issue_rs_addr[RS1]) | (issue_stage.rd_addr == issue_rs_addr[RS2]))),
.clr((instruction_issued_with_rd & div_rs_overwrite) | gc.writeback_supress), //No instructions will be issued while gc.writeback_supress is asserted
.clr((instruction_issued_with_rd & div_rs_overwrite) | gc.init_clear), //No instructions will be issued while gc.init_clear is asserted
.result(prev_div_result_valid)
);

View file

@ -44,6 +44,7 @@ module gc_unit
input issue_packet_t issue_stage,
input logic issue_stage_ready,
input logic instruction_issued,
input logic [31:0] constant_alu,
input logic [31:0] rf [REGFILE_READ_PORTS],
@ -52,39 +53,38 @@ module gc_unit
//Branch miss predict
input logic branch_flush,
//exception_interface.unit pre_issue_exception,
//Exception
exception_interface.unit local_gc_exception,
exception_interface.econtrol exception [NUM_EXCEPTION_SOURCES],
input logic [31:0] exception_target_pc,
input logic [31:0] oldest_pc,
output logic mret,
output logic sret,
input logic [31:0] epc,
//Retire
input id_t retire_ids_next [RETIRE_PORTS],
input logic [$clog2(NUM_EXCEPTION_SOURCES)-1:0] current_exception_unit,
input logic [31:0] mepc,
input logic [31:0] sepc,
//CSR Interrupts
input logic interrupt_pending,
output logic interrupt_taken,
input logic processing_csr,
//CSR signals
input logic csr_frontend_flush,
input logic [1:0] current_privilege,
input logic tvm,
input logic tsr,
//Output controls
output gc_outputs_t gc,
output tlb_packet_t sfence,
//Ordering support
input load_store_status_t load_store_status,
input logic [LOG2_MAX_IDS:0] post_issue_count
input load_store_status_t load_store_status
);
//Largest depth for TLBs
localparam int TLB_CLEAR_DEPTH = (CONFIG.DTLB.DEPTH > CONFIG.ITLB.DEPTH) ? CONFIG.DTLB.DEPTH : CONFIG.ITLB.DEPTH;
//For general reset clear, greater of TLB depth or id-flight memory blocks (MAX_IDS)
localparam int INIT_CLEAR_DEPTH = CONFIG.INCLUDE_S_MODE ? (TLB_CLEAR_DEPTH > 64 ? TLB_CLEAR_DEPTH : 64) : 64;
localparam int INIT_CLEAR_DEPTH = CONFIG.MODES == MSU ? (TLB_CLEAR_DEPTH > 64 ? TLB_CLEAR_DEPTH : 64) : 64;
////////////////////////////////////////////////////
//Overview
@ -119,120 +119,157 @@ module gc_unit
//LS exceptions (miss-aligned, TLB and MMU) (issue stage)
//fetch flush, take exception. If execute or later exception occurs first, exception is overridden
common_instruction_t instruction;//rs1_addr, rs2_addr, fn3, fn7, rd_addr, upper/lower opcode
typedef enum {RST_STATE, PRE_CLEAR_STATE, INIT_CLEAR_STATE, IDLE_STATE, TLB_CLEAR_STATE, POST_ISSUE_DRAIN, PRE_ISSUE_FLUSH, POST_ISSUE_DISCARD} gc_state;
typedef enum {RST_STATE, PRE_CLEAR_STATE, INIT_CLEAR_STATE, IDLE_STATE, TLB_CLEAR_STATE, WAIT_INTERRUPT, PRE_ISSUE_FLUSH, WAIT_WRITE} gc_state;
gc_state state;
gc_state next_state;
logic init_clear_done;
logic tlb_clear_done;
logic post_issue_idle;
logic ifence_in_progress;
logic ret_in_progress;
//GC registered global outputs
logic gc_init_clear;
logic gc_fetch_hold;
logic gc_issue_hold;
logic gc_rename_revert;
logic gc_fetch_flush;
logic gc_writeback_supress;
logic gc_retire_hold;
logic gc_fetch_ifence;
logic gc_tlb_flush;
logic gc_sq_flush;
logic gc_pc_override;
logic [31:0] gc_pc;
typedef struct packed{
logic [31:0] pc_p4;
logic is_ifence;
logic is_mret;
logic is_sret;
} gc_inputs_t;
logic possible_exception;
gc_inputs_t gc_inputs;
gc_inputs_t gc_inputs_r;
////////////////////////////////////////////////////
//Implementation
////////////////////////////////////////////////////
//Decode
logic [31:0] pc_p4;
logic is_ifence;
logic is_sfence;
logic trivial_sfence;
logic asid_sfence;
logic is_mret;
logic is_sret;
logic is_wfi;
assign instruction = decode_stage.instruction;
assign unit_needed =
(CONFIG.INCLUDE_M_MODE & decode_stage.instruction inside {MRET}) |
(CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SRET, SFENCE_VMA}) |
(CONFIG.INCLUDE_IFENCE & decode_stage.instruction inside {FENCE_I});
(CONFIG.MODES != BARE & instruction inside {MRET, WFI}) |
(CONFIG.MODES == MSU & instruction inside {SRET, SFENCE_VMA}) |
(CONFIG.INCLUDE_IFENCE & instruction inside {FENCE_I});
always_comb begin
uses_rs = '0;
uses_rs[RS1] = CONFIG.INCLUDE_S_MODE & decode_stage.instruction inside {SFENCE_VMA};
uses_rs[RS1] = CONFIG.MODES == MSU & instruction inside {SFENCE_VMA};
uses_rs[RS2] = CONFIG.MODES == MSU & instruction inside {SFENCE_VMA};
uses_rd = 0;
end
always_ff @(posedge clk) begin
if (issue_stage_ready) begin
is_ifence = (instruction.upper_opcode == FENCE_T) & CONFIG.INCLUDE_IFENCE;
is_mret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == MRET_imm) & CONFIG.INCLUDE_M_MODE;
is_sret = (instruction.upper_opcode == SYSTEM_T) & (decode_stage.instruction[31:20] == SRET_imm) & CONFIG.INCLUDE_S_MODE;
is_ifence <= CONFIG.INCLUDE_IFENCE & instruction.upper_opcode[2];
is_sfence <= CONFIG.MODES == MSU & ~instruction.upper_opcode[2] & instruction.fn7[0];
trivial_sfence <= |instruction.rs1_addr;
asid_sfence <= |instruction.rs2_addr;
is_wfi <= CONFIG.MODES != BARE & ~instruction.upper_opcode[2] & ~instruction.fn7[0] & ~instruction.rs2_addr[1];
//Ret instructions need exact decoding
is_mret <= CONFIG.MODES != BARE & instruction inside {MRET};
is_sret <= CONFIG.MODES == MSU & instruction inside {SRET};
end
end
assign gc_inputs.pc_p4 = constant_alu;
assign gc_inputs.is_ifence = is_ifence;
assign gc_inputs.is_mret = is_mret;
assign gc_inputs.is_sret = is_sret;
////////////////////////////////////////////////////
//Issue
logic is_ifence_r;
logic is_sfence_r;
logic is_sret_r;
logic trivial_sfence_r;
logic asid_sfence_r;
logic [31:0] sfence_addr_r;
logic [ASIDLEN-1:0] asid_r;
logic new_exception;
//Input registering
always_ff @(posedge clk) begin
if (issue.new_request)
gc_inputs_r <= gc_inputs;
if (rst) begin
is_ifence_r <= 0;
is_sfence_r <= 0;
mret <= 0;
sret <= 0;
end
else begin
is_ifence_r <= issue.new_request & is_ifence & ~new_exception;
is_sfence_r <= issue.new_request & is_sfence & ~new_exception;
mret <= issue.new_request & is_mret & ~new_exception;
sret <= issue.new_request & is_sret & ~new_exception;
end
end
//ret
always_ff @(posedge clk) begin
if (rst)
ret_in_progress <= 0;
else
ret_in_progress <= (ret_in_progress & ~(next_state == PRE_ISSUE_FLUSH)) | (issue.new_request & (gc_inputs.is_mret | gc_inputs.is_sret));
if (issue.new_request) begin
trivial_sfence_r <= trivial_sfence;
asid_sfence_r <= asid_sfence;
sfence_addr_r <= rf[RS1];
asid_r <= rf[RS2][ASIDLEN-1:0];
end
if (rst) begin
trivial_sfence_r <= 0;
asid_sfence_r <= 0;
end
end
//ifence
always_ff @(posedge clk) begin
if (rst)
ifence_in_progress <= 0;
else
ifence_in_progress <= (ifence_in_progress & ~(next_state == PRE_ISSUE_FLUSH)) | (issue.new_request & gc_inputs.is_ifence);
//Exceptions treated like every other unit
generate if (CONFIG.MODES != BARE) begin : gen_gc_exception
always_comb begin
new_exception = 0;
if (issue.new_request) begin
if (current_privilege == USER_PRIVILEGE)
new_exception = is_sfence | is_sret | is_mret;
else if (current_privilege == SUPERVISOR_PRIVILEGE)
new_exception = (is_sfence & tvm) | (is_sret & tsr);
end
end
always_ff @(posedge clk) begin
if (rst)
local_gc_exception.valid <= 0;
else
local_gc_exception.valid <= new_exception;
end
assign local_gc_exception.possible = 0; //Not needed because appears on first cycle
assign local_gc_exception.code = ILLEGAL_INST;
assign local_gc_exception.tval = issue_stage.instruction_r;
assign local_gc_exception.pc = issue_stage.pc_r;
assign local_gc_exception.discard = 0;
end
endgenerate
////////////////////////////////////////////////////
//GC Operation
assign post_issue_idle = (post_issue_count == 0) & load_store_status.sq_empty;
assign gc.fetch_flush = branch_flush | gc_pc_override;
always_ff @ (posedge clk) begin
gc_fetch_hold <= next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, POST_ISSUE_DRAIN, PRE_ISSUE_FLUSH};
gc_issue_hold <= processing_csr | (next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, TLB_CLEAR_STATE, POST_ISSUE_DRAIN, PRE_ISSUE_FLUSH, POST_ISSUE_DISCARD});
gc_writeback_supress <= next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, POST_ISSUE_DISCARD};
gc_retire_hold <= next_state inside {PRE_ISSUE_FLUSH};
gc_fetch_hold <= next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, PRE_ISSUE_FLUSH, TLB_CLEAR_STATE, WAIT_WRITE};
gc_issue_hold <= next_state inside {PRE_CLEAR_STATE, INIT_CLEAR_STATE, WAIT_INTERRUPT, PRE_ISSUE_FLUSH, TLB_CLEAR_STATE, WAIT_WRITE};
gc_init_clear <= next_state inside {INIT_CLEAR_STATE};
gc_fetch_ifence <= issue.new_request & is_ifence;
gc_tlb_flush <= next_state inside {INIT_CLEAR_STATE, TLB_CLEAR_STATE};
gc_sq_flush <= state inside {POST_ISSUE_DISCARD} & next_state inside {IDLE_STATE};
end
//work-around for verilator BLKANDNBLK signal optimizations
assign gc.fetch_hold = gc_fetch_hold;
assign gc.issue_hold = gc_issue_hold;
assign gc.writeback_supress = CONFIG.INCLUDE_M_MODE & gc_writeback_supress;
assign gc.retire_hold = gc_retire_hold;
assign gc.issue_hold = gc_issue_hold | possible_exception;
assign gc.init_clear = gc_init_clear;
assign gc.tlb_flush = CONFIG.INCLUDE_S_MODE & gc_tlb_flush;
assign gc.sq_flush = CONFIG.INCLUDE_M_MODE & gc_sq_flush;
assign gc.fetch_ifence = CONFIG.INCLUDE_IFENCE & gc_fetch_ifence;
assign sfence = '{
valid : CONFIG.MODES == MSU & gc_tlb_flush,
asid_only : asid_sfence_r,
asid : asid_r,
addr_only : trivial_sfence_r,
addr : sfence_addr_r
};
////////////////////////////////////////////////////
//GC State Machine
always @(posedge clk) begin
@ -249,19 +286,47 @@ module gc_unit
PRE_CLEAR_STATE : next_state = INIT_CLEAR_STATE;
INIT_CLEAR_STATE : if (init_clear_done) next_state = IDLE_STATE;
IDLE_STATE : begin
if (gc.exception.valid)//new pending exception is also oldest instruction
if ((issue.new_request & ~is_wfi & ~new_exception) | gc.exception.valid | csr_frontend_flush)
next_state = PRE_ISSUE_FLUSH;
else if (issue.new_request | interrupt_pending | gc.exception_pending)
next_state = POST_ISSUE_DRAIN;
else if (interrupt_pending)
next_state = WAIT_INTERRUPT;
end
TLB_CLEAR_STATE : if (tlb_clear_done) next_state = IDLE_STATE;
POST_ISSUE_DRAIN : if (((ifence_in_progress | ret_in_progress) & post_issue_idle) | gc.exception.valid | interrupt_pending) next_state = PRE_ISSUE_FLUSH;
PRE_ISSUE_FLUSH : next_state = POST_ISSUE_DISCARD;
POST_ISSUE_DISCARD : if ((post_issue_count == 0) & load_store_status.no_released_stores_pending) next_state = IDLE_STATE;
WAIT_INTERRUPT : begin
if (gc.exception.valid | csr_frontend_flush) //Exception overrides interrupt
next_state = PRE_ISSUE_FLUSH;
else if (~interrupt_pending) //Something cancelled the interrupt
next_state = IDLE_STATE;
else if (~possible_exception & issue_stage.stage_valid & ~branch_flush) //No more possible exceptions and issue stage has correct PC
next_state = PRE_ISSUE_FLUSH;
end
PRE_ISSUE_FLUSH : begin
if (is_sfence_r)
next_state = TLB_CLEAR_STATE;
else if (is_ifence_r)
next_state = WAIT_WRITE;
else //MRET/SRET, exception, interrupt, CSR flush
next_state = IDLE_STATE;
end
//gc.exception will never be set in these states
TLB_CLEAR_STATE : if (tlb_clear_done) next_state = (load_store_status.outstanding_store) ? WAIT_WRITE : IDLE_STATE;
WAIT_WRITE : if (~load_store_status.outstanding_store) next_state = IDLE_STATE;
default : next_state = RST_STATE;
endcase
end
//Will never encounter an exception and can ignore interrupts -> will not have a new instruction on the transition to idle; interrupts can be ignored
//SFENCE: PRE_ISSUE_FLUSH (Override PC) -> TLB_CLEAR -> WAIT_WRITE
//IFENCE: PRE_ISSUE_FLUSH (Override PC) -> WAIT_WRITE
//MRET/SRET: PRE_ISSUE_FLUSH (Override PC)
//Branch/CSR/LS exceptions: PRE_ISSUE_FLUSH (Override PC)
//Fetch/illegal exception: PRE_ISSUE_FLUSH (Override PC)
//Interrupt: WAIT_UNTIL_RETIRED (capture next PC) -> PRE_ISSUE_FLUSH (Override PC) <- This can be hijacked by an exception
//Interrupt
//wait until issue/execute exceptions are no longer possible, flush fetch, take exception
////////////////////////////////////////////////////
//State Counter
logic [$clog2(INIT_CLEAR_DEPTH):0] state_counter;
@ -272,63 +337,101 @@ module gc_unit
state_counter <= state_counter + 1;
end
assign init_clear_done = state_counter[$clog2(INIT_CLEAR_DEPTH)];
assign tlb_clear_done = state_counter[$clog2(TLB_CLEAR_DEPTH)];
assign tlb_clear_done = state_counter[$clog2(TLB_CLEAR_DEPTH)] | trivial_sfence_r;
////////////////////////////////////////////////////
//Exception handling
generate if (CONFIG.INCLUDE_M_MODE) begin :gen_gc_m_mode
logic [NUM_EXCEPTION_SOURCES-1:0] exception_valid;
logic [NUM_EXCEPTION_SOURCES-1:0] exception_possible;
//Separated out because possible exceptions from CSR must still stall even without M
generate for (genvar i = 0; i < NUM_EXCEPTION_SOURCES; i++) begin : gen_possible_exceptions
assign exception_possible[i] = exception[i].possible;
end endgenerate
assign possible_exception = |exception_possible;
assign gc.exception.possible = possible_exception;
generate if (CONFIG.MODES != BARE) begin :gen_gc_m_mode
//Re-assigning interface inputs to array types so that they can be dynamically indexed
logic [NUM_EXCEPTION_SOURCES-1:0] exception_pending;
exception_code_t [NUM_EXCEPTION_SOURCES-1:0] exception_code;
id_t [NUM_EXCEPTION_SOURCES-1:0] exception_id;
logic [NUM_EXCEPTION_SOURCES-1:0][31:0] exception_tval;
logic exception_ack;
logic [NUM_EXCEPTION_SOURCES-1:0][31:0] exception_pc;
logic [NUM_EXCEPTION_SOURCES-1:0] exception_discard;
logic [31:0] muxed_exception_pc;
for (genvar i = 0; i < NUM_EXCEPTION_SOURCES; i++) begin
assign exception_pending[i] = exception[i].valid;
assign exception_valid[i] = exception[i].valid;
assign exception_code[i] = exception[i].code;
assign exception_id[i] = exception[i].id;
assign exception_tval[i] = exception[i].tval;
assign exception[i].ack = exception_ack;
end
//Exception valid when the oldest instruction is a valid ID. This is done with a level of indirection (through the exception unit table)
//for better scalability, avoiding the need to compare against all exception sources.
always_comb begin
gc.exception_pending = |exception_pending;
gc.exception.valid = (retire_ids_next[0] == exception_id[current_exception_unit]) & exception_pending[current_exception_unit];
gc.exception.pc = oldest_pc;
gc.exception.code = exception_code[current_exception_unit];
gc.exception.tval = exception_tval[current_exception_unit];
assign exception_discard[i] = exception[i].discard;
assign exception_pc[i] = exception[i].pc;
end
assign exception_ack = gc.exception.valid;
assign gc.exception.valid = |exception_valid;
assign gc.exception.source = exception_valid;
assign interrupt_taken = interrupt_pending & (next_state == PRE_ISSUE_FLUSH) & ~(ifence_in_progress | ret_in_progress | gc.exception.valid);
one_hot_mux #(.OPTIONS(NUM_EXCEPTION_SOURCES), .DATA_TYPE(exception_code_t)) code_mux (
.one_hot(exception_valid),
.choices(exception_code),
.sel(gc.exception.code),
.*);
assign mret = gc_inputs_r.is_mret & ret_in_progress & (next_state == PRE_ISSUE_FLUSH);
assign sret = gc_inputs_r.is_sret & ret_in_progress & (next_state == PRE_ISSUE_FLUSH);
one_hot_mux #(.OPTIONS(NUM_EXCEPTION_SOURCES), .DATA_TYPE(logic[31:0])) tval_mux (
.one_hot(exception_valid),
.choices(exception_tval),
.sel(gc.exception.tval),
.*);
end endgenerate
one_hot_mux #(.OPTIONS(NUM_EXCEPTION_SOURCES), .DATA_TYPE(logic[31:0])) pc_mux (
.one_hot(exception_valid),
.choices(exception_pc),
.sel(muxed_exception_pc),
.*);
assign gc.exception.pc = |exception_valid ? muxed_exception_pc : issue_stage.pc;
assign interrupt_taken = interrupt_pending & (next_state == PRE_ISSUE_FLUSH) & ~(gc.exception.valid) & ~csr_frontend_flush;
//Writeback and rename handling
logic gc_writeback_suppress_r;
logic gc_rename_revert;
always_ff @(posedge clk) begin
if (rst) begin
gc_writeback_suppress_r <= 0;
gc_rename_revert <= 0;
end
else begin
gc_writeback_suppress_r <= gc.writeback_suppress;
gc_rename_revert <= gc_writeback_suppress_r;
end
end
assign gc.writeback_suppress = |(exception_valid & exception_discard);
assign gc.rename_revert = gc_rename_revert;
end endgenerate
//PC determination (trap, flush or return)
//Two cycles: on first cycle the processor front end is flushed,
//on the second cycle the new PC is fetched
generate if (CONFIG.INCLUDE_M_MODE || CONFIG.INCLUDE_IFENCE) begin :gen_gc_pc_override
generate if (CONFIG.MODES != BARE || CONFIG.INCLUDE_IFENCE) begin :gen_gc_pc_override
always_ff @ (posedge clk) begin
gc_pc_override <= next_state inside {PRE_ISSUE_FLUSH, INIT_CLEAR_STATE};
gc_pc <=
(gc.exception.valid | interrupt_taken) ? exception_target_pc :
(gc_inputs_r.is_ifence) ? gc_inputs_r.pc_p4 :
epc; //ret
if (gc.exception.valid | interrupt_taken)
gc_pc <= exception_target_pc;
else if (instruction_issued) begin
if (is_mret)
gc_pc <= mepc;
else if (is_sret)
gc_pc <= sepc;
else //IFENCE, SFENCE, CSR flush
gc_pc <= constant_alu;
end
end
//work-around for verilator BLKANDNBLK signal optimizations
assign gc.pc_override = gc_pc_override;
assign gc.pc = gc_pc;
end endgenerate
end endgenerate
////////////////////////////////////////////////////
//Decode / Write-back Handshaking
//CSR reads are passed through the Load-Store unit
@ -342,12 +445,12 @@ module gc_unit
////////////////////////////////////////////////////
//Assertions
`ifdef ENABLE_SIMULATION_ASSERTIONS
generate if (DEBUG_CONVERT_EXCEPTIONS_INTO_ASSERTIONS) begin
unexpected_exception_assertion:
assert property (@(posedge clk) disable iff (rst) (~gc.exception.valid))
else $error("unexpected exception occured: %s", gc.exception.code.name());
end endgenerate
`endif
multiple_exceptions_assertion:
assert property (@(posedge clk) disable iff (rst) $onehot0(exception_valid))
else $error("Simultaneous exceptions");
multiple_possible_exceptions_assertion:
assert property (@(posedge clk) disable iff (rst) $onehot0(exception_possible))
else $error("Simultaneous possible exceptions");
endmodule

View file

@ -28,7 +28,7 @@ module addr_hash
parameter logic USE_BIT_3 = 1
)
(
input logic [31:0] addr,
input logic [11:0] addr,
output addr_hash_t addr_hash
);

56
core/execution_units/load_store_unit/amo_alu.sv Executable file → Normal file
View file

@ -1,5 +1,5 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
* Copyright © 2017 Eric Matthews, Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,44 +18,48 @@
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module amo_alu
import cva5_config::*;
import riscv_types::*;
import cva5_types::*;
#(
parameter int WIDTH = 32
)
(
input amo_alu_inputs_t amo_alu_inputs,
output logic[31:0] result
input amo_t amo_type,
input logic[WIDTH-1:0] rs1,
input logic[WIDTH-1:0] rs2,
output logic[WIDTH-1:0] rd
);
logic signed_op;
logic rs1_smaller_than_rs2;
logic signed [32:0] rs1_ext;
logic signed [32:0] rs2_ext;
//bit 4 for unsigned
assign rs1_ext = {(~amo_alu_inputs.op[4] & amo_alu_inputs.rs1_load[31]), amo_alu_inputs.rs1_load};
assign rs2_ext = {(~amo_alu_inputs.op[4] & amo_alu_inputs.rs2[31]), amo_alu_inputs.rs2};
logic signed [WIDTH:0] rs1_ext;
logic signed [WIDTH:0] rs2_ext;
logic[WIDTH-1:0] logic_result;
logic[WIDTH-1:0] arith_result;
assign signed_op = amo_type == AMO_MIN_FN5 | amo_type == AMO_MAX_FN5;
assign rs1_ext = {(signed_op & rs1[WIDTH-1]), rs1};
assign rs2_ext = {(signed_op & rs2[WIDTH-1]), rs2};
assign rs1_smaller_than_rs2 = rs1_ext < rs2_ext;
/* verilator lint_off CASEINCOMPLETE */
always_comb begin
case (amo_alu_inputs.op)// <--unique as not all codes are in use
AMO_SWAP_FN5 : result = amo_alu_inputs.rs2;
AMO_ADD_FN5 : result = amo_alu_inputs.rs1_load + amo_alu_inputs.rs2;
AMO_XOR_FN5 : result = amo_alu_inputs.rs1_load ^ amo_alu_inputs.rs2;
AMO_AND_FN5 : result = amo_alu_inputs.rs1_load & amo_alu_inputs.rs2;
AMO_OR_FN5 : result = amo_alu_inputs.rs1_load | amo_alu_inputs.rs2;
AMO_MIN_FN5 : result = rs1_smaller_than_rs2 ? amo_alu_inputs.rs1_load : amo_alu_inputs.rs2;
AMO_MAX_FN5 : result = rs1_smaller_than_rs2 ? amo_alu_inputs.rs2 : amo_alu_inputs.rs1_load;
AMO_MINU_FN5 : result = rs1_smaller_than_rs2 ? amo_alu_inputs.rs1_load : amo_alu_inputs.rs2;
AMO_MAXU_FN5 : result = rs1_smaller_than_rs2 ? amo_alu_inputs.rs2 : amo_alu_inputs.rs1_load;
unique case (amo_type)
AMO_XOR_FN5 : rd = rs1 ^ rs2;
AMO_OR_FN5 : rd = rs1 | rs2;
AMO_AND_FN5 : rd = rs1 & rs2;
AMO_SWAP_FN5 : rd = rs2;
AMO_MIN_FN5 : rd = rs1_smaller_than_rs2 ? rs1 : rs2;
AMO_MAX_FN5 : rd = rs1_smaller_than_rs2 ? rs2 : rs1;
AMO_MINU_FN5 : rd = rs1_smaller_than_rs2 ? rs1 : rs2;
AMO_MAXU_FN5 : rd = rs1_smaller_than_rs2 ? rs2 : rs1;
AMO_ADD_FN5 : rd = rs1 + rs2;
default : rd = 'x; //Default don't care allows some optimization
endcase
end
/* verilator lint_on CASEINCOMPLETE */
endmodule
endmodule

View file

@ -0,0 +1,123 @@
/*
* Copyright © 2024 Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module amo_unit
import riscv_types::*;
#(
parameter int NUM_UNITS = 3,
parameter int RESERVATION_WORDS = 4
) //TODO: reservation shape and size must be discoverable(?)
(
input logic clk,
input logic rst,
amo_interface.amo_unit agents[NUM_UNITS]
);
localparam RESERVATION_WIDTH = 30 - $clog2(RESERVATION_WORDS);
typedef logic[RESERVATION_WIDTH-1:0] reservation_t;
////////////////////////////////////////////////////
//Interface unpacking
logic[NUM_UNITS-1:0] set_reservation;
logic[NUM_UNITS-1:0] clear_reservation;
reservation_t[NUM_UNITS-1:0] reservation;
reservation_t lr_addr;
logic lr_valid;
logic[NUM_UNITS-1:0] rmw_valid;
amo_t[NUM_UNITS-1:0] op;
logic[NUM_UNITS-1:0][31:0] rs1;
logic[NUM_UNITS-1:0][31:0] rs2;
logic[31:0] rd;
generate for (genvar i = 0; i < NUM_UNITS; i++) begin : gen_unpacking
assign set_reservation[i] = agents[i].set_reservation;
assign clear_reservation[i] = agents[i].clear_reservation;
assign reservation[i] = agents[i].reservation[31-:RESERVATION_WIDTH];
assign agents[i].reservation_valid = lr_valid & lr_addr == reservation[i];
assign rmw_valid[i] = agents[i].rmw_valid;
assign op[i] = agents[i].op;
assign rs1[i] = agents[i].rs1;
assign rs2[i] = agents[i].rs2;
assign agents[i].rd = rd;
end endgenerate
////////////////////////////////////////////////////
//Multiplexing
//Shared LR-SC and RMW port across all units
reservation_t set_val;
amo_t selected_op;
logic[31:0] selected_rs1;
logic[31:0] selected_rs2;
one_hot_mux #(.OPTIONS(NUM_UNITS), .DATA_TYPE(amo_t)) op_mux (
.one_hot(rmw_valid),
.choices(op),
.sel(selected_op),
.*);
one_hot_mux #(.OPTIONS(NUM_UNITS), .DATA_TYPE(logic[31:0])) rs1_mux (
.one_hot(rmw_valid),
.choices(rs1),
.sel(selected_rs1),
.*);
one_hot_mux #(.OPTIONS(NUM_UNITS), .DATA_TYPE(logic[31:0])) rs2_mux (
.one_hot(rmw_valid),
.choices(rs2),
.sel(selected_rs2),
.*);
one_hot_mux #(.OPTIONS(NUM_UNITS), .DATA_TYPE(reservation_t)) reservation_mux (
.one_hot(set_reservation),
.choices(reservation),
.sel(set_val),
.*);
////////////////////////////////////////////////////
//RISC-V LR-SC
//One address is reserved at a time for all units
//The reservation can be set or cleared at any time by any unit, but set has priority over clear on same cycle
always_ff @(posedge clk) begin
if (rst)
lr_valid <= 0;
else
lr_valid <= (lr_valid & ~|clear_reservation) | |set_reservation;
if (|set_reservation)
lr_addr <= set_val;
end
////////////////////////////////////////////////////
//RISC-V Atomic ALU
//Combinational; results valid in same cycle
amo_alu #(.WIDTH(32)) alu_inst (
.amo_type(selected_op),
.rs1(selected_rs1),
.rs2(selected_rs2),
.rd(rd)
);
endmodule

View file

@ -1,5 +1,5 @@
/*
* Copyright © 2022 Eric Matthews
* Copyright © 2024 Chris Keilbart
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -17,7 +17,7 @@
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module dcache
@ -32,296 +32,349 @@ module dcache
(
input logic clk,
input logic rst,
input logic dcache_on,
l1_arbiter_request_interface.master l1_request,
l1_arbiter_return_interface.master l1_response,
input logic sc_complete,
input logic sc_success,
input logic clear_reservation,
input amo_details_t amo,
input logic uncacheable_load,
input logic uncacheable_store,
input logic is_load,
input logic load_request,
input logic store_request,
output logic load_ready,
output logic store_ready,
input data_access_shared_inputs_t ls_load,
input data_access_shared_inputs_t ls_store,
memory_sub_unit_interface.responder ls
output logic write_outstanding,
input logic amo,
input amo_t amo_type,
amo_interface.subunit amo_unit,
input logic cbo,
input logic uncacheable,
memory_sub_unit_interface.responder ls,
input logic load_peek, //If the next request may be a load
input logic[31:0] load_addr_peek //The address in that case
);
localparam derived_cache_config_t SCONFIG = get_derived_cache_params(CONFIG, CONFIG.DCACHE, CONFIG.DCACHE_ADDR);
localparam LOG2_WAYS = (CONFIG.DCACHE.WAYS == 1) ? 1 : $clog2(CONFIG.DCACHE.WAYS);
localparam DB_ADDR_LEN = SCONFIG.LINE_ADDR_W + SCONFIG.SUB_LINE_ADDR_W;
cache_functions_interface # (.TAG_W(SCONFIG.TAG_W), .LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils ();
localparam bit [SCONFIG.SUB_LINE_ADDR_W-1:0] END_OF_LINE_COUNT = SCONFIG.SUB_LINE_ADDR_W'(CONFIG.DCACHE.LINE_W-1);
typedef logic[SCONFIG.TAG_W-1:0] tag_t;
cache_functions_interface # (.LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils ();
typedef struct packed {
logic valid;
tag_t tag;
} tb_entry_t;
typedef struct packed{
logic [31:0] addr;
typedef struct packed {
logic[31:0] addr;
logic[31:0] data;
logic[3:0] be;
logic rnw;
logic uncacheable;
} load_stage2_t;
load_stage2_t stage2_load;
logic amo;
amo_t amo_type;
logic cbo;
} req_t;
typedef struct packed{
logic [31:0] addr;
logic [3:0] be;
logic [31:0] data;
logic cache_op;
logic uncacheable;
} store_stage2_t;
store_stage2_t stage2_store;
logic [CONFIG.DCACHE.WAYS-1:0] load_tag_hit_way;
logic [CONFIG.DCACHE.WAYS-1:0] store_tag_hit_way;
logic [CONFIG.DCACHE.WAYS-1:0] replacement_way;
logic [CONFIG.DCACHE.WAYS-1:0] replacement_way_r;
logic load_tag_check;
logic load_hit;
logic store_hit;
logic [LOG2_WAYS-1:0] tag_hit_index;
logic [LOG2_WAYS-1:0] replacement_index;
logic [LOG2_WAYS-1:0] replacement_index_r;
logic [LOG2_WAYS-1:0] load_sel;
logic is_target_word;
logic [SCONFIG.SUB_LINE_ADDR_W-1:0] word_count;
logic miss_data_valid;
logic line_complete;
logic arb_load_sel;
logic load_l1_arb_ack;
logic store_l1_arb_ack;
logic [31:0] ram_load_data [CONFIG.DCACHE.WAYS-1:0];
typedef enum {
LOAD_IDLE = 0,
LOAD_HIT_CHECK = 1,
LOAD_L1_REQUEST = 2,
LOAD_FILL = 3
} load_path_enum_t;
logic [3:0] load_state, load_state_next;
typedef enum {
STORE_IDLE = 0,
STORE_L1_REQUEST = 1
} store_path_enum_t;
logic [1:0] store_state, store_state_next;
////////////////////////////////////////////////////
//Implementation
req_t stage0;
req_t stage1;
logic stage1_done;
logic stage0_advance_r;
////////////////////////////////////////////////////
//Load Path
always_ff @ (posedge clk) begin
if (rst) begin
load_state <= 0;
load_state[LOAD_IDLE] <= 1;
end
assign write_outstanding = (current_state != IDLE) & (~stage1.rnw | stage1.amo);
//Peeking avoids circular logic
assign ls.ready = (current_state == IDLE) | (stage1_done & ~stage1.cbo & ~(db_wen & load_peek & load_addr_peek[31:DB_ADDR_LEN+2] == stage1.addr[31:DB_ADDR_LEN+2] & load_addr_peek[2+:DB_ADDR_LEN] == db_addr));
always_ff @(posedge clk) begin
if (rst)
stage0_advance_r <= 0;
else
load_state <= load_state_next;
stage0_advance_r <= ls.new_request;
if (ls.new_request)
stage1 <= stage0;
end
always_comb begin
load_state_next[LOAD_IDLE] = (load_state[LOAD_IDLE] & ~load_request) | ((load_hit & ~load_request) | line_complete);
load_state_next[LOAD_HIT_CHECK] = load_request;
load_state_next[LOAD_L1_REQUEST] = (load_state[LOAD_L1_REQUEST] & ~load_l1_arb_ack) | (load_state[LOAD_HIT_CHECK] & ~load_hit);
load_state_next[LOAD_FILL] = (load_state[LOAD_FILL] & ~line_complete) | (load_state[LOAD_L1_REQUEST] & load_l1_arb_ack);
end
assign load_ready = (load_state[LOAD_IDLE] | load_hit) & (store_state[STORE_IDLE] | store_l1_arb_ack);
always_ff @ (posedge clk) begin
if (load_request) begin
stage2_load.addr <= ls_load.addr;
stage2_load.uncacheable <= uncacheable_load;
end
end
assign load_tag_check = load_request & dcache_on & ~uncacheable_load;
////////////////////////////////////////////////////
//Load Miss
always_ff @ (posedge clk) begin
if (load_request)
word_count <= 0;
else
word_count <= word_count + SCONFIG.SUB_LINE_ADDR_W'(l1_response.data_valid);
end
assign is_target_word = (stage2_load.addr[2 +: SCONFIG.SUB_LINE_ADDR_W] == word_count) | stage2_load.uncacheable;
assign line_complete = l1_response.data_valid & ((word_count == END_OF_LINE_COUNT) | stage2_load.uncacheable);
////////////////////////////////////////////////////
//Store Path
always_ff @ (posedge clk) begin
if (rst) begin
store_state <= 0;
store_state[STORE_IDLE] <= 1;
end
else
store_state <= store_state_next;
end
always_comb begin
store_state_next[STORE_IDLE] = (store_state[STORE_IDLE] & (~store_request | (store_request & ls_store.cache_op))) | (store_l1_arb_ack & ~store_request);
store_state_next[STORE_L1_REQUEST] = (store_state[STORE_L1_REQUEST] & ~store_l1_arb_ack) | (store_request & ~ls_store.cache_op);
end
assign store_ready = (store_state[STORE_IDLE] | store_l1_arb_ack) & (load_state[LOAD_IDLE] | load_hit);
assign ls.ready = is_load ? load_ready : store_ready;
always_ff @ (posedge clk) begin
if (store_request) begin
stage2_store.addr <= ls_store.addr;
stage2_store.uncacheable <= uncacheable_store;
stage2_store.be <= ls_store.be;
stage2_store.data <= ls_store.data_in;
stage2_store.cache_op <= ls_store.cache_op;
end
end
////////////////////////////////////////////////////
//L1 Arbiter Interface
//Priority to oldest request
fifo_interface #(.DATA_TYPE(logic)) request_order();
assign request_order.data_in = load_request;
assign request_order.push = load_request | (store_request & ~ls_store.cache_op);
assign request_order.potential_push = request_order.push;
assign request_order.pop = l1_request.ack | load_hit;
cva5_fifo #(.DATA_TYPE(logic), .FIFO_DEPTH(2))
request_order_fifo (
.clk (clk),
.rst (rst),
.fifo (request_order)
);
assign arb_load_sel = request_order.data_out;
assign stage0 = '{
addr : ls.addr,
data : ls.data_in,
be : ls.be,
rnw : ls.re,
uncacheable : uncacheable,
amo : amo,
amo_type : amo_type,
cbo : cbo
};
assign l1_request.addr = arb_load_sel ? stage2_load.addr : stage2_store.addr;//Memory interface aligns request to burst size (done there to support AMO line-read word-write)
assign l1_request.data = stage2_store.data;
assign l1_request.rnw = arb_load_sel;
assign l1_request.be = stage2_store.be;
assign l1_request.size = (arb_load_sel & ~stage2_load.uncacheable) ? 5'(CONFIG.DCACHE.LINE_W-1) : 0;//LR and AMO ops are included in load
assign l1_request.is_amo = 0;
assign l1_request.amo = 0;
assign l1_request.request = load_state[LOAD_L1_REQUEST] | store_state[STORE_L1_REQUEST];
assign load_l1_arb_ack = l1_request.ack & arb_load_sel;
assign store_l1_arb_ack = l1_request.ack & ~arb_load_sel;
////////////////////////////////////////////////////
//Replacement policy (free runing one-hot cycler, i.e. pseudo random)
//Replacement policy
logic[CONFIG.DCACHE.WAYS-1:0] replacement_way;
cycler #(CONFIG.DCACHE.WAYS) replacement_policy (
.clk (clk),
.rst (rst),
.en (1'b1),
.one_hot (replacement_way)
);
.en(ls.new_request),
.one_hot(replacement_way),
.*);
////////////////////////////////////////////////////
//Tag banks
dcache_tag_banks #(.CONFIG(CONFIG), .SCONFIG(SCONFIG))
tag_banks (
.clk (clk),
.rst (rst),
.load_addr (ls_load.addr),
.load_req (load_tag_check),
.miss_addr (stage2_load.addr),
.miss_req (load_l1_arb_ack),
.miss_way (replacement_way),
.inv_addr ({l1_response.inv_addr, 2'b0}),
.extern_inv (l1_response.inv_valid),
.extern_inv_complete (l1_response.inv_ack),
.store_addr (ls_store.addr),
.store_addr_r (stage2_store.addr),
.store_req (store_request),
.cache_op_req (ls_store.cache_op),
.load_tag_hit (load_hit),
.load_tag_hit_way (load_tag_hit_way),
.store_tag_hit (store_hit),
.store_tag_hit_way (store_tag_hit_way)
);
//Tagbank
tb_entry_t[CONFIG.DCACHE.WAYS-1:0] tb_entries;
tb_entry_t new_entry;
logic[CONFIG.DCACHE.WAYS-1:0] hit_ohot;
logic[CONFIG.DCACHE.WAYS-1:0] hit_ohot_r;
logic hit;
logic hit_r;
logic tb_write;
////////////////////////////////////////////////////
//Data Bank(s)
logic [SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W-1:0] data_read_addr;
assign data_read_addr = load_state[LOAD_FILL] ? {addr_utils.getTagLineAddr(stage2_load.addr), word_count} : addr_utils.getDataLineAddr(ls_load.addr);
assign tb_write = stage0_advance_r & ~stage1.uncacheable & ((~hit & stage1.rnw & ~stage1_is_sc) | (stage1.cbo & hit));
generate for (genvar i=0; i < CONFIG.DCACHE.WAYS; i++) begin : data_bank_gen
byte_en_bram #(CONFIG.DCACHE.LINES*CONFIG.DCACHE.LINE_W) data_bank (
.clk(clk),
.addr_a(data_read_addr),
.addr_b(addr_utils.getDataLineAddr(stage2_store.addr)),
.en_a(load_tag_check | (replacement_way_r[i] & l1_response.data_valid)),
.en_b(store_tag_hit_way[i]),
.be_a({4{(replacement_way_r[i] & l1_response.data_valid)}}),
.be_b(stage2_store.be),
.data_in_a(l1_response.data),
.data_in_b(stage2_store.data),
.data_out_a(ram_load_data[i]),
.data_out_b()
);
end endgenerate
assign new_entry = '{
valid : ~stage1.cbo,
tag : addr_utils.getTag(stage1.addr)
};
////////////////////////////////////////////////////
//Output
//One-hot tag hit / update logic to binary int
one_hot_to_integer #(CONFIG.DCACHE.WAYS)
hit_way_conv (
.one_hot (load_tag_hit_way),
.int_out (tag_hit_index)
);
one_hot_to_integer #(CONFIG.DCACHE.WAYS)
replacment_way_conv (
.one_hot (replacement_way),
.int_out (replacement_index)
);
always_ff @ (posedge clk) begin
if (load_l1_arb_ack) begin
replacement_way_r <= replacement_way;
replacement_index_r <= replacement_index;
sdp_ram_padded #(
.ADDR_WIDTH(SCONFIG.LINE_ADDR_W),
.NUM_COL(CONFIG.DCACHE.WAYS),
.COL_WIDTH($bits(tb_entry_t)),
.PIPELINE_DEPTH(0)
) tagbank (
.a_en(tb_write),
.a_wbe(replacement_way),
.a_wdata({CONFIG.DCACHE.WAYS{new_entry}}),
.a_addr(addr_utils.getTagLineAddr(stage1.addr)),
.b_en(ls.new_request),
.b_addr(addr_utils.getTagLineAddr(stage0.addr)),
.b_rdata(tb_entries),
.*);
//Hit detection
always_comb begin
hit_ohot = '0;
for (int i = 0; i < CONFIG.DCACHE.WAYS; i++)
hit_ohot[i] = tb_entries[i].valid & (tb_entries[i].tag == addr_utils.getTag(stage1.addr));
end
assign hit = |hit_ohot;
always_ff @(posedge clk) begin
if (stage0_advance_r) begin
hit_r <= hit;
hit_ohot_r <= hit_ohot;
end
end
always_ff @ (posedge clk) miss_data_valid <= l1_response.data_valid & is_target_word;
//Databank
logic[CONFIG.DCACHE.WAYS-1:0][31:0] db_entries;
logic[31:0] db_hit_entry;
logic db_wen;
logic[CONFIG.DCACHE.WAYS-1:0] db_way;
logic[CONFIG.DCACHE.WAYS-1:0][3:0] db_wbe_full;
logic[31:0] db_wdata;
logic collision;
logic [31:0] saved_data;
logic [3:0] saved_be;
assign collision = store_state[STORE_L1_REQUEST] & (stage2_store.addr[31:2] == ls_load.addr[31:2]);
always_ff @ (posedge clk) begin
if (load_request) begin
saved_data <= stage2_store.data;
saved_be <= {4{collision}} & stage2_store.be;
always_comb begin
for (int i = 0; i < CONFIG.DCACHE.WAYS; i++)
db_wbe_full[i] = {4{db_way[i]}} & stage1.be;
end
logic[DB_ADDR_LEN-1:0] db_addr;
assign db_addr = current_state == FILLING ? {addr_utils.getTagLineAddr(stage1.addr), word_counter} : addr_utils.getDataLineAddr(stage1.addr);
sdp_ram #(
.ADDR_WIDTH(DB_ADDR_LEN),
.NUM_COL(4*CONFIG.DCACHE.WAYS),
.COL_WIDTH(8),
.PIPELINE_DEPTH(0)
) databank (
.a_en(db_wen),
.a_wbe(db_wbe_full),
.a_wdata({CONFIG.DCACHE.WAYS{db_wdata}}),
.a_addr(db_addr),
.b_en(ls.new_request),
.b_addr(addr_utils.getDataLineAddr(stage0.addr)),
.b_rdata(db_entries),
.*);
always_comb begin
db_hit_entry = 'x;
for (int i = 0; i < CONFIG.DCACHE.WAYS; i++) begin
if (hit_ohot[i])
db_hit_entry = db_entries[i];
end
end
assign load_sel = load_state[LOAD_HIT_CHECK] ? tag_hit_index : replacement_index_r;
always_comb for (int i = 0; i < 4; i++)
ls.data_out[8*i+:8] = saved_be[i] ? saved_data[8*i+:8] : ram_load_data[load_sel][8*i+:8];
assign ls.data_valid = load_hit | miss_data_valid;
//Arbiter response
logic correct_word;
logic return_done;
logic[SCONFIG.SUB_LINE_ADDR_W-1:0] word_counter;
assign return_done = l1_response.data_valid & word_counter == SCONFIG.SUB_LINE_ADDR_W'(CONFIG.DCACHE.LINE_W-1);
assign correct_word = l1_response.data_valid & word_counter == stage1.addr[2+:SCONFIG.SUB_LINE_ADDR_W];
always_ff @(posedge clk) begin
if (l1_response.data_valid)
word_counter <= word_counter+1;
if (ls.new_request)
word_counter <= 0;
end
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
typedef enum {
IDLE,
FIRST_CYCLE,
REQUESTING_READ,
FILLING,
UNCACHEABLE_WAITING_READ,
AMO_WRITE
} stage1_t;
stage1_t current_state;
stage1_t next_state;
always_ff @(posedge clk) begin
if (rst)
current_state <= IDLE;
else
current_state <= next_state;
end
//Have to pull this into its own block to prevent a verilator circular dependency
always_comb begin
unique case (current_state)
IDLE : stage1_done = 0;
FIRST_CYCLE : stage1_done = ((~stage1.rnw | (stage1_is_sc & amo_unit.reservation_valid)) & l1_request.ack) | (stage1_is_sc & ~amo_unit.reservation_valid) | (stage1.rnw & hit & (~stage1.amo | stage1_is_lr) & ~stage1.uncacheable) | stage1.cbo;
REQUESTING_READ : stage1_done = 0;
FILLING : stage1_done = return_done & (stage1_is_lr | ~stage1.amo);
UNCACHEABLE_WAITING_READ : stage1_done = l1_response.data_valid & (stage1_is_lr | ~stage1.amo);
AMO_WRITE : stage1_done = l1_request.ack;
endcase
end
always_comb begin
unique case (current_state)
IDLE : begin
l1_request.request = 0;
l1_request.addr = 'x;
l1_request.data = 'x;
l1_request.rnw = 'x;
l1_request.size = 'x;
db_wen = 0;
db_wdata = 'x;
db_way = 'x;
ls.data_valid = 0;
ls.data_out = 'x;
next_state = ls.new_request ? FIRST_CYCLE : IDLE;
end
FIRST_CYCLE : begin //Handles writes, read hits, uncacheable reads, and SC
l1_request.request = ~stage1.cbo & (~stage1.rnw | (stage1.uncacheable & ~stage1_is_sc) | (stage1_is_sc & amo_unit.reservation_valid));
l1_request.addr = stage1.addr;
l1_request.data = stage1.data;
l1_request.rnw = stage1.rnw & ~stage1_is_sc;
l1_request.size = '0;
db_wen = ~stage1.cbo & hit & ~stage1.uncacheable & (~stage1.rnw | (stage1_is_sc & amo_unit.reservation_valid));
db_wdata = stage1.data;
db_way = hit_ohot;
ls.data_valid = (stage0_advance_r & stage1_is_sc) | (stage1.rnw & ~stage1.uncacheable & hit & ~stage1_is_sc);
ls.data_out = stage1_is_sc ? {31'b0, ~amo_unit.reservation_valid} : db_hit_entry;
if (stage1_done)
next_state = ls.new_request ? FIRST_CYCLE : IDLE;
else if (stage1.uncacheable & l1_request.ack)
next_state = UNCACHEABLE_WAITING_READ;
else if (stage1.rnw & ~stage1.uncacheable & ~hit & ~stage1_is_sc)
next_state = REQUESTING_READ;
else if (stage1.amo & hit & ~stage1.uncacheable & ~stage1_is_sc)
next_state = AMO_WRITE;
else
next_state = FIRST_CYCLE;
end
REQUESTING_READ : begin
l1_request.request = 1;
l1_request.addr = stage1.addr;
l1_request.data = 'x;
l1_request.rnw = 1;
l1_request.size = 5'(CONFIG.DCACHE.LINE_W-1);
db_wen = 0;
db_wdata = 'x;
db_way = 'x;
ls.data_valid = 0;
ls.data_out = 'x;
next_state = l1_request.ack ? FILLING : REQUESTING_READ;
end
FILLING : begin
l1_request.request = 0;
l1_request.addr = 'x;
l1_request.data = 'x;
l1_request.rnw = 'x;
l1_request.size = 'x;
db_wen = l1_response.data_valid;
db_wdata = l1_response.data;
db_way = replacement_way;
ls.data_valid = correct_word;
ls.data_out = l1_response.data;
if (return_done) begin
if (stage1.amo & ~stage1_is_lr)
next_state = AMO_WRITE;
else
next_state = ls.new_request ? FIRST_CYCLE : IDLE;
end
else
next_state = FILLING;
end
UNCACHEABLE_WAITING_READ : begin
l1_request.request = 0;
l1_request.addr = 'x;
l1_request.data = 'x;
l1_request.rnw = 'x;
l1_request.size = 'x;
db_wen = 0;
db_wdata = 'x;
db_way = 'x;
ls.data_valid = l1_response.data_valid;
ls.data_out = l1_response.data;
if (l1_response.data_valid) begin
if (stage1.amo & ~stage1_is_lr)
next_state = AMO_WRITE;
else
next_state = ls.new_request ? FIRST_CYCLE : IDLE;
end
else
next_state = UNCACHEABLE_WAITING_READ;
end
AMO_WRITE : begin
l1_request.request = 1;
l1_request.addr = stage1.addr;
l1_request.data = amo_unit.rd;
l1_request.rnw = 0;
l1_request.size = '0;
db_wen = ~stage1.uncacheable;
db_wdata = amo_unit.rd;
db_way = hit_r ? hit_ohot_r : replacement_way;
ls.data_valid = 0;
ls.data_out = 'x;
if (l1_request.ack)
next_state = ls.new_request ? FIRST_CYCLE : IDLE;
else
next_state = AMO_WRITE;
end
endcase
end
//AMO
logic stage1_is_lr;
logic stage1_is_sc;
assign stage1_is_lr = stage1.amo & stage1.amo_type == AMO_LR_FN5;
assign stage1_is_sc = stage1.amo & stage1.amo_type == AMO_SC_FN5;
assign amo_unit.reservation = stage1.addr;
assign amo_unit.rs2 = stage1.data;
assign amo_unit.rmw_valid = (current_state != IDLE) & stage1.amo;
assign amo_unit.op = stage1.amo_type;
assign amo_unit.set_reservation = stage1_is_lr & stage1_done;
assign amo_unit.clear_reservation = stage1_done;
always_ff @(posedge clk) begin
if (stage0_advance_r)
amo_unit.rs1 <= db_hit_entry;
else if (correct_word | (l1_response.data_valid & stage1.uncacheable))
amo_unit.rs1 <= l1_response.data;
end
assign l1_request.be = stage1.be;
assign l1_request.is_amo = 0;
assign l1_request.amo = '0;
////////////////////////////////////////////////////
//Assertions
dcache_request_when_not_ready_assertion:
assert property (@(posedge clk) disable iff (rst) load_request |-> load_ready)
assert property (@(posedge clk) disable iff (rst) ls.new_request |-> ls.ready)
else $error("dcache received request when not ready");
dache_suprious_l1_ack_assertion:
assert property (@(posedge clk) disable iff (rst) l1_request.ack |-> (load_state[LOAD_L1_REQUEST] | store_state[STORE_L1_REQUEST]))
assert property (@(posedge clk) disable iff (rst) l1_request.ack |-> l1_request.request)
else $error("dcache received ack without a request");
endmodule

View file

@ -1,114 +0,0 @@
/*
* Copyright © 2022 Eric Matthews
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module dcache_tag_banks
import cva5_config::*;
import cva5_types::*;
# (
parameter cpu_config_t CONFIG = EXAMPLE_CONFIG,
parameter derived_cache_config_t SCONFIG = '{LINE_ADDR_W : 9, SUB_LINE_ADDR_W : 2, TAG_W : 15}
)
(
input logic clk,
input logic rst,
//Port A
input logic[31:0] load_addr,
input logic load_req,
input logic[31:0] miss_addr,
input logic miss_req,
input logic[CONFIG.DCACHE.WAYS-1:0] miss_way,
input logic[31:0] inv_addr,
input logic extern_inv,
output logic extern_inv_complete,
//Port B
input logic[31:0] store_addr,
input logic[31:0] store_addr_r,
input logic store_req,
input logic cache_op_req,
output logic load_tag_hit,
output logic store_tag_hit,
output logic[CONFIG.DCACHE.WAYS-1:0] load_tag_hit_way,
output logic[CONFIG.DCACHE.WAYS-1:0] store_tag_hit_way
);
typedef struct packed {
logic valid;
logic [SCONFIG.TAG_W-1:0] tag;
} dtag_entry_t;
cache_functions_interface # (.TAG_W(SCONFIG.TAG_W), .LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils ();
dtag_entry_t tag_line_a [CONFIG.DCACHE.WAYS-1:0];
dtag_entry_t tag_line_b [CONFIG.DCACHE.WAYS-1:0];
dtag_entry_t new_tagline;
logic [SCONFIG.LINE_ADDR_W-1:0] porta_addr;
logic [SCONFIG.LINE_ADDR_W-1:0] portb_addr;
logic external_inv;
logic load_req_r;
logic store_req_r;
////////////////////////////////////////////////////
//Implementation
always_ff @ (posedge clk) load_req_r <= load_req;
always_ff @ (posedge clk) store_req_r <= store_req & ~cache_op_req;
assign external_inv = extern_inv & CONFIG.DCACHE.USE_EXTERNAL_INVALIDATIONS;
assign porta_addr = miss_req ? addr_utils.getTagLineAddr(miss_addr) : external_inv ? addr_utils.getTagLineAddr(inv_addr) : addr_utils.getTagLineAddr(store_addr);
assign portb_addr = addr_utils.getTagLineAddr(load_addr);
assign extern_inv_complete = external_inv & ~miss_req;
assign new_tagline = '{valid: miss_req, tag: addr_utils.getTag(miss_addr)};
////////////////////////////////////////////////////
//Memory instantiation and hit detection
generate for (genvar i = 0; i < CONFIG.DCACHE.WAYS; i++) begin : tag_bank_gen
dual_port_bram #(.WIDTH($bits(dtag_entry_t)), .LINES(CONFIG.DCACHE.LINES)) dtag_bank (
.clk (clk),
.en_a (store_req | (miss_req & miss_way[i]) | external_inv),
.wen_a ((miss_req & miss_way[i]) | external_inv | (store_req & cache_op_req)),
.addr_a (porta_addr),
.data_in_a (new_tagline),
.data_out_a (tag_line_a[i]),
.en_b (load_req),
.wen_b ('0),
.addr_b (portb_addr),
.data_in_b ('0),
.data_out_b(tag_line_b[i])
);
assign store_tag_hit_way[i] = ({store_req_r, 1'b1, addr_utils.getTag(store_addr_r)} == {1'b1, tag_line_a[i]});
assign load_tag_hit_way[i] = ({load_req_r, 1'b1, addr_utils.getTag(miss_addr)} == {1'b1, tag_line_b[i]});
end endgenerate
assign load_tag_hit = |load_tag_hit_way;
assign store_tag_hit = |store_tag_hit_way;
endmodule

View file

@ -49,20 +49,32 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
localparam DOUBLE_MIN_WIDTH = FLEN >= 32 ? 32 : FLEN;
typedef struct packed {
logic [31:0] addr;
logic [11:0] offset;
logic [2:0] fn3;
logic fp;
logic double;
logic amo;
amo_t amo_type;
logic [31:0] amo_wdata;
id_t id;
logic store_collision;
logic [LOG2_SQ_DEPTH-1:0] sq_index;
} lq_entry_t;
typedef struct packed {
logic discard;
logic [19:0] addr;
ls_subunit_t subunit;
} addr_entry_t;
logic [LOG2_SQ_DEPTH-1:0] sq_index;
logic [LOG2_SQ_DEPTH-1:0] sq_oldest;
addr_hash_t addr_hash;
logic potential_store_conflict;
logic lq_addr_discard;
logic sq_addr_discard;
logic load_pop;
logic load_addr_bit_3;
logic [2:0] load_fn3;
@ -72,7 +84,9 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
logic [31:0] store_data;
fifo_interface #(.DATA_TYPE(lq_entry_t)) lq();
fifo_interface #(.DATA_TYPE(addr_entry_t)) lq_addr();
store_queue_interface sq();
fifo_interface #(.DATA_TYPE(addr_entry_t)) sq_addr();
////////////////////////////////////////////////////
//Implementation
@ -85,7 +99,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
//Address hash for load-store collision checking
addr_hash #(.USE_BIT_3(~CONFIG.INCLUDE_UNIT.FPU))
lsq_addr_hash (
.addr (lsq.data_in.addr),
.addr (lsq.data_in.offset),
.addr_hash (addr_hash)
);
@ -97,31 +111,49 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
.rst(rst),
.fifo(lq)
);
cva5_fifo #(.DATA_TYPE(addr_entry_t), .FIFO_DEPTH(MAX_IDS))
load_queue_addr_fifo (
.clk(clk),
.rst(rst),
.fifo(lq_addr)
);
//FIFO control signals
assign lq.push = lsq.push & lsq.data_in.load;
assign lq.potential_push = lsq.potential_push;
assign lq.pop = load_pop;
assign lq.pop = load_pop | lq_addr_discard;
assign lq_addr.push = lsq.addr_push & lsq.addr_data_in.rnw;
assign lq_addr.potential_push = lq_addr.push;
assign lq_addr.data_in.addr = lsq.addr_data_in.addr;
assign lq_addr.data_in.subunit = lsq.addr_data_in.subunit;
assign lq_addr.data_in.discard = lsq.addr_data_in.discard;
assign lq_addr.pop = load_pop | lq_addr_discard;
assign lq_addr_discard = lq_addr.valid ? lq_addr.data_out.discard : lsq.addr_push & lsq.addr_data_in.rnw & lsq.addr_data_in.discard;
//FIFO data ports
assign lq.data_in = '{
addr : lsq.data_in.addr,
offset : lsq.data_in.offset,
fn3 : lsq.data_in.fn3,
fp : lsq.data_in.fp,
double : lsq.data_in.double,
amo : lsq.data_in.amo,
amo_type : lsq.data_in.amo_type,
amo_wdata : lsq.data_in.data,
id : lsq.data_in.id,
store_collision : potential_store_conflict,
store_collision : potential_store_conflict | (CONFIG.INCLUDE_AMO & lsq.data_in.amo), //Collision forces sequential consistence
sq_index : sq_index
};
////////////////////////////////////////////////////
//Store Queue
assign sq.push = lsq.push & (lsq.data_in.store | lsq.data_in.cache_op);
assign sq.pop = store_pop;
assign sq.pop = store_pop | sq_addr_discard;
assign sq.data_in = lsq.data_in;
store_queue # (.CONFIG(CONFIG)) sq_block (
.clk (clk),
.rst (rst | gc.sq_flush),
.rst (rst),
.sq (sq),
.store_forward_wb_group (store_forward_wb_group),
.fp_store_forward_wb_group (fp_store_forward_wb_group),
@ -133,6 +165,22 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
.fp_wb_packet (fp_wb_packet),
.store_retire (store_retire)
);
cva5_fifo #(.DATA_TYPE(addr_entry_t), .FIFO_DEPTH(CONFIG.SQ_DEPTH))
store_queue_addr_fifo (
.clk(clk),
.rst(rst),
.fifo(sq_addr)
);
assign sq_addr.push = lsq.addr_push & ~lsq.addr_data_in.rnw;
assign sq_addr.potential_push = sq_addr.push;
assign sq_addr.data_in.addr = lsq.addr_data_in.addr;
assign sq_addr.data_in.subunit = lsq.addr_data_in.subunit;
assign sq_addr.data_in.discard = lsq.addr_data_in.discard;
assign sq_addr.pop = store_pop | sq_addr_discard;
assign sq_addr_discard = sq.valid & (~lq.valid | load_blocked) & (sq_addr.valid ? sq_addr.data_out.discard : lsq.addr_push & ~lsq.addr_data_in.rnw & lsq.addr_data_in.discard);
////////////////////////////////////////////////////
//Output
@ -148,7 +196,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
assign load_fp_hold = ~load_p2 & lq.data_out.double;
assign load_pop = lsq.load_pop & ~load_fp_hold;
assign load_addr_bit_3 = load_fp_hold | lq.data_out.addr[2];
assign load_addr_bit_3 = load_fp_hold | lq.data_out.offset[2];
assign load_fn3 = lq.data_out.fp ? LS_W_fn3 : lq.data_out.fn3;
always_comb begin
@ -171,7 +219,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
end else begin : gen_no_load_split
//All loads are single cycle (load only the upper word)
assign load_pop = lsq.load_pop;
assign load_addr_bit_3 = lq.data_out.addr[2] | lq.data_out.double;
assign load_addr_bit_3 = lq.data_out.offset[2] | lq.data_out.double;
assign load_fn3 = lq.data_out.fp ? LS_W_fn3 : lq.data_out.fn3;
always_comb begin
if (lq.data_out.double)
@ -194,7 +242,7 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
assign store_fp_hold = ~store_p2 & sq.data_out.double;
assign store_pop = lsq.store_pop & ~store_fp_hold;
assign store_addr_bit_3 = sq.data_out.double ? store_p2 : sq.data_out.addr[2];
assign store_addr_bit_3 = sq.data_out.double ? store_p2 : sq.data_out.offset[2];
always_ff @(posedge clk) begin
if (rst)
@ -217,11 +265,11 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
end else begin : gen_no_fpu
//Plain integer memory operations
assign load_pop = lsq.load_pop;
assign load_addr_bit_3 = lq.data_out.addr[2];
assign load_addr_bit_3 = lq.data_out.offset[2];
assign load_fn3 = lq.data_out.fn3;
assign load_type = INT_DONE;
assign store_pop = lsq.store_pop;
assign store_addr_bit_3 = sq.data_out.addr[2];
assign store_addr_bit_3 = sq.data_out.offset[2];
assign store_data = sq.data_out.data;
end
endgenerate
@ -229,35 +277,41 @@ module load_store_queue //ID-based input buffer for Load/Store Unit
logic load_blocked;
assign load_blocked = (lq.data_out.store_collision & (lq.data_out.sq_index != sq_oldest));
assign lsq.load_valid = lq.valid & ~load_blocked;
assign lsq.store_valid = sq.valid;
//Requests are only valid if the TLB has returned the physical address and there was no exception
assign lsq.load_valid = lq.valid & ~load_blocked & (lq_addr.valid ? ~lq_addr.data_out.discard : lsq.addr_push & lsq.addr_data_in.rnw & ~lsq.addr_data_in.discard);
assign lsq.store_valid = sq.valid & (sq_addr.valid ? ~sq_addr.data_out.discard : lsq.addr_push & ~lsq.addr_data_in.rnw & ~lsq.addr_data_in.discard);
assign lsq.load_data_out = '{
addr : {lq.data_out.addr[31:3], load_addr_bit_3, lq.data_out.addr[1:0]},
addr : {(lq_addr.valid ? lq_addr.data_out.addr : lsq.addr_data_in.addr), lq.data_out.offset[11:3], load_addr_bit_3, lq.data_out.offset[1:0]},
load : 1,
store : 0,
cache_op : 0,
be : 'x,
amo : lq.data_out.amo,
amo_type : lq.data_out.amo_type,
be : '1,
fn3 : load_fn3,
data_in : 'x,
subunit : lq_addr.valid ? lq_addr.data_out.subunit : lsq.addr_data_in.subunit,
data_in : CONFIG.INCLUDE_AMO ? lq.data_out.amo_wdata : 'x,
id : lq.data_out.id,
fp_op : load_type
};
assign lsq.store_data_out = '{
addr : {sq.data_out.addr[31:3], store_addr_bit_3, sq.data_out.addr[1:0]},
addr : {(sq_addr.valid ? sq_addr.data_out.addr : lsq.addr_data_in.addr), sq.data_out.offset[11:3], store_addr_bit_3, sq.data_out.offset[1:0]},
load : 0,
store : 1,
cache_op : sq.data_out.cache_op,
amo : 0,
amo_type : amo_t'('x),
be : sq.data_out.be,
fn3 : 'x,
subunit : sq_addr.valid ? sq_addr.data_out.subunit : lsq.addr_data_in.subunit,
data_in : store_data,
id : 'x,
fp_op : fp_ls_op_t'('x)
};
assign lsq.sq_empty = sq.empty;
assign lsq.no_released_stores_pending = sq.no_released_stores_pending;
assign lsq.empty = ~lq.valid & sq.empty;
////////////////////////////////////////////////////

369
core/execution_units/load_store_unit/load_store_unit.sv Executable file → Normal file
View file

@ -26,6 +26,7 @@ module load_store_unit
import riscv_types::*;
import cva5_types::*;
import fpu_types::*;
import csr_types::*;
import opcodes::*;
# (
@ -62,7 +63,6 @@ module load_store_unit
input logic dcache_on,
input logic clear_reservation,
tlb_interface.requester tlb,
input logic tlb_on,
l1_arbiter_request_interface.master l1_request,
l1_arbiter_return_interface.master l1_response,
@ -75,11 +75,17 @@ module load_store_unit
local_memory_interface.master data_bram,
//CSR
input logic [1:0] current_privilege,
input envcfg_t menvcfg,
input envcfg_t senvcfg,
//Writeback-Store Interface
input wb_packet_t wb_packet [CONFIG.NUM_WB_GROUPS],
input fp_wb_packet_t fp_wb_packet [2],
//Retire release
//Retire
input id_t retire_id,
input retire_packet_t store_retire,
exception_interface.unit exception,
@ -96,9 +102,10 @@ module load_store_unit
localparam DCACHE_ID = int'(CONFIG.INCLUDE_DLOCAL_MEM) + int'(CONFIG.INCLUDE_PERIPHERAL_BUS);
//Should be equal to pipeline depth of longest load/store subunit
localparam ATTRIBUTES_DEPTH = 1;
localparam ATTRIBUTES_DEPTH = 2;
//Subunit signals
amo_interface amo_if[NUM_SUB_UNITS]();
addr_utils_interface #(CONFIG.DLOCAL_MEM_ADDR.L, CONFIG.DLOCAL_MEM_ADDR.H) dlocal_mem_addr_utils ();
addr_utils_interface #(CONFIG.PERIPHERAL_BUS_ADDR.L, CONFIG.PERIPHERAL_BUS_ADDR.H) dpbus_addr_utils ();
addr_utils_interface #(CONFIG.DCACHE_ADDR.L, CONFIG.DCACHE_ADDR.H) dcache_addr_utils ();
@ -111,11 +118,14 @@ module load_store_unit
data_access_shared_inputs_t shared_inputs;
logic [31:0] unit_data_array [NUM_SUB_UNITS-1:0];
logic [NUM_SUB_UNITS-1:0] unit_ready;
logic [NUM_SUB_UNITS-1:0] unit_write_outstanding;
logic write_outstanding;
logic [NUM_SUB_UNITS-1:0] unit_data_valid;
logic [NUM_SUB_UNITS-1:0] last_unit;
logic [NUM_SUB_UNITS_W-1:0] last_unit;
logic sub_unit_ready;
logic [NUM_SUB_UNITS_W-1:0] subunit_id;
ls_subunit_t padded_subunit_id;
logic unit_switch;
logic unit_switch_in_progress;
@ -126,6 +136,7 @@ module load_store_unit
logic sub_unit_load_issue;
logic sub_unit_store_issue;
logic load_response;
logic load_complete;
logic [31:0] virtual_address;
@ -134,10 +145,20 @@ module load_store_unit
logic [31:0] aligned_load_data;
logic [31:0] final_load_data;
logic tlb_request_r;
logic tlb_lq;
logic unaligned_addr;
logic load_exception_complete;
logic exception_is_fp;
logic exception_is_store;
logic nontrivial_fence;
logic fence_hold;
logic illegal_cbo;
logic exception_lsq_push;
logic nomatch_fault;
logic late_exception;
id_t exception_id;
typedef struct packed{
logic is_signed;
@ -166,14 +187,19 @@ module load_store_unit
assign unit_needed = instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW, FENCE} |
(CONFIG.INCLUDE_CBO & instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH}) |
(CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD});
(CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD}) |
(CONFIG.INCLUDE_AMO & instruction inside {AMO_ADD, AMO_XOR, AMO_OR, AMO_AND, AMO_MIN, AMO_MAX, AMO_MINU, AMO_MAXU, AMO_SWAP, AMO_LR, AMO_SC});
always_comb begin
uses_rs = '0;
uses_rs[RS1] = instruction inside {LB, LH, LW, LBU, LHU, SB, SH, SW} |
(CONFIG.INCLUDE_CBO & instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH}) |
(CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD});
uses_rs[RS2] = CONFIG.INCLUDE_FORWARDING_TO_STORES ? 0 : instruction inside {SB, SH, SW};
uses_rd = instruction inside {LB, LH, LW, LBU, LHU};
(CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD}) |
(CONFIG.INCLUDE_AMO & instruction inside {AMO_ADD, AMO_XOR, AMO_OR, AMO_AND, AMO_MIN, AMO_MAX, AMO_MINU, AMO_MAXU, AMO_SWAP, AMO_LR, AMO_SC});
if (CONFIG.INCLUDE_AMO)
uses_rs[RS2] = instruction inside {AMO_ADD, AMO_XOR, AMO_OR, AMO_AND, AMO_MIN, AMO_MAX, AMO_MINU, AMO_MAXU, AMO_SWAP, AMO_SC};
if (~CONFIG.INCLUDE_FORWARDING_TO_STORES)
uses_rs[RS2] |= instruction inside {SB, SH, SW};
uses_rd = instruction inside {LB, LH, LW, LBU, LHU} | (CONFIG.INCLUDE_AMO & instruction inside {AMO_ADD, AMO_XOR, AMO_OR, AMO_AND, AMO_MIN, AMO_MAX, AMO_MINU, AMO_MAXU, AMO_SWAP, AMO_LR, AMO_SC});
fp_uses_rs = '0;
fp_uses_rs[RS2] = ~CONFIG.INCLUDE_FORWARDING_TO_STORES & CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FSW, DP_FSD};
fp_uses_rd = CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, DP_FLD};
@ -186,8 +212,13 @@ module load_store_unit
logic is_store;
logic is_fence;
logic is_cbo;
cbo_t cbo_type;
logic is_fpu;
logic is_double;
logic nontrivial_fence;
logic is_amo;
amo_t amo_type;
logic rd_zero;
logic [11:0] offset;
} ls_attr_t;
ls_attr_t decode_attr;
@ -198,17 +229,55 @@ module load_store_unit
assign load_offset = instruction[31:20];
assign store_offset = {instruction[31:25], instruction[11:7]};
//Only a reduced subset of possible fences require stalling, because of the following guarantees:
//The load queue does not reorder loads
//The store queue does not reorder stores
//Earlier loads are always selected before later stores
//The data cache and local memory are sequentially consistent (no reordering)
//All peripheral busses are sequentially consistent across request types
always_comb begin
if (NUM_SUB_UNITS == 3)
nontrivial_fence = (
(instruction[27] & (instruction[22] | instruction[20])) | //Peripheral read before any write
(instruction[26] & (instruction[23] | |instruction[21:20])) | //Peripheral write before anything other than a peripheral write
(instruction[25] & instruction[22]) | //Regular read before peripheral write
(instruction[24]) //Regular write before anything
);
else if (NUM_SUB_UNITS == 2 & ~CONFIG.INCLUDE_PERIPHERAL_BUS)
nontrivial_fence = instruction[24] & |instruction[21:20]; //Regular write before any regular
else if (NUM_SUB_UNITS == 2)
nontrivial_fence = (
(instruction[27] & (instruction[22] | instruction[20])) | //Peripheral read before any write
(instruction[26] & (instruction[23] | |instruction[21:20])) | //Peripheral write before anything other than a peripheral write
(instruction[25] & instruction[22]) | //Memory read before peripheral write
(instruction[24] & |instruction[23:21]) //Memory write before anything other than a memory write
);
else if (NUM_SUB_UNITS == 1 & ~CONFIG.INCLUDE_PERIPHERAL_BUS)
nontrivial_fence = instruction[24] & instruction[21]; //Memory write before memory read
else if (NUM_SUB_UNITS == 1 & CONFIG.INCLUDE_PERIPHERAL_BUS)
nontrivial_fence = (
(instruction[27] & instruction[22]) | //Peripheral read before peripheral write
(instruction[26] & instruction[23]) //Peripheral write before peripheral read
);
else //0 subunits??
nontrivial_fence = 0;
end
assign decode_attr = '{
is_load : instruction inside {LB, LH, LW, LBU, LHU} | CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, DP_FLD},
is_load : ~instruction.upper_opcode[5] & ~instruction.upper_opcode[3],
is_store : instruction inside {SB, SH, SW} | CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FSW, DP_FSD},
is_fence : instruction inside {FENCE},
is_fence : ~instruction.fn3[1] & instruction.upper_opcode[3],
nontrivial_fence : nontrivial_fence,
is_cbo : CONFIG.INCLUDE_CBO & instruction inside {CBO_INVAL, CBO_CLEAN, CBO_FLUSH},
is_fpu : CONFIG.INCLUDE_UNIT.FPU & instruction inside {SP_FLW, SP_FSW, DP_FLD, DP_FSD},
is_double : CONFIG.INCLUDE_UNIT.FPU & instruction inside {DP_FLD, DP_FSD},
offset : instruction[5] ? store_offset : ((CONFIG.INCLUDE_CBO & instruction[2]) ? '0 : load_offset)
cbo_type : cbo_t'(instruction[21:20]),
is_fpu : CONFIG.INCLUDE_UNIT.FPU & instruction.upper_opcode[3:2] == 2'b01,
is_double : CONFIG.INCLUDE_UNIT.FPU & instruction.fn3[1:0] == 2'b11,
is_amo : CONFIG.INCLUDE_AMO & instruction.upper_opcode[3] & instruction.upper_opcode[5],
amo_type : amo_t'(instruction[31:27]),
rd_zero : ~|instruction.rd_addr,
offset : (CONFIG.INCLUDE_CBO | CONFIG.INCLUDE_AMO) & instruction[3] ? '0 : (instruction[5] ? store_offset : load_offset)
};
assign decode_is_store = decode_attr.is_store | decode_attr.is_cbo;
assign decode_is_store = decode_attr.is_store | decode_attr.is_cbo; //Must be exact
always_ff @(posedge clk) begin
if (issue_stage_ready)
@ -238,8 +307,36 @@ module load_store_unit
);
////////////////////////////////////////////////////
//Alignment Exception
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_ls_exceptions
//CSR Permissions
//Can impact fences, atomic instructions, and CBO
logic fiom;
logic fiom_amo_hold;
generate if (CONFIG.MODES inside {MU, MSU}) begin : gen_csr_env
//Fence on IO implies memory; force all fences to be nontrivial for simplicity
always_comb begin
if (CONFIG.MODES == MU)
fiom = current_privilege == USER_PRIVILEGE & menvcfg.fiom;
else
fiom = (current_privilege != MACHINE_PRIVILEGE & menvcfg.fiom) | (current_privilege == USER_PRIVILEGE & senvcfg.fiom);
end
//AMO instructions AQ-RL consider all memory regions; force write drain for simplicity
logic fiom_amo_hold_r;
logic set_fiom_amo_hold;
assign set_fiom_amo_hold = lsq.load_valid & shared_inputs.amo & fiom & write_outstanding;
assign fiom_amo_hold = set_fiom_amo_hold | fiom_amo_hold_r;
always_ff @(posedge clk) begin
if (rst | ~write_outstanding)
fiom_amo_hold_r <= 0;
else
fiom_amo_hold_r <= fiom_amo_hold_r | set_fiom_amo_hold;
end
end endgenerate
////////////////////////////////////////////////////
//Exceptions
generate if (CONFIG.MODES != BARE) begin : gen_ls_exceptions
logic new_exception;
always_comb begin
if (issue_stage.fn3 == LS_H_fn3 | issue_stage.fn3 == L_HU_fn3)
@ -254,53 +351,103 @@ module load_store_unit
unaligned_addr = 0;
end
assign new_exception = unaligned_addr & issue.new_request & ~issue_attr.is_fence;
logic menv_illegal;
logic senv_illegal;
assign menv_illegal = CONFIG.INCLUDE_CBO & (issue_attr.is_cbo & issue_attr.cbo_type == INVAL ? menvcfg.cbie == 2'b00 : ~menvcfg.cbcfe);
assign senv_illegal = CONFIG.INCLUDE_CBO & (issue_attr.is_cbo & issue_attr.cbo_type == INVAL ? senvcfg.cbie == 2'b00 : ~senvcfg.cbcfe);
assign illegal_cbo = CONFIG.MODES == MU ? current_privilege == USER_PRIVILEGE & menv_illegal : (current_privilege != MACHINE_PRIVILEGE & menv_illegal) | (current_privilege == USER_PRIVILEGE & senv_illegal);
assign nomatch_fault = tlb.done & ~|sub_unit_address_match;
assign late_exception = tlb.is_fault | nomatch_fault;
//Hold writeback exceptions until they are ready to retire
logic rd_zero_r;
logic delay_exception;
logic delayed_exception;
assign delay_exception = (
(issue.new_request & unaligned_addr & (issue_attr.is_load | issue_attr.is_amo) & issue.id != retire_id & ~issue_attr.rd_zero) |
(late_exception & tlb_lq & exception_id != retire_id & ~rd_zero_r)
);
always_ff @(posedge clk) begin
if (rst)
delayed_exception <= 0;
else if (delay_exception)
delayed_exception <= 1;
else if (new_exception)
delayed_exception <= 0;
end
assign new_exception = (
(issue.new_request & ((unaligned_addr & issue_attr.is_store) | illegal_cbo)) |
(issue.new_request & unaligned_addr & (issue_attr.is_load | issue_attr.is_amo) & (issue.id == retire_id | issue_attr.rd_zero)) |
(late_exception & ~tlb_lq) |
(late_exception & tlb_lq & (exception_id == retire_id | rd_zero_r)) |
(delayed_exception & exception_id == retire_id)
);
always_ff @(posedge clk) begin
if (rst)
exception.valid <= 0;
else
exception.valid <= (exception.valid & ~exception.ack) | new_exception;
exception.valid <= new_exception;
end
logic is_load;
logic is_load_r;
assign is_load = issue_attr.is_load & ~(issue_attr.is_amo & issue_attr.amo_type != AMO_LR_FN5);
always_ff @(posedge clk) begin
if (rst)
exception_is_fp <= 0;
else if (new_exception)
exception_lsq_push <= issue.new_request & ((unaligned_addr & ~issue_attr.is_fence & ~issue_attr.is_cbo) | illegal_cbo);
if (issue.new_request) begin
rd_zero_r <= issue_attr.rd_zero;
exception_is_fp <= CONFIG.INCLUDE_UNIT.FPU & issue_attr.is_fpu;
end
always_ff @(posedge clk) begin
if (new_exception & ~exception.valid) begin
exception.code <= issue_attr.is_store ? STORE_AMO_ADDR_MISSALIGNED : LOAD_ADDR_MISSALIGNED;
exception.tval <= virtual_address;
exception.id <= issue.id;
is_load_r <= is_load;
if (illegal_cbo) begin
exception.code <= ILLEGAL_INST;
exception.tval <= issue_stage.instruction;
end else begin
exception.code <= is_load ? LOAD_ADDR_MISSALIGNED : STORE_AMO_ADDR_MISSALIGNED;
exception.tval <= virtual_address;
end
exception_id <= issue.id;
end
else if (tlb.is_fault)
exception.code <= is_load_r ? LOAD_PAGE_FAULT : STORE_OR_AMO_PAGE_FAULT;
else if (nomatch_fault)
exception.code <= is_load_r ? LOAD_FAULT : STORE_AMO_FAULT;
end
assign exception.possible = (tlb_request_r & (~tlb.done | ~|sub_unit_address_match)) | exception.valid | delayed_exception; //Must suppress issue for issue-time exceptions too
assign exception.pc = issue_stage.pc_r;
assign exception.discard = tlb_lq & ~rd_zero_r;
always_ff @(posedge clk) begin
if (rst)
load_exception_complete <= 0;
else
load_exception_complete <= exception.valid & exception.ack & (exception.code == LOAD_ADDR_MISSALIGNED);
end
assign exception_is_store = ~tlb_lq;
end endgenerate
////////////////////////////////////////////////////
//Load-Store status
assign load_store_status = '{
sq_empty : lsq.sq_empty,
no_released_stores_pending : lsq.no_released_stores_pending,
idle : lsq.empty & (~load_attributes.valid) & (&unit_ready)
outstanding_store : ~lsq.sq_empty | write_outstanding,
idle : lsq.empty & (~load_attributes.valid) & (&unit_ready) & (~write_outstanding)
};
////////////////////////////////////////////////////
//TLB interface
//Address calculation
assign virtual_address = rf[RS1] + 32'(signed'(issue_attr.offset));
////////////////////////////////////////////////////
//TLB interface
always_ff @(posedge clk) begin
if (rst)
tlb_request_r <= 0;
else if (tlb.new_request)
tlb_request_r <= 1;
else if (tlb.done | tlb.is_fault)
tlb_request_r <= 0;
end
assign tlb.rnw = issue_attr.is_load | (issue_attr.is_amo & issue_attr.amo_type == AMO_LR_FN5) | issue_attr.is_cbo;
assign tlb.virtual_address = virtual_address;
assign tlb.new_request = tlb_on & issue.new_request;
assign tlb.execute = 0;
assign tlb.rnw = issue_attr.is_load & ~issue_attr.is_store;
assign tlb.new_request = issue.new_request & ~issue_attr.is_fence & (~unaligned_addr | issue_attr.is_cbo) & ~illegal_cbo;
////////////////////////////////////////////////////
//Byte enable generation
@ -318,18 +465,22 @@ module load_store_unit
end
default : be = '1;
endcase
if (issue_attr.is_cbo) //Treat CBOM as writes that don't do anything
be = '0;
end
////////////////////////////////////////////////////
//Load Store Queue
assign lsq.data_in = '{
addr : tlb_on ? tlb.physical_address : virtual_address,
offset : virtual_address[11:0],
fn3 : issue_stage.fn3,
be : be,
data : rf[RS2],
load : issue_attr.is_load,
load : issue_attr.is_load | issue_attr.is_amo,
store : issue_attr.is_store,
cache_op : issue_attr.is_cbo,
amo : issue_attr.is_amo,
amo_type : issue_attr.amo_type,
id : issue.id,
id_needed : rd_attributes.id,
fp : issue_attr.is_fpu,
@ -338,7 +489,7 @@ module load_store_unit
};
assign lsq.potential_push = issue.possible_issue;
assign lsq.push = issue.new_request & ~unaligned_addr & (~tlb_on | tlb.done) & ~issue_attr.is_fence;
assign lsq.push = issue.new_request & ~issue_attr.is_fence;
load_store_queue # (.CONFIG(CONFIG)) lsq_block (
.clk (clk),
@ -355,48 +506,67 @@ module load_store_unit
assign lsq.load_pop = sub_unit_load_issue;
assign lsq.store_pop = sub_unit_store_issue;
//Physical address passed separately
assign lsq.addr_push = tlb.done | tlb.is_fault | exception_lsq_push;
assign lsq.addr_data_in = '{
addr : tlb.physical_address[31:12],
rnw : tlb_lq,
discard : late_exception | exception_lsq_push,
subunit : padded_subunit_id
};
always_ff @(posedge clk) begin
if (issue.new_request)
tlb_lq <= ~issue_attr.is_store & ~issue_attr.is_cbo;
end
////////////////////////////////////////////////////
//Unit tracking
always_ff @ (posedge clk) begin
if (load_attributes.push)
last_unit <= sub_unit_address_match;
last_unit <= subunit_id;
end
//When switching units, ensure no outstanding loads so that there can be no timing collisions with results
assign unit_switch = lsq.load_valid & (sub_unit_address_match != last_unit) & load_attributes.valid;
assign unit_switch = lsq.load_valid & (subunit_id != last_unit) & load_attributes.valid;
always_ff @ (posedge clk) begin
unit_switch_in_progress <= (unit_switch_in_progress | unit_switch) & ~load_attributes.valid;
end
assign unit_switch_hold = unit_switch | unit_switch_in_progress;
assign unit_switch_hold = unit_switch | unit_switch_in_progress | fiom_amo_hold;
////////////////////////////////////////////////////
//Primary Control Signals
assign sel_load = lsq.load_valid;
assign sub_unit_ready = unit_ready[subunit_id] & (~unit_switch_hold);
assign load_complete = |unit_data_valid;
assign load_response = |unit_data_valid;
assign load_complete = load_response & (~exception.valid | exception_is_store);
assign issue.ready = (~tlb_on | tlb.ready) & (~lsq.full) & (~fence_hold) & (~exception.valid);
//TLB status and exceptions can be ignored because they will prevent instructions from issuing
assign issue.ready = ~lsq.full & ~fence_hold;
assign sub_unit_load_issue = sel_load & lsq.load_valid & sub_unit_ready & sub_unit_address_match[subunit_id];
assign sub_unit_store_issue = (lsq.store_valid & ~sel_load) & sub_unit_ready & sub_unit_address_match[subunit_id];
assign sub_unit_load_issue = sel_load & lsq.load_valid & sub_unit_ready;
assign sub_unit_store_issue = (lsq.store_valid & ~sel_load) & sub_unit_ready;
assign sub_unit_issue = sub_unit_load_issue | sub_unit_store_issue;
assign write_outstanding = |unit_write_outstanding;
always_ff @ (posedge clk) begin
if (rst)
fence_hold <= 0;
else
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & issue_attr.is_fence);
fence_hold <= (fence_hold & ~load_store_status.idle) | (issue.new_request & issue_attr.is_fence & (issue_attr.nontrivial_fence | fiom));
end
////////////////////////////////////////////////////
//Load attributes FIFO
logic [1:0] final_mux_sel;
assign subunit_id = shared_inputs.subunit[NUM_SUB_UNITS_W-1:0];
one_hot_to_integer #(NUM_SUB_UNITS)
sub_unit_select (
.one_hot (sub_unit_address_match),
.int_out (subunit_id)
.int_out (padded_subunit_id[NUM_SUB_UNITS_W-1:0])
);
always_comb begin
@ -431,7 +601,7 @@ module load_store_unit
////////////////////////////////////////////////////
//Unit Instantiation
generate for (genvar i=0; i < NUM_SUB_UNITS; i++) begin : gen_load_store_sources
assign sub_unit[i].new_request = sub_unit_issue & sub_unit_address_match[i];
assign sub_unit[i].new_request = sub_unit_issue & subunit_id == i;
assign sub_unit[i].addr = shared_inputs.addr;
assign sub_unit[i].re = shared_inputs.load;
assign sub_unit[i].we = shared_inputs.store;
@ -445,10 +615,14 @@ module load_store_unit
endgenerate
generate if (CONFIG.INCLUDE_DLOCAL_MEM) begin : gen_ls_local_mem
assign sub_unit_address_match[LOCAL_MEM_ID] = dlocal_mem_addr_utils.address_range_check(shared_inputs.addr);
assign sub_unit_address_match[LOCAL_MEM_ID] = dlocal_mem_addr_utils.address_range_check(tlb.physical_address);
local_mem_sub_unit d_local_mem (
.clk (clk),
.rst (rst),
.write_outstanding (unit_write_outstanding[LOCAL_MEM_ID]),
.amo (shared_inputs.amo),
.amo_type (shared_inputs.amo_type),
.amo_unit (amo_if[LOCAL_MEM_ID]),
.unit (sub_unit[LOCAL_MEM_ID]),
.local_mem (data_bram)
);
@ -456,27 +630,38 @@ module load_store_unit
endgenerate
generate if (CONFIG.INCLUDE_PERIPHERAL_BUS) begin : gen_ls_pbus
assign sub_unit_address_match[BUS_ID] = dpbus_addr_utils.address_range_check(shared_inputs.addr);
assign sub_unit_address_match[BUS_ID] = dpbus_addr_utils.address_range_check(tlb.physical_address);
if(CONFIG.PERIPHERAL_BUS_TYPE == AXI_BUS)
axi_master axi_bus (
.clk (clk),
.rst (rst),
.write_outstanding (unit_write_outstanding[BUS_ID]),
.m_axi (m_axi),
.size ({1'b0,shared_inputs.fn3[1:0]}),
.amo (shared_inputs.amo),
.amo_type (shared_inputs.amo_type),
.amo_unit (amo_if[BUS_ID]),
.ls (sub_unit[BUS_ID])
); //Lower two bits of fn3 match AXI specification for request size (byte/halfword/word)
else if (CONFIG.PERIPHERAL_BUS_TYPE == WISHBONE_BUS)
wishbone_master wishbone_bus (
wishbone_master #(.LR_WAIT(CONFIG.AMO_UNIT.LR_WAIT), .INCLUDE_AMO(CONFIG.INCLUDE_AMO)) wishbone_bus (
.clk (clk),
.rst (rst),
.write_outstanding (unit_write_outstanding[BUS_ID]),
.wishbone (dwishbone),
.amo (shared_inputs.amo),
.amo_type (shared_inputs.amo_type),
.amo_unit (amo_if[BUS_ID]),
.ls (sub_unit[BUS_ID])
);
else if (CONFIG.PERIPHERAL_BUS_TYPE == AVALON_BUS) begin
avalon_master avalon_bus (
avalon_master #(.LR_WAIT(CONFIG.AMO_UNIT.LR_WAIT), .INCLUDE_AMO(CONFIG.INCLUDE_AMO)) avalon_bus (
.clk (clk),
.rst (rst),
.m_avalon (m_avalon),
.write_outstanding (unit_write_outstanding[BUS_ID]),
.m_avalon (m_avalon),
.amo (shared_inputs.amo),
.amo_type (shared_inputs.amo_type),
.amo_unit (amo_if[BUS_ID]),
.ls (sub_unit[BUS_ID])
);
end
@ -484,46 +669,39 @@ module load_store_unit
endgenerate
generate if (CONFIG.INCLUDE_DCACHE) begin : gen_ls_dcache
logic load_ready;
logic store_ready;
logic uncacheable_load;
logic uncacheable_store;
logic dcache_load_request;
logic dcache_store_request;
assign sub_unit_address_match[DCACHE_ID] = dcache_addr_utils.address_range_check(shared_inputs.addr);
assign sub_unit_address_match[DCACHE_ID] = dcache_addr_utils.address_range_check(tlb.physical_address);
assign uncacheable_load = CONFIG.DCACHE.USE_NON_CACHEABLE & uncacheable_utils.address_range_check(shared_inputs.addr);
assign uncacheable_store = CONFIG.DCACHE.USE_NON_CACHEABLE & uncacheable_utils.address_range_check(shared_inputs.addr);
assign dcache_load_request = sub_unit_load_issue & sub_unit_address_match[DCACHE_ID];
assign dcache_store_request = sub_unit_store_issue & sub_unit_address_match[DCACHE_ID];
dcache # (.CONFIG(CONFIG))
data_cache (
.clk (clk),
.rst (rst),
.dcache_on (dcache_on),
.l1_request (l1_request),
.l1_response (l1_response),
.sc_complete (sc_complete),
.sc_success (sc_success),
.clear_reservation (clear_reservation),
.amo (),
.uncacheable_load (uncacheable_load),
.uncacheable_store (uncacheable_store),
.is_load (sel_load),
.load_ready (load_ready),
.store_ready (store_ready),
.load_request (dcache_load_request),
.store_request (dcache_store_request),
.ls_load (lsq.load_data_out),
.ls_store (lsq.store_data_out),
.ls (sub_unit[DCACHE_ID])
);
dcache #(.CONFIG(CONFIG)) data_cache (
.l1_request(l1_request),
.l1_response(l1_response),
.write_outstanding(unit_write_outstanding[DCACHE_ID]),
.amo(shared_inputs.amo),
.amo_type(shared_inputs.amo_type),
.amo_unit(amo_if[DCACHE_ID]),
.uncacheable(uncacheable_load | uncacheable_store),
.cbo(shared_inputs.cache_op),
.ls(sub_unit[DCACHE_ID]),
.load_peek(lsq.load_valid),
.load_addr_peek(lsq.load_data_out.addr),
.*);
end
endgenerate
generate if (CONFIG.INCLUDE_AMO) begin : gen_amo
amo_unit #(
.NUM_UNITS(NUM_SUB_UNITS),
.RESERVATION_WORDS(CONFIG.AMO_UNIT.RESERVATION_WORDS)
) amo_inst (
.agents(amo_if),
.*);
end endgenerate
////////////////////////////////////////////////////
//Output Muxing
logic sign_bit_data [4];
@ -581,13 +759,12 @@ module load_store_unit
////////////////////////////////////////////////////
//Output bank
assign wb.rd = final_load_data;
assign wb.done = (load_complete & (~CONFIG.INCLUDE_UNIT.FPU | wb_attr.fp_op == INT_DONE)) | (load_exception_complete & ~exception_is_fp);
//TODO: exceptions seemingly clobber load data if it appears on the same cycle
assign wb.id = load_exception_complete ? exception.id : wb_attr.id;
assign wb.done = (load_complete & (~CONFIG.INCLUDE_UNIT.FPU | wb_attr.fp_op == INT_DONE)) | (exception.valid & ~exception_is_fp & ~exception_is_store);
assign wb.id = exception.valid & ~exception_is_store ? exception_id : wb_attr.id;
assign fp_wb.rd = fp_result;
assign fp_wb.done = (load_complete & (wb_attr.fp_op == SINGLE_DONE | wb_attr.fp_op == DOUBLE_DONE)) | (load_exception_complete & exception_is_fp);
assign fp_wb.id = load_exception_complete ? exception.id : wb_attr.id;
assign fp_wb.done = (load_complete & (wb_attr.fp_op == SINGLE_DONE | wb_attr.fp_op == DOUBLE_DONE)) | (exception.valid & exception_is_fp & ~exception_is_store);
assign fp_wb.id = exception.valid & ~exception_is_store ? exception_id : wb_attr.id;
////////////////////////////////////////////////////
//End of Implementation

View file

@ -39,6 +39,7 @@ module store_queue
//Address hash (shared by loads and stores)
input addr_hash_t addr_hash,
//hash check on adding a load to the queue
output logic [$clog2(CONFIG.SQ_DEPTH)-1:0] sq_index,
output logic [$clog2(CONFIG.SQ_DEPTH)-1:0] sq_oldest,
@ -73,6 +74,8 @@ module store_queue
logic [CONFIG.SQ_DEPTH-1:0] valid;
logic [CONFIG.SQ_DEPTH-1:0] valid_next;
addr_hash_t [CONFIG.SQ_DEPTH-1:0] hashes;
logic [CONFIG.SQ_DEPTH-1:0] ids_valid;
id_t [CONFIG.SQ_DEPTH-1:0] ids;
//LUTRAM-based memory blocks
sq_entry_t output_entry;
@ -131,7 +134,7 @@ module store_queue
.raddr(sq_oldest_next),
.ram_write(sq.push),
.new_ram_data('{
addr : sq.data_in.addr,
offset : sq.data_in.offset,
be : sq.data_in.be,
cache_op : sq.data_in.cache_op,
data : '0,
@ -151,22 +154,28 @@ module store_queue
.waddr(sq.data_in.id),
.raddr(store_retire.id),
.ram_write(sq.push),
.new_ram_data(sq.data_in.addr[1:0]),
.new_ram_data(sq.data_in.offset[1:0]),
.ram_data_out(retire_alignment)
);
//Compare store addr-hashes against new load addr-hash
//ID collisions also handled to prevent overwriting store data
always_comb begin
potential_store_conflict = 0;
for (int i = 0; i < CONFIG.SQ_DEPTH; i++)
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
potential_store_conflict |= {(valid[i] & ~issued_one_hot[i]), addr_hash} == {1'b1, hashes[i]};
potential_store_conflict |= {(valid[i] & ~issued_one_hot[i] & ids_valid[i]), sq.data_in.id} == {1'b1, ids[i]};
end
end
////////////////////////////////////////////////////
//Register-based storage
//Address hashes
always_ff @ (posedge clk) begin
for (int i = 0; i < CONFIG.SQ_DEPTH; i++) begin
if (new_request_one_hot[i])
if (new_request_one_hot[i]) begin
hashes[i] <= addr_hash;
ids[i] <= sq.data_in.id_needed;
ids_valid[i] <= CONFIG.INCLUDE_UNIT.FPU & sq.data_in.fp ? |fp_store_forward_wb_group : |store_forward_wb_group;
end
end
end
////////////////////////////////////////////////////
@ -178,8 +187,6 @@ module store_queue
released_count <= released_count + (LOG2_SQ_DEPTH + 1)'(store_retire.valid) - (LOG2_SQ_DEPTH + 1)'(sq.pop);
end
assign sq.no_released_stores_pending = ~|released_count;
////////////////////////////////////////////////////
//Forwarding and Store Data
//Forwarding is only needed from multi-cycle writeback ports
@ -308,7 +315,7 @@ module store_queue
assign sq.valid = |released_count;
assign sq.data_out = '{
addr : output_entry_r.addr,
offset : output_entry_r.offset,
be : output_entry_r.be,
cache_op : output_entry_r.cache_op,
data : sq_data_out[31:0],

0
core/execution_units/mul_unit.sv Executable file → Normal file
View file

105
core/fetch_stage/branch_predictor.sv Executable file → Normal file
View file

@ -45,7 +45,7 @@ module branch_predictor
localparam longint BUS_RANGE = 64'(CONFIG.IBUS_ADDR.H) - 64'(CONFIG.IBUS_ADDR.L) + 1;
function int get_memory_width();
if(CONFIG.INCLUDE_S_MODE)
if(CONFIG.MODES == MSU)
return 32;
else if (CONFIG.INCLUDE_ICACHE && (
(CONFIG.INCLUDE_ILOCAL_MEM && CACHE_RANGE > SCRATCH_RANGE) ||
@ -66,6 +66,7 @@ module branch_predictor
localparam BTAG_W = get_memory_width() - BRANCH_ADDR_W - 2;
cache_functions_interface #(.TAG_W(BTAG_W), .LINE_W(BRANCH_ADDR_W), .SUB_LINE_W(0)) addr_utils();
typedef logic[1:0] branch_predictor_metadata_t;
typedef struct packed {
logic valid;
logic [BTAG_W-1:0] tag;
@ -76,6 +77,7 @@ module branch_predictor
} branch_table_entry_t;
branch_table_entry_t [CONFIG.BP.WAYS-1:0] if_entry;
branch_table_entry_t muxed_entry;
branch_table_entry_t ex_entry;
typedef struct packed{
@ -88,12 +90,12 @@ module branch_predictor
logic branch_predictor_direction_changed;
logic [31:0] new_jump_addr;
logic [CONFIG.BP.WAYS-1:0][31:0] predicted_pc;
logic [31:0] muxed_predicted_pc;
logic [CONFIG.BP.WAYS-1:0] tag_matches;
logic [CONFIG.BP.WAYS-1:0] replacement_way;
logic [CONFIG.BP.WAYS-1:0] tag_update_way;
logic [CONFIG.BP.WAYS-1:0] target_update_way;
logic [$clog2(CONFIG.BP.WAYS > 1 ? CONFIG.BP.WAYS : 2)-1:0] hit_way;
logic tag_match;
logic use_predicted_pc;
@ -102,70 +104,67 @@ module branch_predictor
/////////////////////////////////////////
genvar i;
generate if (CONFIG.INCLUDE_BRANCH_PREDICTOR)
for (i=0; i<CONFIG.BP.WAYS; i++) begin : gen_branch_tag_banks
dual_port_bram #(.WIDTH($bits(branch_table_entry_t)), .LINES(CONFIG.BP.ENTRIES))
tag_bank (
.clk (clk),
.en_a (tag_update_way[i]),
.wen_a (tag_update_way[i]),
.addr_a (addr_utils.getHashedLineAddr(br_results.pc, i)),
.data_in_a (ex_entry),
.data_out_a (),
.en_b (bp.new_mem_request),
.wen_b (0),
.addr_b (addr_utils.getHashedLineAddr(bp.next_pc, i)),
.data_in_b ('0),
.data_out_b (if_entry[i]));
end
endgenerate
generate if (CONFIG.INCLUDE_BRANCH_PREDICTOR) begin : gen_bp
for (i=0; i<CONFIG.BP.WAYS; i++) begin : gen_bp_rams
sdp_ram #(
.ADDR_WIDTH(BRANCH_ADDR_W),
.NUM_COL(1),
.COL_WIDTH($bits(branch_table_entry_t)),
.PIPELINE_DEPTH(0)
) tag_bank (
.a_en(tag_update_way[i]),
.a_wbe(tag_update_way[i]),
.a_wdata(ex_entry),
.a_addr(addr_utils.getHashedLineAddr(br_results.pc, i)),
.b_en(bp.new_mem_request),
.b_addr(addr_utils.getHashedLineAddr(bp.next_pc, i)),
.b_rdata(if_entry[i]),
.*);
generate if (CONFIG.INCLUDE_BRANCH_PREDICTOR)
for (i=0; i<CONFIG.BP.WAYS; i++) begin : gen_branch_table_banks
dual_port_bram #(.WIDTH(32), .LINES(CONFIG.BP.ENTRIES))
addr_table (
.clk (clk),
.en_a (target_update_way[i]),
.wen_a (target_update_way[i]),
.addr_a (addr_utils.getHashedLineAddr(br_results.pc, i)),
.data_in_a (br_results.target_pc),
.data_out_a (),
.en_b (bp.new_mem_request),
.wen_b (0),
.addr_b (addr_utils.getHashedLineAddr(bp.next_pc, i)),
.data_in_b ('0),
.data_out_b (predicted_pc[i])
);
end
endgenerate
sdp_ram #(
.ADDR_WIDTH(BRANCH_ADDR_W),
.NUM_COL(1),
.COL_WIDTH(32),
.PIPELINE_DEPTH(0)
) addr_table (
.a_en(target_update_way[i]),
.a_wbe(target_update_way[i]),
.a_wdata(br_results.target_pc),
.a_addr(addr_utils.getHashedLineAddr(br_results.pc, i)),
.b_en(bp.new_mem_request),
.b_addr(addr_utils.getHashedLineAddr(bp.next_pc, i)),
.b_rdata(predicted_pc[i]),
.*);
generate if (CONFIG.INCLUDE_BRANCH_PREDICTOR)
for (i=0; i<CONFIG.BP.WAYS; i++) begin : gen_branch_hit_detection
assign tag_matches[i] = ({if_entry[i].valid, if_entry[i].tag} == {1'b1, addr_utils.getTag(bp.if_pc)});
end
one_hot_mux #(.OPTIONS(CONFIG.BP.WAYS), .DATA_TYPE(branch_table_entry_t)) hit_mux (
.one_hot(tag_matches),
.choices(if_entry),
.sel(muxed_entry),
.*);
one_hot_mux #(.OPTIONS(CONFIG.BP.WAYS), .DATA_TYPE(logic[31:0])) pc_mux (
.one_hot(tag_matches),
.choices(predicted_pc),
.sel(muxed_predicted_pc),
.*);
end
endgenerate
////////////////////////////////////////////////////
//Instruction Fetch Response
generate if (CONFIG.BP.WAYS > 1)
one_hot_to_integer #(CONFIG.BP.WAYS)
hit_way_conv (
.one_hot(tag_matches),
.int_out(hit_way)
);
else
assign hit_way = 0;
endgenerate
assign tag_match = |tag_matches;
assign use_predicted_pc = CONFIG.INCLUDE_BRANCH_PREDICTOR & tag_match;
//Predicted PC and whether the prediction is valid
assign bp.predicted_pc = predicted_pc[hit_way];
assign bp.predicted_pc = muxed_predicted_pc;
assign bp.use_prediction = use_predicted_pc;
assign bp.is_branch = if_entry[hit_way].is_branch;
assign bp.is_return = if_entry[hit_way].is_return;
assign bp.is_call = if_entry[hit_way].is_call;
assign bp.is_branch = muxed_entry.is_branch;
assign bp.is_return = muxed_entry.is_return;
assign bp.is_call = muxed_entry.is_call;
////////////////////////////////////////////////////
//Instruction Fetch metadata
@ -184,7 +183,7 @@ module branch_predictor
.raddr(br_results.id),
.ram_write(bp.pc_id_assigned),
.new_ram_data('{
branch_predictor_metadata : if_entry[hit_way].metadata,
branch_predictor_metadata : muxed_entry.metadata,
branch_prediction_used : use_predicted_pc,
branch_predictor_update_way : tag_match ? tag_matches : replacement_way
}),

View file

@ -36,7 +36,6 @@ module fetch
input logic branch_flush,
input gc_outputs_t gc,
input logic exception,
//ID Support
input id_t pc_id,
@ -77,6 +76,7 @@ module fetch
addr_utils_interface #(CONFIG.IBUS_ADDR.L, CONFIG.IBUS_ADDR.H) ibus_addr_utils ();
memory_sub_unit_interface sub_unit[NUM_SUB_UNITS-1:0]();
amo_interface unused();
logic [NUM_SUB_UNITS-1:0] sub_unit_address_match;
logic [NUM_SUB_UNITS-1:0] unit_ready;
@ -89,6 +89,7 @@ module fetch
typedef struct packed{
logic is_predicted_branch_or_jump;
logic is_branch;
logic [31:0] early_flush_pc;
logic address_valid;
logic mmu_fault;
logic [NUM_SUB_UNITS_W-1:0] subunit_id;
@ -102,8 +103,9 @@ module fetch
logic [31:0] pc_plus_4;
logic [31:0] pc_mux [4];
logic [1:0] pc_sel;
logic [31:0] early_flush_pc;
logic [31:0] pc_mux [5];
logic [2:0] pc_sel;
logic [31:0] next_pc;
logic [31:0] pc;
@ -130,15 +132,16 @@ module fetch
assign pc_plus_4 = pc + 4;
priority_encoder #(.WIDTH(4))
priority_encoder #(.WIDTH(5))
pc_sel_encoder (
.priority_vector ({1'b1, (bp.use_prediction & ~early_branch_flush), branch_flush, gc.pc_override}),
.priority_vector ({1'b1, bp.use_prediction, early_branch_flush, branch_flush, gc.pc_override}),
.encoded_result (pc_sel)
);
assign pc_mux[0] = gc.pc;
assign pc_mux[1] = bp.branch_flush_pc;
assign pc_mux[2] = bp.is_return ? ras.addr : bp.predicted_pc;
assign pc_mux[3] = pc_plus_4;
assign pc_mux[2] = early_flush_pc;
assign pc_mux[3] = bp.is_return ? ras.addr : bp.predicted_pc;
assign pc_mux[4] = pc_plus_4;
assign next_pc = pc_mux[pc_sel];
//If an exception occurs here in the fetch logic,
@ -170,15 +173,14 @@ module fetch
////////////////////////////////////////////////////
//TLB
assign tlb.virtual_address = pc;
assign tlb.execute = 1;
assign tlb.rnw = 0;
assign tlb.new_request = tlb.ready;
assign tlb.rnw = 1;
assign tlb.new_request = tlb.ready & pc_id_available & ~fetch_attr_fifo.full & (~exception_pending) & (~gc.fetch_hold);
//////////////////////////////////////////////
//Issue Control Signals
assign flush_or_rst = (rst | gc.fetch_flush | early_branch_flush);
assign new_mem_request = tlb.done & pc_id_available & ~fetch_attr_fifo.full & units_ready & (~gc.fetch_hold) & (~exception_pending);
assign new_mem_request = tlb.done & units_ready & (~gc.fetch_hold);
assign pc_id_assigned = new_mem_request | tlb.is_fault;
//////////////////////////////////////////////
@ -192,6 +194,7 @@ module fetch
assign fetch_attr_fifo.data_in = '{
is_predicted_branch_or_jump : bp.use_prediction,
is_branch : (bp.use_prediction & bp.is_branch),
early_flush_pc : pc_plus_4,
address_valid : address_valid,
mmu_fault : tlb.is_fault,
subunit_id : subunit_id
@ -207,19 +210,20 @@ module fetch
.fifo (fetch_attr_fifo)
);
assign fetch_attr = fetch_attr_fifo.data_out;
assign early_flush_pc = fetch_attr.early_flush_pc;
assign inflight_count_next = inflight_count + MAX_OUTSTANDING_REQUESTS_W'(fetch_attr_fifo.push) - MAX_OUTSTANDING_REQUESTS_W'(fetch_attr_fifo.pop);
always_ff @(posedge clk) begin
if (rst)
inflight_count <= 0;
else
inflight_count <= inflight_count_next;
inflight_count <= inflight_count_next;
end
always_ff @(posedge clk) begin
if (rst)
flush_count <= 0;
else if (gc.fetch_flush)
else if (gc.fetch_flush | early_branch_flush)
flush_count <= inflight_count_next;
else if (|flush_count & fetch_attr_fifo.pop)
flush_count <= flush_count - 1;
@ -231,7 +235,7 @@ module fetch
//for any sub unit. That request can either be completed or aborted.
//In either case, data_valid must NOT be asserted.
generate for (i=0; i < NUM_SUB_UNITS; i++) begin : gen_fetch_sources
assign sub_unit[i].new_request = fetch_attr_fifo.push & sub_unit_address_match[i];
assign sub_unit[i].new_request = fetch_attr_fifo.push & sub_unit_address_match[i] & ~tlb.is_fault;
assign sub_unit[i].addr = tlb.physical_address;
assign sub_unit[i].re = 1;
assign sub_unit[i].we = 0;
@ -249,6 +253,10 @@ module fetch
local_mem_sub_unit i_local_mem (
.clk (clk),
.rst (rst),
.write_outstanding (),
.amo (1'b0),
.amo_type ('x),
.amo_unit (unused),
.unit (sub_unit[LOCAL_MEM_ID]),
.local_mem (instruction_bram)
);
@ -260,6 +268,10 @@ module fetch
wishbone_master iwishbone_bus (
.clk (clk),
.rst (rst),
.write_outstanding (),
.amo (1'b0),
.amo_type ('x),
.amo_unit (unused),
.wishbone (iwishbone),
.ls (sub_unit[BUS_ID])
);
@ -267,19 +279,38 @@ module fetch
endgenerate
generate if (CONFIG.INCLUDE_ICACHE) begin : gen_fetch_icache
////////////////////////////////////////////////////
//Instruction fence
//A fence first prevents any new instructions from being issued then waits for inflight fetches to complete
//The fence signal can only be delivered to the icache once it is idle
//This logic will be optimized away when instruction fences aren't enabled as gc.fetch_ifence will be constant 0
logic ifence_pending;
logic ifence_start;
assign ifence_start = ifence_pending & ~|inflight_count_next;
always_ff @(posedge clk) begin
if (rst)
ifence_pending <= 0;
else begin
if (gc.fetch_ifence)
ifence_pending <= 1;
else if (~|inflight_count_next)
ifence_pending <= 0;
end
end
assign sub_unit_address_match[ICACHE_ID] = icache_addr_utils.address_range_check(tlb.physical_address);
icache #(.CONFIG(CONFIG))
i_cache (
.clk (clk),
.rst (rst),
.gc (gc),
.ifence (ifence_start),
.icache_on (icache_on),
.l1_request (l1_request),
.l1_response (l1_response),
.fetch_sub (sub_unit[ICACHE_ID])
);
end
endgenerate
end endgenerate
assign units_ready = &unit_ready;
assign address_valid = |sub_unit_address_match;
@ -287,25 +318,25 @@ module fetch
////////////////////////////////////////////////////
//Instruction metada updates
logic valid_fetch_result;
assign valid_fetch_result = CONFIG.INCLUDE_M_MODE ? (fetch_attr_fifo.valid & fetch_attr.address_valid & (~fetch_attr.mmu_fault)) : 1;
assign valid_fetch_result = CONFIG.MODES != BARE ? (fetch_attr_fifo.valid & fetch_attr.address_valid & (~fetch_attr.mmu_fault)) : 1;
assign if_pc = pc;
assign fetch_metadata.ok = valid_fetch_result;
assign fetch_metadata.error_code = INST_ACCESS_FAULT;
assign fetch_metadata.error_code = fetch_attr.mmu_fault ? INST_PAGE_FAULT : INST_ACCESS_FAULT;
assign fetch_instruction = unit_data_array[fetch_attr.subunit_id];
assign internal_fetch_complete = fetch_attr_fifo.valid & (fetch_attr.address_valid ? |unit_data_valid : ~valid_fetch_result);//allow instruction to propagate to decode if address is invalid
assign internal_fetch_complete = fetch_attr_fifo.valid & (~valid_fetch_result | |unit_data_valid);//allow instruction to propagate to decode if address is invalid
assign fetch_complete = internal_fetch_complete & ~|flush_count;
////////////////////////////////////////////////////
//Branch Predictor corruption check
//Needed if instruction memory is changed after any branches have been executed
generate if (CONFIG.INCLUDE_IFENCE | CONFIG.INCLUDE_S_MODE) begin : gen_branch_corruption_check
generate if (CONFIG.INCLUDE_IFENCE | CONFIG.MODES == MSU) begin : gen_branch_corruption_check
logic is_branch_or_jump;
assign is_branch_or_jump = fetch_instruction[6:2] inside {JAL_T, JALR_T, BRANCH_T};
assign early_branch_flush = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_predicted_branch_or_jump & (~is_branch_or_jump);
assign early_branch_flush_ras_adjust = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_branch & (~is_branch_or_jump);
assign early_branch_flush = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_predicted_branch_or_jump & (~is_branch_or_jump) & (~|flush_count);
assign early_branch_flush_ras_adjust = (valid_fetch_result & (|unit_data_valid)) & fetch_attr.is_branch & (~is_branch_or_jump) & (~|flush_count);
end endgenerate
////////////////////////////////////////////////////
//End of Implementation

64
core/fetch_stage/icache.sv Executable file → Normal file
View file

@ -33,7 +33,7 @@ module icache
(
input logic clk,
input logic rst,
input gc_outputs_t gc,
input logic ifence,
input logic icache_on,
l1_arbiter_request_interface.master l1_request,
l1_arbiter_return_interface.master l1_response,
@ -46,6 +46,9 @@ module icache
cache_functions_interface #(.TAG_W(SCONFIG.TAG_W), .LINE_W(SCONFIG.LINE_ADDR_W), .SUB_LINE_W(SCONFIG.SUB_LINE_ADDR_W)) addr_utils();
logic ifence_in_progress;
logic[SCONFIG.LINE_ADDR_W-1:0] ifence_counter;
logic tag_hit;
logic [CONFIG.ICACHE.WAYS-1:0] tag_hit_way;
@ -59,7 +62,7 @@ module icache
logic line_complete;
logic [31:0] data_out [CONFIG.ICACHE.WAYS-1:0];
logic [CONFIG.ICACHE.WAYS-1:0][31:0] data_out;
logic linefill_in_progress;
logic request_in_progress;
@ -94,6 +97,29 @@ module icache
.rst (rst),
.fifo (input_fifo)
);
////////////////////////////////////////////////////
//Instruction fence
generate if (CONFIG.INCLUDE_IFENCE) begin : gen_ifence
always_ff @(posedge clk) begin
if (rst) begin
ifence_counter <= '0;
ifence_in_progress <= 0;
end else begin
if (ifence)
ifence_in_progress <= 1;
else if (&ifence_counter)
ifence_in_progress <= 0;
if (ifence_in_progress)
ifence_counter <= ifence_counter+1;
end
end
end else begin : gen_no_ifence
assign ifence_in_progress = 0;
assign ifence_counter = '0;
end endgenerate
////////////////////////////////////////////////////
//Ready determination
always_ff @ (posedge clk) begin
@ -103,7 +129,7 @@ module icache
request_in_progress <= (request_in_progress & ~fetch_sub.data_valid) | new_request;
end
assign fetch_sub.ready = ~input_fifo.full;
assign fetch_sub.ready = ~input_fifo.full & ~ifence_in_progress;
////////////////////////////////////////////////////
//General Control Logic
@ -176,6 +202,8 @@ module icache
icache_tag_banks (
.clk(clk),
.rst(rst), //clears the read_hit_allowed flag
.ifence(ifence_in_progress),
.ifence_addr(ifence_counter),
.stage1_line_addr(addr_utils.getTagLineAddr(new_request_addr)),
.stage2_line_addr(addr_utils.getTagLineAddr(second_cycle_addr)),
.stage2_tag(addr_utils.getTag(second_cycle_addr)),
@ -188,22 +216,20 @@ module icache
////////////////////////////////////////////////////
//Data Banks
genvar i;
generate for (i=0; i < CONFIG.ICACHE.WAYS; i++) begin : idata_bank_gen
dual_port_bram #(.WIDTH(32), .LINES(CONFIG.ICACHE.LINES*CONFIG.ICACHE.LINE_W)) idata_bank (
.clk(clk),
.en_a(new_request),
.wen_a(0),
.addr_a(addr_utils.getDataLineAddr(new_request_addr)),
.data_in_a('0),
.data_out_a(data_out[i]),
.en_b(1),
.wen_b(tag_update_way[i] & l1_response.data_valid),
.addr_b(addr_utils.getDataLineAddr({second_cycle_addr[31:SCONFIG.SUB_LINE_ADDR_W+2], word_count, 2'b0})),
.data_in_b(l1_response.data),
.data_out_b()
);
end endgenerate
sdp_ram #(
.ADDR_WIDTH(SCONFIG.LINE_ADDR_W+SCONFIG.SUB_LINE_ADDR_W),
.NUM_COL(CONFIG.ICACHE.WAYS),
.COL_WIDTH(32),
.PIPELINE_DEPTH(0)
) idata_bank (
.a_en(l1_response.data_valid),
.a_wbe(tag_update_way),
.a_wdata({CONFIG.ICACHE.WAYS{l1_response.data}}),
.a_addr(addr_utils.getDataLineAddr({second_cycle_addr[31:SCONFIG.SUB_LINE_ADDR_W+2], word_count, 2'b0})),
.b_en(new_request),
.b_addr(addr_utils.getDataLineAddr(new_request_addr)),
.b_rdata(data_out),
.*);
////////////////////////////////////////////////////
//Miss data path

42
core/fetch_stage/icache_tag_banks.sv Executable file → Normal file
View file

@ -33,6 +33,8 @@ module itag_banks
(
input logic clk,
input logic rst,
input logic ifence,
input logic[SCONFIG.LINE_ADDR_W-1:0] ifence_addr,
input logic[SCONFIG.LINE_ADDR_W-1:0] stage1_line_addr,
input logic[SCONFIG.LINE_ADDR_W-1:0] stage2_line_addr,
@ -49,7 +51,7 @@ module itag_banks
//Valid + tag
typedef logic [SCONFIG.TAG_W : 0] itag_entry_t;
itag_entry_t tag_line[CONFIG.ICACHE.WAYS-1:0];
itag_entry_t[CONFIG.ICACHE.WAYS-1:0] tag_line;
logic hit_allowed;
@ -60,25 +62,25 @@ module itag_banks
hit_allowed <= stage1_adv;
end
genvar i;
generate
for (i=0; i < CONFIG.ICACHE.WAYS; i++) begin : tag_bank_gen
dual_port_bram #(.WIDTH(SCONFIG.TAG_W+1), .LINES(CONFIG.ICACHE.LINES)) itag_bank (.*,
.clk(clk),
.en_a(stage1_adv),
.wen_a('0),
.addr_a(stage1_line_addr),
.data_in_a('0),
.data_out_a(tag_line[i]),
.en_b(update),
.wen_b(update_way[i]),
.addr_b(stage2_line_addr),
.data_in_b({1'b1, stage2_tag}),
.data_out_b()
);
assign tag_hit_way[i] = ({hit_allowed, 1'b1, stage2_tag} == {1'b1, tag_line[i]});
end
endgenerate
sdp_ram_padded #(
.ADDR_WIDTH(SCONFIG.LINE_ADDR_W),
.NUM_COL(CONFIG.ICACHE.WAYS),
.COL_WIDTH(SCONFIG.TAG_W+1),
.PIPELINE_DEPTH(0)
) itag_bank (
.a_en(update | ifence),
.a_wbe(update_way | {CONFIG.ICACHE.WAYS{ifence}}),
.a_wdata({CONFIG.ICACHE.WAYS{~ifence, stage2_tag}}),
.a_addr(ifence ? ifence_addr : stage2_line_addr),
.b_en(stage1_adv),
.b_addr(stage1_line_addr),
.b_rdata(tag_line),
.*);
always_comb begin
for (int i = 0; i < CONFIG.ICACHE.WAYS; i++)
tag_hit_way[i] = ({hit_allowed, 1'b1, stage2_tag} == {1'b1, tag_line[i]});
end
assign tag_hit = |tag_hit_way;

2
core/fetch_stage/ras.sv Executable file → Normal file
View file

@ -77,4 +77,4 @@ module ras
read_index <= new_index;
end
endmodule
endmodule

View file

@ -53,7 +53,6 @@ module instruction_metadata_and_id_management
input logic decode_uses_rd,
input logic fp_decode_uses_rd,
input rs_addr_t decode_rd_addr,
input exception_sources_t decode_exception_unit,
input logic decode_is_store,
//renamer
input phys_addr_t decode_phys_rd_addr,
@ -76,15 +75,11 @@ module instruction_metadata_and_id_management
output retire_packet_t fp_wb_retire,
output retire_packet_t store_retire,
output id_t retire_ids [RETIRE_PORTS],
output id_t retire_ids_next [RETIRE_PORTS],
output logic retire_port_valid [RETIRE_PORTS],
output logic [LOG2_RETIRE_PORTS : 0] retire_count,
//CSR
output logic [LOG2_MAX_IDS:0] post_issue_count,
//Exception
output logic [31:0] oldest_pc,
output logic [$clog2(NUM_EXCEPTION_SOURCES)-1:0] current_exception_unit
output logic [LOG2_MAX_IDS:0] post_issue_count
);
//////////////////////////////////////////
localparam NUM_WB_GROUPS = CONFIG.NUM_WB_GROUPS + 32'(CONFIG.INCLUDE_UNIT.FPU) + 32'(CONFIG.INCLUDE_UNIT.FPU);
@ -115,6 +110,7 @@ module instruction_metadata_and_id_management
retire_packet_t fp_wb_retire_next;
retire_packet_t store_retire_next;
id_t retire_ids_next [RETIRE_PORTS];
logic retire_port_valid_next [RETIRE_PORTS];
logic [LOG2_RETIRE_PORTS : 0] retire_count_next;
////////////////////////////////////////////////////
@ -133,18 +129,6 @@ module instruction_metadata_and_id_management
.ram_data_out(decode_pc)
);
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_pc_id_exception_support
lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(MAX_IDS))
pc_table_exception (
.clk(clk),
.waddr(pc_id),
.raddr(retire_ids_next[0]),
.ram_write(pc_id_assigned),
.new_ram_data(if_pc),
.ram_data_out(oldest_pc)
);
end endgenerate
////////////////////////////////////////////////////
//Instruction table
lutram_1w_1r #(.DATA_TYPE(logic[31:0]), .DEPTH(MAX_IDS))
@ -220,20 +204,6 @@ module instruction_metadata_and_id_management
.ram_data_out(wb_phys_addrs)
);
////////////////////////////////////////////////////
//Exception unit table
generate if (CONFIG.INCLUDE_M_MODE) begin : gen_id_exception_support
lutram_1w_1r #(.DATA_TYPE(logic[$bits(exception_sources_t)-1:0]), .DEPTH(MAX_IDS))
exception_unit_table (
.clk(clk),
.waddr(decode_id),
.raddr(retire_ids_next[0]),
.ram_write(decode_advance),
.new_ram_data(decode_exception_unit),
.ram_data_out(current_exception_unit)
);
end endgenerate
////////////////////////////////////////////////////
//ID Management
@ -270,10 +240,8 @@ module instruction_metadata_and_id_management
retire_ids_next[i] <= retire_ids_next[i] + LOG2_MAX_IDS'(retire_count_next);
end
always_ff @ (posedge clk) begin
if (~gc.retire_hold)
retire_ids[i] <= retire_ids_next[i];
end
always_ff @ (posedge clk)
retire_ids[i] <= retire_ids_next[i];
end endgenerate
//Represented as a negative value so that the MSB indicates that the decode stage is valid
@ -343,7 +311,6 @@ module instruction_metadata_and_id_management
) id_waiting_for_writeback_toggle_mem_set
(
.clk (clk),
.rst (rst),
.init_clear (gc.init_clear),
.toggle (id_waiting_toggle),
.toggle_addr (id_waiting_toggle_addr),
@ -363,13 +330,11 @@ module instruction_metadata_and_id_management
//Supports retiring up to RETIRE_PORTS instructions. The retired block of instructions must be
//contiguous and must start with the first retire port. Additionally, only one register file writing
//instruction is supported per cycle.
//If an exception is pending, only retire a single intrustuction per cycle. As such, the pending
//exception will have to become the oldest instruction retire_ids[0] before it can retire.
logic retire_with_rd_found;
logic retire_with_fp_rd_found;
logic retire_with_store_found;
always_comb begin
contiguous_retire = ~gc.retire_hold;
contiguous_retire = 1;
retire_with_rd_found = 0;
retire_with_fp_rd_found = 0;
retire_with_store_found = 0;
@ -386,7 +351,7 @@ module instruction_metadata_and_id_management
retire_with_rd_found |= retire_port_valid_next[i] & retire_type[i] == RD;
retire_with_fp_rd_found |= retire_port_valid_next[i] & retire_type[i] == FP_RD;
retire_with_store_found |= retire_port_valid_next[i] & retire_type[i] == STORE;
contiguous_retire &= retire_port_valid_next[i] & ~gc.exception_pending;
contiguous_retire &= retire_port_valid_next[i];
if (retire_port_valid_next[i] & retire_type[i] == RD)
retire_with_rd_sel = LOG2_RETIRE_PORTS'(i);
@ -423,9 +388,9 @@ module instruction_metadata_and_id_management
fp_wb_retire <= fp_wb_retire_next;
store_retire <= store_retire_next;
retire_count <= gc.writeback_supress ? '0 : retire_count_next;
retire_count <= retire_count_next;
for (int i = 0; i < RETIRE_PORTS; i++)
retire_port_valid[i] <= retire_port_valid_next[i] & ~gc.writeback_supress;
retire_port_valid[i] <= retire_port_valid_next[i];
end
////////////////////////////////////////////////////
@ -439,7 +404,7 @@ module instruction_metadata_and_id_management
valid : fetched_count_neg[LOG2_MAX_IDS],
pc : decode_pc,
instruction : decode_instruction,
fetch_metadata : CONFIG.INCLUDE_M_MODE ? decode_fetch_metadata : ADDR_OK
fetch_metadata : CONFIG.MODES != BARE ? decode_fetch_metadata : ADDR_OK
};
////////////////////////////////////////////////////

0
core/l1_arbiter.sv Executable file → Normal file
View file

View file

@ -1,5 +1,5 @@
/*
* Copyright © 2019 Eric Matthews, Lesley Shannon
* Copyright © 2019 Eric Matthews, Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,75 +18,160 @@
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module avalon_master
import cva5_config::*;
import riscv_types::*;
import cva5_types::*;
#(
parameter int unsigned LR_WAIT = 32, //The number of cycles lock is held after an LR
parameter logic INCLUDE_AMO = 1 //Required because the tools cannot fully optimize even if amo signals are tied off
)
(
input logic clk,
input logic rst,
output logic write_outstanding,
avalon_interface.master m_avalon,
input logic amo,
input amo_t amo_type,
amo_interface.subunit amo_unit,
memory_sub_unit_interface.responder ls
);
//implementation
////////////////////////////////////////////////////
//Implementation
typedef enum {
READY,
REQUESTING,
REQUESTING_AMO_R,
REQUESTING_AMO_M,
REQUESTING_AMO_W,
READY_LR,
REQUESTING_SC
} state_t;
state_t current_state;
always_ff @ (posedge clk) begin
if (ls.new_request) begin
m_avalon.addr <= ls.addr;
m_avalon.byteenable <= ls.be;
m_avalon.writedata <= ls.data_in;
end
end
logic[$clog2(LR_WAIT)-1:0] lock_counter;
logic request_is_sc;
assign request_is_sc = amo & amo_type == AMO_SC_FN5;
always_ff @ (posedge clk) begin
assign amo_unit.set_reservation = ls.new_request & amo & amo_type == AMO_LR_FN5;
assign amo_unit.clear_reservation = ls.new_request;
assign amo_unit.reservation = ls.addr;
assign amo_unit.rs1 = ls.data_out;
assign amo_unit.rs2 = m_avalon.writedata;
always_ff @(posedge clk) begin
m_avalon.addr[1:0] <= '0;
unique case (current_state)
READY : begin //Accept any request
ls.ready <= ~ls.new_request | request_is_sc;
ls.data_out <= 32'b1;
ls.data_valid <= ls.new_request & request_is_sc;
m_avalon.addr[31:2] <= ls.addr[31:2];
m_avalon.byteenable <= ls.be;
m_avalon.writedata <= ls.data_in;
m_avalon.read <= ls.new_request & ls.re & ~request_is_sc;
m_avalon.write <= ls.new_request & ls.we;
m_avalon.lock <= ls.new_request & amo;
write_outstanding <= ls.new_request & (ls.we | amo);
amo_unit.rmw_valid <= 0;
amo_unit.op <= amo_type;
lock_counter <= '0;
if (ls.new_request & (~amo | amo_type == AMO_LR_FN5))
current_state <= REQUESTING;
else if (ls.new_request & amo & amo_type != AMO_SC_FN5)
current_state <= REQUESTING_AMO_R;
end
REQUESTING : begin //Wait for response
ls.ready <= ~m_avalon.waitrequest;
ls.data_out <= m_avalon.readdata;
ls.data_valid <= m_avalon.read & ~m_avalon.waitrequest;
m_avalon.read <= m_avalon.read & m_avalon.waitrequest;
m_avalon.write <= m_avalon.write & m_avalon.waitrequest;
write_outstanding <= m_avalon.write & ~m_avalon.waitrequest;
if (~m_avalon.waitrequest)
current_state <= m_avalon.lock ? READY_LR : READY;
end
REQUESTING_AMO_R : begin //Read for an AMO
if (INCLUDE_AMO) begin
ls.data_out <= m_avalon.readdata;
ls.data_valid <= ~m_avalon.waitrequest;
m_avalon.read <= m_avalon.waitrequest;
amo_unit.rmw_valid <= ~m_avalon.waitrequest;
if (~m_avalon.waitrequest)
current_state <= REQUESTING_AMO_M;
end
end
REQUESTING_AMO_M : begin //One cycle for computing the AMO write value
if (INCLUDE_AMO) begin
ls.data_valid <= 0;
m_avalon.writedata <= amo_unit.rd;
m_avalon.write <= 1;
amo_unit.rmw_valid <= 0;
current_state <= REQUESTING_AMO_W;
end
end
REQUESTING_AMO_W : begin //Write for an AMO
if (INCLUDE_AMO) begin
ls.ready <= ~m_avalon.waitrequest;
m_avalon.write <= m_avalon.waitrequest;
m_avalon.lock <= m_avalon.waitrequest;
write_outstanding <= m_avalon.waitrequest;
if (~m_avalon.waitrequest)
current_state <= READY;
end
end
READY_LR : begin //Lock is held; hold for LR_WAIT cycles
if (INCLUDE_AMO) begin
ls.ready <= ~ls.new_request | (request_is_sc & ~amo_unit.reservation_valid);
ls.data_out <= {31'b0, ~amo_unit.reservation_valid};
ls.data_valid <= ls.new_request & request_is_sc;
m_avalon.addr[31:2] <= ls.addr[31:2];
m_avalon.byteenable <= ls.be;
m_avalon.writedata <= ls.data_in;
m_avalon.read <= ls.new_request & ls.re & ~request_is_sc;
m_avalon.write <= ls.new_request & (ls.we | (request_is_sc & amo_unit.reservation_valid));
write_outstanding <= ls.new_request & (ls.we | amo);
amo_unit.rmw_valid <= 0;
amo_unit.op <= amo_type;
if (ls.new_request)
m_avalon.lock <= amo;
else if (32'(lock_counter) == LR_WAIT-1)
m_avalon.lock <= 0;
lock_counter <= lock_counter + 1;
if (ls.new_request & (~amo | amo_type == AMO_LR_FN5))
current_state <= REQUESTING;
else if (ls.new_request & amo & amo_type != AMO_SC_FN5)
current_state <= REQUESTING_AMO_R;
else if (ls.new_request & amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid)
current_state <= REQUESTING_SC;
else if (32'(lock_counter) == LR_WAIT-1 | ls.new_request)
current_state <= READY;
end
end
REQUESTING_SC : begin //Exclusive write
if (INCLUDE_AMO) begin
ls.ready <= ~m_avalon.waitrequest;
ls.data_valid <= 0;
m_avalon.write <= m_avalon.waitrequest;
m_avalon.lock <= m_avalon.waitrequest;
write_outstanding <= m_avalon.waitrequest;
if (~m_avalon.waitrequest)
current_state <= REQUESTING;
end
end
endcase
if (rst)
ls.ready <= 1;
else if (ls.new_request)
ls.ready <= 0;
else if (~ls.ready & ~m_avalon.waitrequest)
ls.ready <= 1;
current_state <= READY;
end
always_ff @ (posedge clk) begin
if (rst)
ls.data_valid <= 0;
else if (m_avalon.read & ~m_avalon.waitrequest)
ls.data_valid <= 1;
else
ls.data_valid <= 0;
end
always_ff @ (posedge clk) begin
if (m_avalon.read & ~m_avalon.waitrequest)
ls.data_out <= m_avalon.readdata;
else
ls.data_out <= 0;
end
always_ff @ (posedge clk) begin
if (rst)
m_avalon.read <= 0;
else if (ls.new_request & ls.re)
m_avalon.read <= 1;
else if (~m_avalon.waitrequest)
m_avalon.read <= 0;
end
always_ff @ (posedge clk) begin
if (rst)
m_avalon.write <= 0;
else if (ls.new_request & ls.we)
m_avalon.write <= 1;
else if (~m_avalon.waitrequest)
m_avalon.write <= 0;
end
endmodule

159
core/memory_sub_units/axi_master.sv Executable file → Normal file
View file

@ -1,5 +1,5 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
* Copyright © 2024 Eric Matthews, Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,89 +18,120 @@
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module axi_master
import cva5_config::*;
import riscv_types::*;
import cva5_types::*;
(
input logic clk,
input logic rst,
output logic write_outstanding,
axi_interface.master m_axi,
input logic [2:0] size,
input logic amo,
input amo_t amo_type,
amo_interface.subunit amo_unit,
memory_sub_unit_interface.responder ls
);
logic ready;
////////////////////////////////////////////////////
//Implementation
typedef enum {
READY,
REQUESTING_WRITE,
REQUESTING_READ,
REQUESTING_AMO_M,
WAITING_READ,
WAITING_WRITE
} state_t;
state_t current_state;
//read constants
assign m_axi.arlen = 0; // 1 request
assign m_axi.arburst = 0;// burst type does not matter
assign m_axi.rready = 1; //always ready to receive data
logic request_is_invalid_sc;
assign request_is_invalid_sc = amo & amo_type == AMO_SC_FN5 & ~amo_unit.reservation_valid;
always_ff @ (posedge clk) begin
if (ls.new_request) begin
m_axi.araddr <= ls.addr;
m_axi.arsize <= size;
m_axi.awsize <= size;
m_axi.awaddr <= ls.addr;
m_axi.wdata <= ls.data_in;
m_axi.wstrb <= ls.be;
end
end
assign amo_unit.set_reservation = ls.new_request & amo & amo_type == AMO_LR_FN5;
assign amo_unit.clear_reservation = ls.new_request;
assign amo_unit.reservation = ls.addr;
assign amo_unit.rs1 = ls.data_out;
//write constants
assign m_axi.awlen = 0;
assign m_axi.awburst = 0;
logic[29:0] addr;
assign m_axi.awaddr = {addr, 2'b0};
assign m_axi.araddr = {addr, 2'b0};
assign m_axi.awlen = '0;
assign m_axi.arlen = '0;
assign m_axi.awburst = '0;
assign m_axi.arburst = '0;
assign m_axi.awid = '0;
assign m_axi.arid = '0;
assign m_axi.rready = 1;
assign m_axi.bready = 1;
set_clr_reg_with_rst #(.SET_OVER_CLR(0), .WIDTH(1), .RST_VALUE(1)) ready_m (
.clk, .rst,
.set(m_axi.rvalid | m_axi.bvalid),
.clr(ls.new_request),
.result(ready)
);
assign ls.ready = ready;
always_ff @ (posedge clk) begin
always_ff @(posedge clk) begin
unique case (current_state)
READY : begin //Accept any request
ls.ready <= ~ls.new_request | request_is_invalid_sc;
ls.data_out <= 1;
ls.data_valid <= ls.new_request & request_is_invalid_sc;
addr <= ls.addr[31:2];
m_axi.awlock <= amo & amo_type != AMO_LR_FN5; //Used in WAITING_READ to determine if it was a RMW
m_axi.awvalid <= ls.new_request & (ls.we | (amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid));
m_axi.arlock <= amo & amo_type != AMO_SC_FN5; //Used in WAITING_WRITE to determine if it was a RNW
m_axi.arvalid <= ls.new_request & ls.re & ~(amo & amo_type == AMO_SC_FN5);
m_axi.wvalid <= ls.new_request & (ls.we | (amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid));
m_axi.wdata <= ls.data_in;
m_axi.wstrb <= ls.be;
write_outstanding <= ls.new_request & (ls.we | amo);
amo_unit.rmw_valid <= 0;
amo_unit.op <= amo_type;
amo_unit.rs2 <= ls.data_in; //Cannot use wdata because wdata will be overwritten if the RMW is not exclusive
if (ls.new_request & (ls.we | (amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid)))
current_state <= REQUESTING_WRITE;
else if (ls.new_request & ~request_is_invalid_sc)
current_state <= REQUESTING_READ;
end
REQUESTING_READ : begin //Wait for read to be accepted
m_axi.arvalid <= ~m_axi.arready;
if (m_axi.arready)
current_state <= WAITING_READ;
end
WAITING_READ : begin //Wait for read response
ls.ready <= m_axi.rvalid & ~m_axi.awlock;
ls.data_out <= m_axi.rdata;
ls.data_valid <= m_axi.rvalid;
amo_unit.rmw_valid <= m_axi.rvalid;
if (m_axi.rvalid)
current_state <= m_axi.awlock ? REQUESTING_AMO_M : READY;
end
REQUESTING_AMO_M : begin //One cycle for computing the AMO write value
ls.data_valid <= 0;
m_axi.awvalid <= 1;
m_axi.wvalid <= 1;
m_axi.wdata <= amo_unit.rd;
amo_unit.rmw_valid <= 0;
current_state <= REQUESTING_WRITE;
end
REQUESTING_WRITE : begin //Wait for write (address and data) to be accepted
m_axi.awvalid <= m_axi.awvalid & ~m_axi.awready;
m_axi.wvalid <= m_axi.wvalid & ~m_axi.wready;
if ((~m_axi.awvalid | m_axi.awready) & (~m_axi.wvalid | m_axi.wready))
current_state <= WAITING_WRITE;
end
WAITING_WRITE : begin //Wait for write response; resubmit if RMW was not exclusive
ls.ready <= m_axi.bvalid & (~m_axi.arlock | m_axi.bresp == 2'b01);
ls.data_out <= {31'b0, m_axi.bresp != 2'b01};
ls.data_valid <= m_axi.bvalid & m_axi.awlock & ~m_axi.arlock;
m_axi.arvalid <= m_axi.bvalid & m_axi.arlock & m_axi.bresp != 2'b01;
write_outstanding <= ~(m_axi.bvalid & (~m_axi.arlock | m_axi.bresp == 2'b01));
if (m_axi.bvalid)
current_state <= m_axi.arlock & m_axi.bresp != 2'b01 ? REQUESTING_READ : READY;
end
endcase
if (rst)
ls.data_valid <= 0;
else
ls.data_valid <= m_axi.rvalid;
current_state <= READY;
end
//read channel
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) arvalid_m (
.clk, .rst,
.set(ls.new_request & ls.re),
.clr(m_axi.arready),
.result(m_axi.arvalid)
);
always_ff @ (posedge clk) begin
if (m_axi.rvalid)
ls.data_out <= m_axi.rdata;
end
//write channel
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) awvalid_m (
.clk, .rst,
.set(ls.new_request & ls.we),
.clr(m_axi.awready),
.result(m_axi.awvalid)
);
set_clr_reg_with_rst #(.SET_OVER_CLR(1), .WIDTH(1), .RST_VALUE(0)) wvalid_m (
.clk, .rst,
.set(ls.new_request & ls.we),
.clr(m_axi.wready),
.result(m_axi.wvalid)
);
assign m_axi.wlast = m_axi.wvalid;
endmodule

67
core/memory_sub_units/local_mem_sub_unit.sv Executable file → Normal file
View file

@ -1,5 +1,5 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
* Copyright © 2017 Eric Matthews, Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,35 +18,78 @@
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module local_mem_sub_unit
import cva5_config::*;
import riscv_types::*;
import cva5_types::*;
(
input logic clk,
input logic rst,
output logic write_outstanding,
input logic amo,
input amo_t amo_type,
amo_interface.subunit amo_unit,
memory_sub_unit_interface.responder unit,
local_memory_interface.master local_mem
);
assign unit.ready = 1;
//If amo is tied to 0 and amo_unit is disconnected the tools can optimize most of the logic away
assign local_mem.addr = unit.addr[31:2];
assign local_mem.en = unit.new_request;
assign local_mem.be = unit.be;
assign local_mem.data_in = unit.data_in;
assign unit.data_out = local_mem.data_out;
logic rmw;
logic[31:2] rmw_addr;
logic[31:0] rmw_rs2;
logic[31:0] rmw_op;
logic sc_valid;
logic sc_valid_r;
assign write_outstanding = 0;
always_ff @ (posedge clk) begin
if (rst)
assign sc_valid = amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid;
assign amo_unit.set_reservation = unit.new_request & amo & amo_type == AMO_LR_FN5;
assign amo_unit.clear_reservation = unit.new_request;
assign amo_unit.reservation = unit.addr;
assign amo_unit.rmw_valid = rmw;
assign amo_unit.op = rmw_op;
assign amo_unit.rs1 = local_mem.data_out;
assign amo_unit.rs2 = rmw_rs2;
always_comb begin
if (rmw) begin
unit.ready = 0;
local_mem.addr = rmw_addr;
local_mem.en = 1;
local_mem.be = '1;
local_mem.data_in = amo_unit.rd;
unit.data_out = local_mem.data_out;
end else begin
unit.ready = 1;
local_mem.addr = unit.addr[31:2];
local_mem.en = unit.new_request;
local_mem.be = {4{unit.we | sc_valid}} & unit.be; //SC only writes when it succeeds
local_mem.data_in = unit.data_in;
unit.data_out = sc_valid_r ? 32'b1 : local_mem.data_out;
end
end
always_ff @(posedge clk) begin
if (rst) begin
unit.data_valid <= 0;
else
rmw <= 0;
sc_valid_r <= 0;
end
else begin
unit.data_valid <= unit.new_request & unit.re;
rmw <= unit.new_request & amo & ~(amo_type inside {AMO_LR_FN5, AMO_SC_FN5});
sc_valid_r <= sc_valid;
end
rmw_addr <= unit.addr[31:2];
rmw_rs2 <= unit.data_in;
rmw_op <= amo_type;
end
endmodule

View file

@ -1,5 +1,5 @@
/*
* Copyright © 2019 Eric Matthews, Lesley Shannon
* Copyright © 2019 Eric Matthews, Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,57 +18,163 @@
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module wishbone_master
import cva5_config::*;
import riscv_types::*;
import cva5_types::*;
#(
parameter int unsigned LR_WAIT = 32, //The number of cycles the master holds cyc after an LR
parameter logic INCLUDE_AMO = 1 //Required because the tools cannot fully optimize even if amo signals are tied off
)
(
input logic clk,
input logic rst,
output logic write_outstanding,
wishbone_interface.master wishbone,
input logic amo,
input amo_t amo_type,
amo_interface.subunit amo_unit,
memory_sub_unit_interface.responder ls
);
logic busy;
////////////////////////////////////////////////////
//Implementation
assign wishbone.cti = 0;
assign wishbone.bte = 0;
typedef enum {
READY,
REQUESTING,
REQUESTING_AMO_R,
REQUESTING_AMO_M,
REQUESTING_AMO_W,
READY_LR,
REQUESTING_SC
} state_t;
state_t current_state;
always_ff @ (posedge clk) begin
if (ls.new_request) begin
wishbone.adr <= ls.addr[31:2];
wishbone.sel <= ls.we ? ls.be : '1;
wishbone.we <= ls.we;
wishbone.dat_w <= ls.data_in;
end
end
logic[$clog2(LR_WAIT)-1:0] cyc_counter;
logic request_is_sc;
assign request_is_sc = amo & amo_type == AMO_SC_FN5;
always_ff @ (posedge clk) begin
assign amo_unit.set_reservation = ls.new_request & amo & amo_type == AMO_LR_FN5;
assign amo_unit.clear_reservation = ls.new_request;
assign amo_unit.reservation = ls.addr;
assign amo_unit.rs1 = ls.data_out;
assign amo_unit.rs2 = wishbone.dat_w;
assign wishbone.cti = '0;
assign wishbone.bte = '0;
always_ff @(posedge clk) begin
wishbone.adr[1:0] <= '0;
unique case (current_state)
READY : begin //Accept any request
ls.ready <= ~ls.new_request | request_is_sc;
ls.data_out <= 32'b1;
ls.data_valid <= ls.new_request & request_is_sc;
wishbone.adr[31:2] <= ls.addr[31:2];
wishbone.sel <= ls.we ? ls.be : '1;
wishbone.dat_w <= ls.data_in;
wishbone.we <= ls.we;
wishbone.stb <= ls.new_request & ~request_is_sc;
wishbone.cyc <= ls.new_request & ~request_is_sc;
write_outstanding <= ls.new_request & (ls.we | amo);
amo_unit.rmw_valid <= 0;
amo_unit.op <= amo_type;
cyc_counter <= amo ? 1 : 0;
if (ls.new_request & (~amo | amo_type == AMO_LR_FN5))
current_state <= REQUESTING;
else if (ls.new_request & amo & amo_type != AMO_SC_FN5)
current_state <= REQUESTING_AMO_R;
end
REQUESTING : begin //Wait for response
ls.ready <= wishbone.ack;
ls.data_out <= wishbone.dat_r;
ls.data_valid <= ~wishbone.we & wishbone.ack;
wishbone.stb <= ~wishbone.ack;
wishbone.cyc <= ~wishbone.ack | cyc_counter[0];
write_outstanding <= wishbone.we & ~wishbone.ack;
if (wishbone.ack)
current_state <= cyc_counter[0] ? READY_LR : READY;
end
REQUESTING_AMO_R : begin //Read for an AMO
if (INCLUDE_AMO) begin
ls.data_out <= wishbone.dat_r;
ls.data_valid <= wishbone.ack;
wishbone.stb <= ~wishbone.ack;
amo_unit.rmw_valid <= wishbone.ack;
if (wishbone.ack)
current_state <= REQUESTING_AMO_M;
end
end
REQUESTING_AMO_M : begin //One cycle for computing the AMO write value
if (INCLUDE_AMO) begin
ls.data_valid <= 0;
wishbone.dat_w <= amo_unit.rd;
wishbone.stb <= 1;
wishbone.we <= 1;
amo_unit.rmw_valid <= 0;
current_state <= REQUESTING_AMO_W;
end
end
REQUESTING_AMO_W : begin //Write for an AMO
if (INCLUDE_AMO) begin
ls.ready <= wishbone.ack;
wishbone.cyc <= ~wishbone.ack;
wishbone.stb <= ~wishbone.ack;
write_outstanding <= ~wishbone.ack;
if (wishbone.ack)
current_state <= READY;
end
end
READY_LR : begin //Cyc is held; hold for LR_WAIT cycles
if (INCLUDE_AMO) begin
ls.ready <= ~ls.new_request | (request_is_sc & ~amo_unit.reservation_valid);
ls.data_out <= {31'b0, ~amo_unit.reservation_valid};
ls.data_valid <= ls.new_request & request_is_sc;
wishbone.adr[31:2] <= ls.addr[31:2];
wishbone.sel <= ls.we ? ls.be : '1;
wishbone.dat_w <= ls.data_in;
wishbone.we <= ls.we | request_is_sc;
wishbone.stb <= ls.new_request & ~(request_is_sc & ~amo_unit.reservation_valid);
write_outstanding <= ls.new_request & (ls.we | amo);
amo_unit.rmw_valid <= 0;
amo_unit.op <= amo_type;
if (ls.new_request)
wishbone.cyc <= ~(request_is_sc & ~amo_unit.reservation_valid);
else if (32'(cyc_counter) == LR_WAIT-1)
wishbone.cyc <= 0;
cyc_counter <= cyc_counter + 1;
if (ls.new_request & (~amo | amo_type == AMO_LR_FN5))
current_state <= REQUESTING;
else if (ls.new_request & amo & amo_type != AMO_SC_FN5)
current_state <= REQUESTING_AMO_R;
else if (ls.new_request & amo & amo_type == AMO_SC_FN5 & amo_unit.reservation_valid)
current_state <= REQUESTING_SC;
else if (32'(cyc_counter) == LR_WAIT-1 | ls.new_request)
current_state <= READY;
end
end
REQUESTING_SC : begin //Exclusive write
if (INCLUDE_AMO) begin
ls.ready <= wishbone.ack;
ls.data_valid <= 0;
wishbone.stb = ~wishbone.ack;
wishbone.cyc = ~wishbone.ack;
write_outstanding <= ~wishbone.ack;
if (wishbone.ack)
current_state <= REQUESTING;
end
end
endcase
if (rst)
busy <= 0;
else
busy <= (busy & ~wishbone.ack) | ls.new_request;
end
assign ls.ready = (~busy);
assign wishbone.stb = busy;
assign wishbone.cyc = busy;
always_ff @ (posedge clk) begin
if (rst)
ls.data_valid <= 0;
else
ls.data_valid <= ~wishbone.we & wishbone.ack;
end
always_ff @ (posedge clk) begin
if (wishbone.ack)
ls.data_out <= wishbone.dat_r;
current_state <= READY;
end
endmodule

333
core/mmu/dtlb.sv Normal file
View file

@ -0,0 +1,333 @@
/*
* Copyright © 2017 Eric Matthews, Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module dtlb
import cva5_types::*;
import riscv_types::*;
#(
parameter WAYS = 2,
parameter DEPTH = 32
)
(
input logic clk,
input logic rst,
input logic translation_on,
input tlb_packet_t sfence,
input logic [ASIDLEN-1:0] asid,
mmu_interface.tlb mmu,
tlb_interface.tlb tlb
);
//////////////////////////////////////////
localparam TAG_W = 20 - $clog2(DEPTH);
localparam TAG_W_S = 10 - $clog2(DEPTH);
localparam WAY_W = WAYS == 1 ? 1 : $clog2(WAYS);
typedef struct packed {
logic valid;
logic [ASIDLEN-1:0] asid;
logic [TAG_W-1:0] tag;
//Signals from the PTE
logic [9:0] ppn1;
logic [9:0] ppn0;
logic dirty;
logic globe;
logic user;
logic execute;
logic write;
logic read;
} tlb_entry_t;
typedef struct packed {
logic valid;
logic [ASIDLEN-1:0] asid;
logic [TAG_W_S-1:0] tag;
//Signals from the PTE
logic [9:0] ppn1;
logic dirty;
logic globe;
logic user;
logic execute;
logic write;
logic read;
} tlb_entry_s_t;
////////////////////////////////////////////////////
//Implementation
//Regular and super pages stored separately
//Regular pages are set associative and super pages are direct mapped
//Random replacement
logic[WAYS-1:0] replacement_way;
cycler #(.C_WIDTH(WAYS)) replacement_policy (
.en(1'b1),
.one_hot(replacement_way),
.*);
//LUTRAM storage
logic [$clog2(DEPTH)-1:0] tlb_raddr;
logic [$clog2(DEPTH)-1:0] tlb_raddr_s;
logic [$clog2(DEPTH)-1:0] tlb_waddr;
logic [$clog2(DEPTH)-1:0] tlb_waddr_s;
tlb_entry_t [WAYS-1:0] rdata;
tlb_entry_s_t rdata_s;
logic [WAYS-1:0] write;
logic write_s;
tlb_entry_t wdata;
tlb_entry_s_t wdata_s;
generate for (genvar i = 0; i < WAYS; i++) begin : gen_lut_rams
lutram_1w_1r #(.DATA_TYPE(tlb_entry_t), .DEPTH(DEPTH)) data_table (
.waddr(tlb_waddr),
.raddr(tlb_raddr),
.ram_write(write[i]),
.new_ram_data(wdata),
.ram_data_out(rdata[i]),
.*);
end endgenerate
lutram_1w_1r #(.DATA_TYPE(tlb_entry_s_t), .DEPTH(DEPTH)) data_table_s (
.waddr(tlb_waddr_s),
.raddr(tlb_raddr_s),
.ram_write(write_s),
.new_ram_data(wdata_s),
.ram_data_out(rdata_s),
.*);
//Hit detection
logic [TAG_W-1:0] cmp_tag;
logic [TAG_W_S-1:0] cmp_tag_s;
logic [ASIDLEN-1:0] cmp_asid;
logic [WAYS-1:0] tag_hit;
logic tag_hit_s;
logic [WAYS-1:0] asid_hit;
logic asid_hit_s;
logic [WAYS-1:0] rdata_global;
logic rdata_global_s;
logic [WAYS-1:0][9:0] ppn0;
logic [WAYS-1:0][9:0] ppn1;
logic [9:0] ppn1_s;
logic [WAYS-1:0] perms_valid_comb;
logic perms_valid_comb_s;
logic [WAYS-1:0] perms_valid;
logic perms_valid_s;
logic [WAYS-1:0] hit_ohot;
logic hit_ohot_s;
logic [WAY_W-1:0] hit_way;
logic hit;
assign cmp_tag = sfence.valid ? sfence.addr[31-:TAG_W] : tlb.virtual_address[31-:TAG_W];
assign cmp_tag_s = sfence.valid ? sfence.addr[31-:TAG_W_S] : tlb.virtual_address[31-:TAG_W_S];
assign cmp_asid = sfence.valid ? sfence.asid : asid;
always_ff @(posedge clk) begin
for (int i = 0; i < WAYS; i++) begin
tag_hit[i] <= rdata[i].tag == cmp_tag;
rdata_global[i] <= rdata[i].globe;
ppn0[i] <= rdata[i].ppn0;
ppn1[i] <= rdata[i].ppn1;
asid_hit[i] <= rdata[i].asid == cmp_asid;
perms_valid[i] <= perms_valid_comb[i];
hit_ohot[i] <= rdata[i].valid & (rdata[i].tag == cmp_tag) & (rdata[i].asid == cmp_asid | rdata[i].globe);
end
tag_hit_s <= rdata_s.tag == cmp_tag_s;
rdata_global_s <= rdata_s.globe;
ppn1_s <= rdata_s.ppn1;
asid_hit_s <= rdata_s.asid == cmp_asid;
perms_valid_s <= perms_valid_comb_s;
hit_ohot_s <= rdata_s.valid & (rdata_s.tag == cmp_tag_s) & (rdata_s.asid == cmp_asid | rdata_s.globe);
end
assign hit = |hit_ohot | hit_ohot_s;
priority_encoder #(.WIDTH(WAYS)) hit_cast (
.priority_vector(hit_ohot),
.encoded_result(hit_way)
);
generate for (genvar i = 0; i < WAYS; i++) begin : gen_perms_check
perms_check checks (
.pte_perms('{
d : rdata[i].dirty,
a : 1'b1,
u : rdata[i].user,
x : rdata[i].execute,
w : rdata[i].write,
r : rdata[i].read,
default: 'x
}),
.rnw(tlb.rnw),
.execute(1'b0),
.mxr(mmu.mxr),
.sum(mmu.sum),
.privilege(mmu.privilege),
.valid(perms_valid_comb[i])
);
end endgenerate
perms_check checks (
.pte_perms('{
d : rdata_s.dirty,
a : 1'b1,
u : rdata_s.user,
x : rdata_s.execute,
w : rdata_s.write,
r : rdata_s.read,
default: 'x
}),
.rnw(tlb.rnw),
.execute(1'b0),
.mxr(mmu.mxr),
.sum(mmu.sum),
.privilege(mmu.privilege),
.valid(perms_valid_comb_s)
);
//SFENCE
logic sfence_valid_r;
logic [$clog2(DEPTH)-1:0] flush_addr;
lfsr #(.WIDTH($clog2(DEPTH)), .NEEDS_RESET(0)) lfsr_counter (
.en(1'b1),
.value(flush_addr),
.*);
always_ff @(posedge clk) begin
if (tlb.new_request | sfence.valid) begin
tlb_waddr <= tlb_raddr;
tlb_waddr_s <= tlb_raddr_s;
end
sfence_valid_r <= sfence.valid; //Other SFENCE signals remain registered and do not need to be saved
end
always_comb begin
if (sfence.valid) begin
tlb_raddr = sfence.addr_only ? sfence.addr[12 +: $clog2(DEPTH)] : flush_addr;
tlb_raddr_s = sfence.addr_only ? sfence.addr[22 +: $clog2(DEPTH)] : flush_addr;
end
else begin
tlb_raddr = tlb.virtual_address[12 +: $clog2(DEPTH)];
tlb_raddr_s = tlb.virtual_address[22 +: $clog2(DEPTH)];
end
end
assign wdata = '{
valid : ~sfence_valid_r,
asid : asid,
tag : mmu.virtual_address[31-:TAG_W],
ppn1 : mmu.upper_physical_address[19:10],
ppn0 : mmu.upper_physical_address[9:0],
dirty : mmu.perms.d,
globe : mmu.perms.g,
user : mmu.perms.u,
execute : mmu.perms.x,
write : mmu.perms.w,
read : mmu.perms.r
};
assign wdata_s = '{
valid : ~sfence_valid_r,
asid : asid,
tag : mmu.virtual_address[31-:TAG_W_S],
ppn1 : mmu.upper_physical_address[19:10],
dirty : mmu.perms.d,
globe : mmu.perms.g,
user : mmu.perms.u,
execute : mmu.perms.x,
write : mmu.perms.w,
read : mmu.perms.r
};
always_comb begin
for (int i = 0; i < WAYS; i++) begin
case ({sfence_valid_r, sfence.addr_only, sfence.asid_only})
3'b100: begin //Clear everything
write[i] = 1'b1;
write_s = 1'b1;
end
3'b101: begin //Clear non global for specified address space
write[i] = ~rdata_global[i] & asid_hit[i];
write_s = ~rdata_global_s & asid_hit_s;
end
3'b110: begin //Clear matching addresses
write[i] = tag_hit[i];
write_s = tag_hit_s;
end
3'b111: begin //Clear if both
write[i] = (~rdata[i].globe & asid_hit[i]) & tag_hit[i];
write_s = (~rdata_s.globe & asid_hit_s) & tag_hit_s;
end
default: begin
write[i] = mmu.write_entry & ~mmu.superpage & replacement_way[i];
write_s = mmu.write_entry & mmu.superpage;
end
endcase
end
end
//Permission fail
logic perm_fail;
assign perm_fail = |(hit_ohot & ~perms_valid) | (hit_ohot_s & ~perms_valid_s);
//MMU interface
logic new_request_r;
assign mmu.request = translation_on & new_request_r & ~hit & ~perm_fail;
assign mmu.execute = 0;
always_ff @(posedge clk) begin
new_request_r <= tlb.new_request;
if (tlb.new_request) begin
mmu.virtual_address <= tlb.virtual_address;
mmu.rnw <= tlb.rnw;
end
end
//TLB interface
assign tlb.done = (new_request_r & ((hit & ~perm_fail) | ~translation_on)) | mmu.write_entry;
assign tlb.ready = 1; //Not always ready, but requests will not be sent if it isn't done
assign tlb.is_fault = mmu.is_fault | (new_request_r & translation_on & perm_fail);
always_comb begin
tlb.physical_address[11:0] = mmu.virtual_address[11:0];
if (~translation_on)
tlb.physical_address[31:12] = mmu.virtual_address[31:12];
else if (new_request_r) begin
tlb.physical_address[31:22] = hit_ohot_s ? ppn1_s : ppn1[hit_way];
tlb.physical_address[21:12] = hit_ohot_s ? mmu.virtual_address[21:12] : ppn0[hit_way];
end else begin
tlb.physical_address[31:22] = mmu.upper_physical_address[19:10];
tlb.physical_address[21:12] = mmu.superpage ? mmu.virtual_address[21:12] : mmu.upper_physical_address[9:0];
end
end
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Assertions
request_on_miss:
assert property (@(posedge clk) disable iff (rst) (mmu.request) |-> ~tlb.new_request)
else $error("Request during miss in TLB!");
endmodule

294
core/mmu/itlb.sv Normal file
View file

@ -0,0 +1,294 @@
/*
* Copyright © 2017 Eric Matthews, Chris Keilbart, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
* Chris Keilbart <ckeilbar@sfu.ca>
*/
module itlb
import riscv_types::*;
import cva5_types::*;
#(
parameter WAYS = 2,
parameter DEPTH = 32
)
(
input logic clk,
input logic rst,
input logic translation_on,
input tlb_packet_t sfence,
input logic abort_request,
input logic [ASIDLEN-1:0] asid,
mmu_interface.tlb mmu,
tlb_interface.tlb tlb
);
//////////////////////////////////////////
localparam TAG_W = 20 - $clog2(DEPTH);
localparam TAG_W_S = 10 - $clog2(DEPTH);
localparam WAY_W = WAYS == 1 ? 1 : $clog2(WAYS);
typedef struct packed {
logic valid;
logic [ASIDLEN-1:0] asid;
logic [TAG_W-1:0] tag;
//Signals from the PTE
logic [9:0] ppn1;
logic [9:0] ppn0;
logic globe;
logic user;
} tlb_entry_t;
typedef struct packed {
logic valid;
logic [ASIDLEN-1:0] asid;
logic [TAG_W_S-1:0] tag;
//Signals from the PTE
logic [9:0] ppn1;
logic globe;
logic user;
} tlb_entry_s_t;
////////////////////////////////////////////////////
//Implementation
//Regular and super pages stored separately
//Regular pages are set associative and super pages are direct mapped
//Random replacement
logic[WAYS-1:0] replacement_way;
cycler #(.C_WIDTH(WAYS)) replacement_policy (
.en(1'b1),
.one_hot(replacement_way),
.*);
//LUTRAM storage
logic [$clog2(DEPTH)-1:0] tlb_addr;
logic [$clog2(DEPTH)-1:0] tlb_addr_s;
tlb_entry_t [WAYS-1:0] rdata;
tlb_entry_s_t rdata_s;
logic [WAYS-1:0] write;
logic write_s;
tlb_entry_t wdata;
tlb_entry_s_t wdata_s;
generate for (genvar i = 0; i < WAYS; i++) begin : gen_lut_rams
lutram_1w_1r #(.DATA_TYPE(tlb_entry_t), .DEPTH(DEPTH)) data_table (
.waddr(tlb_addr),
.raddr(tlb_addr),
.ram_write(write[i]),
.new_ram_data(wdata),
.ram_data_out(rdata[i]),
.*);
end endgenerate
lutram_1w_1r #(.DATA_TYPE(tlb_entry_s_t), .DEPTH(DEPTH)) data_table_s (
.waddr(tlb_addr_s),
.raddr(tlb_addr_s),
.ram_write(write_s),
.new_ram_data(wdata_s),
.ram_data_out(rdata_s),
.*);
//Hit detection
logic [TAG_W-1:0] cmp_tag;
logic [TAG_W_S-1:0] cmp_tag_s;
logic [ASIDLEN-1:0] cmp_asid;
logic [WAYS-1:0] tag_hit;
logic tag_hit_s;
logic [WAYS-1:0] asid_hit;
logic asid_hit_s;
logic [WAYS-1:0] perms_valid;
logic perms_valid_s;
logic [WAYS-1:0] hit_ohot;
logic hit_ohot_s;
logic [WAY_W-1:0] hit_way;
logic hit;
assign cmp_tag = sfence.valid ? sfence.addr[31-:TAG_W] : tlb.virtual_address[31-:TAG_W];
assign cmp_tag_s = sfence.valid ? sfence.addr[31-:TAG_W_S] : tlb.virtual_address[31-:TAG_W_S];
assign cmp_asid = sfence.valid ? sfence.asid : asid;
always_comb begin
for (int i = 0; i < WAYS; i++) begin
tag_hit[i] = rdata[i].tag == cmp_tag;
asid_hit[i] = rdata[i].asid == cmp_asid;
hit_ohot[i] = rdata[i].valid & tag_hit[i] & (asid_hit[i] | rdata[i].globe);
end
tag_hit_s = rdata_s.tag == cmp_tag_s;
asid_hit_s = rdata_s.asid == cmp_asid;
hit_ohot_s = rdata_s.valid & tag_hit_s & (asid_hit_s | rdata_s.globe);
end
assign hit = |hit_ohot | hit_ohot_s;
priority_encoder #(.WIDTH(WAYS)) hit_cast (
.priority_vector(hit_ohot),
.encoded_result(hit_way)
);
generate for (genvar i = 0; i < WAYS; i++) begin : gen_perms_check
perms_check checks (
.pte_perms('{
x : 1'b1,
a : 1'b1,
u : rdata[i].user,
default: 'x
}),
.rnw(tlb.rnw),
.execute(1'b1),
.mxr(mmu.mxr),
.sum(mmu.sum),
.privilege(mmu.privilege),
.valid(perms_valid[i])
);
end endgenerate
perms_check checks_s (
.pte_perms('{
x : 1'b1,
a : 1'b1,
u : rdata_s.user,
default: 'x
}),
.rnw(tlb.rnw),
.execute(1'b1),
.mxr(mmu.mxr),
.sum(mmu.sum),
.privilege(mmu.privilege),
.valid(perms_valid_s)
);
//SFENCE
logic [$clog2(DEPTH)-1:0] flush_addr;
lfsr #(.WIDTH($clog2(DEPTH)), .NEEDS_RESET(0)) lfsr_counter (
.en(1'b1),
.value(flush_addr),
.*);
always_comb begin
if (sfence.valid) begin
tlb_addr = sfence.addr_only ? sfence.addr[12 +: $clog2(DEPTH)] : flush_addr;
tlb_addr_s = sfence.addr_only ? sfence.addr[22 +: $clog2(DEPTH)] : flush_addr;
end
else begin
tlb_addr = tlb.virtual_address[12 +: $clog2(DEPTH)];
tlb_addr_s = tlb.virtual_address[22 +: $clog2(DEPTH)];
end
end
assign wdata = '{
valid : ~sfence.valid,
asid : asid,
tag : tlb.virtual_address[31-:TAG_W],
ppn1 : mmu.upper_physical_address[19:10],
ppn0 : mmu.upper_physical_address[9:0],
globe : mmu.perms.g,
user : mmu.perms.u
};
assign wdata_s = '{
valid : ~sfence.valid,
asid : asid,
tag : tlb.virtual_address[31-:TAG_W_S],
ppn1 : mmu.upper_physical_address[19:10],
globe : mmu.perms.g,
user : mmu.perms.u
};
always_comb begin
for (int i = 0; i < WAYS; i++) begin
case ({sfence.valid, sfence.addr_only, sfence.asid_only})
3'b100: begin //Clear everything
write[i] = 1'b1;
write_s = 1'b1;
end
3'b101: begin //Clear non global for specified address space
write[i] = ~rdata[i].globe & asid_hit[i];
write_s = ~rdata_s.globe & asid_hit_s;
end
3'b110: begin //Clear matching addresses
write[i] = tag_hit[i];
write_s = tag_hit_s;
end
3'b111: begin //Clear if both
write[i] = (~rdata[i].globe & asid_hit[i]) & tag_hit[i];
write_s = (~rdata_s.globe & asid_hit_s) & tag_hit_s;
end
default: begin
write[i] = mmu.write_entry & ~mmu.superpage & replacement_way[i];
write_s = mmu.write_entry & mmu.superpage;
end
endcase
end
end
//Permission fail
logic perm_fail;
assign perm_fail = |(hit_ohot & ~perms_valid) | (hit_ohot_s & ~perms_valid_s);
//MMU interface
logic request_in_progress;
always_ff @ (posedge clk) begin
if (rst)
request_in_progress <= 0;
else if (mmu.write_entry | mmu.is_fault | abort_request)
request_in_progress <= 0;
else if (mmu.request)
request_in_progress <= 1;
end
assign mmu.request = translation_on & tlb.new_request & ~hit & ~perm_fail;
assign mmu.execute = 1;
assign mmu.rnw = tlb.rnw;
assign mmu.virtual_address = tlb.virtual_address;
//TLB interface
logic mmu_request_complete;
always_ff @(posedge clk) begin
if (rst)
mmu_request_complete <= 0;
else
mmu_request_complete <= mmu.write_entry & ~abort_request;
end
assign tlb.done = translation_on ? (hit & ~perm_fail & (tlb.new_request | mmu_request_complete)) : tlb.new_request;
assign tlb.ready = ~request_in_progress & ~mmu_request_complete;
assign tlb.is_fault = mmu.is_fault | (tlb.new_request & translation_on & perm_fail);
always_comb begin
tlb.physical_address[11:0] = tlb.virtual_address[11:0];
if (~translation_on)
tlb.physical_address[31:12] = tlb.virtual_address[31:12];
else if (hit_ohot_s) begin
tlb.physical_address[21:12] = tlb.virtual_address[21:12];
tlb.physical_address[31:22] = rdata_s.ppn1;
end
else begin
tlb.physical_address[21:12] = rdata[hit_way].ppn0;
tlb.physical_address[31:22] = rdata[hit_way].ppn1;
end
end
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Assertions
endmodule

64
core/mmu.sv → core/mmu/mmu.sv Executable file → Normal file
View file

@ -22,9 +22,6 @@
module mmu
import cva5_config::*;
import riscv_types::*;
import cva5_types::*;
import csr_types::*;
(
@ -40,14 +37,7 @@ module mmu
logic [11:0] ppn1;
logic [9:0] ppn0;
logic [1:0] reserved;
logic d;
logic a;
logic g;
logic u;
logic x;
logic w;
logic r;
logic v;
pte_perms_t perms;
} pte_t;
typedef enum {
@ -63,8 +53,7 @@ module mmu
logic [6:0] next_state;
pte_t pte;
logic access_valid;
logic privilege_valid;
logic perms_valid;
localparam MAX_ABORTED_REQUESTS = 4;
logic abort_queue_full;
@ -84,7 +73,7 @@ module mmu
//Page Table addresses
always_ff @ (posedge clk) begin
if (state[IDLE] | l1_response.data_valid) begin
if (state[IDLE] | (l1_response.data_valid & ~discard_data)) begin
if (state[IDLE])
l1_request.addr <= {mmu.satp_ppn[19:0], mmu.virtual_address[31:22], 2'b00};
else
@ -103,7 +92,7 @@ module mmu
logic delayed_abort_complete;
assign delayed_abort = abort_request & (state[WAIT_REQUEST_1] | state[WAIT_REQUEST_2]);
assign delayed_abort_complete = discard_data & l1_response.data_valid;
assign delayed_abort_complete = (discard_data | abort_request) & l1_response.data_valid;
always_ff @ (posedge clk) begin
if (rst)
abort_tracking <= 0;
@ -113,18 +102,16 @@ module mmu
assign discard_data = abort_tracking[COUNT_W];
assign abort_queue_full = abort_tracking[COUNT_W] & ~|abort_tracking[COUNT_W-1:0];
////////////////////////////////////////////////////
//Access and permission checks
//A and D bits are software managed
assign access_valid =
(mmu.execute & pte.x & pte.a) | //fetch
(mmu.rnw & (pte.r | (pte.x & mmu.mxr)) & pte.a) | //load
((~mmu.rnw & ~mmu.execute) & pte.w & pte.a & pte.d); //store
assign privilege_valid =
(mmu.privilege == MACHINE_PRIVILEGE) |
((mmu.privilege == SUPERVISOR_PRIVILEGE) & (~pte.u | (pte.u & mmu.sum))) |
((mmu.privilege == USER_PRIVILEGE) & pte.u);
perms_check perm (
.pte_perms(pte.perms),
.rnw(mmu.rnw),
.execute(mmu.execute),
.mxr(mmu.mxr),
.sum(mmu.sum),
.privilege(mmu.privilege),
.valid(perms_valid)
);
////////////////////////////////////////////////////
//State Machine
@ -139,14 +126,14 @@ module mmu
next_state = 2**WAIT_REQUEST_1;
state[WAIT_REQUEST_1] :
if (l1_response.data_valid & ~discard_data) begin
if (~pte.v | (~pte.r & pte.w)) //page not valid OR invalid xwr pattern
if (~pte.perms.v | (~pte.perms.r & pte.perms.w)) //page not valid OR invalid xwr pattern
next_state = 2**COMPLETE_FAULT;
else if (pte.v & (pte.r | pte.x)) begin//superpage (all remaining xwr patterns other than all zeros)
if (access_valid & privilege_valid)
else if (pte.perms.v & (pte.perms.r | pte.perms.x)) begin//superpage (all remaining xwr patterns other than all zeros)
if (perms_valid & ~|pte.ppn0) //check for misaligned superpage
next_state = 2**COMPLETE_SUCCESS;
else
next_state = 2**COMPLETE_FAULT;
end else //(pte.v & ~pte.x & ~pte.w & ~pte.r) pointer to next level in page table
end else //(pte.perms.v & ~pte.perms.x & ~pte.perms.w & ~pte.perms.r) pointer to next level in page table
next_state = 2**SEND_REQUEST_2;
end
state[SEND_REQUEST_2] :
@ -154,10 +141,10 @@ module mmu
next_state = 2**WAIT_REQUEST_2;
state[WAIT_REQUEST_2] :
if (l1_response.data_valid & ~discard_data) begin
if (access_valid & privilege_valid)
next_state = 2**COMPLETE_SUCCESS;
else
if (~perms_valid | ~pte.perms.v | (~pte.perms.r & pte.perms.w)) //perm fail or invalid
next_state = 2**COMPLETE_FAULT;
else
next_state = 2**COMPLETE_SUCCESS;
end
state[COMPLETE_SUCCESS], state[COMPLETE_FAULT] :
next_state = 2**IDLE;
@ -178,6 +165,15 @@ module mmu
//TLB return path
always_ff @ (posedge clk) begin
if (l1_response.data_valid) begin
mmu.superpage <= state[WAIT_REQUEST_1];
mmu.perms.d <= pte.perms.d;
mmu.perms.a <= pte.perms.a;
mmu.perms.g <= pte.perms.g | (state[WAIT_REQUEST_2] & mmu.perms.g);
mmu.perms.u <= pte.perms.u;
mmu.perms.x <= pte.perms.x;
mmu.perms.w <= pte.perms.w;
mmu.perms.r <= pte.perms.r;
mmu.perms.v <= pte.perms.v;
mmu.upper_physical_address[19:10] <= pte.ppn1[9:0];
mmu.upper_physical_address[9:0] <= state[WAIT_REQUEST_2] ? pte.ppn0 : mmu.virtual_address[21:12];
end
@ -201,7 +197,7 @@ module mmu
//the transaction is aborted. As such, if TLB request is low and we are not in the
//IDLE state, then our current processor state has been corrupted
mmu_tlb_state_mismatch:
assert property (@(posedge clk) disable iff (rst) (~mmu.request) |-> (state[IDLE]))
assert property (@(posedge clk) disable iff (rst) (mmu.request) |-> (state[IDLE]))
else $error("MMU and TLB state mismatch");
endmodule

58
core/mmu/perms_check.sv Normal file
View file

@ -0,0 +1,58 @@
/*
* Copyright © 2024 Liam Feng, Chris Keilbart, Eric Matthews
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Liam Feng <lfa32@sfu.ca>
* Chris Keilbart <ckeilbar@sfu.ca>
* Eric Matthews <ematthew@sfu.ca>
*/
module perms_check
import csr_types::*;
(
input pte_perms_t pte_perms,
input logic rnw, //LS type
input logic execute, //Fetch
input logic mxr, //Make eXecutable Readable
input logic sum, //permit Supervisor User Memory access
input privilege_t privilege, //Effective operatinf privilege
output logic valid
);
logic access_valid;
logic privilege_valid;
//Access and permission checks
//A and D bits are software managed; this implementation corresponds to the Svade extension
assign access_valid =
(execute & pte_perms.x & pte_perms.a) | //fetch
(rnw & (pte_perms.r | (pte_perms.x & mxr)) & pte_perms.a) | //load
((~rnw & ~execute) & pte_perms.w & pte_perms.a & pte_perms.d); //store
assign privilege_valid =
(privilege == MACHINE_PRIVILEGE) |
((privilege == SUPERVISOR_PRIVILEGE) & (~pte_perms.u | (pte_perms.u & sum))) |
((privilege == USER_PRIVILEGE) & pte_perms.u);
assign valid = access_valid & privilege_valid;
endmodule

3
core/register_file.sv Executable file → Normal file
View file

@ -100,7 +100,6 @@ module register_file
) id_inuse_toggle_mem_set
(
.clk (clk),
.rst (rst),
.init_clear (gc.init_clear),
.toggle (toggle),
.toggle_addr (toggle_addr),
@ -118,7 +117,7 @@ module register_file
.clk,
.waddr(wb_phys_addr[i]),
.raddr(decode_phys_rs_addr),
.ram_write(commit[i].valid & ~gc.writeback_supress),
.ram_write(commit[i].valid & ~gc.writeback_suppress),
.new_ram_data(commit[i].data),
.ram_data_out(regfile_rs_data[i])
);

View file

@ -91,4 +91,4 @@ module register_free_list
fifo_underflow_assertion:
assert property (@(posedge clk) disable iff (rst) fifo.pop |-> fifo.valid) else $error("underflow");
endmodule
endmodule

View file

@ -96,7 +96,7 @@ module renamer
assign free_list.potential_push = (gc.init_clear & ~clear_index[5]) | (wb_retire.valid);
assign free_list.push = free_list.potential_push;
assign free_list.data_in = gc.init_clear ? {1'b1, clear_index[4:0]} : (gc.writeback_supress ? inuse_table_output.spec_phys_addr : inuse_table_output.previous_phys_addr);
assign free_list.data_in = gc.init_clear ? {1'b1, clear_index[4:0]} : (gc.rename_revert ? inuse_table_output.spec_phys_addr : inuse_table_output.previous_phys_addr);
assign free_list.pop = rename_valid;
////////////////////////////////////////////////////
@ -137,12 +137,12 @@ module renamer
rs_addr_t spec_table_write_index;
rs_addr_t spec_table_write_index_mux [4];
assign spec_table_update = rename_valid | rollback | gc.init_clear | (wb_retire.valid & gc.writeback_supress);
assign spec_table_update = rename_valid | rollback | gc.init_clear | gc.rename_revert;
logic [1:0] spec_table_sel;
one_hot_to_integer #(.C_WIDTH(4)) spec_table_sel_one_hot_to_int (
.one_hot ({gc.init_clear, rollback, (wb_retire.valid & gc.writeback_supress), 1'b0}),
.one_hot ({gc.init_clear, rollback, gc.rename_revert, 1'b0}),
.int_out (spec_table_sel)
);
@ -150,7 +150,7 @@ module renamer
assign spec_table_write_index_mux[0] = decode.rd_addr;
assign spec_table_next_mux[0].phys_addr = free_list.data_out;
assign spec_table_next_mux[0].wb_group = decode.rd_wb_group;
//gc.writeback_supress
//gc.rename_revert
assign spec_table_write_index_mux[1] = inuse_table_output.rd_addr;
assign spec_table_next_mux[1].phys_addr = inuse_table_output.previous_phys_addr;
assign spec_table_next_mux[1].wb_group = inuse_table_output.previous_wb_group;

View file

@ -1,170 +0,0 @@
/*
* Copyright © 2017 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Initial code developed under the supervision of Dr. Lesley Shannon,
* Reconfigurable Computing Lab, Simon Fraser University.
*
* Author(s):
* Eric Matthews <ematthew@sfu.ca>
*/
module tlb_lut_ram
import cva5_config::*;
import riscv_types::*;
import cva5_types::*;
#(
parameter WAYS = 2,
parameter DEPTH = 32
)
(
input logic clk,
input logic rst,
input gc_outputs_t gc,
input logic abort_request,
input logic [ASIDLEN-1:0] asid,
mmu_interface.tlb mmu,
tlb_interface.tlb tlb
);
//////////////////////////////////////////
localparam TLB_TAG_W = 32-12-$clog2(DEPTH);
typedef struct packed {
logic valid;
logic [TLB_TAG_W-1:0] tag;
logic [19:0] phys_addr;
} tlb_entry_t;
logic [$clog2(DEPTH)-1:0] tlb_addr;
logic [TLB_TAG_W-1:0] virtual_tag;
tlb_entry_t ram [DEPTH-1:0][WAYS-1:0];
logic [DEPTH-1:0] valid [WAYS-1:0];
logic [WAYS-1:0] tag_hit;
logic hit;
logic [WAYS-1:0] replacement_way;
logic [$bits(tlb_entry_t)-1:0] ram_data [WAYS-1:0];
tlb_entry_t ram_entry [WAYS-1:0];
tlb_entry_t new_entry;
logic [$clog2(DEPTH)-1:0] flush_addr;
logic [WAYS-1:0] tlb_write;
logic request_in_progress;
logic mmu_request_complete;
////////////////////////////////////////////////////
//Implementation
//LUTRAM-based
//Reset is performed sequentially, coordinated by the gc unit
lfsr #(.WIDTH($clog2(DEPTH)), .NEEDS_RESET(0))
lfsr_counter (
.clk (clk), .rst (rst),
.en(gc.tlb_flush),
.value(flush_addr)
);
assign tlb_addr = gc.tlb_flush ? flush_addr : tlb.virtual_address[12 +: $clog2(DEPTH)];
assign tlb_write = {WAYS{gc.tlb_flush}} | replacement_way;
assign new_entry.valid = ~gc.tlb_flush;
assign new_entry.tag = virtual_tag;
assign new_entry.phys_addr = mmu.upper_physical_address;
genvar i;
generate
for (i=0; i<WAYS; i=i+1) begin : lut_rams
lutram_1w_1r #(.DATA_TYPE(tlb_entry_t), .DEPTH(DEPTH))
write_port (
.clk(clk),
.waddr(tlb_addr),
.raddr(tlb_addr),
.ram_write(tlb_write[i]),
.new_ram_data(new_entry),
.ram_data_out(ram_data[i])
);
assign ram_entry[i] = ram_data[i];
end
endgenerate
cycler #(.C_WIDTH(WAYS)) replacement_policy (
.clk (clk),
.rst (rst),
.en (1'b1),
.one_hot (replacement_way)
);
assign virtual_tag = tlb.virtual_address[31:32-TLB_TAG_W];
always_comb begin
for (int i=0; i<WAYS; i=i+1) begin
tag_hit[i] = {ram_entry[i].valid, ram_entry[i].tag} == {1'b1, virtual_tag};
end
end
assign tlb.ready = ~request_in_progress;
always_ff @ (posedge clk) begin
if (rst)
request_in_progress <= 0;
else if (mmu.write_entry | mmu.is_fault | abort_request)
request_in_progress <= 0;
else if (tlb.new_request & ~hit)
request_in_progress <= 1;
end
assign mmu.request = request_in_progress;
always_ff @ (posedge clk) begin
if (rst)
mmu_request_complete <= 0;
else
mmu_request_complete <= mmu.write_entry;
end
assign mmu.virtual_address = tlb.virtual_address;
assign mmu.execute = tlb.execute;
assign mmu.rnw = tlb.rnw;
//On a TLB miss, the entry is requested from the MMU
//Once the request completes, it will update the TLB, causing
//the current request to output a hit
assign hit = |tag_hit;
assign tlb.done = hit & (tlb.new_request | mmu_request_complete);
assign tlb.is_fault = mmu.is_fault;
always_comb begin
tlb.physical_address[11:0] = tlb.virtual_address[11:0];
tlb.physical_address[31:12] = 0;
for (int i = 0; i < WAYS; i++) begin
if (tag_hit[i]) tlb.physical_address[31:12] |= ram_entry[i].phys_addr;
end
end
////////////////////////////////////////////////////
//End of Implementation
////////////////////////////////////////////////////
////////////////////////////////////////////////////
//Assertions
multiple_tag_hit_in_tlb:
assert property (@(posedge clk) disable iff (rst) (tlb.done) |-> $onehot(tag_hit))
else $error("Multiple tag hits in TLB!");
endmodule

View file

@ -29,7 +29,7 @@ package csr_types;
typedef enum logic [1:0] {
USER_PRIVILEGE = 2'b00,
SUPERVISOR_PRIVILEGE = 2'b01,
//reserved
RESERVED_PRIVILEGE = 2'b10,
MACHINE_PRIVILEGE = 2'b11
} privilege_t;
@ -72,8 +72,6 @@ package csr_types;
logic A; //Atomic
} misa_t;
typedef struct packed {
logic sd;
logic [7:0] zeros;
@ -86,7 +84,7 @@ package csr_types;
logic [1:0] xs;
logic [1:0] fs;
logic [1:0] mpp;
logic [1:0] zeros1;
logic [1:0] vs;
logic spp;
logic mpie;
logic ube;
@ -121,7 +119,9 @@ package csr_types;
typedef struct packed {
logic [31:16] custom;
logic [15:12] zeros;
logic [15:14] zeros;
logic lcofip;
logic zero0;
logic meip;
logic zero1;
logic seip;
@ -138,7 +138,9 @@ package csr_types;
typedef struct packed {
logic [31:16] custom;
logic [15:12] zeros;
logic [15:14] zeros;
logic lcofie;
logic zero0;
logic meie;
logic zero1;
logic seie;
@ -154,11 +156,65 @@ package csr_types;
} mie_t;
typedef struct packed {
logic is_interrupt;
logic [XLEN-1-1-ECODE_W:0] zeroes;
logic [ECODE_W-1:0] code;
} mcause_t;
logic [31:16] custom;
logic [15:14] zeros;
logic lcofipd;
logic [12:10] zero1;
logic seid;
logic [8:6] zero2;
logic stid;
logic [4:2] zero3;
logic ssid;
logic zero4;
} mideleg_t;
typedef struct packed {
logic is_interrupt;
logic [XLEN-1-1-ECODE_W:0] zeros;
logic [ECODE_W-1:0] code;
} cause_t;
typedef struct packed {
logic [28:0] hpm;
logic ir;
logic tm;
logic cy;
} mcounter_t;
typedef struct packed {
logic [24:0] zeros_high;
logic cbze;
logic cbcfe;
logic [1:0] cbie;
logic [1:0] zeros_low;
logic fiom;
} envcfg_t;
typedef struct packed {
logic stce;
logic pbmte;
logic adue;
logic cde;
logic [27:0] zeros;
} envcfgh_t;
typedef struct packed {
logic [28:0] zeros;
logic jvt;
logic fcsr;
logic c;
} stateen0_t;
typedef struct packed {
logic se0;
logic envcfg;
logic zero;
logic csrind;
logic aia;
logic imsic;
logic contex;
logic [24:0] zeros;
} mstateen0h_t;
typedef struct packed {
logic mode;
@ -166,5 +222,15 @@ package csr_types;
logic [21:0] ppn;
} satp_t;
typedef struct packed {
logic d;
logic a;
logic g;
logic u;
logic x;
logic w;
logic r;
logic v;
} pte_perms_t;
endpackage

76
core/types_and_interfaces/cva5_config.sv Executable file → Normal file
View file

@ -32,31 +32,38 @@ package cva5_config;
////////////////////////////////////////////////////
//CSR Options
typedef struct packed {
int unsigned COUNTER_W; //CSR counter width (33-64 bits): 48-bits --> 32 days @ 100MHz
bit MCYCLE_WRITEABLE;
bit MINSTR_WRITEABLE;
bit MTVEC_WRITEABLE;
bit INCLUDE_MSCRATCH;
bit INCLUDE_MCAUSE;
bit INCLUDE_MTVAL;
} csr_non_standard_config_t;
typedef enum {
BARE,
M,
MU,
MSU
} modes_t;
typedef struct packed {
bit [31:0] MACHINE_IMPLEMENTATION_ID;
bit [31:0] CPU_ID;
bit [31:0] RESET_VEC; //PC value on reset
bit [31:0] RESET_MTVEC;
csr_non_standard_config_t NON_STANDARD_OPTIONS;
bit [31:0] RESET_TVEC;
bit [31:0] MCONFIGPTR;
bit INCLUDE_ZICNTR;
bit INCLUDE_ZIHPM;
bit INCLUDE_SSTC;
bit INCLUDE_SMSTATEEN;
} csr_config_t;
//Memory range [L, H]
//Address range is inclusive and must be aligned to its size
typedef struct packed {
bit [31:0] L;
bit [31:0] H;
logic [31:0] L;
logic [31:0] H;
} memory_config_t;
//Atomic configuration
typedef struct packed {
int unsigned LR_WAIT; //Must be >= the maximum number of cycles a constrained LR-SC can take
int unsigned RESERVATION_WORDS; //The amount of 32-bit words that are reserved by an LR instruction, must be == cache line size (if cache present)
} amo_config_t;
////////////////////////////////////////////////////
//Cache Options
//Size in bytes: (LINES * WAYS * LINE_W * 4)
@ -109,7 +116,7 @@ package cva5_config;
//Additionally, writeback units must be grouped before non-writeback units
localparam MAX_NUM_UNITS = 9;
typedef struct packed {
bit IEC;
bit GC;
bit BR;
//End of Write-Back Units
bit CUSTOM;
@ -122,7 +129,7 @@ package cva5_config;
} units_t;
typedef enum bit [$clog2(MAX_NUM_UNITS)-1:0] {
IEC_ID = 8,
GC_ID = 8,
BR_ID = 7,
//End of Write-Back Units (insert new writeback units here)
CUSTOM_ID = 6,
@ -161,9 +168,7 @@ package cva5_config;
typedef struct packed {
//ISA options
bit INCLUDE_M_MODE;
bit INCLUDE_S_MODE;
bit INCLUDE_U_MODE;
modes_t MODES;
bit INCLUDE_IFENCE; //local mem operations only
bit INCLUDE_AMO;
@ -177,6 +182,7 @@ package cva5_config;
//Memory Options
int unsigned SQ_DEPTH;//CAM-based reasonable max of 4
bit INCLUDE_FORWARDING_TO_STORES;
amo_config_t AMO_UNIT;
//Caches
bit INCLUDE_ICACHE;
cache_config_t ICACHE;
@ -232,9 +238,7 @@ package cva5_config;
localparam cpu_config_t EXAMPLE_CONFIG = '{
//ISA options
INCLUDE_M_MODE : 1,
INCLUDE_S_MODE : 0,
INCLUDE_U_MODE : 0,
MODES : MSU,
INCLUDE_UNIT : '{
ALU : 1,
@ -245,7 +249,7 @@ package cva5_config;
FPU : 1,
CUSTOM : 0,
BR : 1,
IEC : 1
GC : 1
},
INCLUDE_IFENCE : 1,
@ -257,20 +261,20 @@ package cva5_config;
MACHINE_IMPLEMENTATION_ID : 0,
CPU_ID : 0,
RESET_VEC : 32'h80000000,
RESET_MTVEC : 32'h80000100,
NON_STANDARD_OPTIONS : '{
COUNTER_W : 33,
MCYCLE_WRITEABLE : 0,
MINSTR_WRITEABLE : 0,
MTVEC_WRITEABLE : 1,
INCLUDE_MSCRATCH : 0,
INCLUDE_MCAUSE : 1,
INCLUDE_MTVAL : 1
}
RESET_TVEC : 32'h00000000,
MCONFIGPTR : '0,
INCLUDE_ZICNTR : 1,
INCLUDE_ZIHPM : 1,
INCLUDE_SSTC : 1,
INCLUDE_SMSTATEEN : 1
},
//Memory Options
SQ_DEPTH : 4,
INCLUDE_FORWARDING_TO_STORES : 1,
AMO_UNIT : '{
LR_WAIT : 32,
RESERVATION_WORDS : 8
},
INCLUDE_ICACHE : 0,
ICACHE_ADDR : '{
L: 32'h80000000,
@ -377,12 +381,14 @@ package cva5_config;
////////////////////////////////////////////////////
//Exceptions
localparam NUM_EXCEPTION_SOURCES = 3; //LS, Branch, Illegal
localparam NUM_EXCEPTION_SOURCES = 5; //LS, Branch, Illegal, CSR, GC
//Stored in a ID table on issue, checked at retire
typedef enum bit [1:0] {
typedef enum bit [2:0] {
LS_EXCEPTION = 0,
BR_EXCEPTION = 1,
PRE_ISSUE_EXCEPTION = 2
PRE_ISSUE_EXCEPTION = 2,
CSR_EXCEPTION = 3,
GC_EXCEPTION = 4
} exception_sources_t;
////////////////////////////////////////////////////

54
core/types_and_interfaces/cva5_types.sv Executable file → Normal file
View file

@ -27,9 +27,10 @@ package cva5_types;
localparam LOG2_RETIRE_PORTS = $clog2(RETIRE_PORTS);
localparam LOG2_MAX_IDS = $clog2(MAX_IDS);
localparam MAX_LS_SUBUNITS = 3;
typedef logic[LOG2_MAX_IDS-1:0] id_t;
typedef logic[1:0] branch_predictor_metadata_t;
typedef logic[$clog2(MAX_LS_SUBUNITS)-1:0] ls_subunit_t;
typedef logic [3:0] addr_hash_t;
typedef logic [5:0] phys_addr_t;
@ -43,6 +44,8 @@ package cva5_types;
typedef struct packed{
logic valid;
logic possible;
logic [NUM_EXCEPTION_SOURCES-1:0] source;
exception_code_t code;
logic [31:0] tval;
logic [31:0] pc;
@ -64,7 +67,9 @@ package cva5_types;
typedef struct packed{
logic [31:0] pc;
logic [31:0] pc_r;
logic [31:0] instruction;
logic [31:0] instruction_r;
logic [2:0] fn3;
logic [6:0] opcode;
@ -76,7 +81,6 @@ package cva5_types;
logic fp_uses_rd;
logic is_multicycle;
id_t id;
exception_sources_t exception_unit;
logic stage_valid;
fetch_metadata_t fetch_metadata;
} issue_packet_t;
@ -98,18 +102,13 @@ package cva5_types;
logic [4:0] op;
}amo_alu_inputs_t;
typedef struct packed{
logic is_lr;
logic is_sc;
logic is_amo;
logic [4:0] op;
} amo_details_t;
typedef struct packed {
logic [31:0] addr;
logic [11:0] offset;
logic load;
logic store;
logic cache_op;
logic amo;
amo_t amo_type;
logic [3:0] be;
logic [2:0] fn3;
logic [31:0] data;
@ -121,7 +120,14 @@ package cva5_types;
} lsq_entry_t;
typedef struct packed {
logic [31:0] addr;
logic [19:0] addr;
logic rnw;
logic discard;
ls_subunit_t subunit;
} lsq_addr_entry_t;
typedef struct packed {
logic [11:0] offset;
logic [3:0] be;
logic cache_op;
logic [31:0] data;
@ -131,8 +137,7 @@ package cva5_types;
} sq_entry_t;
typedef struct packed {
logic sq_empty;
logic no_released_stores_pending;
logic outstanding_store;
logic idle;
} load_store_status_t;
@ -165,29 +170,32 @@ package cva5_types;
logic load;
logic store;
logic cache_op;
logic amo;
amo_t amo_type;
logic [3:0] be;
logic [2:0] fn3;
ls_subunit_t subunit;
logic [31:0] data_in;
id_t id;
fp_ls_op_t fp_op;
} data_access_shared_inputs_t;
typedef enum {
LUTRAM_FIFO,
NON_MUXED_INPUT_FIFO,
NON_MUXED_OUTPUT_FIFO
} fifo_type_t;
typedef struct packed {
logic valid;
logic asid_only;
logic[ASIDLEN-1:0] asid;
logic addr_only;
logic[31:0] addr;
} tlb_packet_t;
typedef struct packed{
logic init_clear;
logic fetch_hold;
logic issue_hold;
logic fetch_flush;
logic writeback_supress;
logic retire_hold;
logic sq_flush;
logic tlb_flush;
logic exception_pending;
logic fetch_ifence;
logic writeback_suppress;
logic rename_revert;
exception_packet_t exception;
logic pc_override;
logic [31:0] pc;

View file

@ -31,6 +31,7 @@ interface axi_interface;
logic [1:0] arburst;
logic [3:0] arcache;
logic [5:0] arid;
logic arlock;
//read data
logic rready;
@ -50,6 +51,7 @@ interface axi_interface;
logic [1:0] awburst;
logic [3:0] awcache;
logic [5:0] awid;
logic awlock;
//write data
logic wready;
@ -65,12 +67,12 @@ interface axi_interface;
logic [5:0] bid;
modport master (input arready, rvalid, rdata, rresp, rlast, rid, awready, wready, bvalid, bresp, bid,
output arvalid, araddr, arlen, arsize, arburst, arcache, arid, rready, awvalid, awaddr, awlen, awsize, awburst, awcache, awid,
output arvalid, araddr, arlen, arsize, arburst, arcache, arlock, arid, rready, awvalid, awaddr, awlen, awsize, awburst, awcache, awid, awlock,
wvalid, wdata, wstrb, wlast, bready);
modport slave (input arvalid, araddr, arlen, arsize, arburst, arcache,
modport slave (input arvalid, araddr, arlen, arsize, arburst, arcache, arlock,
rready,
awvalid, awaddr, awlen, awsize, awburst, awcache, arid,
awvalid, awaddr, awlen, awsize, awburst, awcache, awlock, arid,
wvalid, wdata, wstrb, wlast, awid,
bready,
output arready, rvalid, rdata, rresp, rlast, rid,
@ -79,9 +81,9 @@ interface axi_interface;
bvalid, bresp, bid);
`ifdef __CVA5_FORMAL__
modport formal (input arready, arvalid, araddr, arlen, arsize, arburst, arcache,
modport formal (input arready, arvalid, araddr, arlen, arsize, arburst, arcache, arlock,
rready, rvalid, rdata, rresp, rlast, rid,
awready, awvalid, awaddr, awlen, awsize, awburst, awcache, arid,
awready, awvalid, awaddr, awlen, awsize, awburst, awcache, awlock, arid,
wready, wvalid, wdata, wstrb, wlast, awid,
bready, bvalid, bresp, bid);
`endif
@ -92,6 +94,7 @@ interface avalon_interface;
logic [31:0] addr;
logic read;
logic write;
logic lock;
logic [3:0] byteenable;
logic [31:0] readdata;
logic [31:0] writedata;
@ -100,13 +103,13 @@ interface avalon_interface;
logic writeresponsevalid;
modport master (input readdata, waitrequest, readdatavalid, writeresponsevalid,
output addr, read, write, byteenable, writedata);
output addr, read, write, lock, byteenable, writedata);
modport slave (output readdata, waitrequest, readdatavalid, writeresponsevalid,
input addr, read, write, byteenable, writedata);
input addr, read, write, lock, byteenable, writedata);
`ifdef __CVA5_FORMAL__
modport formal (input readdata, waitrequest, readdatavalid, writeresponsevalid,
addr, read, write, byteenable, writedata);
addr, read, write, lock, byteenable, writedata);
`endif
endinterface

92
core/types_and_interfaces/internal_interfaces.sv Executable file → Normal file
View file

@ -98,14 +98,15 @@ interface exception_interface;
import cva5_types::*;
logic valid;
logic ack;
logic possible;
exception_code_t code;
id_t id;
logic [31:0] tval;
logic [31:0] pc;
logic discard;
modport unit (output valid, code, id, tval, input ack);
modport econtrol (input valid, code, id, tval, output ack);
modport unit (output valid, possible, code, tval, pc, discard);
modport econtrol (input valid, possible, code, tval, pc, discard);
endinterface
interface fifo_interface #(parameter type DATA_TYPE = logic);
@ -122,6 +123,8 @@ interface fifo_interface #(parameter type DATA_TYPE = logic);
endinterface
interface mmu_interface;
import csr_types::*;
//From TLB
logic request;
logic execute;
@ -130,6 +133,8 @@ interface mmu_interface;
//TLB response
logic write_entry;
logic superpage;
pte_perms_t perms;
logic [19:0] upper_physical_address;
logic is_fault;
@ -137,10 +142,10 @@ interface mmu_interface;
logic [21:0] satp_ppn;
logic mxr; //Make eXecutable Readable
logic sum; //permit Supervisor User Memory access
logic [1:0] privilege;
privilege_t privilege;
modport mmu (input virtual_address, request, execute, rnw, satp_ppn, mxr, sum, privilege, output write_entry, upper_physical_address, is_fault);
modport tlb (input write_entry, upper_physical_address, is_fault, output request, virtual_address, execute, rnw);
modport mmu (input virtual_address, request, execute, rnw, satp_ppn, mxr, sum, privilege, output write_entry, superpage, perms, upper_physical_address, is_fault);
modport tlb (input write_entry, superpage, perms, upper_physical_address, is_fault, mxr, sum, privilege, output request, virtual_address, execute, rnw);
modport csr (output satp_ppn, mxr, sum, privilege);
endinterface
@ -154,18 +159,17 @@ interface tlb_interface;
//TLB Inputs
logic [31:0] virtual_address;
logic rnw;
logic execute;
//TLB Outputs
logic is_fault;
logic [31:0] physical_address;
modport tlb (
input new_request, virtual_address, rnw, execute,
input new_request, virtual_address, rnw,
output ready, done, is_fault, physical_address
);
modport requester (
output new_request, virtual_address, rnw, execute,
output new_request, virtual_address, rnw,
input ready, done, is_fault, physical_address
);
endinterface
@ -181,6 +185,10 @@ interface load_store_queue_interface;
logic load_pop;
logic store_pop;
//Address translation
logic addr_push;
lsq_addr_entry_t addr_data_in;
//LSQ outputs
data_access_shared_inputs_t load_data_out;
data_access_shared_inputs_t store_data_out;
@ -193,15 +201,14 @@ interface load_store_queue_interface;
//LSQ status
logic sq_empty;
logic empty;
logic no_released_stores_pending;
modport queue (
input data_in, potential_push, push, load_pop, store_pop,
output full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty, no_released_stores_pending
input data_in, potential_push, push, addr_push, addr_data_in, load_pop, store_pop,
output full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty
);
modport ls (
output data_in, potential_push, push, load_pop, store_pop,
input full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty, no_released_stores_pending
output data_in, potential_push, push, addr_push, addr_data_in, load_pop, store_pop,
input full, load_data_out, store_data_out, load_valid, store_valid, sq_empty, empty
);
endinterface
@ -221,15 +228,14 @@ interface store_queue_interface;
//SQ status
logic empty;
logic no_released_stores_pending;
modport queue (
input data_in, push, pop,
output full, data_out, valid, empty, no_released_stores_pending
output full, data_out, valid, empty
);
modport ls (
output data_in, push, pop,
input full, data_out, valid, empty, no_released_stores_pending
input full, data_out, valid, empty
);
endinterface
@ -258,23 +264,14 @@ interface cache_functions_interface #(parameter int TAG_W = 8, parameter int LIN
endinterface
interface addr_utils_interface #(parameter bit [31:0] BASE_ADDR = 32'h00000000, parameter bit [31:0] UPPER_BOUND = 32'hFFFFFFFF);
//Based on the lower and upper address ranges,
//find the number of bits needed to uniquely identify this memory range.
//Assumption: address range is aligned to its size
function int unsigned bit_range ();
for(int i=0; i < 32; i++) begin
if (BASE_ADDR[i] == UPPER_BOUND[i])
return (32 - i);
end
return 0;
endfunction
localparam int unsigned BIT_RANGE = bit_range();
/* verilator lint_off SELRANGE */
interface addr_utils_interface #(parameter logic [31:0] BASE_ADDR = 32'h00000000, parameter logic [31:0] UPPER_BOUND = 32'hFFFFFFFF);
//The range should be aligned for performance
function address_range_check (input logic[31:0] addr);
return (BIT_RANGE == 0) ? 1 : (addr[31:32-BIT_RANGE] == BASE_ADDR[31:32-BIT_RANGE]);
/* verilator lint_off UNSIGNED */
/* verilator lint_off CMPCONST */
return addr >= BASE_ADDR & addr <= UPPER_BOUND;
/* verilator lint_on UNSIGNED */
/* verilator lint_on CMPCONST */
endfunction
endinterface
@ -406,3 +403,30 @@ interface fp_intermediate_wb_interface;
input id, done, rd, expo_overflow, fflags, rm, hidden, grs, clz, carry, safe, subnormal, right_shift, right_shift_amt, ignore_max_expo, d2s
);
endinterface
interface amo_interface;
import riscv_types::*;
//Atomic Load Reserved and Store Conditional
logic set_reservation;
logic clear_reservation;
logic[31:0] reservation;
logic reservation_valid;
//Atomic Read-Modify-Write
logic rmw_valid;
amo_t op;
logic[31:0] rs1;
logic[31:0] rs2;
logic[31:0] rd;
modport subunit (
input reservation_valid, rd,
output set_reservation, clear_reservation, reservation, rmw_valid, op, rs1, rs2
);
modport amo_unit (
output reservation_valid, rd,
input set_reservation, clear_reservation, reservation, rmw_valid, op, rs1, rs2
);
endinterface

View file

@ -148,8 +148,8 @@ package opcodes;
localparam [31:0] AMO_MINU = 32'b11000????????????010?????0101111;
localparam [31:0] AMO_MAXU = 32'b11100????????????010?????0101111;
localparam [31:0] AMO_SWAP = 32'b00001????????????010?????0101111;
localparam [31:0] LR = 32'b00010??00000?????010?????0101111;
localparam [31:0] SC = 32'b00011????????????010?????0101111;
localparam [31:0] AMO_LR = 32'b00010??00000?????010?????0101111;
localparam [31:0] AMO_SC = 32'b00011????????????010?????0101111;
//Machine/Supervisor
localparam [31:0] SRET = 32'b00010000001000000000000001110011;

View file

@ -113,15 +113,23 @@ package riscv_types;
URET_imm = 12'b000000000010,
SRET_imm = 12'b000100000010,
MRET_imm = 12'b001100000010,
SFENCE_imm = 12'b0001001?????
SFENCE_imm = 12'b0001001?????,
WFI_imm = 12'b000100000101
} imm_sys_t;
//Other registers exist but are not supported
typedef enum logic [11:0] {
//Floating Point
FFLAGS = 12'h001,
FRM = 12'h002,
FCSR = 12'h003,
//Machine info
MVENDORID = 12'hF11,
MARCHID = 12'hF12,
MIMPID = 12'hF13,
MHARTID = 12'hF14,
MCONFIGPTR = 12'hF15,
//Machine trap setup
MSTATUS = 12'h300,
MISA = 12'h301,
@ -130,55 +138,79 @@ package riscv_types;
MIE = 12'h304,
MTVEC = 12'h305,
MCOUNTEREN = 12'h306,
MSTATUSH = 12'h310,
MEDELEGH = 12'h312,
//Machine trap handling
MSCRATCH = 12'h340,
MEPC = 12'h341,
MCAUSE = 12'h342,
MTVAL = 12'h343,
MIP = 12'h344,
//Machine configuration
MENVCFG = 12'h30A,
MENVCFGH = 12'h31A,
//No optional mseccfg/mseccfgh
//No PMP
//Machine Counters
MCYCLE = 12'hB00,
MINSTRET = 12'hB02,
MHPMCOUNTER3 = 12'hB03,
MHPMCOUNTER31 = 12'hB1F,
MCYCLEH = 12'hB80,
MINSTRETH = 12'hB82,
MHPMCOUNTER3H = 12'hB83,
MHPMCOUNTER31H = 12'hB9F,
//Machine counter setup
MCOUNTINHIBIT = 12'h320,
MHPMEVENT3 = 12'h323,
MHPMEVENT31 = 12'h33F,
MHPMEVENT3H = 12'h723,
MHPMEVENT31H = 12'h73F,
//Machine state enable
MSTATEEN0 = 12'h30C,
MSTATEEN1 = 12'h30D,
MSTATEEN2 = 12'h30E,
MSTATEEN3 = 12'h30F,
MSTATEEN0H = 12'h31C,
MSTATEEN1H = 12'h31D,
MSTATEEN2H = 12'h31E,
MSTATEEN3H = 12'h31F,
//Supervisor regs
//Supervisor Trap Setup
SSTATUS = 12'h100,
SEDELEG = 12'h102,
SIDELEG = 12'h103,
SIE = 12'h104,
STVEC = 12'h105,
SCOUNTEREN = 12'h106,
//Supervisor configuration
SENVCFG = 12'h10A,
//Supervisor trap handling
SSCRATCH = 12'h140,
SEPC = 12'h141,
SCAUSE = 12'h142,
STVAL = 12'h143,
SIP = 12'h144,
STIMECMP = 12'h14D,
STIMECMPH = 12'h15D,
//Supervisor address translation and protection
SATP = 12'h180,
//Supervisor state enable
SSTATEEN0 = 12'h10C,
SSTATEEN1 = 12'h10D,
SSTATEEN2 = 12'h10E,
SSTATEEN3 = 12'h10F,
//User regs
//USER Floating Point
FFLAGS = 12'h001,
FRM = 12'h002,
FCSR = 12'h003,
//User Counter Timers
//Timers and counters
CYCLE = 12'hC00,
TIME = 12'hC01,
INSTRET = 12'hC02,
HPMCOUNTER3 = 12'hC03,
HPMCOUNTER31 = 12'hC1F,
CYCLEH = 12'hC80,
TIMEH = 12'hC81,
INSTRETH = 12'hC82,
//Debug regs
DCSR = 12'h7B0,
DPC = 12'h7B1,
DSCRATCH = 12'h7B2
HPMCOUNTER3H = 12'hC83,
HPMCOUNTER31H = 12'hC9F
} csr_reg_addr_t;
typedef enum logic [2:0] {
@ -198,11 +230,6 @@ package riscv_types;
CSR_RC = 2'b11
} csr_op_t;
typedef enum logic [4:0] {
BARE = 5'd0,
SV32 = 5'd8
} vm_t;
localparam ASIDLEN = 9;//pid
typedef enum logic [ECODE_W-1:0] {
@ -221,7 +248,9 @@ package riscv_types;
INST_PAGE_FAULT = 5'd12,
LOAD_PAGE_FAULT = 5'd13,
//reserved
STORE_OR_AMO_PAGE_FAULT = 5'd15
STORE_OR_AMO_PAGE_FAULT = 5'd15,
SOFTWARE_CHECK = 5'd18,
HARDWARE_ERROR = 5'd19
//reserved
} exception_code_t;
@ -238,7 +267,9 @@ package riscv_types;
//RESERVED
S_EXTERNAL_INTERRUPT = 5'd9,
//RESERVED
M_EXTERNAL_INTERRUPT = 5'd11
M_EXTERNAL_INTERRUPT = 5'd11,
//RESERVED
LOCAL_COUNT_OVERFLOW_INTERRUPT = 5'd13
} interrupt_code_t;
typedef enum bit [4:0] {
@ -255,6 +286,12 @@ package riscv_types;
AMO_MAXU_FN5 = 5'b11100
} amo_t;
typedef enum bit [1:0] {
INVAL = 2'b00,
CLEAN = 2'b01,
FLUSH = 2'b10
} cbo_t;
//Assembly register definitions for simulation purposes
typedef struct packed{
logic [XLEN-1:0] zero;

View file

@ -624,4 +624,4 @@ for (index = 0; index < NUM_CPUS; index=index+1) begin
end
end
endgenerate
endmodule
endmodule

View file

@ -67,7 +67,7 @@ module l1_to_wishbone
assign data_fifo.pop = wishbone.we & wishbone.ack;
assign data_fifo.data_in = '{
data : cpu.wr_data,
be : cpu_wr_data_be
be : cpu.wr_data_be
};
assign data_request = data_fifo.data_out;

View file

@ -1,5 +1,5 @@
/*
* Copyright © 2022 Eric Matthews, Lesley Shannon
* Copyright © 2022 Eric Matthews, Lesley Shannon
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -24,42 +24,22 @@ module litex_wrapper
import cva5_config::*;
import cva5_types::*;
import l2_config_and_types::*;
import riscv_types::*;
#(
parameter LITEX_VARIANT = 0,
parameter bit [31:0] RESET_VEC = 0,
parameter bit [31:0] NON_CACHABLE_L = 32'h80000000,
parameter bit [31:0] NON_CACHABLE_H =32'hFFFFFFFF
parameter bit [31:0] NON_CACHABLE_H = 32'hFFFFFFFF
)
(
input logic clk,
input logic rst,
input logic [15:0] litex_interrupt,
output logic [29:0] ibus_adr,
output logic [31:0] ibus_dat_w,
output logic [3:0] ibus_sel,
output logic ibus_cyc,
output logic ibus_stb,
output logic ibus_we,
output logic ibus_cti,
output logic ibus_bte,
input logic [31:0] ibus_dat_r,
input logic ibus_ack,
input logic ibus_err,
output logic [29:0] dbus_adr,
output logic [31:0] dbus_dat_w,
output logic [3:0] dbus_sel,
output logic dbus_cyc,
output logic dbus_stb,
output logic dbus_we,
output logic dbus_cti,
output logic dbus_bte,
input logic [31:0] dbus_dat_r,
input logic dbus_ack,
input logic dbus_err,
input logic cpu_m_interrupt,
input logic cpu_s_interrupt,
input logic cpu_software_in,
input logic cpu_timer_in,
input logic [63:0] mtime,
output logic [29:0] idbus_adr,
output logic [31:0] idbus_dat_w,
@ -74,122 +54,6 @@ module litex_wrapper
input logic idbus_err
);
localparam wb_group_config_t MINIMAL_WB_GROUP_CONFIG = '{
0 : '{0: ALU_ID, default : NON_WRITEBACK_ID},
1 : '{0: LS_ID, 1: CSR_ID, default : NON_WRITEBACK_ID},
default : '{default : NON_WRITEBACK_ID}
};
localparam cpu_config_t MINIMAL_CONFIG = '{
//ISA options
INCLUDE_M_MODE : 1,
INCLUDE_S_MODE : 0,
INCLUDE_U_MODE : 0,
INCLUDE_UNIT : '{
ALU : 1,
LS : 1,
MUL : 0,
DIV : 0,
CSR : 1,
CUSTOM : 0,
BR : 1,
IEC : 1
},
INCLUDE_IFENCE : 0,
INCLUDE_AMO : 0,
//CSR constants
CSRS : '{
MACHINE_IMPLEMENTATION_ID : 0,
CPU_ID : 0,
RESET_VEC : RESET_VEC,
RESET_MTVEC : 32'h00000000,
NON_STANDARD_OPTIONS : '{
COUNTER_W : 33,
MCYCLE_WRITEABLE : 0,
MINSTR_WRITEABLE : 0,
MTVEC_WRITEABLE : 1,
INCLUDE_MSCRATCH : 0,
INCLUDE_MCAUSE : 1,
INCLUDE_MTVAL : 1
}
},
//Memory Options
SQ_DEPTH : 2,
INCLUDE_FORWARDING_TO_STORES : 0,
INCLUDE_ICACHE : 0,
ICACHE_ADDR : '{
L: 32'h40000000,
H: 32'h4FFFFFFF
},
ICACHE : '{
LINES : 512,
LINE_W : 4,
WAYS : 2,
USE_EXTERNAL_INVALIDATIONS : 0,
USE_NON_CACHEABLE : 0,
NON_CACHEABLE : '{
L: 32'h00000000,
H: 32'h00000000
}
},
ITLB : '{
WAYS : 2,
DEPTH : 64
},
INCLUDE_DCACHE : 0,
DCACHE_ADDR : '{
L: 32'h40000000,
H: 32'h4FFFFFFF
},
DCACHE : '{
LINES : 512,
LINE_W : 4,
WAYS : 2,
USE_EXTERNAL_INVALIDATIONS : 0,
USE_NON_CACHEABLE : 0,
NON_CACHEABLE : '{
L: 32'h00000000,
H: 32'h00000000
}
},
DTLB : '{
WAYS : 2,
DEPTH : 64
},
INCLUDE_ILOCAL_MEM : 0,
ILOCAL_MEM_ADDR : '{
L : 32'h80000000,
H : 32'h8FFFFFFF
},
INCLUDE_DLOCAL_MEM : 0,
DLOCAL_MEM_ADDR : '{
L : 32'h80000000,
H : 32'h8FFFFFFF
},
INCLUDE_IBUS : 1,
IBUS_ADDR : '{
L : 32'h00000000,
H : 32'hFFFFFFFF
},
INCLUDE_PERIPHERAL_BUS : 1,
PERIPHERAL_BUS_ADDR : '{
L : 32'h00000000,
H : 32'hFFFFFFFF
},
PERIPHERAL_BUS_TYPE : WISHBONE_BUS,
//Branch Predictor Options
INCLUDE_BRANCH_PREDICTOR : 0,
BP : '{
WAYS : 2,
ENTRIES : 512,
RAS_ENTRIES : 8
},
//Writeback Options
NUM_WB_GROUPS : 2,
WB_GROUP : MINIMAL_WB_GROUP_CONFIG
};
localparam wb_group_config_t STANDARD_WB_GROUP_CONFIG = '{
0 : '{0: ALU_ID, default : NON_WRITEBACK_ID},
1 : '{0: LS_ID, default : NON_WRITEBACK_ID},
@ -199,48 +63,49 @@ module litex_wrapper
localparam cpu_config_t STANDARD_CONFIG = '{
//ISA options
INCLUDE_M_MODE : 1,
INCLUDE_S_MODE : 0,
INCLUDE_U_MODE : 0,
MODES : MSU,
INCLUDE_UNIT : '{
ALU : 1,
LS : 1,
MUL : 1,
DIV : 1,
CSR : 1,
FPU : 0,
CUSTOM : 0,
BR : 1,
IEC : 1
GC : 1
},
INCLUDE_IFENCE : 0,
INCLUDE_AMO : 0,
INCLUDE_IFENCE : 1,
INCLUDE_AMO : 1,
INCLUDE_CBO : 0,
//CSR constants
CSRS : '{
MACHINE_IMPLEMENTATION_ID : 0,
CPU_ID : 0,
RESET_VEC : RESET_VEC,
RESET_MTVEC : 32'h00000000,
NON_STANDARD_OPTIONS : '{
COUNTER_W : 33,
MCYCLE_WRITEABLE : 0,
MINSTR_WRITEABLE : 0,
MTVEC_WRITEABLE : 1,
INCLUDE_MSCRATCH : 0,
INCLUDE_MCAUSE : 1,
INCLUDE_MTVAL : 1
}
RESET_TVEC : 32'h00000000,
MCONFIGPTR : '0,
INCLUDE_ZICNTR : 1,
INCLUDE_ZIHPM : 1,
INCLUDE_SSTC : 1,
INCLUDE_SMSTATEEN : 1
},
//Memory Options
SQ_DEPTH : 4,
INCLUDE_FORWARDING_TO_STORES : 1,
AMO_UNIT : '{
LR_WAIT : 8,
RESERVATION_WORDS : 8
},
INCLUDE_ICACHE : 1,
ICACHE_ADDR : '{
L : 32'h00000000,
H : 32'hFFFFFFFF
H : 32'h7FFFFFFF
},
ICACHE : '{
LINES : 512,
LINE_W : 4,
LINE_W : 8,
WAYS : 2,
USE_EXTERNAL_INVALIDATIONS : 0,
USE_NON_CACHEABLE : 0,
@ -260,7 +125,7 @@ module litex_wrapper
},
DCACHE : '{
LINES : 512,
LINE_W : 4,
LINE_W : 8,
WAYS : 2,
USE_EXTERNAL_INVALIDATIONS : 0,
USE_NON_CACHEABLE : 1,
@ -306,23 +171,15 @@ module litex_wrapper
WB_GROUP : STANDARD_WB_GROUP_CONFIG
};
function cpu_config_t config_select (input integer variant);
case (variant)
0 : config_select = MINIMAL_CONFIG;
1 : config_select = STANDARD_CONFIG;
default : config_select = STANDARD_CONFIG;
endcase
endfunction
localparam cpu_config_t LITEX_CONFIG = config_select(LITEX_VARIANT);
//Unused interfaces
axi_interface m_axi();
avalon_interface m_avalon();
local_memory_interface instruction_bram();
local_memory_interface data_bram();
interrupt_t s_interrupt;
assign s_interrupt.software = 0;
assign s_interrupt.timer = cpu_timer_in;
assign s_interrupt.external = cpu_s_interrupt;
//L2 to Wishbone
l2_requester_interface l2();
@ -334,50 +191,24 @@ module litex_wrapper
//Timer and External interrupts
interrupt_t m_interrupt;
assign m_interrupt.software = 0;
assign m_interrupt.timer = litex_interrupt[1];
assign m_interrupt.external = litex_interrupt[0];
assign m_interrupt.software = cpu_software_in;
assign m_interrupt.timer = 0;
//assign m_interrupt.timer = cpu_timer_in;
assign m_interrupt.external = cpu_m_interrupt;
cva5 #(.CONFIG(LITEX_CONFIG)) cpu(.*);
generate if (LITEX_VARIANT != 0) begin : l1_arb_gen
l1_to_wishbone arb(.*, .cpu(l2), .wishbone(idwishbone));
assign idbus_adr = idwishbone.adr;
assign idbus_dat_w = idwishbone.dat_w;
assign idbus_sel = idwishbone.sel;
assign idbus_cyc = idwishbone.cyc;
assign idbus_stb = idwishbone.stb;
assign idbus_we = idwishbone.we;
assign idbus_cti = idwishbone.cti;
assign idbus_bte = idwishbone.bte;
assign idwishbone.dat_r = idbus_dat_r;
assign idwishbone.ack = idbus_ack;
assign idwishbone.err = idbus_err;
end else begin
assign ibus_adr = iwishbone.adr;
assign ibus_dat_w = iwishbone.dat_w;
assign ibus_sel = iwishbone.sel;
assign ibus_cyc = iwishbone.cyc;
assign ibus_stb = iwishbone.stb;
assign ibus_we = iwishbone.we;
assign ibus_cti = iwishbone.cti;
assign ibus_bte = iwishbone.bte;
assign iwishbone.dat_r = ibus_dat_r;
assign iwishbone.ack = ibus_ack;
assign iwishbone.err = ibus_err;
assign dbus_adr = dwishbone.adr;
assign dbus_dat_w = dwishbone.dat_w;
assign dbus_sel = dwishbone.sel;
assign dbus_cyc = dwishbone.cyc;
assign dbus_stb = dwishbone.stb;
assign dbus_we = dwishbone.we;
assign dbus_cti = dwishbone.cti;
assign dbus_bte = dwishbone.bte;
assign dwishbone.dat_r = dbus_dat_r;
assign dwishbone.ack = dbus_ack;
assign dwishbone.err = dbus_err;
end endgenerate
cva5 #(.CONFIG(STANDARD_CONFIG)) cpu(.*);
l1_to_wishbone arb(.*, .cpu(l2), .wishbone(idwishbone));
assign idbus_adr = idwishbone.adr;
assign idbus_dat_w = idwishbone.dat_w;
assign idbus_sel = idwishbone.sel;
assign idbus_cyc = idwishbone.cyc;
assign idbus_stb = idwishbone.stb;
assign idbus_we = idwishbone.we;
assign idbus_cti = idwishbone.cti;
assign idbus_bte = idwishbone.bte;
assign idwishbone.dat_r = idbus_dat_r;
assign idwishbone.ack = idbus_ack;
assign idwishbone.err = idbus_err;
endmodule

View file

@ -34,19 +34,17 @@ package nexys_config;
localparam cpu_config_t NEXYS_CONFIG = '{
//ISA options
INCLUDE_M_MODE : 1,
INCLUDE_S_MODE : 0,
INCLUDE_U_MODE : 0,
MODES : MSU,
INCLUDE_UNIT : '{
ALU : 1,
LS : 1,
MUL : 1,
DIV : 1,
CSR : 1,
FPU : 0,
FPU : 1,
CUSTOM : 0,
BR : 1,
IEC : 1
GC : 1
},
INCLUDE_IFENCE : 0,
INCLUDE_AMO : 0,
@ -57,20 +55,20 @@ package nexys_config;
MACHINE_IMPLEMENTATION_ID : 0,
CPU_ID : 0,
RESET_VEC : 32'h80000000,
RESET_MTVEC : 32'h80000000,
NON_STANDARD_OPTIONS : '{
COUNTER_W : 33,
MCYCLE_WRITEABLE : 0,
MINSTR_WRITEABLE : 0,
MTVEC_WRITEABLE : 1,
INCLUDE_MSCRATCH : 0,
INCLUDE_MCAUSE : 1,
INCLUDE_MTVAL : 1
}
RESET_TVEC : 32'h00000000,
MCONFIGPTR : '0,
INCLUDE_ZICNTR : 1,
INCLUDE_ZIHPM : 1,
INCLUDE_SSTC : 1,
INCLUDE_SMSTATEEN : 1
},
//Memory Options
SQ_DEPTH : 8,
INCLUDE_FORWARDING_TO_STORES : 1,
AMO_UNIT : '{
LR_WAIT : 32,
RESERVATION_WORDS : 8 //Must be the same size as the DCACHE line width
},
INCLUDE_ICACHE : 1,
ICACHE_ADDR : '{
L : 32'h80000000,
@ -126,10 +124,10 @@ package nexys_config;
L : 32'h00000000,
H : 32'hFFFFFFFF
},
INCLUDE_PERIPHERAL_BUS : 0,
INCLUDE_PERIPHERAL_BUS : 1,
PERIPHERAL_BUS_ADDR : '{
L : 32'h00000000,
H : 32'hFFFFFFFF
L : 32'h60000000,
H : 32'h6FFFFFFF
},
PERIPHERAL_BUS_TYPE : AXI_BUS,
//Branch Predictor Options

View file

@ -102,13 +102,9 @@ module cva5_sim
output logic store_queue_empty
);
parameter SCRATCH_MEM_KB = 128;
parameter MEM_LINES = (SCRATCH_MEM_KB*1024)/4;
parameter UART_ADDR = 32'h88001000;
parameter UART_ADDR_LINE_STATUS = 32'h88001014;
interrupt_t s_interrupt;
interrupt_t m_interrupt;
logic[63:0] mtime;
assign s_interrupt = '{default: 0};
assign m_interrupt = '{default: 0};
@ -139,14 +135,50 @@ module cva5_sim
l1_to_axi arb(.*, .cpu(l2), .axi(axi));
cva5 #(.CONFIG(NEXYS_CONFIG)) cpu(.*);
initial begin
write_uart = 0;
uart_byte = 0;
end
//Capture writes to UART
always_ff @(posedge clk) begin
write_uart <= (axi.wvalid && axi.wready && axi.awaddr == UART_ADDR);
uart_byte <= axi.wdata[7:0];
if (rst) begin
m_axi.awready <= 1;
m_axi.wready <= 0;
m_axi.bvalid <= 0;
write_uart <= 0;
end
else begin
write_uart <= 0;
if (m_axi.awvalid & m_axi.awready) begin
m_axi.awready <= 0;
m_axi.wready <= 1;
end
else if (m_axi.wvalid & m_axi.wready) begin
m_axi.wready <= 0;
m_axi.bvalid <= 1;
write_uart <= 1;
end
else if (m_axi.bvalid & m_axi.bready) begin
m_axi.bvalid <= 0;
m_axi.awready <= 1;
end
end
uart_byte <= m_axi.wdata[7:0];
end
//Simulate UART read response
assign m_axi.rdata = 32'hFFFFFF21;
always_ff @(posedge clk) begin
if (rst) begin
m_axi.arready <= 1;
m_axi.rvalid <= 0;
end
else begin
if (m_axi.arvalid & m_axi.arready) begin
m_axi.arready <= 0;
m_axi.rvalid <= 1;
end
else if (m_axi.rvalid & m_axi.rready) begin
m_axi.rvalid <= 0;
m_axi.arready <= 1;
end
end
end
////////////////////////////////////////////////////
@ -261,9 +293,9 @@ module cva5_sim
end endgenerate
generate if (NEXYS_CONFIG.INCLUDE_DCACHE) begin
assign dcache_hit = `DCACHE_P.load_hit;
assign dcache_miss = `DCACHE_P.line_complete;
assign darb_stall = cpu.l1_request[L1_DCACHE_ID].request & ~cpu.l1_request[L1_DCACHE_ID].ack;
// assign dcache_hit = `DCACHE_P.load_hit;
// assign dcache_miss = `DCACHE_P.line_complete;
// assign darb_stall = cpu.l1_request[L1_DCACHE_ID].request & ~cpu.l1_request[L1_DCACHE_ID].ack;
end endgenerate
logic [MAX_NUM_UNITS-1:0] unit_ready;
@ -414,7 +446,7 @@ module cva5_sim
assign retire_ports_valid[i] = cpu.retire_port_valid[i];
end endgenerate
assign store_queue_empty = cpu.load_store_status.sq_empty;
assign store_queue_empty = ~cpu.load_store_status.outstanding_store;
////////////////////////////////////////////////////
//Assertion Binding

View file

@ -139,4 +139,3 @@ module nexys_wrapper
cva5 #(.CONFIG(NEXYS_CONFIG)) cpu(.rst(rst_r2), .*);
endmodule

0
examples/zedboard/README.md Executable file → Normal file
View file

0
examples/zedboard/arm.tcl Executable file → Normal file
View file

0
examples/zedboard/cva5.png Executable file → Normal file
View file

Before

Width:  |  Height:  |  Size: 88 KiB

After

Width:  |  Height:  |  Size: 88 KiB

Before After
Before After

0
examples/zedboard/cva5_small.png Executable file → Normal file
View file

Before

Width:  |  Height:  |  Size: 43 KiB

After

Width:  |  Height:  |  Size: 43 KiB

Before After
Before After

4
examples/zedboard/cva5_wrapper.sv Executable file → Normal file
View file

@ -236,7 +236,7 @@ module cva5_wrapper (
//design_2 infra(.*);
generate
if (EXAMPLE_CONFIG.INCLUDE_S_MODE || EXAMPLE_CONFIG.INCLUDE_ICACHE || EXAMPLE_CONFIG.INCLUDE_DCACHE) begin
if (EXAMPLE_CONFIG.MODES == MSU || EXAMPLE_CONFIG.INCLUDE_ICACHE || EXAMPLE_CONFIG.INCLUDE_DCACHE) begin
l2_arbiter l2_arb (.*, .request(l2));
axi_to_arb l2_to_mem (.*, .l2(mem));
end
@ -258,4 +258,4 @@ module cva5_wrapper (
.data_out_b(data_bram.data_out)
);
endmodule
endmodule

0
examples/zedboard/simulator_output_example.png Executable file → Normal file
View file

Before

Width:  |  Height:  |  Size: 52 KiB

After

Width:  |  Height:  |  Size: 52 KiB

Before After
Before After

0
examples/zedboard/system.png Executable file → Normal file
View file

Before

Width:  |  Height:  |  Size: 9.2 KiB

After

Width:  |  Height:  |  Size: 9.2 KiB

Before After
Before After

0
examples/zedboard/system_periperhals.tcl Executable file → Normal file
View file

0
formal/interfaces/axi4_basic_props.sv Executable file → Normal file
View file

0
formal/models/cva5_fbm.sv Executable file → Normal file
View file

0
formal/models/cva5_formal_wrapper.sv Executable file → Normal file
View file

1
l2_arbiter/axi_to_arb.sv Executable file → Normal file
View file

@ -238,4 +238,3 @@ module axi_to_arb
assign l2.rd_data_valid = axi_rvalid;
endmodule

0
l2_arbiter/l2_arbiter.sv Executable file → Normal file
View file

0
l2_arbiter/l2_config_and_types.sv Executable file → Normal file
View file

0
l2_arbiter/l2_fifo.sv Executable file → Normal file
View file

0
l2_arbiter/l2_interfaces.sv Executable file → Normal file
View file

0
l2_arbiter/l2_reservation_logic.sv Executable file → Normal file
View file

0
l2_arbiter/l2_round_robin.sv Executable file → Normal file
View file

0
test_benches/axi_mem_sim.sv Executable file → Normal file
View file

0
test_benches/cva5_tb.sv Executable file → Normal file
View file

0
test_benches/cva5_tb.wcfg Executable file → Normal file
View file

0
test_benches/sim_mem.sv Executable file → Normal file
View file

0
test_benches/unit_test_benches/alu_unit_tb.sv Executable file → Normal file
View file

0
test_benches/unit_test_benches/div_unit_tb.sv Executable file → Normal file
View file

0
test_benches/unit_test_benches/mul_unit_tb.sv Executable file → Normal file
View file

View file

@ -152,13 +152,15 @@ void CVA5Tracer::start_tracer(const char *trace_file) {
}
uint64_t CVA5Tracer::cycle_count = 0;
uint64_t CVA5Tracer::get_cycle_count() {
return cycle_count;
}
CVA5Tracer::CVA5Tracer(std::ifstream& programFile) {
cycle_count = 0;
#ifdef TRACE_ON
Verilated::traceEverOn(true);
#endif

View file

@ -54,7 +54,7 @@ public:
void set_log_file(std::ofstream* logFile);
void set_pc_file(std::ofstream* pcFile);
void start_tracer(const char *trace_file);
uint64_t get_cycle_count();
static uint64_t get_cycle_count();
//DDR Simulation
Vcva5_sim *tb;
@ -71,7 +71,7 @@ private:
int reset_length = 64;
int stall_limit = 2000;
int stall_count = 0;
uint64_t cycle_count = 0;
static uint64_t cycle_count;
bool program_complete = false;

View file

@ -12,7 +12,7 @@ CVA5Tracer *cva5Tracer;
char* csv_log_name;
//For time index on assertions
double sc_time_stamp () {
return cva5Tracer->get_cycle_count();
return CVA5Tracer::get_cycle_count();
}
const char* cva5_csv_log_file_name () {

View file

@ -649,7 +649,7 @@ module cva5_sim
assign retire_ports_valid[i] = cpu.retire_port_valid[i];
end endgenerate
assign store_queue_empty = cpu.load_store_status.sq_empty;
assign store_queue_empty = ~cpu.load_store_status.outstanding_store;
////////////////////////////////////////////////////
//Assertion Binding

View file

@ -14,11 +14,14 @@ local_memory/local_mem.sv
core/types_and_interfaces/internal_interfaces.sv
core/types_and_interfaces/external_interfaces.sv
core/common_components/lutram_1w_1r.sv
core/common_components/lutram_1w_mr.sv
core/common_components/dual_port_bram.sv
core/common_components/ram/lutram_1w_1r.sv
core/common_components/ram/lutram_1w_mr.sv
core/common_components/ram/sdp_ram.sv
core/common_components/ram/sdp_ram_padded.sv
core/common_components/ram/dual_port_bram.sv
core/common_components/set_clr_reg_with_rst.sv
core/common_components/one_hot_to_integer.sv
core/common_components/one_hot_mux.sv
core/common_components/cycler.sv
core/common_components/lfsr.sv
core/common_components/cva5_fifo.sv
@ -44,8 +47,8 @@ core/memory_sub_units/axi_master.sv
core/memory_sub_units/avalon_master.sv
core/memory_sub_units/wishbone_master.sv
core/execution_units/load_store_unit/dcache_tag_banks.sv
core/execution_units/load_store_unit/amo_alu.sv
core/execution_units/load_store_unit/amo_unit.sv
core/execution_units/load_store_unit/dcache.sv
core/execution_units/load_store_unit/addr_hash.sv
core/execution_units/load_store_unit/store_queue.sv
@ -90,8 +93,10 @@ core/fetch_stage/fetch.sv
core/instruction_metadata_and_id_management.sv
core/tlb_lut_ram.sv
core/mmu.sv
core/mmu/perms_check.sv
core/mmu/itlb.sv
core/mmu/dtlb.sv
core/mmu/mmu.sv
core/decode_and_issue.sv