Merge branch 'synth' into 'master'

Synth

Closes #26

See merge request !2
This commit is contained in:
Florian Zaruba 2017-06-25 17:34:09 +02:00
commit f8d1231cd5
36 changed files with 1373 additions and 1150 deletions

View file

@ -40,10 +40,11 @@ riscv-tests = rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p-a
rv64ui-p-sraiw rv64ui-p-sraw rv64ui-p-srl rv64ui-p-srli rv64ui-p-srliw rv64ui-p-srlw \
rv64ui-p-lb rv64ui-p-lbu rv64ui-p-ld rv64ui-p-lh rv64ui-p-lhu rv64ui-p-lui \
rv64ui-p-lw rv64ui-p-lwu \
rv64mi-p-csr rv64mi-p-mcsr rv64mi-p-illegal rv64mi-p-ma_addr rv64mi-p-ma_fetch rv64mi-p-sbreak rv64mi-p-scall \
rv64mi-p-csr rv64mi-p-mcsr rv64mi-p-illegal \
rv64mi-p-ma_addr rv64mi-p-ma_fetch rv64mi-p-sbreak rv64mi-p-scall \
rv64si-p-csr rv64si-p-ma_fetch rv64si-p-scall rv64si-p-wfi rv64si-p-sbreak \
rv64uc-p-rvc rv64si-p-dirty \
rv64ui-v-add
rv64si-p-dirty rv64uc-p-rvc \
rv64ui-v-sll
riscv-test = rv64ui-p-add
@ -138,4 +139,4 @@ clean:
.PHONY:
build lint build-moore
# make CC=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/gcc CXX=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/g++ -j20
# make CC=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/gcc CXX=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/g++ -j20

View file

@ -1,3 +1,4 @@
/* File: ariane_pkg.svh
* Author: Florian Zaruba <zarubaf@ethz.ch>
* Date: 8.4.2017
@ -14,11 +15,14 @@ package ariane_pkg;
// ---------------
// Global Config
// ---------------
localparam NR_SB_ENTRIES = 4; // number of scoreboard entries
localparam NR_SB_ENTRIES = 8; // number of scoreboard entries
localparam TRANS_ID_BITS = $clog2(NR_SB_ENTRIES); // depending on the number of scoreboard entries we need that many bits
// to uniquely identify the entry in the scoreboard
localparam NR_WB_PORTS = 4;
localparam ASID_WIDTH = 1;
localparam BTB_ENTRIES = 64;
localparam BITS_SATURATION_COUNTER = 2;
localparam logic [63:0] ISA_CODE = (1 << 2) // C - Compressed extension
| (1 << 8) // I - RV32I/64I/128I base ISA
| (1 << 12) // M - Integer Multiply/Divide extension
@ -91,6 +95,16 @@ package ariane_pkg;
// LSU functions
LD, SD, LW, LWU, SW, LH, LHU, SH, LB, SB, LBU
} fu_op;
typedef struct packed {
logic valid;
logic [63:0] vaddr;
logic [63:0] data;
logic [7:0] be;
fu_t fu;
fu_op operator;
logic [TRANS_ID_BITS-1:0] trans_id;
} lsu_ctrl_t;
// ---------------
// IF/ID Stage
// ---------------
@ -198,8 +212,9 @@ package ariane_pkg;
// memory management, pte
typedef struct packed {
logic[37:0] ppn;
logic[1:0] sw_reserved;
logic [9:0] reserved;
logic [43:0] ppn;
logic [1:0] rsw;
logic d;
logic a;
logic g;
@ -217,27 +232,27 @@ package ariane_pkg;
// ----------------------
// Exception Cause Codes
// ----------------------
localparam logic [63:0] INSTR_ADDR_MISALIGNED = 64'd0;
localparam logic [63:0] INSTR_ACCESS_FAULT = 64'd1;
localparam logic [63:0] ILLEGAL_INSTR = 64'd2;
localparam logic [63:0] BREAKPOINT = 64'd3;
localparam logic [63:0] LD_ADDR_MISALIGNED = 64'd4;
localparam logic [63:0] LD_ACCESS_FAULT = 64'd5;
localparam logic [63:0] ST_ADDR_MISALIGNED = 64'd6;
localparam logic [63:0] ST_ACCESS_FAULT = 64'd7;
localparam logic [63:0] ENV_CALL_UMODE = 64'd8; // environment call from user mode
localparam logic [63:0] ENV_CALL_SMODE = 64'd9; // environment call from supervisor mode
localparam logic [63:0] ENV_CALL_MMODE = 64'd11; // environment call from machine mode
localparam logic [63:0] INSTR_PAGE_FAULT = 64'd12; // Instruction page fault
localparam logic [63:0] LOAD_PAGE_FAULT = 64'd13; // Load page fault
localparam logic [63:0] STORE_PAGE_FAULT = 64'd15; // Store page fault
localparam logic [63:0] INSTR_ADDR_MISALIGNED = 0;
localparam logic [63:0] INSTR_ACCESS_FAULT = 1;
localparam logic [63:0] ILLEGAL_INSTR = 2;
localparam logic [63:0] BREAKPOINT = 3;
localparam logic [63:0] LD_ADDR_MISALIGNED = 4;
localparam logic [63:0] LD_ACCESS_FAULT = 5;
localparam logic [63:0] ST_ADDR_MISALIGNED = 6;
localparam logic [63:0] ST_ACCESS_FAULT = 7;
localparam logic [63:0] ENV_CALL_UMODE = 8; // environment call from user mode
localparam logic [63:0] ENV_CALL_SMODE = 9; // environment call from supervisor mode
localparam logic [63:0] ENV_CALL_MMODE = 11; // environment call from machine mode
localparam logic [63:0] INSTR_PAGE_FAULT = 12; // Instruction page fault
localparam logic [63:0] LOAD_PAGE_FAULT = 13; // Load page fault
localparam logic [63:0] STORE_PAGE_FAULT = 15; // Store page fault
localparam logic [63:0] S_SW_INTERRUPT = (1 << 63) | 64'd1;
localparam logic [63:0] M_SW_INTERRUPT = (1 << 63) | 64'd3;
localparam logic [63:0] S_TIMER_INTERRUPT = (1 << 63) | 64'd5;
localparam logic [63:0] M_TIMER_INTERRUPT = (1 << 63) | 64'd7;
localparam logic [63:0] S_EXT_INTERRUPT = (1 << 63) | 64'd9;
localparam logic [63:0] M_EXT_INTERRUPT = (1 << 63) | 64'd11;
localparam logic [63:0] S_SW_INTERRUPT = (1 << 63) | 1;
localparam logic [63:0] M_SW_INTERRUPT = (1 << 63) | 3;
localparam logic [63:0] S_TIMER_INTERRUPT = (1 << 63) | 5;
localparam logic [63:0] M_TIMER_INTERRUPT = (1 << 63) | 7;
localparam logic [63:0] S_EXT_INTERRUPT = (1 << 63) | 9;
localparam logic [63:0] M_EXT_INTERRUPT = (1 << 63) | 11;
// -----
// CSRs
// -----

View file

@ -123,9 +123,9 @@ module alu
assign shift_op_a_64 = { shift_arithmetic & shift_op_a[63], shift_op_a};
assign shift_op_a_32 = { shift_arithmetic & shift_op_a[31], shift_op_a32};
assign shift_right_result = $signed(shift_op_a_64) >>> shift_amt[5:0];
assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]);
assign shift_right_result32 = $signed(shift_op_a_32) >>> shift_amt[4:0];
assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]);
// bit reverse the shift_right_result for left shifts
genvar j;
generate

View file

@ -118,8 +118,17 @@ module ariane
logic fetch_valid_if_id;
logic decode_ack_id_if;
exception exception_if_id;
// --------------
// ID <-> EX
// ID <-> ISSUE
// --------------
scoreboard_entry issue_entry_id_issue;
logic issue_entry_valid_id_issue;
logic is_ctrl_fow_id_issue;
logic issue_instr_issue_id;
// --------------
// ISSUE <-> EX
// --------------
logic [63:0] imm_id_ex;
logic [TRANS_ID_BITS-1:0] trans_id_id_ex;
@ -247,7 +256,6 @@ module ariane
if_stage if_stage_i (
.flush_i ( flush_ctrl_if ),
.if_busy_o ( if_ready_if_pcgen ),
.id_ready_i ( ready_id_if ),
.fetch_address_i ( fetch_address_pcgen_if ),
.fetch_valid_i ( fetch_valid_pcgen_if ),
.branch_predict_i ( branch_predict_pcgen_if ),
@ -267,23 +275,42 @@ module ariane
// ---------
// ID
// ---------
id_stage
id_stage id_stage_i (
.flush_i ( flush_ctrl_if ),
.fetch_entry_i ( fetch_entry_if_id ),
.fetch_entry_valid_i ( fetch_valid_if_id ),
.decoded_instr_ack_o ( decode_ack_id_if ),
.issue_entry_o ( issue_entry_id_issue ),
.issue_entry_valid_o ( issue_entry_valid_id_issue ),
.is_ctrl_flow_o ( is_ctrl_fow_id_issue ),
.issue_instr_ack_i ( issue_instr_issue_id ),
.priv_lvl_i ( priv_lvl ),
.tvm_i ( tvm_csr_id ),
.tw_i ( tw_csr_id ),
.tsr_i ( tsr_csr_id ),
.*
);
// ---------
// Issue
// ---------
issue_stage
#(
.NR_ENTRIES ( NR_SB_ENTRIES ),
.NR_WB_PORTS ( NR_WB_PORTS )
.NR_ENTRIES ( NR_SB_ENTRIES ),
.NR_WB_PORTS ( NR_WB_PORTS )
)
id_stage_i (
.test_en_i ( test_en_i ),
.flush_i ( flush_ctrl_id ),
issue_stage_i (
.flush_unissued_instr_i ( flush_unissued_instr_ctrl_id ),
.fetch_entry_i ( fetch_entry_if_id ),
.fetch_entry_valid_i ( fetch_valid_if_id ),
.decoded_instr_ack_o ( decode_ack_id_if ),
.ready_o ( ready_id_if ),
.priv_lvl_i ( priv_lvl ),
.tvm_i ( tvm_csr_id ),
.tw_i ( tw_csr_id ),
.tsr_i ( tsr_csr_id ),
.flush_i ( flush_ctrl_id ),
.decoded_instr_i ( issue_entry_id_issue ),
.decoded_instr_valid_i ( issue_entry_valid_id_issue ),
.is_ctrl_flow_i ( is_ctrl_fow_id_issue ),
.decoded_instr_ack_o ( issue_instr_issue_id ),
// Functional Units
.fu_o ( fu_id_ex ),
.operator_o ( operator_id_ex ),
@ -487,8 +514,8 @@ module ariane
assign tracer_if.fetch_valid = fetch_valid_if_id;
assign tracer_if.fetch_ack = decode_ack_id_if;
// Issue
assign tracer_if.issue_ack = id_stage_i.scoreboard_i.issue_ack_i;
assign tracer_if.issue_sbe = id_stage_i.scoreboard_i.issue_instr_o;
assign tracer_if.issue_ack = issue_stage_i.scoreboard_i.issue_ack_i;
assign tracer_if.issue_sbe = issue_stage_i.scoreboard_i.issue_instr_o;
// write-back
assign tracer_if.waddr = waddr_a_commit_id;
assign tracer_if.wdata = wdata_a_commit_id;
@ -497,12 +524,13 @@ module ariane
assign tracer_if.commit_instr = commit_instr_id_commit;
assign tracer_if.commit_ack = commit_ack;
// address translation
assign tracer_if.translation_valid = ex_stage_i.lsu_i.mmu_i.lsu_valid_o;
assign tracer_if.vaddr = ex_stage_i.lsu_i.mmu_i.lsu_vaddr_i;
assign tracer_if.paddr = ex_stage_i.lsu_i.mmu_i.lsu_paddr_o;
assign tracer_if.is_store = ex_stage_i.lsu_i.mmu_i.lsu_is_store_i;
assign tracer_if.st_ready = ex_stage_i.lsu_i.store_unit_i.ready_o;
assign tracer_if.ld_ready = ex_stage_i.lsu_i.load_unit_i.ready_o;
// stores
assign tracer_if.st_valid = ex_stage_i.lsu_i.store_unit_i.store_buffer_i.valid_i;
assign tracer_if.st_paddr = ex_stage_i.lsu_i.store_unit_i.store_buffer_i.paddr_i;
// loads
assign tracer_if.ld_valid = ex_stage_i.lsu_i.load_unit_i.tag_valid_o;
assign tracer_if.ld_kill = ex_stage_i.lsu_i.load_unit_i.kill_req_o;
assign tracer_if.ld_paddr = ex_stage_i.lsu_i.load_unit_i.paddr_i;
// exceptions
assign tracer_if.exception = commit_stage_i.exception_o;
@ -510,6 +538,8 @@ module ariane
instruction_tracer it = new (tracer_if);
initial begin
#15ns;
it.create_file(cluster_id_i, core_id_i);
it.trace();
end

View file

@ -74,7 +74,6 @@ module branch_unit (
automatic logic [63:0] jump_base = (operator_i == JALR) ? operand_a_i : pc_i;
target_address = 64'b0;
resolved_branch_o.pc = pc_i;
resolved_branch_o.target_address = 64'b0;
resolved_branch_o.is_taken = 1'b0;
resolved_branch_o.valid = branch_valid_i;
@ -92,17 +91,18 @@ module branch_unit (
// if we need to put the branch target address in a destination register, output it here to WB
branch_result_o = next_pc;
// save PC - we need this to get the target row in the branch target buffer
// we play this trick with the branch instruction which wraps a byte boundary:
// |---------- Place the prediction on this PC
// \/
// ____________________________________________________
// |branch [15:0] | branch[31:16] | compressed 1[15:0] |
// |____________________________________________________
// This will relief the pre-fetcher to re-fetch partially fetched unaligned branch instructions e.g.:
// we don't have a back arch between the pre-fetcher and decoder/instruction FIFO.
resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4);
if (branch_valid_i) begin
// save PC - we need this to get the target row in the branch target buffer
// we play this trick with the branch instruction which wraps a byte boundary:
// |---------- Place the prediction on this PC
// \/
// ____________________________________________________
// |branch [15:0] | branch[31:16] | compressed 1[15:0] |
// |____________________________________________________
// This will relief the prefetcher to re-fetch partially fetched unaligned branch instructions e.g.:
// we don't have a back arch between prefetcher and decoder/instruction FIFO.
resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4);
// save if the branch instruction was in the lower 16 bit of the instruction word
// the first case is a compressed instruction which is in slot 0
// the other case is a misaligned uncompressed instruction which we only predict in the next cycle (see notes above)
@ -131,17 +131,18 @@ module branch_unit (
end
end
end
// to resolve the branch in ID -> only do this if this was indeed a branch (hence vald_i is asserted)
// to resolve the branch in ID
resolve_branch_o = 1'b1;
// the other case would be that this instruction was no branch but branch prediction thought that it was one
// this is essentially also a mis-predict
end else if (fu_valid_i && branch_predict_i.valid) begin
end else if (fu_valid_i && branch_predict_i.valid && branch_predict_i.predict_taken) begin
// re-set the branch to the next PC
resolved_branch_o.is_mispredict = 1'b1;
resolved_branch_o.target_address = next_pc;
// clear this entry so that we are not constantly mis-predicting
resolved_branch_o.clear = 1'b1;
resolved_branch_o.valid = 1'b1;
resolve_branch_o = 1'b1;
end
end
// use ALU exception signal for storing instruction fetch exceptions if

View file

@ -111,6 +111,7 @@ module btb #(
end else begin
btb_q <= btb_n;
end
end
end
endmodule

View file

@ -32,226 +32,222 @@ module compressed_decoder
output logic illegal_instr_o
);
// -------------------
// Compressed Decoder
// -------------------
always_comb begin
illegal_instr_o = 1'b0;
instr_o = '0;
// -------------------
// Compressed Decoder
// -------------------
always_comb begin
illegal_instr_o = 1'b0;
instr_o = '0;
unique case (instr_i[1:0])
// C0
2'b00: begin
unique case (instr_i[15:13])
3'b000: begin
// c.addi4spn -> addi rd', x2, imm
instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], OPCODE_OPIMM};
if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1;
end
unique case (instr_i[1:0])
// C0
2'b00: begin
unique case (instr_i[15:13])
3'b000: begin
// c.addi4spn -> addi rd', x2, imm
instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], OPCODE_OPIMM};
if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1;
end
3'b010: begin
// c.lw -> lw rd', imm(rs1')
instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], OPCODE_LOAD};
end
3'b010: begin
// c.lw -> lw rd', imm(rs1')
instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], OPCODE_LOAD};
end
3'b011: begin
// c.ld -> ld rd', imm(rs1')
// | imm[11:0] | rs1 | funct3 | rd | opcode |
instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], OPCODE_LOAD};
end
3'b011: begin
// c.ld -> ld rd', imm(rs1')
// | imm[11:0] | rs1 | funct3 | rd | opcode |
instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], OPCODE_LOAD};
end
3'b110: begin
// c.sw -> sw rs2', imm(rs1')
instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], 2'b00, OPCODE_STORE};
end
3'b110: begin
// c.sw -> sw rs2', imm(rs1')
instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], 2'b00, OPCODE_STORE};
end
3'b111: begin
// c.sd -> sd rs2', imm(rs1')
instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE};
end
3'b111: begin
// c.sd -> sd rs2', imm(rs1')
instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE};
end
default: begin
illegal_instr_o = 1'b1;
end
endcase
end
// C1
2'b01: begin
unique case (instr_i[15:13])
3'b000: begin
// c.addi -> addi rd, rd, nzimm
// c.nop -> addi 0, 0, 0
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM};
end
// c.addiw -> addiw rd, rd, nzimm for RV64
3'b001: begin
if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM32};
else
illegal_instr_o = 1'b1;
end
3'b101: begin
// 101: c.j -> jal x0, imm
instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], OPCODE_JAL};
end
3'b010: begin
// c.li -> addi rd, x0, nzimm
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OPIMM};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
3'b011: begin
// c.lui -> lui rd, imm
instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], OPCODE_LUI};
if (instr_i[11:7] == 5'h02) begin
// c.addi16sp -> addi x2, x2, nzimm
instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, OPCODE_OPIMM};
end else if (instr_i[11:7] == 5'b0) begin
illegal_instr_o = 1'b1;
default: begin
illegal_instr_o = 1'b1;
end
endcase
end
if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
end
3'b100: begin
unique case (instr_i[11:10])
2'b00,
2'b01: begin
// 00: c.srli -> srli rd, rd, shamt
// 01: c.srai -> srai rd, rd, shamt
instr_o = {1'b0, instr_i[10], 5'b0, instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], OPCODE_OPIMM};
if (instr_i[6:2] == 5'b0) illegal_instr_o = 1'b1;
end
2'b10: begin
// c.andi -> andi rd, rd, imm
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OPIMM};
end
2'b11: begin
unique case ({instr_i[12], instr_i[6:5]})
// C1
2'b01: begin
unique case (instr_i[15:13])
3'b000: begin
// c.sub -> sub rd', rd', rs2'
instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP};
// c.addi -> addi rd, rd, nzimm
// c.nop -> addi 0, 0, 0
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM};
end
// c.addiw -> addiw rd, rd, nzimm for RV64
3'b001: begin
// c.xor -> xor rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], OPCODE_OP};
if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM32};
else
illegal_instr_o = 1'b1;
end
3'b101: begin
// 101: c.j -> jal x0, imm
instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], OPCODE_JAL};
end
3'b010: begin
// c.or -> or rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], OPCODE_OP};
// c.li -> addi rd, x0, nzimm
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OPIMM};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
3'b011: begin
// c.and -> and rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OP};
// c.lui -> lui rd, imm
instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], OPCODE_LUI};
if (instr_i[11:7] == 5'h02) begin
// c.addi16sp -> addi x2, x2, nzimm
instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, OPCODE_OPIMM};
end else if (instr_i[11:7] == 5'b0) begin
illegal_instr_o = 1'b1;
end
if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1;
end
3'b100: begin
// c.subw -> subw rd', rd', rs2'
instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32};
end
3'b101: begin
// c.addw -> addw rd', rd', rs2'
instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32};
unique case (instr_i[11:10])
2'b00,
2'b01: begin
// 00: c.srli -> srli rd, rd, shamt
// 01: c.srai -> srai rd, rd, shamt
instr_o = {1'b0, instr_i[10], 5'b0, instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], OPCODE_OPIMM};
if (instr_i[6:2] == 5'b0) illegal_instr_o = 1'b1;
end
2'b10: begin
// c.andi -> andi rd, rd, imm
instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OPIMM};
end
2'b11: begin
unique case ({instr_i[12], instr_i[6:5]})
3'b000: begin
// c.sub -> sub rd', rd', rs2'
instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP};
end
3'b001: begin
// c.xor -> xor rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], OPCODE_OP};
end
3'b010: begin
// c.or -> or rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], OPCODE_OP};
end
3'b011: begin
// c.and -> and rd', rd', rs2'
instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OP};
end
3'b100: begin
// c.subw -> subw rd', rd', rs2'
instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32};
end
3'b101: begin
// c.addw -> addw rd', rd', rs2'
instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32};
end
3'b110,
3'b111: begin
// 100: c.subw
// 101: c.addw
illegal_instr_o = 1'b1;
instr_o = {16'b0, instr_i};
end
endcase
end
endcase
end
3'b110,
3'b111: begin
// 100: c.subw
// 101: c.addw
illegal_instr_o = 1'b1;
instr_o = {16'b0, instr_i};
3'b110, 3'b111: begin
// 0: c.beqz -> beq rs1', x0, imm
// 1: c.bnez -> bne rs1', x0, imm
instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], OPCODE_BRANCH};
end
endcase
end
endcase
end
3'b110, 3'b111: begin
// 0: c.beqz -> beq rs1', x0, imm
// 1: c.bnez -> bne rs1', x0, imm
instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], OPCODE_BRANCH};
end
default: begin
illegal_instr_o = 1'b1;
end
endcase
end
// C2
2'b10: begin
unique case (instr_i[15:13])
3'b000: begin
// c.slli -> slli rd, rd, shamt
instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], OPCODE_OPIMM};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; // register not x0
if (instr_i[6:2] == 5'b0) illegal_instr_o = 1'b1; // shift amount must be non zero
end
3'b010: begin
// c.lwsp -> lw rd, imm(x2)
instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, 3'b010, instr_i[11:7], OPCODE_LOAD};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
3'b011: begin
// c.ldsp -> ld rd, imm(x2)
instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], OPCODE_LOAD};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
3'b100: begin
if (instr_i[12] == 1'b0) begin
// c.mv -> add rd/rs1, x0, rs2
instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OP};
if (instr_i[6:2] == 5'b0) begin
// c.jr -> jalr x0, rd/rs1, 0
instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, OPCODE_JALR};
end
end else begin
// c.add -> add rd, rd, rs2
instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OP};
if (instr_i[11:7] == 5'b0) begin
// c.ebreak -> ebreak
instr_o = {32'h00_10_00_73};
if (instr_i[6:2] != 5'b0)
illegal_instr_o = 1'b1;
end else if (instr_i[6:2] == 5'b0) begin
// c.jalr -> jalr x1, rs1, 0
instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, OPCODE_JALR};
end
endcase
end
end
3'b110: begin
// c.swsp -> sw rs2, imm(x2)
instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, instr_i[11:9], 2'b00, OPCODE_STORE};
end
// C2
2'b10: begin
unique case (instr_i[15:13])
3'b000: begin
// c.slli -> slli rd, rd, shamt
instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], OPCODE_OPIMM};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; // register not x0
if (instr_i[6:2] == 5'b0) illegal_instr_o = 1'b1; // shift amount must be non zero
end
3'b111: begin
// c.sdsp -> sd rs2, imm(x2)
instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE};
end
3'b010: begin
// c.lwsp -> lw rd, imm(x2)
instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, 3'b010, instr_i[11:7], OPCODE_LOAD};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
default: begin
illegal_instr_o = 1'b1;
end
3'b011: begin
// c.ldsp -> ld rd, imm(x2)
instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], OPCODE_LOAD};
if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1;
end
3'b100: begin
if (instr_i[12] == 1'b0) begin
// c.mv -> add rd/rs1, x0, rs2
instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OP};
if (instr_i[6:2] == 5'b0) begin
// c.jr -> jalr x0, rd/rs1, 0
instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, OPCODE_JALR};
end
end else begin
// c.add -> add rd, rd, rs2
instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OP};
if (instr_i[11:7] == 5'b0) begin
// c.ebreak -> ebreak
instr_o = {32'h00_10_00_73};
if (instr_i[6:2] != 5'b0)
illegal_instr_o = 1'b1;
end else if (instr_i[6:2] == 5'b0) begin
// c.jalr -> jalr x1, rs1, 0
instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, OPCODE_JALR};
end
end
end
3'b110: begin
// c.swsp -> sw rs2, imm(x2)
instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, instr_i[11:9], 2'b00, OPCODE_STORE};
end
3'b111: begin
// c.sdsp -> sd rs2, imm(x2)
instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE};
end
default: begin
illegal_instr_o = 1'b1;
end
endcase
end
default: ;
endcase
end
default: ;
endcase
end
end
endmodule

View file

@ -68,6 +68,7 @@ module controller (
if (sfence_vma_i) begin
flush_pcgen_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
flush_tlb_o = 1'b1;
@ -79,6 +80,7 @@ module controller (
if (flush_csr_i) begin
flush_pcgen_o = 1'b1;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
end
@ -91,6 +93,7 @@ module controller (
// for the PC GEN stage but instead tells it to take the PC we gave it
flush_pcgen_o = 1'b0;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
end
@ -102,6 +105,7 @@ module controller (
// don't flush pcgen as we want to take the exception
flush_pcgen_o = 1'b0;
flush_if_o = 1'b1;
flush_unissued_instr_o = 1'b1;
flush_id_o = 1'b1;
flush_ex_o = 1'b1;
end

View file

@ -79,11 +79,7 @@ module csr_regfile #(
priv_lvl_t trap_to_priv_lvl;
// register for enabling load store address translation, this is critical, hence the register
logic en_ld_st_translation_n, en_ld_st_translation_q;
// ----------------------
// LD/ST Privilege Level
// ----------------------
assign ld_st_priv_lvl_o = (mstatus_q.mprv) ? mstatus_q.mpp : priv_lvl_o;
assign en_ld_st_translation_o = en_ld_st_translation_q;
// ----------------
// CSR Registers
// ----------------
@ -282,7 +278,13 @@ module csr_regfile #(
CSR_MIE: mie_n = csr_wdata & 64'hBBB; // we only support supervisor and m-mode interrupts
CSR_MIP: mip_n = csr_wdata & 64'h33; // only USIP, SSIP, UTIP, STIP are write-able
CSR_MTVEC: mtvec_n = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
CSR_MTVEC: begin
mtvec_n = {csr_wdata[63:2], 1'b0, csr_wdata[0]};
// we are in vector mode, this implementation requires the additional
// alignment constraint of 64 * 4 bytes
if (csr_wdata[0])
mtvec_n = {csr_wdata[63:8], 7'b0, csr_wdata[0]};
end
CSR_MSCRATCH: mscratch_n = csr_wdata;
CSR_MEPC: mepc_n = {csr_wdata[63:1], 1'b0};
CSR_MCAUSE: mcause_n = csr_wdata;
@ -338,7 +340,7 @@ module csr_regfile #(
end else begin
// update mstatus
// clear enable flags for all lower privilege levels
// but as m is already the highest -> clear everything
// but as M is already the highest -> clear everything
mstatus_n.mie = 1'b0;
mstatus_n.sie = 1'b0;
mstatus_n.mpie = mstatus_q.mie;
@ -362,6 +364,9 @@ module csr_regfile #(
en_ld_st_translation_n = 1'b1;
else // otherwise we go with the regular settings
en_ld_st_translation_n = en_translation_o;
ld_st_priv_lvl_o = (mstatus_q.mprv) ? mstatus_q.mpp : priv_lvl_o;
en_ld_st_translation_o = en_ld_st_translation_q;
// -----------------------
// Return from Exception
// -----------------------
@ -537,75 +542,76 @@ module csr_regfile #(
// output assignments dependent on privilege mode
always_comb begin : priv_output
automatic logic [63:0] base = {mtvec_q[63:2], 2'b0};
epc_o = mepc_q;
trap_vector_base_o = {mtvec_q[63:2], 2'b0};
// output user mode stvec
if (trap_to_priv_lvl == PRIV_LVL_S) begin
base = {stvec_q[63:2], 2'b0};
trap_vector_base_o = {stvec_q[63:2], 2'b0};
end
// check if we are in vectored mode, if yes then do BASE + 4*cause
// check if we are in vectored mode, if yes then do BASE + 4 * cause
// we are imposing an additional alignment-constraint of 64 * 4 bytes since
// we want to spare the costly addition
if ((mtvec_q[0] || stvec_q[0]) && csr_exception_o.cause[63]) begin
base = base + (csr_exception_o.cause[62:0] << 2);
trap_vector_base_o[7:2] = csr_exception_o.cause[5:0];
end
epc_o = mepc_q;
// we are returning from supervisor mode, so take the sepc register
if (sret) begin
epc_o = sepc_q;
epc_o = sepc_q;
end
trap_vector_base_o = base;
end
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
priv_lvl_q <= PRIV_LVL_M;
priv_lvl_q <= PRIV_LVL_M;
// machine mode registers
mstatus_q <= 64'b0;
mtvec_q <= {boot_addr_i[63:2], 2'b0}; // set to boot address + direct mode
medeleg_q <= 64'b0;
mideleg_q <= 64'b0;
mip_q <= 64'b0;
mie_q <= 64'b0;
mepc_q <= 64'b0;
mcause_q <= 64'b0;
mscratch_q <= 64'b0;
mtval_q <= 64'b0;
mstatus_q <= 64'b0;
mtvec_q <= {boot_addr_i[63:2], 2'b0}; // set to boot address + direct mode
medeleg_q <= 64'b0;
mideleg_q <= 64'b0;
mip_q <= 64'b0;
mie_q <= 64'b0;
mepc_q <= 64'b0;
mcause_q <= 64'b0;
mscratch_q <= 64'b0;
mtval_q <= 64'b0;
// supervisor mode registers
sepc_q <= 64'b0;
scause_q <= 64'b0;
stvec_q <= 64'b0;
sscratch_q <= 64'b0;
stval_q <= 64'b0;
satp_q <= 64'b0;
sepc_q <= 64'b0;
scause_q <= 64'b0;
stvec_q <= 64'b0;
sscratch_q <= 64'b0;
stval_q <= 64'b0;
satp_q <= 64'b0;
// timer and counters
cycle_q <= 64'b0;
instret_q <= 64'b0;
cycle_q <= 64'b0;
instret_q <= 64'b0;
// aux registers
en_ld_st_translation_q <= 1'b0;
end else begin
priv_lvl_q <= priv_lvl_n;
priv_lvl_q <= priv_lvl_n;
// machine mode registers
mstatus_q <= mstatus_n;
mtvec_q <= mtvec_n;
medeleg_q <= medeleg_n;
mideleg_q <= mideleg_n;
mip_q <= mip_n;
mie_q <= mie_n;
mepc_q <= mepc_n;
mcause_q <= mcause_n;
mscratch_q <= mscratch_n;
mtval_q <= mtval_n;
mstatus_q <= mstatus_n;
mtvec_q <= mtvec_n;
medeleg_q <= medeleg_n;
mideleg_q <= mideleg_n;
mip_q <= mip_n;
mie_q <= mie_n;
mepc_q <= mepc_n;
mcause_q <= mcause_n;
mscratch_q <= mscratch_n;
mtval_q <= mtval_n;
// supervisor mode registers
sepc_q <= sepc_n;
scause_q <= scause_n;
stvec_q <= stvec_n;
sscratch_q <= sscratch_n;
stval_q <= stval_n;
satp_q <= satp_n;
sepc_q <= sepc_n;
scause_q <= scause_n;
stvec_q <= stvec_n;
sscratch_q <= sscratch_n;
stval_q <= stval_n;
satp_q <= satp_n;
// timer and counters
cycle_q <= cycle_n;
instret_q <= instret_n;
cycle_q <= cycle_n;
instret_q <= instret_n;
// aux registers
en_ld_st_translation_q <= en_ld_st_translation_n;
end

View file

@ -108,15 +108,15 @@ module dcache_arbiter #(
request_port_n = i;
request_index = i;
// wait for the grant
if (data_gnt_i) begin
// set the slave on which we are waiting
in_data = 1'b1 << i[DATA_WIDTH-1:0];
push = 1'b1;
end
// set the slave on which we are waiting
in_data = 1'b1 << i[DATA_WIDTH-1:0];
break; // break here as this is a priority select
end
end
// only if we got a grant save it to the queue
if (data_gnt_i) begin
push = 1'b1;
end
end
// pass through all signals from the correct slave port

View file

@ -96,11 +96,17 @@ module decoder (
instruction_o.op = SRET;
// check privilege level, SRET can only be executed in S and M mode
// we'll just decode an illegal instruction if we are in the wrong privilege level
if (priv_lvl_i == PRIV_LVL_U)
if (priv_lvl_i == PRIV_LVL_U) begin
illegal_instr = 1'b1;
// do not change privilege level if this is an illegal instruction
instruction_o.op = ADD;
end
// if we are in S-Mode and Trap SRET (tsr) is set -> trap on illegal instruction
if (priv_lvl_i == PRIV_LVL_S && tsr_i)
if (priv_lvl_i == PRIV_LVL_S && tsr_i) begin
illegal_instr = 1'b1;
// do not change privilege level if this is an illegal instruction
instruction_o.op = ADD;
end
end
// MRET
12'b1100000010: begin
@ -276,8 +282,6 @@ module decoder (
else
illegal_instr = 1'b1;
end
default: illegal_instr = 1'b1;
endcase
end

View file

@ -40,7 +40,7 @@ module fetch_fifo
input logic out_ready_i
);
localparam DEPTH = 8; // must be a power of two
localparam int unsigned DEPTH = 8; // must be a power of two
// input registers - bounding the path from memory
branchpredict_sbe branch_predict_n, branch_predict_q;
@ -164,7 +164,6 @@ module fetch_fifo
status_cnt++;
write_pointer++;
// $display("Instruction: [ c | c ] @ %t", $time);
// or is it an unaligned 32 bit instruction like
// ____________________________________________________
// |instr [15:0] | instr [31:16] | compressed 1[15:0] |
@ -176,7 +175,6 @@ module fetch_fifo
unaligned_n = 1'b1;
// save the address as well
unaligned_address_n = {in_addr_q[63:2], 2'b10};
// $display("Instruction: [ i0 | c ] @ %t", $time);
// this does not consume space in the FIFO
end
end else begin
@ -189,7 +187,6 @@ module fetch_fifo
};
status_cnt++;
write_pointer++;
// $display("Instruction: [ i ] @ %t", $time);
end
end
// we have an outstanding unaligned instruction
@ -217,7 +214,6 @@ module fetch_fifo
write_pointer++;
// unaligned access served
unaligned_n = 1'b0;
// $display("Instruction: [ c | i1 ] @ %t", $time);
// or is it an unaligned 32 bit instruction like
// ____________________________________________________
// |instr [15:0] | instr [31:16] | compressed 1[15:0] |
@ -229,7 +225,6 @@ module fetch_fifo
unaligned_n = 1'b1;
// save the address as well
unaligned_address_n = {in_addr_q[63:2], 2'b10};
// $display("Instruction: [ i0 | i1 ] @ %t", $time);
// this does not consume space in the FIFO
// we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction
end else if (branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16) begin

View file

@ -68,6 +68,7 @@ module fifo #(
// but increment the read pointer...
read_pointer_n = read_pointer_q + 1;
// ... and decrement the overall count
mem_n[read_pointer_q] = '0;
status_cnt_n = status_cnt_q - 1;
end
// keep the count pointer stable if we push and pop at the same time

View file

@ -19,121 +19,37 @@
//
import ariane_pkg::*;
module id_stage #(
parameter int NR_ENTRIES = 4,
parameter int NR_WB_PORTS = 4
)(
module id_stage (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic test_en_i, // Test Enable
input logic flush_i,
input logic flush_unissued_instr_i,
// from IF
input fetch_entry fetch_entry_i,
input logic fetch_entry_valid_i,
output logic decoded_instr_ack_o,
output logic decoded_instr_ack_o, // acknowledge the instruction (fetch entry)
// to ID
output scoreboard_entry issue_entry_o, // a decoded instruction
output logic issue_entry_valid_o, // issue entry is valid
output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions
input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions
// from CSR file
input priv_lvl_t priv_lvl_i, // current privilege level
input priv_lvl_t priv_lvl_i, // current privilege level
input logic tvm_i,
input logic tw_i,
input logic tsr_i,
output logic ready_o, // id is ready
output fu_t fu_o,
output fu_op operator_o,
output logic [63:0] operand_a_o,
output logic [63:0] operand_b_o,
output logic [63:0] imm_o,
output logic [TRANS_ID_BITS-1:0] trans_id_o,
output logic [63:0] pc_o,
output logic is_compressed_instr_o,
input logic alu_ready_i,
output logic alu_valid_o,
// ex just resolved our predicted branch, we are ready to accept new requests
input logic resolve_branch_i,
input logic lsu_ready_i,
output logic lsu_valid_o,
// branch prediction
input logic branch_ready_i,
output logic branch_valid_o, // use branch prediction unit
output branchpredict_sbe branch_predict_o,
input logic mult_ready_i,
output logic mult_valid_o, // Branch predict Out
input logic csr_ready_i,
output logic csr_valid_o,
// write back port
input logic [NR_WB_PORTS-1:0][TRANS_ID_BITS-1:0] trans_id_i,
input logic [NR_WB_PORTS-1:0][63:0] wdata_i,
input exception [NR_WB_PORTS-1:0] ex_ex_i, // exception from execute stage
input logic [NR_WB_PORTS-1:0] wb_valid_i,
// commit port
input logic[4:0] waddr_a_i,
input logic[63:0] wdata_a_i,
input logic we_a_i,
output scoreboard_entry commit_instr_o,
input logic commit_ack_i
input logic tsr_i
);
// ---------------------------------------------------
// Global signals
// ---------------------------------------------------
logic full;
// ---------------------------------------------------
// Scoreboard (SB) <-> Issue and Read Operands (iro)
// ---------------------------------------------------
fu_t [31:0] rd_clobber_sb_iro;
logic [4:0] rs1_iro_sb;
logic [63:0] rs1_sb_iro;
logic rs1_valid_sb_iro;
logic [4:0] rs2_iro_sb;
logic [63:0] rs2_sb_iro;
logic rs2_valid_iro_sb;
scoreboard_entry issue_instr_sb_iro;
logic issue_instr_valid_sb_iro;
logic issue_ack_iro_sb;
// ---------------------------------------------------
// Decoder (DC) <-> Scoreboard (SB)
// ---------------------------------------------------
scoreboard_entry decoded_instr_dc_sb;
// ---------------------------------------------------
// Decoder (DC) <-> Branch Logic
// ---------------------------------------------------
// register stage
struct packed {
logic valid;
scoreboard_entry sbe;
logic is_ctrl_flow;
} issue_n, issue_q;
logic is_control_flow_instr;
// ---------------------------------------------------
// Branch (resolve) logic
// ---------------------------------------------------
// This should basically prevent the scoreboard from accepting
// instructions past a branch. We need to resolve the branch beforehand.
// This limitation is in place to ease the backtracking of mis-predicted branches as they
// can simply be in the front-end of the processor.
logic unresolved_branch_n, unresolved_branch_q;
always_comb begin : unresolved_branch
unresolved_branch_n = unresolved_branch_q;
// we just resolved the branch
if (resolve_branch_i) begin
unresolved_branch_n = 1'b0;
end
// if the instruction is valid and it is a control flow instruction
if (fetch_entry_valid_i && is_control_flow_instr) begin
unresolved_branch_n = 1'b1;
end
// if we are requested to flush also flush the unresolved branch flag because either the flush
// was requested by a branch or an exception. In any case: any unresolved branch will get evicted
if (flush_unissued_instr_i || flush_i) begin
unresolved_branch_n = 1'b0;
end
end
// we are ready if we are not full and don't have any unresolved branches, but it can be
// the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i.valid == 1)
assign ready_o = ~full && (~unresolved_branch_q || resolve_branch_i);
scoreboard_entry decoded_instruction;
decoder decoder_i (
.pc_i ( fetch_entry_i.address ),
@ -142,58 +58,45 @@ module id_stage #(
.branch_predict_i ( fetch_entry_i.branch_predict ),
.is_illegal_i ( fetch_entry_i.is_illegal ),
.ex_i ( fetch_entry_i.ex ),
.instruction_o ( decoded_instr_dc_sb ),
.instruction_o ( decoded_instruction ),
.is_control_flow_instr_o ( is_control_flow_instr ),
.*
);
scoreboard #(
.NR_ENTRIES ( NR_ENTRIES ),
.NR_WB_PORTS ( NR_WB_PORTS )
)
scoreboard_i
(
.full_o ( full ),
.rd_clobber_o ( rd_clobber_sb_iro ),
.rs1_i ( rs1_iro_sb ),
.rs1_o ( rs1_sb_iro ),
.rs1_valid_o ( rs1_valid_sb_iro ),
.rs2_i ( rs2_iro_sb ),
.rs2_o ( rs2_sb_iro ),
.rs2_valid_o ( rs2_valid_iro_sb ),
.commit_instr_o ( commit_instr_o ),
.commit_ack_i ( commit_ack_i ),
.decoded_instr_i ( decoded_instr_dc_sb ),
.decoded_instr_valid_i ( fetch_entry_valid_i ),
.issue_instr_o ( issue_instr_sb_iro ),
.issue_instr_valid_o ( issue_instr_valid_sb_iro ),
.issue_ack_i ( issue_ack_iro_sb ),
.trans_id_i ( trans_id_i ),
.wdata_i ( wdata_i ),
.ex_i ( ex_ex_i ),
.*
);
// ------------------
// Output Registers
// ------------------
assign issue_entry_o = issue_q.sbe;
assign issue_entry_valid_o = issue_q.valid;
assign is_ctrl_flow_o = issue_q.is_ctrl_flow;
always_comb begin
issue_n = issue_q;
decoded_instr_ack_o = 1'b0;
issue_read_operands issue_read_operands_i (
.issue_instr_i ( issue_instr_sb_iro ),
.issue_instr_valid_i ( issue_instr_valid_sb_iro ),
.issue_ack_o ( issue_ack_iro_sb ),
.rs1_o ( rs1_iro_sb ),
.rs1_i ( rs1_sb_iro ),
.rs1_valid_i ( rs1_valid_sb_iro ),
.rs2_o ( rs2_iro_sb ),
.rs2_i ( rs2_sb_iro ),
.rs2_valid_i ( rs2_valid_iro_sb ),
.rd_clobber_i ( rd_clobber_sb_iro ),
.*
);
if (issue_instr_ack_i)
issue_n.valid = 1'b0;
// if we have a space in the register and the fetch is valid, go get it
// or the issue stage is currently acknowledging an instruction, which means that we will have space
// for a new instruction
if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin
decoded_instr_ack_o = 1'b1;
issue_n = { 1'b1, decoded_instruction, is_control_flow_instr};
end
// invalidate on a flush
if (flush_i)
issue_n.valid = 1'b0;
end
// -------------------------
// Registers (ID <-> Issue)
// -------------------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
unresolved_branch_q <= 1'b0;
if(~rst_ni) begin
issue_q <= '0;
end else begin
unresolved_branch_q <= unresolved_branch_n;
issue_q <= issue_n;
end
end

View file

@ -24,7 +24,6 @@ module if_stage (
// control signals
input logic flush_i,
output logic if_busy_o, // is the IF stage busy fetching instructions?
input logic id_ready_i, // ID stage is ready
// fetch direction from PC Gen
input logic [63:0] fetch_address_i, // address to fetch from
input logic fetch_valid_i, // the fetch address is valid
@ -88,7 +87,7 @@ module if_stage (
NS = CS;
addr_valid = 1'b0;
unique case(CS)
case(CS)
// default state, not waiting for requested data
IDLE: begin
instr_addr_o = fetch_address;
@ -211,11 +210,6 @@ module if_stage (
// otherwise wait in this state for the rvalid
end
end
default: begin
NS = IDLE;
instr_req_o = 1'b0;
end
endcase
end

View file

@ -338,7 +338,7 @@ module issue_read_operands (
`ifndef verilator
assert property (
@(posedge clk_i) (alu_valid_q || lsu_valid_q || csr_valid_q) |-> (!$isunknown(operand_a_q) && !$isunknown(operand_b_q)))
else $error ("Got unknown value in one of the operands");
else $warning ("Got unknown value in one of the operands");
`endif
`endif
endmodule

175
src/issue_stage.sv Executable file
View file

@ -0,0 +1,175 @@
// Author: Florian Zaruba, ETH Zurich
// Date: 21.05.2017
// Description: Issue stage dispatches instructions to the FUs
//
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
import ariane_pkg::*;
module issue_stage #(
parameter int NR_ENTRIES = 8,
parameter int NR_WB_PORTS = 4
)(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic test_en_i, // Test Enable
input logic flush_unissued_instr_i,
input logic flush_i,
// from ID
input scoreboard_entry decoded_instr_i,
input logic decoded_instr_valid_i,
input logic is_ctrl_flow_i,
output logic decoded_instr_ack_o,
// to EX
output fu_t fu_o,
output fu_op operator_o,
output logic [63:0] operand_a_o,
output logic [63:0] operand_b_o,
output logic [63:0] imm_o,
output logic [TRANS_ID_BITS-1:0] trans_id_o,
output logic [63:0] pc_o,
output logic is_compressed_instr_o,
input logic alu_ready_i,
output logic alu_valid_o,
// ex just resolved our predicted branch, we are ready to accept new requests
input logic resolve_branch_i,
input logic lsu_ready_i,
output logic lsu_valid_o,
// branch prediction
input logic branch_ready_i,
output logic branch_valid_o, // use branch prediction unit
output branchpredict_sbe branch_predict_o,
input logic mult_ready_i,
output logic mult_valid_o, // Branch predict Out
input logic csr_ready_i,
output logic csr_valid_o,
// write back port
input logic [NR_WB_PORTS-1:0][TRANS_ID_BITS-1:0] trans_id_i,
input logic [NR_WB_PORTS-1:0][63:0] wdata_i,
input exception [NR_WB_PORTS-1:0] ex_ex_i, // exception from execute stage
input logic [NR_WB_PORTS-1:0] wb_valid_i,
// commit port
input logic[4:0] waddr_a_i,
input logic[63:0] wdata_a_i,
input logic we_a_i,
output scoreboard_entry commit_instr_o,
input logic commit_ack_i
);
// ---------------------------------------------------
// Scoreboard (SB) <-> Issue and Read Operands (IRO)
// ---------------------------------------------------
fu_t [31:0] rd_clobber_sb_iro;
logic [4:0] rs1_iro_sb;
logic [63:0] rs1_sb_iro;
logic rs1_valid_sb_iro;
logic [4:0] rs2_iro_sb;
logic [63:0] rs2_sb_iro;
logic rs2_valid_iro_sb;
scoreboard_entry issue_instr_sb_iro;
logic issue_instr_valid_sb_iro;
logic issue_ack_iro_sb;
// ---------------------------------------------------
// Branch (resolve) logic
// ---------------------------------------------------
// This should basically prevent the scoreboard from accepting
// instructions past a branch. We need to resolve the branch beforehand.
// This limitation is in place to ease the backtracking of mis-predicted branches as they
// can simply be in the front-end of the processor.
logic unresolved_branch_n, unresolved_branch_q;
always_comb begin : unresolved_branch
unresolved_branch_n = unresolved_branch_q;
// we just resolved the branch
if (resolve_branch_i) begin
unresolved_branch_n = 1'b0;
end
// if the instruction is valid, it is a control flow instruction and the issue stage acknowledged its dispatch
// set the unresolved branch flag
if (issue_ack_iro_sb && decoded_instr_valid_i && is_ctrl_flow_i) begin
unresolved_branch_n = 1'b1;
end
// if we predicted a taken branch this means that we need to stall issue for one cycle to resolve the
// branch, otherwise we might issue a wrong instruction
if (issue_ack_iro_sb && decoded_instr_i.bp.valid && decoded_instr_i.bp.predict_taken) begin
unresolved_branch_n = 1'b1;
end
// if we are requested to flush also flush the unresolved branch flag because either the flush
// was requested by a branch or an exception. In any case: any unresolved branch will get evicted
if (flush_unissued_instr_i || flush_i) begin
unresolved_branch_n = 1'b0;
end
end
issue_read_operands issue_read_operands_i (
.flush_i ( flush_unissued_instr_i ),
.issue_instr_i ( issue_instr_sb_iro ),
.issue_instr_valid_i ( issue_instr_valid_sb_iro ),
.issue_ack_o ( issue_ack_iro_sb ),
.rs1_o ( rs1_iro_sb ),
.rs1_i ( rs1_sb_iro ),
.rs1_valid_i ( rs1_valid_sb_iro ),
.rs2_o ( rs2_iro_sb ),
.rs2_i ( rs2_sb_iro ),
.rs2_valid_i ( rs2_valid_iro_sb ),
.rd_clobber_i ( rd_clobber_sb_iro ),
.*
);
scoreboard #(
.NR_ENTRIES ( NR_ENTRIES ),
.NR_WB_PORTS ( NR_WB_PORTS )
)
scoreboard_i
(
.unresolved_branch_i ( unresolved_branch_q ),
.rd_clobber_o ( rd_clobber_sb_iro ),
.rs1_i ( rs1_iro_sb ),
.rs1_o ( rs1_sb_iro ),
.rs1_valid_o ( rs1_valid_sb_iro ),
.rs2_i ( rs2_iro_sb ),
.rs2_o ( rs2_sb_iro ),
.rs2_valid_o ( rs2_valid_iro_sb ),
.issue_instr_o ( issue_instr_sb_iro ),
.issue_instr_valid_o ( issue_instr_valid_sb_iro ),
.issue_ack_i ( issue_ack_iro_sb ),
.trans_id_i ( trans_id_i ),
.wdata_i ( wdata_i ),
.ex_i ( ex_ex_i ),
.*
);
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
unresolved_branch_q <= 1'b0;
end else begin
unresolved_branch_q <= unresolved_branch_n;
end
end
endmodule

View file

@ -23,11 +23,9 @@ module load_unit (
input logic rst_ni, // Asynchronous reset active low
input logic flush_i,
// load unit input port
input fu_op operator_i,
input logic [TRANS_ID_BITS-1:0] trans_id_i,
input logic valid_i,
input logic [63:0] vaddr_i,
input logic [7:0] be_i,
input lsu_ctrl_t lsu_ctrl_i,
output logic pop_ld_o,
// load unit output port
output logic valid_o,
output logic ready_o,
@ -38,8 +36,8 @@ module load_unit (
output logic translation_req_o, // request address translation
output logic [63:0] vaddr_o, // virtual address out
input logic [63:0] paddr_i, // physical address in
input logic translation_valid_i,
input exception ex_i, // exception which may has happened earlier. for example: mis-aligned exception
input logic dtlb_hit_i, // hit on the dtlb, send in the same cycle as the request
// address checker
output logic [11:0] page_offset_o,
input logic page_offset_matches_i,
@ -56,56 +54,46 @@ module load_unit (
input logic data_rvalid_i,
input logic [63:0] data_rdata_i
);
enum logic [2:0] {IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, ABORT_TRANSLATION, WAIT_FLUSH} NS, CS;
enum logic [2:0] {IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, ABORT_TRANSACTION, WAIT_TRANSLATION, WAIT_FLUSH} NS, CS;
// in order to decouple the response interface from the request interface we need a
// a queue which can hold all outstanding memory requests
typedef struct packed {
struct packed {
logic [TRANS_ID_BITS-1:0] trans_id;
logic [2:0] address_offset;
fu_op operator;
} rvalid_entry_t;
} load_data_n, load_data_q, in_data;
// queue control signal
rvalid_entry_t in_data;
logic push;
rvalid_entry_t out_data;
logic pop;
logic empty;
// register to save the physical address after address translation
// going directly to memory with this address will not work in-terms of timing (e.g.: the path to the memory
// is already super-critical with the address checker and memory arbiter on it).
logic [63:0] paddr_n, paddr_q;
// page offset is defined as the lower 12 bits, feed through for address checker
assign page_offset_o = vaddr_i[11:0];
assign page_offset_o = lsu_ctrl_i.vaddr[11:0];
// feed-through the virtual address for VA translation
assign vaddr_o = vaddr_i;
assign vaddr_o = lsu_ctrl_i.vaddr;
// this is a read-only interface so set the write enable to 0
assign data_we_o = 1'b0;
// compose the queue data, control is handled in the FSM
assign in_data = {trans_id_i, vaddr_i[2:0], operator_i};
assign in_data = {lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.operator};
// output address
// we can now output the lower 12 bit as the index to the cache
assign address_index_o = vaddr_i[11:0];
assign address_index_o = lsu_ctrl_i.vaddr[11:0];
// translation from last cycle, again: control is handled in the FSM
assign address_tag_o = paddr_q[55:12];
assign address_tag_o = paddr_i[55:12];
// directly output an exception
assign ex_o = ex_i;
// ---------------
// Load Control
// ---------------
always_comb begin : load_controll
always_comb begin : load_control
// default assignments
NS = CS;
paddr_n = paddr_q;
load_data_n = in_data;
translation_req_o = 1'b0;
ready_o = 1'b1;
data_req_o = 1'b0;
// tag control
kill_req_o = 1'b0;
tag_valid_o = 1'b0;
push = 1'b0;
data_be_o = be_i;
ex_o = ex_i;
data_be_o = lsu_ctrl_i.be;
pop_ld_o = 1'b0;
case (CS)
IDLE: begin
@ -117,107 +105,59 @@ module load_unit (
// check if the page offset matches with a store, if it does then stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
data_req_o = 1'b1;
// the translation request we got is valid
if (translation_valid_i) begin
// save the physical address for the next cycle
paddr_n = paddr_i;
// we got no data grant so wait for the grant before sending the tag
if (!data_gnt_i) begin
NS = WAIT_GNT;
ready_o = 1'b0;
end else begin
// put the request in the queue
push = 1'b1;
// we got a grant so we can send the tag in the next cycle
NS = SEND_TAG;
end
// we got a TLB miss
data_req_o = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!data_gnt_i) begin
NS = WAIT_GNT;
end else begin
// we need to abort the translation and let the PTW walker fix the TLB miss
NS = ABORT_TRANSLATION;
ready_o = 1'b0;
if (dtlb_hit_i) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
NS = SEND_TAG;
pop_ld_o = 1'b1;
end else
NS = ABORT_TRANSACTION;
end
end else begin
// stall and wait for the store-buffer to drain
ready_o = 1'b0;
// wait for the store buffer to train and the page offset to not match anymore
NS = WAIT_PAGE_OFFSET;
end
end
end
// wait here for the page offset to not match anymore
WAIT_PAGE_OFFSET: begin
// we are definitely not ready to accept a new request
// we need unique access to the LSU
ready_o = 1'b0;
translation_req_o = 1'b1;
// we make a new request as soon as the page offset does not match anymore
// essentially the same part as above
if (!page_offset_matches_i) begin
// make a load request to memory
data_req_o = 1'b1;
// the translation request we got is valid
if (translation_valid_i) begin
// save the physical address for the next cycle
paddr_n = paddr_i;
// we got no data grant so wait for the grant before sending the tag
if (!data_gnt_i) begin
NS = WAIT_GNT;
end else begin
// put the request in the queue
push = 1'b1;
// we got a grant so we can send the tag in the next cycle
NS = SEND_TAG;
end
// we got a TLB miss
end else begin
// we need to abort the translation and let the PTW walker fix the TLB miss
NS = ABORT_TRANSLATION;
ready_o = 1'b0;
end
NS = WAIT_GNT;
end
end
// abort the previous request - free the D$ arbiter
// we are here because of a TLB miss, we need to abort the current request and give way for the
// PTW walker to satisfy the TLB miss
ABORT_TRANSLATION: begin
// keep the translation request hight to tell the PTW that we want this
// translation
translation_req_o = 1'b1;
// we are not ready here
ABORT_TRANSACTION: begin
ready_o = 1'b0;
kill_req_o = 1'b1;
tag_valid_o = 1'b1;
// redo the request by going back to the wait gnt state
NS = WAIT_TRANSLATION;
end
WAIT_TRANSLATION: begin
ready_o = 1'b0;
// send an abort signal
tag_valid_o = 1'b1;
kill_req_o = 1'b1;
// wait for the translation to become valid and redo the request
if (translation_valid_i) begin
// we have a valid translation so tell the cache it should wait for it on the next cycle
// reset the the kill request
tag_valid_o = 1'b0;
kill_req_o = 1'b0;
// if the request is still here, do the load
if (valid_i) begin
data_req_o = 1'b1;
paddr_n = paddr_i;
if (!data_gnt_i) begin
NS = WAIT_GNT;
ready_o = 1'b0;
end else begin
// here we are ready to accept a new request
ready_o = 1'b1;
// put the request in the queue
push = 1'b1;
// we got a grant so we can send the tag in the next cycle
NS = SEND_TAG;
end
end
end
translation_req_o = 1'b1;
// we've got a hit and we can continue with the request process
if (dtlb_hit_i)
NS = WAIT_GNT;
end
WAIT_GNT: begin
// keep the translation request up
translation_req_o = 1'b1;
// we are waiting for the grant so we are not ready to accept anything new
ready_o = 1'b0;
// keep the request up
@ -225,144 +165,104 @@ module load_unit (
// we finally got a data grant
if (data_gnt_i) begin
// so we send the tag in the next cycle
NS = SEND_TAG;
// we store this grant in our queue
push = 1'b1;
// plus: we can accept a new request
ready_o = 1'b1;
if (dtlb_hit_i) begin
NS = SEND_TAG;
pop_ld_o = 1'b1;
end else // should we not have hit on the TLB abort this transaction an retry later
NS = ABORT_TRANSACTION;
end
// otherwise we keep waiting on our grant
end
// we know for sure that the tag we want to send is valid
SEND_TAG: begin
ready_o = 1'b1;
// tell the cache that this tag is valid
tag_valid_o = 1'b1;
// if we are sending our tag we are able to process a new request
// -------------
// New Request
// -------------
// we can make a new request if we got one
NS = IDLE;
// we can make a new request here if we got one
if (valid_i) begin
// do another address translation
// start the translation process even though we do not know if the addresses match
// this should ease timing
translation_req_o = 1'b1;
if(!page_offset_matches_i) begin
// make a load request to memory
data_req_o = 1'b1;
// the translation request we got is valid
if (translation_valid_i) begin
// save the physical address for the next cycle
paddr_n = paddr_i;
// we got no data grant so wait for the grant before sending the tag
if (!data_gnt_i) begin
NS = WAIT_GNT;
ready_o = 1'b0;
end else begin
// put the request in the queue
push = 1'b1;
// we got a grant so we can send the tag in the next cycle
NS = SEND_TAG;
end
// we got a TLB miss
end else begin
// we need to abort the translation and let the PTW walker fix the TLB miss
NS = ABORT_TRANSLATION;
ready_o = 1'b0;
end
// page offset mis-match -> go back to idle
// check if the page offset matches with a store, if it does stall and wait
if (!page_offset_matches_i) begin
// make a load request to memory
data_req_o = 1'b1;
// we got no data grant so wait for the grant before sending the tag
if (!data_gnt_i) begin
NS = WAIT_GNT;
end else begin
// we got a grant so we can send the tag in the next cycle
if (dtlb_hit_i) begin
// we got a grant and a hit on the DTLB so we can send the tag in the next cycle
NS = SEND_TAG;
pop_ld_o = 1'b1;
end else // we missed on the TLB -> wait for the translation
NS = ABORT_TRANSACTION;
end
end else begin
NS = IDLE;
// wait for the store buffer to train and the page offset to not match anymore
NS = WAIT_PAGE_OFFSET;
end
end else begin
NS = IDLE;
end
// ----------
// Exception
// ----------
// if we got an exception we need to kill the request immediately
if (ex_i.valid) begin
kill_req_o = 1'b1;
end
end
WAIT_FLUSH: begin
ready_o = 1'b0;
// we got all outstanding requests
if (empty) begin
ready_o = 1'b1;
NS = IDLE;
end
ready_o = 1'b0;
// the D$ arbiter will take care of presenting this to the memory only in case we
// have an outstanding request
kill_req_o = 1'b1;
tag_valid_o = 1'b1;
// we've killed the current request so we can go back to idle
NS = IDLE;
end
endcase
// -----------------
// Access Exception
// -----------------
// we've got an exception
if (valid_i && ex_i.valid) begin
// clear the request
data_req_o = 1'b0;
// we are ready
ready_o = 1'b1;
// do not push this request
push = 1'b0;
// reset state machine
// we got an exception
if (ex_i.valid) begin
// the next state will be the idle state
NS = IDLE;
end
// if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage
if (flush_i && (!empty || data_rvalid_i)) begin
if (flush_i) begin
NS = WAIT_FLUSH;
end else if (flush_i) begin
NS = IDLE;
end
end
// decoupled rvalid process
always_comb begin : rvalid_output
pop = 1'b0;
valid_o = 1'b0;
// output the queue data directly, the valid signal is set corresponding to the process above
trans_id_o = out_data.trans_id;
trans_id_o = load_data_q.trans_id;
// we got an rvalid and are currently not flushing and not aborting the request
if (data_rvalid_i && CS != WAIT_FLUSH && !kill_req_o) begin
pop = 1'b1;
valid_o = 1'b1;
end
// pass through an exception
if (valid_i && ex_i.valid) begin
valid_o = 1'b1;
// in case of an exception we can use the current trans_id since we either stalled
// or we are taking the exception in the first cycle
trans_id_o = trans_id_i;
if (data_rvalid_i && CS != WAIT_FLUSH) begin
// we killed the request
if(!kill_req_o)
valid_o = 1'b1;
// the output is also valid if we got an exception
if (ex_i.valid)
valid_o = 1'b1;
end
end
// latch physical address
// latch physical address for the tag cycle (one cycle after applying the index)
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
CS <= IDLE;
paddr_q <= '0;
CS <= IDLE;
load_data_q <= '0;
end else begin
CS <= NS;
paddr_q <= paddr_n;
CS <= NS;
load_data_q <= load_data_n;
end
end
// --------------
// Rvalid FIFO
// --------------
// we can have two outstanding requests, hence we need to elements in the FIFO
fifo #(
.dtype ( rvalid_entry_t ),
.DEPTH ( 2 )
)
fifo_i (
.full_o ( ), // we can ignore the full signal, the FIFO will never overflow
.empty_o ( empty ),
.single_element_o ( ), // we don't care about the single element either
.data_i ( in_data ),
.push_i ( push ),
.data_o ( out_data ),
.pop_i ( pop ),
.*
);
// ---------------
// Sign Extend
// ---------------
@ -378,43 +278,43 @@ module load_unit (
// sign extension for words
always_comb begin : sign_extend_word
case (out_data.address_offset)
default: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[31]}}, data_rdata_i[31:0]} : {32'h0, data_rdata_i[31:0]};
3'b001: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[39]}}, data_rdata_i[39:8]} : {32'h0, data_rdata_i[39:8]};
3'b010: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[47]}}, data_rdata_i[47:16]} : {32'h0, data_rdata_i[47:16]};
3'b011: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[55]}}, data_rdata_i[55:24]} : {32'h0, data_rdata_i[55:24]};
3'b100: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[63]}}, data_rdata_i[63:32]} : {32'h0, data_rdata_i[63:32]};
case (load_data_q.address_offset)
default: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[31]}}, data_rdata_i[31:0]} : {32'h0, data_rdata_i[31:0]};
3'b001: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[39]}}, data_rdata_i[39:8]} : {32'h0, data_rdata_i[39:8]};
3'b010: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[47]}}, data_rdata_i[47:16]} : {32'h0, data_rdata_i[47:16]};
3'b011: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[55]}}, data_rdata_i[55:24]} : {32'h0, data_rdata_i[55:24]};
3'b100: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[63]}}, data_rdata_i[63:32]} : {32'h0, data_rdata_i[63:32]};
endcase
end
// sign extension for half words
always_comb begin : sign_extend_half_word
case (out_data.address_offset)
default: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[15]}}, data_rdata_i[15:0]} : {48'h0, data_rdata_i[15:0]};
3'b001: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[23]}}, data_rdata_i[23:8]} : {48'h0, data_rdata_i[23:8]};
3'b010: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[31]}}, data_rdata_i[31:16]} : {48'h0, data_rdata_i[31:16]};
3'b011: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[39]}}, data_rdata_i[39:24]} : {48'h0, data_rdata_i[39:24]};
3'b100: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[47]}}, data_rdata_i[47:32]} : {48'h0, data_rdata_i[47:32]};
3'b101: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[55]}}, data_rdata_i[55:40]} : {48'h0, data_rdata_i[55:40]};
3'b110: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[63]}}, data_rdata_i[63:48]} : {48'h0, data_rdata_i[63:48]};
case (load_data_q.address_offset)
default: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[15]}}, data_rdata_i[15:0]} : {48'h0, data_rdata_i[15:0]};
3'b001: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[23]}}, data_rdata_i[23:8]} : {48'h0, data_rdata_i[23:8]};
3'b010: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[31]}}, data_rdata_i[31:16]} : {48'h0, data_rdata_i[31:16]};
3'b011: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[39]}}, data_rdata_i[39:24]} : {48'h0, data_rdata_i[39:24]};
3'b100: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[47]}}, data_rdata_i[47:32]} : {48'h0, data_rdata_i[47:32]};
3'b101: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[55]}}, data_rdata_i[55:40]} : {48'h0, data_rdata_i[55:40]};
3'b110: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[63]}}, data_rdata_i[63:48]} : {48'h0, data_rdata_i[63:48]};
endcase
end
always_comb begin : sign_extend_byte
case (out_data.address_offset)
default: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[7]}}, data_rdata_i[7:0]} : {56'h0, data_rdata_i[7:0]};
3'b001: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[15]}}, data_rdata_i[15:8]} : {56'h0, data_rdata_i[15:8]};
3'b010: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[23]}}, data_rdata_i[23:16]} : {56'h0, data_rdata_i[23:16]};
3'b011: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[31]}}, data_rdata_i[31:24]} : {56'h0, data_rdata_i[31:24]};
3'b100: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[39]}}, data_rdata_i[39:32]} : {56'h0, data_rdata_i[39:32]};
3'b101: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[47]}}, data_rdata_i[47:40]} : {56'h0, data_rdata_i[47:40]};
3'b110: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[55]}}, data_rdata_i[55:48]} : {56'h0, data_rdata_i[55:48]};
3'b111: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[63]}}, data_rdata_i[63:56]} : {56'h0, data_rdata_i[63:56]};
case (load_data_q.address_offset)
default: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[7]}}, data_rdata_i[7:0]} : {56'h0, data_rdata_i[7:0]};
3'b001: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[15]}}, data_rdata_i[15:8]} : {56'h0, data_rdata_i[15:8]};
3'b010: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[23]}}, data_rdata_i[23:16]} : {56'h0, data_rdata_i[23:16]};
3'b011: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[31]}}, data_rdata_i[31:24]} : {56'h0, data_rdata_i[31:24]};
3'b100: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[39]}}, data_rdata_i[39:32]} : {56'h0, data_rdata_i[39:32]};
3'b101: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[47]}}, data_rdata_i[47:40]} : {56'h0, data_rdata_i[47:40]};
3'b110: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[55]}}, data_rdata_i[55:48]} : {56'h0, data_rdata_i[55:48]};
3'b111: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[63]}}, data_rdata_i[63:56]} : {56'h0, data_rdata_i[63:56]};
endcase
end
always_comb begin
case (out_data.operator)
case (load_data_q.operator)
LW, LWU: result_o = rdata_w_ext;
LH, LHU: result_o = rdata_h_ext;
LB, LBU: result_o = rdata_b_ext;

View file

@ -85,30 +85,20 @@ module lsu #(
// --------------------------------------
// those are the signals which are always correct
// e.g.: they keep the value in the stall case
logic valid;
logic [63:0] vaddr;
logic [63:0] data;
logic [7:0] be;
fu_t fu;
fu_op operator;
logic [TRANS_ID_BITS-1:0] trans_id;
// registered address in case of a necessary stall
logic valid_n, valid_q;
logic [63:0] vaddr_n, vaddr_q;
logic [63:0] data_n, data_q;
fu_t fu_n, fu_q;
fu_op operator_n, operator_q;
logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
logic [7:0] be_n, be_q;
logic stall_n, stall_q;
// ------------------------------
lsu_ctrl_t lsu_ctrl;
logic lsu_ctrl_full;
lsu_ctrl_t lsu_ctrl_o;
logic pop_st;
logic pop_ld;
// Address Generation Unit (AGU)
// ------------------------------
// virtual address as calculated by the AGU in the first cycle
logic [63:0] vaddr_i;
logic [7:0] be_i;
assign vaddr_i = $signed(imm_i) + $signed(operand_a_i);
assign vaddr_i = $unsigned($signed(imm_i) + $signed(operand_a_i));
logic st_valid_i;
logic st_ready_o;
@ -123,6 +113,7 @@ module lsu #(
logic [63:0] mmu_vaddr;
logic [63:0] mmu_paddr;
exception mmu_exception;
logic dtlb_hit;
logic ld_valid;
logic [TRANS_ID_BITS-1:0] ld_trans_id;
@ -201,6 +192,7 @@ module lsu #(
.lsu_valid_o ( translation_valid ),
.lsu_paddr_o ( mmu_paddr ),
.lsu_exception_o ( mmu_exception ),
.lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request
// connecting PTW to D$ IF (aka mem arbiter
.address_index_o ( address_index_i [0] ),
.address_tag_o ( address_tag_i [0] ),
@ -219,12 +211,10 @@ module lsu #(
// Store Unit
// ------------------
store_unit store_unit_i (
.operator_i ( operator ),
.trans_id_i ( trans_id ),
.valid_i ( st_valid_i ),
.vaddr_i ( vaddr ),
.be_i ( be ),
.data_i ( data ),
.lsu_ctrl_i ( lsu_ctrl ),
.pop_st_o ( pop_st ),
.valid_o ( st_valid ),
.ready_o ( st_ready_o ),
.trans_id_o ( st_trans_id ),
@ -234,8 +224,8 @@ module lsu #(
.translation_req_o ( st_translation_req ),
.vaddr_o ( st_vaddr ),
.paddr_i ( mmu_paddr ),
.translation_valid_i ( translation_valid ),
.ex_i ( mmu_exception ),
.dtlb_hit_i ( dtlb_hit ),
// Load Unit
.page_offset_i ( page_offset ),
.page_offset_matches_o ( page_offset_matches ),
@ -257,11 +247,10 @@ module lsu #(
// Load Unit
// ------------------
load_unit load_unit_i (
.operator_i ( operator ),
.trans_id_i ( trans_id ),
.valid_i ( ld_valid_i ),
.vaddr_i ( vaddr ),
.be_i ( be ),
.lsu_ctrl_i ( lsu_ctrl ),
.pop_ld_o ( pop_ld ),
.valid_o ( ld_valid ),
.ready_o ( ld_ready_o ),
.trans_id_o ( ld_trans_id ),
@ -271,8 +260,8 @@ module lsu #(
.translation_req_o ( ld_translation_req ),
.vaddr_o ( ld_vaddr ),
.paddr_i ( mmu_paddr ),
.translation_valid_i ( translation_valid ),
.ex_i ( mmu_exception ),
.dtlb_hit_i ( dtlb_hit ),
// to store unit
.page_offset_o ( page_offset ),
.page_offset_matches_i ( page_offset_matches ),
@ -314,38 +303,29 @@ module lsu #(
.ex_o ( lsu_exception_o )
);
// ------------------
// LSU Control
// ------------------
always_comb begin : lsu_control
// the LSU is ready if both, stores and loads are ready because we do not know
// which of the two we are getting
lsu_ready_o = ld_ready_o && st_ready_o;
// "arbitrate" MMU access, there is only one request possible
translation_req = 1'b0;
mmu_vaddr = 64'b0;
// this arbitrates access to the MMU
if (st_translation_req) begin
translation_req = 1'b1;
mmu_vaddr = st_vaddr;
end else if (ld_translation_req) begin
translation_req = 1'b1;
mmu_vaddr = ld_vaddr;
end
end
// determine whether this is a load or store
always_comb begin : which_op
ld_valid_i = 1'b0;
st_valid_i = 1'b0;
translation_req = 1'b0;
mmu_vaddr = 64'b0;
// check the operator to activate the right functional unit accordingly
unique case (fu)
unique case (lsu_ctrl.fu)
// all loads go here
LOAD: ld_valid_i = valid;
LOAD: begin
ld_valid_i = lsu_ctrl.valid;
translation_req = ld_translation_req;
mmu_vaddr = ld_vaddr;
end
// all stores go here
STORE: st_valid_i = valid;
STORE: begin
st_valid_i = lsu_ctrl.valid;
translation_req = st_translation_req;
mmu_vaddr = st_vaddr;
end
// not relevant for the LSU
default: ;
endcase
@ -441,100 +421,44 @@ module lsu #(
if (data_misaligned) begin
if (fu == LOAD) begin
if (lsu_ctrl.fu == LOAD) begin
misaligned_exception = {
LD_ADDR_MISALIGNED,
vaddr,
lsu_ctrl.vaddr,
1'b1
};
end else if (fu == STORE) begin
end else if (lsu_ctrl.fu == STORE) begin
misaligned_exception = {
ST_ADDR_MISALIGNED,
vaddr,
lsu_ctrl.vaddr,
1'b1
};
end
end
end
// this process selects the input based on the current state of the LSU
// it can either be feed-through from the issue stage or from the internal registers
always_comb begin : input_select
// if we are stalling use the values we saved
if (stall_q) begin
valid = valid_q;
vaddr = vaddr_q;
data = data_q;
fu = fu_q;
operator = operator_q;
trans_id = trans_id_q;
be = be_q;
end else begin // otherwise bypass them
valid = lsu_valid_i;
vaddr = vaddr_i;
data = operand_b_i;
fu = fu_i;
operator = operator_i;
trans_id = trans_id_i;
be = be_i;
end
end
// 1st register stage
always_comb begin : register_stage
valid_n = valid_q;
vaddr_n = vaddr_q;
data_n = data_q;
fu_n = fu_q;
operator_n = operator_q;
trans_id_n = trans_id_q;
be_n = be_q;
// get new input data
if (lsu_valid_i) begin
valid_n = lsu_valid_i;
vaddr_n = vaddr_i;
data_n = operand_b_i;
fu_n = fu_i;
operator_n = operator_i;
trans_id_n = trans_id_i;
be_n = be_i;
end
// ------------------
// LSU Control
// ------------------
// new data arrives here
lsu_ctrl_t lsu_req_i;
if (lsu_ready_o) begin
stall_n = 1'b0;
end else begin
stall_n = 1'b1;
end
// if we flush we can safely un-stall
if (flush_i)
stall_n = 1'b0;
end
assign lsu_req_i = {lsu_valid_i, vaddr_i, operand_b_i, be_i, fu_i, operator_i, trans_id_i};
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
// 1st LSU stage
valid_q <= 1'b0;
vaddr_q <= 64'b0;
data_q <= 64'b0;
fu_q <= NONE;
operator_q <= ADD;
trans_id_q <= '{default: 0};
be_q <= 8'b0;
stall_q <= 1'b0;
end else begin
// 1st LSU stage
valid_q <= valid_n;
vaddr_q <= vaddr_n;
data_q <= data_n;
fu_q <= fu_n;
operator_q <= operator_n;
trans_id_q <= trans_id_n;
be_q <= be_n;
stall_q <= stall_n;
end
end
lsu_bypass lsu_bypass_i (
.lsu_req_i ( lsu_req_i ),
.lus_req_valid_i ( lsu_valid_i ),
.pop_ld_i ( pop_ld ),
.pop_st_i ( pop_st ),
.ld_ready_i ( ld_ready_o ),
.st_ready_i ( st_ready_o ),
.lsu_ctrl_o ( lsu_ctrl ),
.ready_o ( lsu_ready_o ),
.*
);
// ------------
// Assertions
// ------------
@ -567,4 +491,107 @@ module lsu #(
// else begin $error("address contains X when request is set"); $stop(); end
`endif
`endif
endmodule
endmodule
// ------------------
// LSU Control
// ------------------
// The LSU consists of two independent block which share a common address translation block.
// The one block is the load unit, the other one is the store unit. They will signal their readiness
// with separate signals. If they are not ready the LSU control should keep the last applied signals stable.
// Furthermore it can be the case that another request for one of the two store units arrives in which case
// the LSU controll should sample it and store it for later application to the units. It does so, by storing it in a
// two element FIFO.
module lsu_bypass (
input logic clk_i,
input logic rst_ni,
input logic flush_i,
input lsu_ctrl_t lsu_req_i,
input logic lus_req_valid_i,
input logic pop_ld_i,
input logic pop_st_i,
input logic ld_ready_i,
input logic st_ready_i,
output lsu_ctrl_t lsu_ctrl_o,
output logic ready_o
);
lsu_ctrl_t [1:0] mem_n, mem_q;
logic read_pointer_n, read_pointer_q;
logic write_pointer_n, write_pointer_q;
logic [1:0] status_cnt_n, status_cnt_q;
logic empty;
assign empty = (status_cnt_q == 0);
assign ready_o = empty;
always_comb begin
automatic logic [1:0] status_cnt = status_cnt_q;
automatic logic write_pointer = write_pointer_q;
automatic logic read_pointer = read_pointer_q;
mem_n = mem_q;
// we've got a valid LSU request
if (lus_req_valid_i) begin
mem_n[write_pointer_q] = lsu_req_i;
write_pointer++;
status_cnt++;
end
if (pop_ld_i) begin
// invalidate the result
mem_n[read_pointer_q].valid = 1'b0;
read_pointer++;
status_cnt--;
end
if (pop_st_i) begin
// invalidate the result
mem_n[read_pointer_q].valid = 1'b0;
read_pointer++;
status_cnt--;
end
if (pop_st_i && pop_ld_i)
mem_n = '{default: 0};
if (flush_i) begin
status_cnt = '0;
write_pointer = '0;
read_pointer = '0;
mem_n = '{default: 0};
end
// default assignments
read_pointer_n = read_pointer;
write_pointer_n = write_pointer;
status_cnt_n = status_cnt;
end
// output assignment
always_comb begin : output_assignments
if (empty) begin
lsu_ctrl_o = lsu_req_i;
end else begin
lsu_ctrl_o = mem_q[read_pointer_q];
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
mem_q <= '{default: 0};
status_cnt_q <= '0;
write_pointer_q <= '0;
read_pointer_q <= '0;
end else begin
mem_q <= mem_n;
status_cnt_q <= status_cnt_n;
write_pointer_q <= write_pointer_n;
read_pointer_q <= read_pointer_n;
end
end
endmodule

View file

@ -48,6 +48,9 @@ module mmu #(
input logic [63:0] lsu_vaddr_i, // virtual address in
input logic lsu_is_store_i, // the translation is requested by a store
// if we need to walk the page table we can't grant in the same cycle
// Cycle 0
output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB
// Cycle 1
output logic lsu_valid_o, // translation is valid
output logic [63:0] lsu_paddr_o, // translated address
output exception lsu_exception_o, // address translation threw an exception
@ -83,14 +86,14 @@ module mmu #(
);
// instruction error
// instruction error valid signal and exception, delayed one cycle
logic ierr_valid_q, ierr_valid_n;
exception fetch_ex_q, fetch_ex_n;
logic ierr_valid_q, ierr_valid_n;
exception fetch_ex_q, fetch_ex_n;
logic iaccess_err; // insufficient privilege to access this instruction page
logic daccess_err; // insufficient privilege to access this data page
logic ptw_active; // PTW is currently walking a page table
logic walking_instr; // PTW is walking because of an ITLB miss
logic ptw_error; // PTW threw an exception
logic iaccess_err; // insufficient privilege to access this instruction page
logic daccess_err; // insufficient privilege to access this data page
logic ptw_active; // PTW is currently walking a page table
logic walking_instr; // PTW is walking because of an ITLB miss
logic ptw_error; // PTW threw an exception
logic update_is_2M;
logic update_is_1G;
@ -98,19 +101,19 @@ module mmu #(
logic [0:0] update_asid;
pte_t update_content;
logic itlb_update;
logic itlb_lu_access;
pte_t itlb_content;
logic itlb_is_2M;
logic itlb_is_1G;
logic itlb_lu_hit;
logic itlb_update;
logic itlb_lu_access;
pte_t itlb_content;
logic itlb_is_2M;
logic itlb_is_1G;
logic itlb_lu_hit;
logic dtlb_update;
logic dtlb_lu_access;
pte_t dtlb_content;
logic dtlb_is_2M;
logic dtlb_is_1G;
logic dtlb_lu_hit;
logic dtlb_update;
logic dtlb_lu_access;
pte_t dtlb_content;
logic dtlb_is_2M;
logic dtlb_is_1G;
logic dtlb_lu_hit;
// Assignments
assign itlb_lu_access = fetch_req_i;
@ -224,7 +227,7 @@ module mmu #(
instr_if_address_o = {itlb_content.ppn, fetch_vaddr_i[11:0]};
// Mega page
if (itlb_is_2M) begin
instr_if_address_o[20:12] = fetch_vaddr_i[20:12];
instr_if_address_o[20:12] = fetch_vaddr_i[20:12];
end
// Giga page
if (itlb_is_1G) begin
@ -236,16 +239,16 @@ module mmu #(
// --------
// if we hit the ITLB output the request signal immediately
if (itlb_lu_hit) begin
instr_if_data_req_o = fetch_req_i;
// we got an access error
if (iaccess_err) begin
// immediately grant a fetch which threw an exception, and stop the request from happening
instr_if_data_req_o = 1'b0;
fetch_gnt_o = 1'b1;
ierr_valid_n = 1'b1;
// throw a page fault
fetch_ex_n = {INSTR_ACCESS_FAULT, fetch_vaddr_i, 1'b1};
end
instr_if_data_req_o = fetch_req_i;
// we got an access error
if (iaccess_err) begin
// immediately grant a fetch which threw an exception, and stop the request from happening
instr_if_data_req_o = 1'b0;
fetch_gnt_o = 1'b1;
ierr_valid_n = 1'b1;
// throw a page fault
fetch_ex_n = {INSTR_ACCESS_FAULT, fetch_vaddr_i, 1'b1};
end
end else
// ---------
// ITLB Miss
@ -261,52 +264,88 @@ module mmu #(
// the fetch is valid if we either got an error in the previous cycle or the I$ gave us a valid signal.
fetch_valid_o = instr_if_data_rvalid_i || ierr_valid_q;
end
// ----------
// Registers
// ----------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
ierr_valid_q <= 1'b0;
fetch_ex_q <= '0;
end else begin
ierr_valid_q <= ierr_valid_n;
fetch_ex_q <= fetch_ex_n;
end
end
//-----------------------
// Data Interface
//-----------------------
logic [63:0] lsu_vaddr_n, lsu_vaddr_q;
pte_t dtlb_pte_n, dtlb_pte_q;
exception misaligned_ex_n, misaligned_ex_q;
logic lsu_req_n, lsu_req_q;
logic lsu_is_store_n, lsu_is_store_q;
logic dtlb_hit_n, dtlb_hit_q;
logic dtlb_is_2M_n, dtlb_is_2M_q;
logic dtlb_is_1G_n, dtlb_is_1G_q;
// check if we need to do translation or if we are always ready (e.g.: we are not translating anything)
assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1;
// The data interface is simpler and only consists of a request/response interface
always_comb begin : data_interface
lsu_paddr_o = lsu_vaddr_i;
lsu_valid_o = lsu_req_i;
lsu_exception_o = misaligned_ex_i;
// save request and DTLB response
lsu_vaddr_n = lsu_vaddr_i;
lsu_req_n = lsu_req_i;
misaligned_ex_n = misaligned_ex_i;
dtlb_pte_n = dtlb_content;
dtlb_hit_n = dtlb_lu_hit;
lsu_is_store_n = lsu_is_store_i;
dtlb_is_2M_n = dtlb_is_2M;
dtlb_is_1G_n = dtlb_is_1G;
lsu_paddr_o = lsu_vaddr_q;
lsu_valid_o = lsu_req_q;
lsu_exception_o = misaligned_ex_q;
// Check if the User flag is set, then we may only access it in supervisor mode
// if SUM is enabled
daccess_err = (ld_st_priv_lvl_i == PRIV_LVL_S && !sum_i && dtlb_content.u) || // SUM is not set and we are trying to access a user page in supervisor mode
(ld_st_priv_lvl_i == PRIV_LVL_U && !dtlb_content.u); // this is not a user page but we are in user mode and trying to access it
daccess_err = (ld_st_priv_lvl_i == PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode
(ld_st_priv_lvl_i == PRIV_LVL_U && !dtlb_pte_q.u); // this is not a user page but we are in user mode and trying to access it
// translation is enabled and no misaligned exception occurred
if (en_ld_st_translation_i && !misaligned_ex_i.valid) begin
if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin
lsu_valid_o = 1'b0;
// 4K page
lsu_paddr_o = {dtlb_content.ppn, lsu_vaddr_i[11:0]};
lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]};
// Mega page
if (dtlb_is_2M) begin
lsu_paddr_o[20:12] = lsu_vaddr_i[20:12];
if (dtlb_is_2M_q) begin
lsu_paddr_o[20:12] = lsu_vaddr_q[20:12];
end
// Giga page
if (dtlb_is_1G) begin
lsu_paddr_o[29:12] = lsu_vaddr_i[29:12];
if (dtlb_is_1G_q) begin
lsu_paddr_o[29:12] = lsu_vaddr_q[29:12];
end
// ---------
// DTLB Hit
// --------
if (dtlb_lu_hit && lsu_req_i) begin
if (dtlb_hit_q && lsu_req_q) begin
lsu_valid_o = 1'b1;
// this is a store
if (lsu_is_store_i) begin
if (lsu_is_store_q) begin
// check if the page is write-able and we are not violating privileges
if (!dtlb_content.w || daccess_err) begin
lsu_exception_o = {ST_ACCESS_FAULT, lsu_vaddr_i, 1'b1};
if (!dtlb_pte_q.w || daccess_err) begin
lsu_exception_o = {ST_ACCESS_FAULT, lsu_vaddr_q, 1'b1};
end
// check if the dirty flag is set
if (!dtlb_content.d) begin
lsu_exception_o = {STORE_PAGE_FAULT, lsu_vaddr_i, 1'b1};
if (!dtlb_pte_q.d) begin
lsu_exception_o = {STORE_PAGE_FAULT, lsu_vaddr_q, 1'b1};
end
// this is a load, check for sufficient access privileges
end else if (daccess_err) begin
lsu_exception_o = {LD_ACCESS_FAULT, lsu_vaddr_i, 1'b1};
lsu_exception_o = {LD_ACCESS_FAULT, lsu_vaddr_q, 1'b1};
end
end else
// ---------
// DTLB Miss
// ---------
@ -317,7 +356,7 @@ module mmu #(
// an error makes the translation valid
lsu_valid_o = 1'b1;
// the page table walker can only throw page faults
if (lsu_is_store_i) begin
if (lsu_is_store_q) begin
lsu_exception_o = {STORE_PAGE_FAULT, {25'b0, update_vaddr}, 1'b1};
end else begin
lsu_exception_o = {LOAD_PAGE_FAULT, {25'b0, update_vaddr}, 1'b1};
@ -331,11 +370,23 @@ module mmu #(
// ----------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
ierr_valid_q <= 1'b0;
fetch_ex_q <= '0;
lsu_vaddr_q <= '0;
lsu_req_q <= '0;
misaligned_ex_q <= '0;
dtlb_pte_q <= '0;
dtlb_hit_q <= '0;
lsu_is_store_q <= '0;
dtlb_is_2M_q <= '0;
dtlb_is_1G_q <= '0;
end else begin
ierr_valid_q <= ierr_valid_n;
fetch_ex_q <= fetch_ex_n;
lsu_vaddr_q <= lsu_vaddr_n;
lsu_req_q <= lsu_req_n;
misaligned_ex_q <= misaligned_ex_n;
dtlb_pte_q <= dtlb_pte_n;
dtlb_hit_q <= dtlb_hit_n;
lsu_is_store_q <= lsu_is_store_n;
dtlb_is_2M_q <= dtlb_is_2M_n;
dtlb_is_1G_q <= dtlb_is_1G_n;
end
end
endmodule

View file

@ -44,20 +44,22 @@ module pcgen (
);
logic [63:0] npc_n, npc_q;
// the PC was set to a new region by a higher priority input (e.g.: exception, debug, ctrl return from exception)
logic set_pc_n, set_pc_q;
branchpredict_sbe branch_predict_btb;
assign fetch_address_o = npc_q;
// branch-predict input register -> this path is critical
branchpredict resolved_branch_q;
btb #(
.NR_ENTRIES(4096),
.BITS_SATURATION_COUNTER(2)
.NR_ENTRIES ( BTB_ENTRIES ),
.BITS_SATURATION_COUNTER ( BITS_SATURATION_COUNTER )
)
btb_i
(
// Use the PC from last cycle to perform branch lookup for the current cycle
.flush_i ( flush_bp_i ),
.vpc_i ( npc_q ),
.branch_predict_i ( resolved_branch_i ), // update port
.branch_predict_i ( resolved_branch_q ), // update port
.branch_predict_o ( branch_predict_btb ), // read port
.*
);
@ -65,23 +67,42 @@ module pcgen (
// Next PC
// -------------------
// next PC (NPC) can come from:
// 1. Exception
// 2. Return from exception
// 3. Predicted branch
// 4. Debug
// 5. Boot address
// 0. Default assignment
// 1. Branch Predict taken
// 2. Debug
// 3. Control flow change request
// 4. Exception
// 5. Return from exception
// 6. Pipeline Flush because of CSR side effects
always_comb begin : npc_select
automatic logic [63:0] fetch_address = npc_q;
branch_predict_o = branch_predict_btb;
fetch_valid_o = 1'b1;
// this tells us whether it is a consecutive PC or a completely new PC
set_pc_n = 1'b0;
// keep the PC stable if IF by default
npc_n = npc_q;
// -------------------------------
// 3. Control flow change request
// -------------------------------
// check if had a mis-predict the cycle earlier and if we can reset the PC (e.g.: it was a predicted or consecutive PC
// which was set a cycle earlier)
if (resolved_branch_q.is_mispredict && !set_pc_q) begin
// we already got the correct target address
fetch_address = resolved_branch_q.target_address;
end
// -------------------------------
// 0. Default assignment
// -------------------------------
// default is a consecutive PC
if (if_ready_i && fetch_enable_i)
npc_n = {npc_q[63:2], 2'b0} + 64'h4;
else // or keep the PC stable if IF is not ready
npc_n = npc_q;
// but operate on the current fetch address
npc_n = {fetch_address[63:2], 2'b0} + 64'h4;
// we only need to stall the consecutive and predicted case since in any other case we will flush at least
// the front-end which means that the IF stage will always be ready to accept a new request
@ -89,52 +110,53 @@ module pcgen (
// 1. Predict taken
// -------------------------------
// only predict if the IF stage is ready, otherwise we might take the predicted PC away which will end in a endless loop
if (if_ready_i && branch_predict_btb.valid && branch_predict_btb.predict_taken) begin
// also check if we fetched on a half word (npc_q[1] == 1), it might be the case that we need the next 16 byte of the following instruction
// prediction could potentially prevent us from getting them
if (if_ready_i && branch_predict_btb.valid && branch_predict_btb.predict_taken && !fetch_address[1]) begin
npc_n = branch_predict_btb.predict_address;
end
// -------------------------------
// 2. Debug
// -------------------------------
// -------------------------------
// 3. Control flow change request
// -------------------------------
if (resolved_branch_i.is_mispredict) begin
// we already got the correct target address
npc_n = resolved_branch_i.target_address;
end
// -------------------------------
// 4. Exception
// -------------------------------
if (ex_i.valid) begin
npc_n = trap_vector_base_i;
branch_predict_o.valid = 1'b0;
set_pc_n = 1'b1;
end
// -------------------------------
// 5. Return from exception
// -------------------------------
if (eret_i) begin
npc_n = epc_i;
npc_n = epc_i;
set_pc_n = 1'b1;
end
// -------------------------------
// 6. Pipeline Flush
// -------------------------------
// -----------------------------------------------
// 6. Pipeline Flush because of CSR side effects
// -----------------------------------------------
// On a pipeline flush start fetching from the next address
// of the instruction in the commit stage
if (flush_i) begin
// we came here from a flush request of a CSR instruction,
// as CSR instructions do not exist in a compressed form
// we can unconditionally do PC + 4 here
npc_n = pc_commit_i + 64'h4;
npc_n = pc_commit_i + 64'h4;
set_pc_n = 1'b1;
end
// fetch enable
if (!fetch_enable_i) begin
fetch_valid_o = 1'b0;
end
// set fetch address
fetch_address_o = fetch_address;
end
// -------------------
// Sequential Process
@ -142,9 +164,13 @@ module pcgen (
// PCGEN -> IF Pipeline Stage
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
npc_q <= boot_addr_i;
npc_q <= boot_addr_i;
set_pc_q <= 1'b0;
resolved_branch_q <= '0;
end else begin
npc_q <= npc_n;
npc_q <= npc_n;
set_pc_q <= set_pc_n;
resolved_branch_q <= resolved_branch_i;
end
end

View file

@ -147,9 +147,7 @@ module regfile
mem[0] = '0;
for (int unsigned k = 1; k < NUM_WORDS; k++) begin : w_WordIter
if (~rst_n)
mem[k] = '0;
else if (mem_clocks[k] == 1'b1)
if (mem_clocks[k] == 1'b1)
mem[k] = wdata_a_q;
end
end

View file

@ -21,15 +21,14 @@
import ariane_pkg::*;
module scoreboard #(
parameter int NR_ENTRIES = 8,
parameter int NR_WB_PORTS = 1
parameter int unsigned NR_ENTRIES = 8,
parameter int unsigned NR_WB_PORTS = 1
)
(
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
output logic full_o, // We can't take anymore data
input logic flush_i, // flush whole scoreboard
input logic flush_unissued_instr_i,
input logic unresolved_branch_i, // we have an unresolved branch
// list of clobbered registers to issue stage
output fu_t [31:0] rd_clobber_o,
@ -63,7 +62,7 @@ module scoreboard #(
input exception [NR_WB_PORTS-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception, divide by zero)
input logic [NR_WB_PORTS-1:0] wb_valid_i // data in is valid
);
localparam BITS_ENTRIES = $clog2(NR_ENTRIES);
localparam int unsigned BITS_ENTRIES = $clog2(NR_ENTRIES);
// this is the FIFO struct of the issue queue
struct packed {
@ -78,7 +77,7 @@ module scoreboard #(
// the issue queue is full don't issue any new instructions
assign issue_full = (issue_cnt_q == NR_ENTRIES-1);
assign full_o = issue_full;
// output commit instruction directly
assign commit_instr_o = mem_q[commit_pointer_q].sbe;
@ -87,9 +86,12 @@ module scoreboard #(
issue_instr_o = decoded_instr_i;
// make sure we assign the correct trans ID
issue_instr_o.trans_id = issue_pointer_q;
issue_instr_valid_o = ~issue_full && decoded_instr_valid_i && !flush_unissued_instr_i;
decoded_instr_ack_o = issue_ack_i;
// we are ready if we are not full and don't have any unresolved branches, but it can be
// the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i == 1)
issue_instr_valid_o = decoded_instr_valid_i && !unresolved_branch_i && !issue_full;
decoded_instr_ack_o = issue_ack_i && !issue_full;
end
// maintain a FIFO with issued instructions
// keep track of all issued instructions
always_comb begin : issue_fifo
@ -99,11 +101,11 @@ module scoreboard #(
commit_pointer_n = commit_pointer_q;
issue_pointer_n = issue_pointer_q;
// if we got a acknowledge from the FIFO, put this scoreboard entry in the queue
if (issue_ack_i) begin
// if we got a acknowledge from the issue stage, put this scoreboard entry in the queue
if (decoded_instr_valid_i && decoded_instr_ack_o) begin
// the decoded instruction we put in there is valid (1st bit)
// increase the issue counter
issue_cnt++;
// the decoded instruction we put in there is valid (1st bit)
mem_n[issue_pointer_q] = {1'b1, decoded_instr_i};
// advance issue pointer
issue_pointer_n = issue_pointer_q + 1'b1;
@ -112,7 +114,7 @@ module scoreboard #(
// ------------
// Write Back
// ------------
for (int i = 0; i < NR_WB_PORTS; i++) begin
for (int unsigned i = 0; i < NR_WB_PORTS; i++) begin
// check if this instruction was issued (e.g.: it could happen after a flush that there is still
// something in the pipeline e.g. an incomplete memory operation)
if (wb_valid_i[i] && mem_n[trans_id_i[i]].issued) begin
@ -141,7 +143,7 @@ module scoreboard #(
// Flush
// ------
if (flush_i) begin
for (int i = 0; i < NR_ENTRIES; i++) begin
for (int unsigned i = 0; i < NR_ENTRIES; i++) begin
// set all valid flags for all entries to zero
mem_n[i].issued = 1'b0;
mem_n[i].sbe.valid = 1'b0;
@ -163,7 +165,7 @@ module scoreboard #(
always_comb begin : clobber_output
rd_clobber_o = '{default: NONE};
// check for all valid entries and set the clobber register accordingly
for (int i = 0; i < NR_ENTRIES; i++) begin
for (int unsigned i = 0; i < NR_ENTRIES; i++) begin
if (mem_q[i].issued) begin
// output the functional unit which is going to clobber this register
rd_clobber_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu;
@ -183,7 +185,7 @@ module scoreboard #(
rs1_valid_o = 1'b0;
rs2_valid_o = 1'b0;
for (int i = 0; i < NR_ENTRIES; i++) begin
for (int unsigned i = 0; i < NR_ENTRIES; i++) begin
// only consider this entry if it is valid
if (mem_q[i].issued) begin
// look at the appropriate fields and look whether there was an
@ -203,7 +205,7 @@ module scoreboard #(
// -----------
// provide a direct combinational path from WB a.k.a forwarding
// make sure that we are not forwarding a result that got an exception
for (int j = 0; j < NR_WB_PORTS; j++) begin
for (int unsigned j = 0; j < NR_WB_PORTS; j++) begin
if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid) begin
rs1_o = wdata_i[j];
rs1_valid_o = wb_valid_i[j];
@ -231,7 +233,7 @@ module scoreboard #(
commit_pointer_q <= '0;
issue_pointer_q <= '0;
end else begin
mem_q <= mem_n;
mem_q <= mem_n;
issue_cnt_q <= issue_cnt_n;
commit_pointer_q <= commit_pointer_n;
issue_pointer_q <= issue_pointer_n;

View file

@ -18,17 +18,15 @@
// University of Bologna.
//
module store_queue (
module store_buffer (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // if we flush we need to pause the transactions on the memory
// otherwise we will run in a deadlock with the memory arbiter
output logic no_st_pending_o, // non-speculative queue is empty (e.g.: everything is committed to the memory hierarchy)
output logic [63:0] paddr_o, // physical address of the valid store
output logic [63:0] data_o, // data at the given address
output logic valid_o, // committed data is valid
output logic [7:0] be_o, // byte enable set
input logic [11:0] page_offset_i,
output logic page_offset_matches_o,
input logic commit_i, // commit the instruction which was placed there most recently
output logic ready_o, // the store queue is ready to accept a new request
@ -74,12 +72,6 @@ module store_queue (
logic is_speculative; // set if the entry isn't committed yet
} commit_queue_n, commit_queue_q;
// we can directly output the commit entry since we have just one element in the "queue"
assign paddr_o = commit_queue_q.address;
assign data_o = commit_queue_q.data;
assign be_o = commit_queue_q.be;
assign valid_o = commit_queue_q.valid;
// those signals can directly be output to the memory
assign address_index_o = commit_queue_q.address[11:0];
// if we got a new request we already saved the tag from the previous cycle
@ -153,6 +145,34 @@ module store_queue (
end
// ------------------
// Address Checker
// ------------------
// The load should return the data stored by the most recent store to the
// same physical address. The most direct way to implement this is to
// maintain physical addresses in the store buffer.
// Of course, there are other micro-architectural techniques to accomplish
// the same thing: you can interlock and wait for the store buffer to
// drain if the load VA matches any store VA modulo the page size (i.e.
// bits 11:0). As a special case, it is correct to bypass if the full VA
// matches, and no younger stores' VAs match in bits 11:0.
//
// checks if the requested load is in the store buffer
// page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_matches_o = 1'b0;
// check if the LSBs are identical and the entry is valid
if ((page_offset_i[11:3] == commit_queue_q.address[11:3]) && commit_queue_q.valid) begin
page_offset_matches_o = 1'b1;
end
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_i) begin
page_offset_matches_o = 1'b1;
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
if(~rst_ni) begin

View file

@ -24,12 +24,9 @@ module store_unit (
input logic flush_i,
output logic no_st_pending_o,
// store unit input port
input fu_op operator_i,
input logic [TRANS_ID_BITS-1:0] trans_id_i,
input logic valid_i,
input logic [63:0] vaddr_i,
input logic [7:0] be_i,
input logic [63:0] data_i,
input lsu_ctrl_t lsu_ctrl_i,
output logic pop_st_o,
input logic commit_i,
// store unit output port
output logic valid_o,
@ -41,8 +38,8 @@ module store_unit (
output logic translation_req_o, // request address translation
output logic [63:0] vaddr_o, // virtual address out
input logic [63:0] paddr_i, // physical address in
input logic translation_valid_i,
input exception ex_i,
input logic dtlb_hit_i, // will be one in the same cycle translation_req was asserted if it hits
// address checker
input logic [11:0] page_offset_i,
output logic page_offset_matches_o,
@ -60,108 +57,162 @@ module store_unit (
);
assign result_o = 64'b0;
logic [63:0] st_buffer_paddr; // physical address for store
logic [63:0] st_buffer_data; // store buffer data out
logic [63:0] st_data; // aligned data to store buffer
logic [7:0] st_buffer_be;
logic st_buffer_valid;
enum logic [1:0] {IDLE, VALID_STORE, WAIT_TRANSLATION, WAIT_STORE_READY} NS, CS;
// store buffer control signals
logic st_ready;
logic st_valid;
assign vaddr_o = vaddr_i;
// ---------------
// Store Control
// ---------------
// keep the data and the byte enable for the second cycle (after address translation)
logic [63:0] st_data_n, st_data_q;
logic [7:0] st_be_n, st_be_q;
logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
// output assignments
assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address
assign trans_id_o = trans_id_q; // transaction id from previous cycle
always_comb begin : store_control
translation_req_o = 1'b0;
valid_o = 1'b0;
ready_o = 1'b1;
trans_id_o = trans_id_i;
ex_o = ex_i;
valid_o = 1'b0;
st_valid = 1'b0;
// we got a valid store
if (valid_i) begin
// first do address translation, we need to do it in the first cycle since we want to share the MMU
// between the load and the store unit. But we only know that when a new request arrives that we are not using
// it at the same time.
translation_req_o = 1'b1;
// check if translation was valid and we have space in the store buffer
// otherwise simply stall
if (translation_valid_i && st_ready) begin
pop_st_o = 1'b0;
ex_o = ex_i;
trans_id_n = lsu_ctrl_i.trans_id;
NS = CS;
case (CS)
// we got a valid store
IDLE: begin
if (valid_i) begin
NS = VALID_STORE;
translation_req_o = 1'b1;
// check if translation was valid and we have space in the store buffer
// otherwise simply stall
if (!dtlb_hit_i) begin
NS = WAIT_TRANSLATION;
end
if (!st_ready) begin
NS = WAIT_STORE_READY;
end
end
end
VALID_STORE: begin
ready_o = 1'b0;
valid_o = 1'b1;
// post this store to the store buffer
st_valid = 1'b1;
// translation was not successful - stall here
end else begin
if (!flush_i)
st_valid = 1'b1;
pop_st_o = 1'b1;
// // we have another request
// if (valid_i) begin
// translation_req_o = 1'b1;
// if (!dtlb_hit_i) begin
// NS = WAIT_TRANSLATION;
// end
// if (!st_ready) begin
// NS = WAIT_STORE_READY;
// end
// // if we do not have another request go back to idle
// end else begin
// NS = IDLE;
// end
NS = IDLE;
end
// the store queue is currently full
WAIT_STORE_READY: begin
ready_o = 1'b0;
// keep the translation request high
translation_req_o = 1'b1;
if (st_ready && dtlb_hit_i) begin
NS = VALID_STORE;
end
end
// we didn't receive a valid translation, wait for one
// but we know that the store queue is not full as we could only have landed here if
// it wasn't full
WAIT_TRANSLATION: begin
ready_o = 1'b0;
translation_req_o = 1'b1;
if (dtlb_hit_i) begin
NS = VALID_STORE;
end
end
// -----------------
// Access Exception
// -----------------
// we got an address translation exception (access rights)
if (ex_i.valid) begin
// result is valid
valid_o = 1'b1;
// do not store this
st_valid = 1'b0;
// we are ready if we got this exception
ready_o = 1'b1;
end
endcase
// -----------------
// Access Exception
// -----------------
// we got an address translation exception (access rights, misaligned or page fault)
if (ex_i.valid && (CS != IDLE)) begin
// the only difference is that we do not want to store this request
st_valid = 1'b0;
NS = IDLE;
valid_o = 1'b1;
end
if (flush_i)
NS = IDLE;
end
// -----------
// Re-aligner
// -----------
// re-align the write data to comply with the address offset
always_comb begin
st_data = data_i;
case (vaddr_i[2:0])
3'b000: st_data = data_i;
3'b001: st_data = {data_i[55:0], data_i[63:56]};
3'b010: st_data = {data_i[47:0], data_i[63:48]};
3'b011: st_data = {data_i[39:0], data_i[63:40]};
3'b100: st_data = {data_i[31:0], data_i[63:32]};
3'b101: st_data = {data_i[23:0], data_i[63:24]};
3'b110: st_data = {data_i[15:0], data_i[63:16]};
3'b111: st_data = {data_i[7:0], data_i[63:8]};
st_be_n = lsu_ctrl_i.be;
st_data_n = lsu_ctrl_i.data;
case (lsu_ctrl_i.vaddr[2:0])
3'b000: st_data_n = lsu_ctrl_i.data;
3'b001: st_data_n = {lsu_ctrl_i.data[55:0], lsu_ctrl_i.data[63:56]};
3'b010: st_data_n = {lsu_ctrl_i.data[47:0], lsu_ctrl_i.data[63:48]};
3'b011: st_data_n = {lsu_ctrl_i.data[39:0], lsu_ctrl_i.data[63:40]};
3'b100: st_data_n = {lsu_ctrl_i.data[31:0], lsu_ctrl_i.data[63:32]};
3'b101: st_data_n = {lsu_ctrl_i.data[23:0], lsu_ctrl_i.data[63:24]};
3'b110: st_data_n = {lsu_ctrl_i.data[15:0], lsu_ctrl_i.data[63:16]};
3'b111: st_data_n = {lsu_ctrl_i.data[7:0], lsu_ctrl_i.data[63:8]};
endcase
end
// ---------------
// Store Queue
// ---------------
store_queue store_queue_i (
store_buffer store_buffer_i (
// store queue write port
.valid_i ( st_valid ),
.data_i ( st_data ),
// store buffer in
.paddr_o ( st_buffer_paddr ),
.data_o ( st_buffer_data ),
.valid_o ( st_buffer_valid ),
.be_o ( st_buffer_be ),
.data_i ( st_data_q ),
.be_i ( st_be_q ),
// store buffer out
.ready_o ( st_ready ),
.*
);
// ------------------
// Address Checker
// ------------------
// The load should return the data stored by the most recent store to the
// same physical address. The most direct way to implement this is to
// maintain physical addresses in the store buffer.
// Of course, there are other micro-architectural techniques to accomplish
// the same thing: you can interlock and wait for the store buffer to
// drain if the load VA matches any store VA modulo the page size (i.e.
// bits 11:0). As a special case, it is correct to bypass if the full VA
// matches, and no younger stores' VAs match in bits 11:0.
//
// checks if the requested load is in the store buffer
// page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_matches_o = 1'b0;
// check if the LSBs are identical and the entry is valid
if ((vaddr_i[11:3] == st_buffer_paddr[11:3]) && st_buffer_valid) begin
page_offset_matches_o = 1'b1;
// ---------------
// Registers
// ---------------
always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
CS <= IDLE;
st_be_q <= '0;
st_data_q <= '0;
trans_id_q <= '0;
end else begin
CS <= NS;
st_be_q <= st_be_n;
st_data_q <= st_data_n;
trans_id_q <= trans_id_n;
end
end

View file

@ -38,7 +38,7 @@ class exception_trace_item;
INSTR_PAGE_FAULT: this.cause = "Instruction Page Fault";
LOAD_PAGE_FAULT: this.cause = "Load Page Fault";
STORE_PAGE_FAULT: this.cause = "Store Page Fault";
default: cause = "Interrupt";
default: this.cause = "Interrupt";
endcase
this.tval = tval;

View file

@ -26,13 +26,12 @@ class instruction_trace_item;
logic [63:0] reg_file [32];
logic [4:0] read_regs [$];
logic [4:0] result_regs [$];
logic [63:0] imm;
logic [63:0] result;
logic [63:0] paddr;
logic [63:0] paddr_queue [$];
logic [63:0] vaddr;
logic [63:0] vaddr_queue [$];
// constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information
function new (time simtime, longint unsigned cycle, scoreboard_entry sbe, logic [31:0] instr, logic [63:0] reg_file [32], logic [63:0] result, logic [63:0] vaddr, logic [63:0] paddr);
function new (time simtime, longint unsigned cycle, scoreboard_entry sbe, logic [31:0] instr, logic [63:0] reg_file [32], logic [63:0] result, logic [63:0] paddr);
this.simtime = simtime;
this.cycle = cycle;
this.pc = sbe.pc;
@ -40,7 +39,6 @@ class instruction_trace_item;
this.instr = instr;
this.reg_file = reg_file;
this.result = result;
this.vaddr = vaddr;
this.paddr = paddr;
endfunction
// convert register address to ABI compatible form
@ -198,11 +196,17 @@ class instruction_trace_item;
if (read_regs[i] != 0)
s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), reg_file[read_regs[i]]);
end
// if we got a physical address also display address translation
foreach (paddr_queue[i]) begin
s = $sformatf("%s VA: %x PA: %x", s, this.vaddr, paddr_queue[i]);
end
casex (instr)
// check of the instrction was a load or store
INSTR_STORE: begin
logic [63:0] vaddress = reg_file[read_regs[1]] + this.imm;
s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr);
end
INSTR_LOAD: begin
logic [63:0] vaddress = reg_file[read_regs[0]] + this.imm;
s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr);
end
endcase
return s;
endfunction
@ -307,7 +311,9 @@ class instruction_trace_item;
result_regs.push_back(sbe.rd);
read_regs.push_back(sbe.rs1);
paddr_queue.push_back(paddr);
// save the immediate for calculating the virtual address
this.imm = sbe.result;
return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1));
endfunction
@ -322,9 +328,10 @@ class instruction_trace_item;
default: return printMnemonic("INVALID");
endcase
read_regs.push_back(sbe.rs1);
read_regs.push_back(sbe.rs2);
paddr_queue.push_back(paddr);
read_regs.push_back(sbe.rs1);
// save the immediate for calculating the virtual address
this.imm = sbe.result;
return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1));

View file

@ -36,16 +36,21 @@ class instruction_tracer;
int f;
// address mapping
// contains mappings of the form vaddr <-> paddr
struct {
logic [63:0] vaddr;
logic [63:0] paddr;
} store_mapping[$], load_mapping[$], address_mapping;
logic [63:0] store_mapping[$], load_mapping[$], address_mapping;
function new(virtual instruction_tracer_if tracer_if);
this.tracer_if = tracer_if;
f = $fopen("output.txt","w");
endfunction : new
function void create_file(logic [5:0] cluster_id, logic [3:0] core_id);
string fn;
$sformat(fn, "trace_core_%h_%h.log", cluster_id, core_id);
$display("[TRACER] Output filename is: %s", fn);
this.f = $fopen(fn,"w");
endfunction : create_file
task trace();
fetch_entry decode_instruction, issue_instruction, issue_commit_instruction;
scoreboard_entry commit_instruction;
@ -82,22 +87,13 @@ class instruction_tracer;
// --------------------
// Address Translation
// --------------------
if (tracer_if.pck.translation_valid) begin
// put it in the store mapping queue if it is a store
if (tracer_if.pck.is_store && tracer_if.pck.st_ready) begin
store_mapping.push_back('{
vaddr: tracer_if.pck.vaddr,
paddr: tracer_if.pck.paddr
});
// or else put it in the load mapping
end else if (!tracer_if.pck.is_store && tracer_if.pck.ld_ready) begin
load_mapping.push_back('{
vaddr: tracer_if.pck.vaddr,
paddr: tracer_if.pck.paddr
});
end
if (tracer_if.pck.st_valid) begin
store_mapping.push_back(tracer_if.pck.st_paddr);
end
if (tracer_if.pck.ld_valid && !tracer_if.pck.ld_kill) begin
load_mapping.push_back(tracer_if.pck.ld_paddr);
end
// --------------
// Commit
// --------------
@ -115,9 +111,9 @@ class instruction_tracer;
// check if the write back is valid, if not we need to source the result from the register file
// as the most recent version of this register will be there.
if (tracer_if.pck.we) begin
printInstr(issue_sbe, issue_commit_instruction.instruction, tracer_if.pck.wdata, address_mapping.vaddr, address_mapping.paddr);
printInstr(issue_sbe, issue_commit_instruction.instruction, tracer_if.pck.wdata, address_mapping);
end else
printInstr(issue_sbe, issue_commit_instruction.instruction, reg_file[commit_instruction.rd], address_mapping.vaddr, address_mapping.paddr);
printInstr(issue_sbe, issue_commit_instruction.instruction, reg_file[commit_instruction.rd], address_mapping);
end
// --------------
@ -149,6 +145,7 @@ class instruction_tracer;
end
endtask
// flush all decoded instructions
function void flushDecode ();
decode_queue = {};
@ -165,8 +162,8 @@ class instruction_tracer;
load_mapping = {};
endfunction;
function void printInstr(scoreboard_entry sbe, logic [63:0] instr, logic [63:0] result, logic [63:0] vaddr, logic [63:0] paddr);
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, vaddr, paddr);
function void printInstr(scoreboard_entry sbe, logic [63:0] instr, logic [63:0] result, logic [63:0] paddr);
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr);
// print instruction to console
string print_instr = iti.printInstr();
$display(print_instr);

View file

@ -39,19 +39,23 @@ interface instruction_tracer_if (
// commit stage
scoreboard_entry commit_instr; // commit instruction
logic commit_ack;
// address translation
logic translation_valid;
logic [63:0] vaddr;
logic [63:0] paddr;
logic is_store;
logic st_ready;
logic ld_ready;
// stores
logic st_valid;
logic [63:0] st_paddr;
// loads
logic ld_valid;
logic ld_kill;
logic [63:0] ld_paddr;
// exceptions
exception exception;
// the tracer just has a passive interface we do not drive anything with it
clocking pck @(posedge clk);
input rstn, flush_unissued, flush, fetch, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr,
wdata, we, commit_instr, commit_ack, translation_valid, vaddr, paddr, is_store, st_ready, ld_ready, exception;
st_valid, st_paddr, ld_valid, ld_kill, ld_paddr,
wdata, we, commit_instr, commit_ack, exception;
endclocking
endinterface

View file

@ -13,7 +13,6 @@
import ariane_pkg::*;
interface scoreboard_if #(parameter int NR_WB_PORTS = 1)(input clk);
wire full;
wire flush;
wire [31:0][$bits(fu_t)-1:0] rd_clobber;
wire [4:0] rs1_address;
@ -26,6 +25,7 @@ interface scoreboard_if #(parameter int NR_WB_PORTS = 1)(input clk);
wire commit_ack;
scoreboard_entry decoded_instr;
wire decoded_instr_valid;
wire decoded_instr_ack;
scoreboard_entry issue_instr;
wire issue_instr_valid;
wire issue_ack;
@ -38,12 +38,12 @@ interface scoreboard_if #(parameter int NR_WB_PORTS = 1)(input clk);
clocking mck @(posedge clk);
default input #1 output #5; // save timing
output flush, rs1_address, rs2_address, commit_ack, decoded_instr, decoded_instr_valid, issue_ack, trans_id, wdata, ex, wb_valid;
input full, rd_clobber, rs1, rs1_valid, rs2, rs2_valid, commit_instr, issue_instr, issue_instr_valid;
input rd_clobber, rs1, rs1_valid, rs2, rs2_valid, commit_instr, issue_instr, issue_instr_valid, decoded_instr_ack;
endclocking
// Scoreboard interface configured in passive mode (-> monitor)
clocking pck @(posedge clk);
input flush, rs1_address, rs2_address, commit_ack, decoded_instr, decoded_instr_valid, issue_ack, trans_id, wdata, ex, wb_valid,
full, rd_clobber, rs1, rs1_valid, rs2, rs2_valid, commit_instr, issue_instr, issue_instr_valid;
rd_clobber, rs1, rs1_valid, rs2, rs2_valid, commit_instr, issue_instr, issue_instr_valid, decoded_instr_ack;
endclocking
modport master (clocking mck);

View file

@ -29,10 +29,9 @@ interface store_queue_if
);
wire flush;
wire [ADDRESS_SIZE-1:0] check_paddr;
wire [DATA_WIDTH-1:0] check_data;
wire valid;
wire [DATA_WIDTH/8-1:0] check_be;
wire no_st_pending;
wire [11:0] page_offset;
wire page_offset_matches;
wire commit;
wire ready;
wire store_valid;
@ -41,14 +40,15 @@ interface store_queue_if
wire [DATA_WIDTH/8-1:0] store_be;
clocking mck @(posedge clk);
output flush, commit, valid, store_paddr, store_data, store_be, store_valid;
input check_paddr, check_data, check_be, ready;
output flush, commit, store_valid, page_offset, store_paddr, store_data, store_be;
input ready, page_offset_matches, no_st_pending;
endclocking
clocking pck @(posedge clk);
input flush, check_paddr, check_data, valid, check_be, commit, ready, store_valid, store_paddr, store_data, store_be;
input flush, commit, ready, page_offset, page_offset_matches, store_valid, store_paddr,
store_data, store_be, no_st_pending;
endclocking
endinterface

View file

@ -58,7 +58,7 @@ class store_queue_if_driver extends uvm_driver #(store_queue_if_seq_item);
seq_item_port.item_done();
// fork off a commit task
// commit a couple of cycles later
@(m_vif.mck iff m_vif.pck.valid)
@(m_vif.mck iff m_vif.pck.store_valid)
fork
commit_block: begin
sem.get(1);

View file

@ -19,17 +19,17 @@
//
import ariane_pkg::*;
import uvm_pkg::*;
import core_lib_pkg::*;
`define DRAM_BASE 64'h80000000
module core_tb;
import "DPI-C" function chandle read_elf(string fn);
import "DPI-C" function longint unsigned get_symbol_address(string symb);
import "DPI-C" function longint unsigned get_symbol_size(string symb);
import uvm_pkg::*;
import core_lib_pkg::*;
logic clk_i;
logic rst_ni;
logic rtc_i;
@ -190,7 +190,8 @@ module core_tb;
// initialize .bss
bss_address = get_symbol_address(".bss");
bss_size = get_symbol_size(".bss");
$display("Symbol Address: %x, Symbol Size: %x, Address: %x", ((bss_address - `DRAM_BASE) >> 3), bss_size, address);
// `uvm_info("Core Test", $sformatf(".bss address: %x, .bss size: %x, .tohost address: %x", ((bss_address - `DRAM_BASE) >> 3), bss_size, address), UVM_LOW)
// the section should be aligned on a double word boundary
for (int i = 0; i < bss_size/8; i++) begin
core_mem_i.ram_i.mem[((bss_address - `DRAM_BASE) >> 3) + i] = 64'b0;

View file

@ -19,33 +19,37 @@ module scoreboard_tb;
scoreboard_if #(.NR_WB_PORTS(1) ) scoreboard_if (clk);
scoreboard #(
.NR_WB_PORTS ( 1 ),
.NR_ENTRIES ( NR_SB_ENTRIES )
.NR_WB_PORTS ( 1 ),
.NR_ENTRIES ( NR_SB_ENTRIES )
)
dut
(
.clk_i ( clk ),
.rst_ni ( rst_ni ),
.full_o ( scoreboard_if.full ),
.flush_i ( scoreboard_if.flush ),
.rd_clobber_o ( scoreboard_if.rd_clobber ),
.rs1_i ( scoreboard_if.rs1_address ),
.rs1_o ( scoreboard_if.rs1 ),
.rs1_valid_o ( scoreboard_if.rs1_valid ),
.rs2_i ( scoreboard_if.rs2_address ),
.rs2_o ( scoreboard_if.rs2 ),
.rs2_valid_o ( scoreboard_if.rs2_valid ),
.commit_instr_o ( scoreboard_if.commit_instr ),
.commit_ack_i ( scoreboard_if.commit_ack ),
.decoded_instr_i ( scoreboard_if.decoded_instr ),
.decoded_instr_valid_i( scoreboard_if.decoded_instr_valid ),
.issue_instr_o ( scoreboard_if.issue_instr ),
.issue_instr_valid_o ( scoreboard_if.issue_instr_valid ),
.issue_ack_i ( scoreboard_if.issue_ack ),
.trans_id_i ( scoreboard_if.trans_id ),
.wdata_i ( scoreboard_if.wdata ),
.ex_i ( scoreboard_if.ex ),
.wb_valid_i ( scoreboard_if.wb_valid )
.clk_i ( clk ),
.rst_ni ( rst_ni ),
.flush_i ( scoreboard_if.flush ),
.rd_clobber_o ( scoreboard_if.rd_clobber ),
.rs1_i ( scoreboard_if.rs1_address ),
.rs1_o ( scoreboard_if.rs1 ),
.rs1_valid_o ( scoreboard_if.rs1_valid ),
.rs2_i ( scoreboard_if.rs2_address ),
.rs2_o ( scoreboard_if.rs2 ),
.rs2_valid_o ( scoreboard_if.rs2_valid ),
.commit_instr_o ( scoreboard_if.commit_instr ),
.commit_ack_i ( scoreboard_if.commit_ack ),
.decoded_instr_i ( scoreboard_if.decoded_instr ),
.decoded_instr_valid_i ( scoreboard_if.decoded_instr_valid ),
.decoded_instr_ack_o ( scoreboard_if.decoded_instr_ack ),
.issue_instr_o ( scoreboard_if.issue_instr ),
.issue_instr_valid_o ( scoreboard_if.issue_instr_valid ),
.issue_ack_i ( scoreboard_if.issue_ack ),
.trans_id_i ( scoreboard_if.trans_id ),
.wdata_i ( scoreboard_if.wdata ),
.ex_i ( scoreboard_if.ex ),
.wb_valid_i ( scoreboard_if.wb_valid )
);
initial begin
@ -86,13 +90,10 @@ module scoreboard_tb;
@(scoreboard_if.mck);
// if we are not full load another instruction
if (scoreboard_if.full == 1'b0) begin
scoreboard_if.mck.decoded_instr <= Scoreboard::randomize_scoreboard();
scoreboard_if.mck.decoded_instr_valid <= 1'b1;
end else begin
@(scoreboard_if.mck iff scoreboard_if.mck.decoded_instr_ack == 1'b1)
scoreboard_if.mck.decoded_instr_valid <= 1'b0;
end
end
end

View file

@ -29,31 +29,31 @@ module store_queue_tb;
dcache_if slave(clk);
store_queue_if store_queue(clk);
store_queue dut (
.clk_i ( clk ),
.rst_ni ( rst_ni ),
.flush_i ( store_queue.flush ),
.paddr_o ( store_queue.check_paddr ),
.data_o ( store_queue.check_data ),
.valid_o ( store_queue.valid ),
.be_o ( store_queue.check_be ),
.commit_i ( store_queue.commit ),
.ready_o ( store_queue.ready ),
.valid_i ( store_queue.store_valid && store_queue.ready ),
.paddr_i ( store_queue.store_paddr ),
.data_i ( store_queue.store_data ),
.be_i ( store_queue.store_be ),
store_buffer dut (
.clk_i ( clk ),
.rst_ni ( rst_ni ),
.flush_i ( store_queue.flush ),
.address_index_o ( slave.address_index ),
.address_tag_o ( slave.address_tag ),
.data_wdata_o ( slave.data_wdata ),
.data_req_o ( slave.data_req ),
.data_we_o ( slave.data_we ),
.data_be_o ( slave.data_be ),
.kill_req_o ( slave.kill_req ),
.tag_valid_o ( slave.tag_valid ),
.data_gnt_i ( slave.data_gnt & slave.data_req ),
.data_rvalid_i ( slave.data_rvalid )
.no_st_pending_o (),
.page_offset_i ( store_queue.page_offset ),
.page_offset_matches_o ( store_queue.page_offset_matches ),
.commit_i ( store_queue.commit ),
.ready_o ( store_queue.ready ),
.valid_i ( store_queue.store_valid && store_queue.ready ),
.paddr_i ( store_queue.store_paddr ),
.data_i ( store_queue.store_data ),
.be_i ( store_queue.store_be ),
.address_index_o ( slave.address_index ),
.address_tag_o ( slave.address_tag ),
.data_wdata_o ( slave.data_wdata ),
.data_req_o ( slave.data_req ),
.data_we_o ( slave.data_we ),
.data_be_o ( slave.data_be ),
.kill_req_o ( slave.kill_req ),
.tag_valid_o ( slave.tag_valid ),
.data_gnt_i ( slave.data_gnt & slave.data_req ),
.data_rvalid_i ( slave.data_rvalid )
);
initial begin

View file

@ -1,28 +1,40 @@
add wave -noupdate -group core /core_tb/dut/*
add wave -noupdate -group pcgen_stage -group btb /core_tb/dut/pcgen_i/btb_i/*
add wave -noupdate -group pcgen_stage /core_tb/dut/pcgen_i/*
add wave -noupdate -group if_stage -group fetch_fifo /core_tb/dut/if_stage_i/fetch_fifo_i/*
add wave -noupdate -group if_stage /core_tb/dut/if_stage_i/*
add wave -noupdate -group id_stage -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/*
add wave -noupdate -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/*
add wave -noupdate -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/*
add wave -noupdate -group id_stage /core_tb/dut/id_stage_i/*
add wave -noupdate -group issue_stage -group scoreboard /core_tb/dut/issue_stage_i/scoreboard_i/*
add wave -noupdate -group issue_stage -group issue_read_operands /core_tb/dut/issue_stage_i/issue_read_operands_i/*
add wave -noupdate -group issue_stage /core_tb/dut/issue_stage_i/*
add wave -noupdate -group ex_stage -group alu /core_tb/dut/ex_stage_i/alu_i/*
add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/*
add wave -noupdate -group ex_stage -group lsu -group lsu_bypass /core_tb/dut/ex_stage_i/lsu_i/lsu_bypass_i/*
add wave -noupdate -group ex_stage -group lsu -group mmu /core_tb/dut/ex_stage_i/lsu_i/mmu_i/*
add wave -noupdate -group ex_stage -group lsu -group mmu -group itlb /core_tb/dut/ex_stage_i/lsu_i/mmu_i/itlb_i/*
add wave -noupdate -group ex_stage -group lsu -group mmu -group dtlb /core_tb/dut/ex_stage_i/lsu_i/mmu_i/dtlb_i/*
add wave -noupdate -group ex_stage -group lsu -group mmu -group ptw /core_tb/dut/ex_stage_i/lsu_i/mmu_i/ptw_i/*
add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/*
add wave -noupdate -group ex_stage -group lsu -group mem_arbiter /core_tb/dut/ex_stage_i/lsu_i/dcache_arbiter_i/*
add wave -noupdate -group ex_stage -group lsu -group mem_arbiter -group arbiter_fifo /core_tb/dut/ex_stage_i/lsu_i/dcache_arbiter_i/fifo_i/*
add wave -noupdate -group ex_stage -group lsu -group store_unit /core_tb/dut/ex_stage_i/lsu_i/store_unit_i/*
add wave -noupdate -group ex_stage -group lsu -group store_unit -group store_queue /core_tb/dut/ex_stage_i/lsu_i/store_unit_i/store_queue_i/*
add wave -noupdate -group ex_stage -group lsu -group store_unit -group store_buffer /core_tb/dut/ex_stage_i/lsu_i/store_unit_i/store_buffer_i/*
add wave -noupdate -group ex_stage -group lsu -group load_unit /core_tb/dut/ex_stage_i/lsu_i/load_unit_i/*
add wave -noupdate -group ex_stage -group lsu -group load_unit -group fifo /core_tb/dut/ex_stage_i/lsu_i/load_unit_i/fifo_i/*
add wave -noupdate -group ex_stage -group lsu -group lsu_arbiter /core_tb/dut/ex_stage_i/lsu_i/lsu_arbiter_i/*
add wave -noupdate -group ex_stage -group branch_unit /core_tb/dut/ex_stage_i/branch_unit_i/*
add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/*
add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/*
add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/*
add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/*
add wave -noupdate -group controller /core_tb/dut/controller_i/*