diff --git a/Makefile b/Makefile index fdb858d72..d71ac71d6 100644 --- a/Makefile +++ b/Makefile @@ -40,10 +40,11 @@ riscv-tests = rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p-a rv64ui-p-sraiw rv64ui-p-sraw rv64ui-p-srl rv64ui-p-srli rv64ui-p-srliw rv64ui-p-srlw \ rv64ui-p-lb rv64ui-p-lbu rv64ui-p-ld rv64ui-p-lh rv64ui-p-lhu rv64ui-p-lui \ rv64ui-p-lw rv64ui-p-lwu \ - rv64mi-p-csr rv64mi-p-mcsr rv64mi-p-illegal rv64mi-p-ma_addr rv64mi-p-ma_fetch rv64mi-p-sbreak rv64mi-p-scall \ + rv64mi-p-csr rv64mi-p-mcsr rv64mi-p-illegal \ + rv64mi-p-ma_addr rv64mi-p-ma_fetch rv64mi-p-sbreak rv64mi-p-scall \ rv64si-p-csr rv64si-p-ma_fetch rv64si-p-scall rv64si-p-wfi rv64si-p-sbreak \ - rv64uc-p-rvc rv64si-p-dirty \ - rv64ui-v-add + rv64si-p-dirty rv64uc-p-rvc \ + rv64ui-v-sll riscv-test = rv64ui-p-add @@ -138,4 +139,4 @@ clean: .PHONY: build lint build-moore - # make CC=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/gcc CXX=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/g++ -j20 + # make CC=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/gcc CXX=/usr/pack/modelsim-10.6-kgf/questasim/gcc-5.3.0-linux_x86_64/bin/g++ -j20 \ No newline at end of file diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 799000c66..6a23a258c 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -1,3 +1,4 @@ + /* File: ariane_pkg.svh * Author: Florian Zaruba * Date: 8.4.2017 @@ -14,11 +15,14 @@ package ariane_pkg; // --------------- // Global Config // --------------- - localparam NR_SB_ENTRIES = 4; // number of scoreboard entries + localparam NR_SB_ENTRIES = 8; // number of scoreboard entries localparam TRANS_ID_BITS = $clog2(NR_SB_ENTRIES); // depending on the number of scoreboard entries we need that many bits // to uniquely identify the entry in the scoreboard localparam NR_WB_PORTS = 4; localparam ASID_WIDTH = 1; + localparam BTB_ENTRIES = 64; + localparam BITS_SATURATION_COUNTER = 2; + localparam logic [63:0] ISA_CODE = (1 << 2) // C - Compressed extension | (1 << 8) // I - RV32I/64I/128I base ISA | (1 << 12) // M - Integer Multiply/Divide extension @@ -91,6 +95,16 @@ package ariane_pkg; // LSU functions LD, SD, LW, LWU, SW, LH, LHU, SH, LB, SB, LBU } fu_op; + + typedef struct packed { + logic valid; + logic [63:0] vaddr; + logic [63:0] data; + logic [7:0] be; + fu_t fu; + fu_op operator; + logic [TRANS_ID_BITS-1:0] trans_id; + } lsu_ctrl_t; // --------------- // IF/ID Stage // --------------- @@ -198,8 +212,9 @@ package ariane_pkg; // memory management, pte typedef struct packed { - logic[37:0] ppn; - logic[1:0] sw_reserved; + logic [9:0] reserved; + logic [43:0] ppn; + logic [1:0] rsw; logic d; logic a; logic g; @@ -217,27 +232,27 @@ package ariane_pkg; // ---------------------- // Exception Cause Codes // ---------------------- - localparam logic [63:0] INSTR_ADDR_MISALIGNED = 64'd0; - localparam logic [63:0] INSTR_ACCESS_FAULT = 64'd1; - localparam logic [63:0] ILLEGAL_INSTR = 64'd2; - localparam logic [63:0] BREAKPOINT = 64'd3; - localparam logic [63:0] LD_ADDR_MISALIGNED = 64'd4; - localparam logic [63:0] LD_ACCESS_FAULT = 64'd5; - localparam logic [63:0] ST_ADDR_MISALIGNED = 64'd6; - localparam logic [63:0] ST_ACCESS_FAULT = 64'd7; - localparam logic [63:0] ENV_CALL_UMODE = 64'd8; // environment call from user mode - localparam logic [63:0] ENV_CALL_SMODE = 64'd9; // environment call from supervisor mode - localparam logic [63:0] ENV_CALL_MMODE = 64'd11; // environment call from machine mode - localparam logic [63:0] INSTR_PAGE_FAULT = 64'd12; // Instruction page fault - localparam logic [63:0] LOAD_PAGE_FAULT = 64'd13; // Load page fault - localparam logic [63:0] STORE_PAGE_FAULT = 64'd15; // Store page fault + localparam logic [63:0] INSTR_ADDR_MISALIGNED = 0; + localparam logic [63:0] INSTR_ACCESS_FAULT = 1; + localparam logic [63:0] ILLEGAL_INSTR = 2; + localparam logic [63:0] BREAKPOINT = 3; + localparam logic [63:0] LD_ADDR_MISALIGNED = 4; + localparam logic [63:0] LD_ACCESS_FAULT = 5; + localparam logic [63:0] ST_ADDR_MISALIGNED = 6; + localparam logic [63:0] ST_ACCESS_FAULT = 7; + localparam logic [63:0] ENV_CALL_UMODE = 8; // environment call from user mode + localparam logic [63:0] ENV_CALL_SMODE = 9; // environment call from supervisor mode + localparam logic [63:0] ENV_CALL_MMODE = 11; // environment call from machine mode + localparam logic [63:0] INSTR_PAGE_FAULT = 12; // Instruction page fault + localparam logic [63:0] LOAD_PAGE_FAULT = 13; // Load page fault + localparam logic [63:0] STORE_PAGE_FAULT = 15; // Store page fault - localparam logic [63:0] S_SW_INTERRUPT = (1 << 63) | 64'd1; - localparam logic [63:0] M_SW_INTERRUPT = (1 << 63) | 64'd3; - localparam logic [63:0] S_TIMER_INTERRUPT = (1 << 63) | 64'd5; - localparam logic [63:0] M_TIMER_INTERRUPT = (1 << 63) | 64'd7; - localparam logic [63:0] S_EXT_INTERRUPT = (1 << 63) | 64'd9; - localparam logic [63:0] M_EXT_INTERRUPT = (1 << 63) | 64'd11; + localparam logic [63:0] S_SW_INTERRUPT = (1 << 63) | 1; + localparam logic [63:0] M_SW_INTERRUPT = (1 << 63) | 3; + localparam logic [63:0] S_TIMER_INTERRUPT = (1 << 63) | 5; + localparam logic [63:0] M_TIMER_INTERRUPT = (1 << 63) | 7; + localparam logic [63:0] S_EXT_INTERRUPT = (1 << 63) | 9; + localparam logic [63:0] M_EXT_INTERRUPT = (1 << 63) | 11; // ----- // CSRs // ----- diff --git a/src/alu.sv b/src/alu.sv index bddcefc47..290011dc8 100644 --- a/src/alu.sv +++ b/src/alu.sv @@ -123,9 +123,9 @@ module alu assign shift_op_a_64 = { shift_arithmetic & shift_op_a[63], shift_op_a}; assign shift_op_a_32 = { shift_arithmetic & shift_op_a[31], shift_op_a32}; - assign shift_right_result = $signed(shift_op_a_64) >>> shift_amt[5:0]; + assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]); - assign shift_right_result32 = $signed(shift_op_a_32) >>> shift_amt[4:0]; + assign shift_right_result32 = $unsigned($signed(shift_op_a_32) >>> shift_amt[4:0]); // bit reverse the shift_right_result for left shifts genvar j; generate diff --git a/src/ariane.sv b/src/ariane.sv index f75709ee8..1914e2835 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -118,8 +118,17 @@ module ariane logic fetch_valid_if_id; logic decode_ack_id_if; exception exception_if_id; + // -------------- - // ID <-> EX + // ID <-> ISSUE + // -------------- + scoreboard_entry issue_entry_id_issue; + logic issue_entry_valid_id_issue; + logic is_ctrl_fow_id_issue; + logic issue_instr_issue_id; + + // -------------- + // ISSUE <-> EX // -------------- logic [63:0] imm_id_ex; logic [TRANS_ID_BITS-1:0] trans_id_id_ex; @@ -247,7 +256,6 @@ module ariane if_stage if_stage_i ( .flush_i ( flush_ctrl_if ), .if_busy_o ( if_ready_if_pcgen ), - .id_ready_i ( ready_id_if ), .fetch_address_i ( fetch_address_pcgen_if ), .fetch_valid_i ( fetch_valid_pcgen_if ), .branch_predict_i ( branch_predict_pcgen_if ), @@ -267,23 +275,42 @@ module ariane // --------- // ID // --------- - id_stage + id_stage id_stage_i ( + .flush_i ( flush_ctrl_if ), + .fetch_entry_i ( fetch_entry_if_id ), + .fetch_entry_valid_i ( fetch_valid_if_id ), + .decoded_instr_ack_o ( decode_ack_id_if ), + + .issue_entry_o ( issue_entry_id_issue ), + .issue_entry_valid_o ( issue_entry_valid_id_issue ), + .is_ctrl_flow_o ( is_ctrl_fow_id_issue ), + .issue_instr_ack_i ( issue_instr_issue_id ), + + .priv_lvl_i ( priv_lvl ), + .tvm_i ( tvm_csr_id ), + .tw_i ( tw_csr_id ), + .tsr_i ( tsr_csr_id ), + + .* + ); + + // --------- + // Issue + // --------- + issue_stage #( - .NR_ENTRIES ( NR_SB_ENTRIES ), - .NR_WB_PORTS ( NR_WB_PORTS ) + .NR_ENTRIES ( NR_SB_ENTRIES ), + .NR_WB_PORTS ( NR_WB_PORTS ) ) - id_stage_i ( - .test_en_i ( test_en_i ), - .flush_i ( flush_ctrl_id ), + issue_stage_i ( .flush_unissued_instr_i ( flush_unissued_instr_ctrl_id ), - .fetch_entry_i ( fetch_entry_if_id ), - .fetch_entry_valid_i ( fetch_valid_if_id ), - .decoded_instr_ack_o ( decode_ack_id_if ), - .ready_o ( ready_id_if ), - .priv_lvl_i ( priv_lvl ), - .tvm_i ( tvm_csr_id ), - .tw_i ( tw_csr_id ), - .tsr_i ( tsr_csr_id ), + .flush_i ( flush_ctrl_id ), + + .decoded_instr_i ( issue_entry_id_issue ), + .decoded_instr_valid_i ( issue_entry_valid_id_issue ), + .is_ctrl_flow_i ( is_ctrl_fow_id_issue ), + .decoded_instr_ack_o ( issue_instr_issue_id ), + // Functional Units .fu_o ( fu_id_ex ), .operator_o ( operator_id_ex ), @@ -487,8 +514,8 @@ module ariane assign tracer_if.fetch_valid = fetch_valid_if_id; assign tracer_if.fetch_ack = decode_ack_id_if; // Issue - assign tracer_if.issue_ack = id_stage_i.scoreboard_i.issue_ack_i; - assign tracer_if.issue_sbe = id_stage_i.scoreboard_i.issue_instr_o; + assign tracer_if.issue_ack = issue_stage_i.scoreboard_i.issue_ack_i; + assign tracer_if.issue_sbe = issue_stage_i.scoreboard_i.issue_instr_o; // write-back assign tracer_if.waddr = waddr_a_commit_id; assign tracer_if.wdata = wdata_a_commit_id; @@ -497,12 +524,13 @@ module ariane assign tracer_if.commit_instr = commit_instr_id_commit; assign tracer_if.commit_ack = commit_ack; // address translation - assign tracer_if.translation_valid = ex_stage_i.lsu_i.mmu_i.lsu_valid_o; - assign tracer_if.vaddr = ex_stage_i.lsu_i.mmu_i.lsu_vaddr_i; - assign tracer_if.paddr = ex_stage_i.lsu_i.mmu_i.lsu_paddr_o; - assign tracer_if.is_store = ex_stage_i.lsu_i.mmu_i.lsu_is_store_i; - assign tracer_if.st_ready = ex_stage_i.lsu_i.store_unit_i.ready_o; - assign tracer_if.ld_ready = ex_stage_i.lsu_i.load_unit_i.ready_o; + // stores + assign tracer_if.st_valid = ex_stage_i.lsu_i.store_unit_i.store_buffer_i.valid_i; + assign tracer_if.st_paddr = ex_stage_i.lsu_i.store_unit_i.store_buffer_i.paddr_i; + // loads + assign tracer_if.ld_valid = ex_stage_i.lsu_i.load_unit_i.tag_valid_o; + assign tracer_if.ld_kill = ex_stage_i.lsu_i.load_unit_i.kill_req_o; + assign tracer_if.ld_paddr = ex_stage_i.lsu_i.load_unit_i.paddr_i; // exceptions assign tracer_if.exception = commit_stage_i.exception_o; @@ -510,6 +538,8 @@ module ariane instruction_tracer it = new (tracer_if); initial begin + #15ns; + it.create_file(cluster_id_i, core_id_i); it.trace(); end diff --git a/src/branch_unit.sv b/src/branch_unit.sv index 4471d6bd8..28648f233 100644 --- a/src/branch_unit.sv +++ b/src/branch_unit.sv @@ -74,7 +74,6 @@ module branch_unit ( automatic logic [63:0] jump_base = (operator_i == JALR) ? operand_a_i : pc_i; target_address = 64'b0; - resolved_branch_o.pc = pc_i; resolved_branch_o.target_address = 64'b0; resolved_branch_o.is_taken = 1'b0; resolved_branch_o.valid = branch_valid_i; @@ -92,17 +91,18 @@ module branch_unit ( // if we need to put the branch target address in a destination register, output it here to WB branch_result_o = next_pc; + // save PC - we need this to get the target row in the branch target buffer + // we play this trick with the branch instruction which wraps a byte boundary: + // |---------- Place the prediction on this PC + // \/ + // ____________________________________________________ + // |branch [15:0] | branch[31:16] | compressed 1[15:0] | + // |____________________________________________________ + // This will relief the pre-fetcher to re-fetch partially fetched unaligned branch instructions e.g.: + // we don't have a back arch between the pre-fetcher and decoder/instruction FIFO. + resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4); + if (branch_valid_i) begin - // save PC - we need this to get the target row in the branch target buffer - // we play this trick with the branch instruction which wraps a byte boundary: - // |---------- Place the prediction on this PC - // \/ - // ____________________________________________________ - // |branch [15:0] | branch[31:16] | compressed 1[15:0] | - // |____________________________________________________ - // This will relief the prefetcher to re-fetch partially fetched unaligned branch instructions e.g.: - // we don't have a back arch between prefetcher and decoder/instruction FIFO. - resolved_branch_o.pc = (is_compressed_instr_i || pc_i[1] == 1'b0) ? pc_i : ({pc_i[63:2], 2'b0} + 64'h4); // save if the branch instruction was in the lower 16 bit of the instruction word // the first case is a compressed instruction which is in slot 0 // the other case is a misaligned uncompressed instruction which we only predict in the next cycle (see notes above) @@ -131,17 +131,18 @@ module branch_unit ( end end end - // to resolve the branch in ID -> only do this if this was indeed a branch (hence vald_i is asserted) + // to resolve the branch in ID resolve_branch_o = 1'b1; // the other case would be that this instruction was no branch but branch prediction thought that it was one // this is essentially also a mis-predict - end else if (fu_valid_i && branch_predict_i.valid) begin + end else if (fu_valid_i && branch_predict_i.valid && branch_predict_i.predict_taken) begin // re-set the branch to the next PC resolved_branch_o.is_mispredict = 1'b1; resolved_branch_o.target_address = next_pc; // clear this entry so that we are not constantly mis-predicting resolved_branch_o.clear = 1'b1; resolved_branch_o.valid = 1'b1; + resolve_branch_o = 1'b1; end end // use ALU exception signal for storing instruction fetch exceptions if diff --git a/src/btb.sv b/src/btb.sv index 23abaa249..88df84aff 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -111,6 +111,7 @@ module btb #( end else begin btb_q <= btb_n; end + end end endmodule \ No newline at end of file diff --git a/src/compressed_decoder.sv b/src/compressed_decoder.sv index 45fe2a5d6..ff459d182 100644 --- a/src/compressed_decoder.sv +++ b/src/compressed_decoder.sv @@ -32,226 +32,222 @@ module compressed_decoder output logic illegal_instr_o ); - // ------------------- - // Compressed Decoder - // ------------------- - always_comb begin - illegal_instr_o = 1'b0; - instr_o = '0; + // ------------------- + // Compressed Decoder + // ------------------- + always_comb begin + illegal_instr_o = 1'b0; + instr_o = '0; - unique case (instr_i[1:0]) - // C0 - 2'b00: begin - unique case (instr_i[15:13]) - 3'b000: begin - // c.addi4spn -> addi rd', x2, imm - instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], OPCODE_OPIMM}; - if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1; - end + unique case (instr_i[1:0]) + // C0 + 2'b00: begin + unique case (instr_i[15:13]) + 3'b000: begin + // c.addi4spn -> addi rd', x2, imm + instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], OPCODE_OPIMM}; + if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1; + end - 3'b010: begin - // c.lw -> lw rd', imm(rs1') - instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], OPCODE_LOAD}; - end + 3'b010: begin + // c.lw -> lw rd', imm(rs1') + instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], OPCODE_LOAD}; + end - 3'b011: begin - // c.ld -> ld rd', imm(rs1') - // | imm[11:0] | rs1 | funct3 | rd | opcode | - instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], OPCODE_LOAD}; - end + 3'b011: begin + // c.ld -> ld rd', imm(rs1') + // | imm[11:0] | rs1 | funct3 | rd | opcode | + instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], OPCODE_LOAD}; + end - 3'b110: begin - // c.sw -> sw rs2', imm(rs1') - instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], 2'b00, OPCODE_STORE}; - end + 3'b110: begin + // c.sw -> sw rs2', imm(rs1') + instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], 2'b00, OPCODE_STORE}; + end - 3'b111: begin - // c.sd -> sd rs2', imm(rs1') - instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE}; - end + 3'b111: begin + // c.sd -> sd rs2', imm(rs1') + instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE}; + end - default: begin - illegal_instr_o = 1'b1; - end - endcase - end - - // C1 - 2'b01: begin - unique case (instr_i[15:13]) - 3'b000: begin - // c.addi -> addi rd, rd, nzimm - // c.nop -> addi 0, 0, 0 - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM}; - end - - // c.addiw -> addiw rd, rd, nzimm for RV64 - 3'b001: begin - if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0 - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM32}; - else - illegal_instr_o = 1'b1; - end - - 3'b101: begin - // 101: c.j -> jal x0, imm - instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], OPCODE_JAL}; - end - - 3'b010: begin - // c.li -> addi rd, x0, nzimm - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OPIMM}; - if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; - end - - 3'b011: begin - // c.lui -> lui rd, imm - instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], OPCODE_LUI}; - - if (instr_i[11:7] == 5'h02) begin - // c.addi16sp -> addi x2, x2, nzimm - instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, OPCODE_OPIMM}; - end else if (instr_i[11:7] == 5'b0) begin - illegal_instr_o = 1'b1; + default: begin + illegal_instr_o = 1'b1; + end + endcase end - if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; - end - - 3'b100: begin - unique case (instr_i[11:10]) - 2'b00, - 2'b01: begin - // 00: c.srli -> srli rd, rd, shamt - // 01: c.srai -> srai rd, rd, shamt - instr_o = {1'b0, instr_i[10], 5'b0, instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], OPCODE_OPIMM}; - if (instr_i[6:2] == 5'b0) illegal_instr_o = 1'b1; - end - - 2'b10: begin - // c.andi -> andi rd, rd, imm - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OPIMM}; - end - - 2'b11: begin - unique case ({instr_i[12], instr_i[6:5]}) + // C1 + 2'b01: begin + unique case (instr_i[15:13]) 3'b000: begin - // c.sub -> sub rd', rd', rs2' - instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP}; + // c.addi -> addi rd, rd, nzimm + // c.nop -> addi 0, 0, 0 + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM}; end + // c.addiw -> addiw rd, rd, nzimm for RV64 3'b001: begin - // c.xor -> xor rd', rd', rs2' - instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], OPCODE_OP}; + if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0 + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM32}; + else + illegal_instr_o = 1'b1; + end + + 3'b101: begin + // 101: c.j -> jal x0, imm + instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], OPCODE_JAL}; end 3'b010: begin - // c.or -> or rd', rd', rs2' - instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], OPCODE_OP}; + // c.li -> addi rd, x0, nzimm + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OPIMM}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; end 3'b011: begin - // c.and -> and rd', rd', rs2' - instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OP}; + // c.lui -> lui rd, imm + instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], OPCODE_LUI}; + + if (instr_i[11:7] == 5'h02) begin + // c.addi16sp -> addi x2, x2, nzimm + instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, OPCODE_OPIMM}; + end else if (instr_i[11:7] == 5'b0) begin + illegal_instr_o = 1'b1; + end + + if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; end 3'b100: begin - // c.subw -> subw rd', rd', rs2' - instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32}; - end - 3'b101: begin - // c.addw -> addw rd', rd', rs2' - instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32}; + unique case (instr_i[11:10]) + 2'b00, + 2'b01: begin + // 00: c.srli -> srli rd, rd, shamt + // 01: c.srai -> srai rd, rd, shamt + instr_o = {1'b0, instr_i[10], 5'b0, instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], OPCODE_OPIMM}; + if (instr_i[6:2] == 5'b0) illegal_instr_o = 1'b1; + end + + 2'b10: begin + // c.andi -> andi rd, rd, imm + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OPIMM}; + end + + 2'b11: begin + unique case ({instr_i[12], instr_i[6:5]}) + 3'b000: begin + // c.sub -> sub rd', rd', rs2' + instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP}; + end + + 3'b001: begin + // c.xor -> xor rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], OPCODE_OP}; + end + + 3'b010: begin + // c.or -> or rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], OPCODE_OP}; + end + + 3'b011: begin + // c.and -> and rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OP}; + end + + 3'b100: begin + // c.subw -> subw rd', rd', rs2' + instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32}; + end + 3'b101: begin + // c.addw -> addw rd', rd', rs2' + instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32}; + end + + 3'b110, + 3'b111: begin + // 100: c.subw + // 101: c.addw + illegal_instr_o = 1'b1; + instr_o = {16'b0, instr_i}; + end + endcase + end + endcase end - 3'b110, - 3'b111: begin - // 100: c.subw - // 101: c.addw - illegal_instr_o = 1'b1; - instr_o = {16'b0, instr_i}; + 3'b110, 3'b111: begin + // 0: c.beqz -> beq rs1', x0, imm + // 1: c.bnez -> bne rs1', x0, imm + instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], OPCODE_BRANCH}; end - endcase - end - endcase - end - - 3'b110, 3'b111: begin - // 0: c.beqz -> beq rs1', x0, imm - // 1: c.bnez -> bne rs1', x0, imm - instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], OPCODE_BRANCH}; - end - - default: begin - illegal_instr_o = 1'b1; - end - endcase - end - - // C2 - 2'b10: begin - unique case (instr_i[15:13]) - 3'b000: begin - // c.slli -> slli rd, rd, shamt - instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], OPCODE_OPIMM}; - if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; // register not x0 - if (instr_i[6:2] == 5'b0) illegal_instr_o = 1'b1; // shift amount must be non zero - end - - 3'b010: begin - // c.lwsp -> lw rd, imm(x2) - instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, 3'b010, instr_i[11:7], OPCODE_LOAD}; - if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; - end - - 3'b011: begin - // c.ldsp -> ld rd, imm(x2) - instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], OPCODE_LOAD}; - if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; - end - - 3'b100: begin - if (instr_i[12] == 1'b0) begin - // c.mv -> add rd/rs1, x0, rs2 - instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OP}; - - if (instr_i[6:2] == 5'b0) begin - // c.jr -> jalr x0, rd/rs1, 0 - instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, OPCODE_JALR}; - end - end else begin - // c.add -> add rd, rd, rs2 - instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OP}; - - if (instr_i[11:7] == 5'b0) begin - // c.ebreak -> ebreak - instr_o = {32'h00_10_00_73}; - if (instr_i[6:2] != 5'b0) - illegal_instr_o = 1'b1; - end else if (instr_i[6:2] == 5'b0) begin - // c.jalr -> jalr x1, rs1, 0 - instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, OPCODE_JALR}; - end + endcase end - end - 3'b110: begin - // c.swsp -> sw rs2, imm(x2) - instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, instr_i[11:9], 2'b00, OPCODE_STORE}; - end + // C2 + 2'b10: begin + unique case (instr_i[15:13]) + 3'b000: begin + // c.slli -> slli rd, rd, shamt + instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], OPCODE_OPIMM}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; // register not x0 + if (instr_i[6:2] == 5'b0) illegal_instr_o = 1'b1; // shift amount must be non zero + end - 3'b111: begin - // c.sdsp -> sd rs2, imm(x2) - instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE}; - end + 3'b010: begin + // c.lwsp -> lw rd, imm(x2) + instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, 3'b010, instr_i[11:7], OPCODE_LOAD}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end - default: begin - illegal_instr_o = 1'b1; - end + 3'b011: begin + // c.ldsp -> ld rd, imm(x2) + instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], OPCODE_LOAD}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end + + 3'b100: begin + if (instr_i[12] == 1'b0) begin + // c.mv -> add rd/rs1, x0, rs2 + instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OP}; + + if (instr_i[6:2] == 5'b0) begin + // c.jr -> jalr x0, rd/rs1, 0 + instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, OPCODE_JALR}; + end + end else begin + // c.add -> add rd, rd, rs2 + instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OP}; + + if (instr_i[11:7] == 5'b0) begin + // c.ebreak -> ebreak + instr_o = {32'h00_10_00_73}; + if (instr_i[6:2] != 5'b0) + illegal_instr_o = 1'b1; + end else if (instr_i[6:2] == 5'b0) begin + // c.jalr -> jalr x1, rs1, 0 + instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, OPCODE_JALR}; + end + end + end + + 3'b110: begin + // c.swsp -> sw rs2, imm(x2) + instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, instr_i[11:9], 2'b00, OPCODE_STORE}; + end + + 3'b111: begin + // c.sdsp -> sd rs2, imm(x2) + instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE}; + end + + default: begin + illegal_instr_o = 1'b1; + end + endcase + end + + default: ; endcase - end - - default: ; - endcase - end + end endmodule \ No newline at end of file diff --git a/src/controller.sv b/src/controller.sv index cdcbd6599..a3f1e856d 100644 --- a/src/controller.sv +++ b/src/controller.sv @@ -68,6 +68,7 @@ module controller ( if (sfence_vma_i) begin flush_pcgen_o = 1'b1; flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; flush_id_o = 1'b1; flush_ex_o = 1'b1; flush_tlb_o = 1'b1; @@ -79,6 +80,7 @@ module controller ( if (flush_csr_i) begin flush_pcgen_o = 1'b1; flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; flush_id_o = 1'b1; flush_ex_o = 1'b1; end @@ -91,6 +93,7 @@ module controller ( // for the PC GEN stage but instead tells it to take the PC we gave it flush_pcgen_o = 1'b0; flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; flush_id_o = 1'b1; flush_ex_o = 1'b1; end @@ -102,6 +105,7 @@ module controller ( // don't flush pcgen as we want to take the exception flush_pcgen_o = 1'b0; flush_if_o = 1'b1; + flush_unissued_instr_o = 1'b1; flush_id_o = 1'b1; flush_ex_o = 1'b1; end diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 4ed1e140a..b02d532a7 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -79,11 +79,7 @@ module csr_regfile #( priv_lvl_t trap_to_priv_lvl; // register for enabling load store address translation, this is critical, hence the register logic en_ld_st_translation_n, en_ld_st_translation_q; - // ---------------------- - // LD/ST Privilege Level - // ---------------------- - assign ld_st_priv_lvl_o = (mstatus_q.mprv) ? mstatus_q.mpp : priv_lvl_o; - assign en_ld_st_translation_o = en_ld_st_translation_q; + // ---------------- // CSR Registers // ---------------- @@ -282,7 +278,13 @@ module csr_regfile #( CSR_MIE: mie_n = csr_wdata & 64'hBBB; // we only support supervisor and m-mode interrupts CSR_MIP: mip_n = csr_wdata & 64'h33; // only USIP, SSIP, UTIP, STIP are write-able - CSR_MTVEC: mtvec_n = {csr_wdata[63:2], 1'b0, csr_wdata[0]}; + CSR_MTVEC: begin + mtvec_n = {csr_wdata[63:2], 1'b0, csr_wdata[0]}; + // we are in vector mode, this implementation requires the additional + // alignment constraint of 64 * 4 bytes + if (csr_wdata[0]) + mtvec_n = {csr_wdata[63:8], 7'b0, csr_wdata[0]}; + end CSR_MSCRATCH: mscratch_n = csr_wdata; CSR_MEPC: mepc_n = {csr_wdata[63:1], 1'b0}; CSR_MCAUSE: mcause_n = csr_wdata; @@ -338,7 +340,7 @@ module csr_regfile #( end else begin // update mstatus // clear enable flags for all lower privilege levels - // but as m is already the highest -> clear everything + // but as M is already the highest -> clear everything mstatus_n.mie = 1'b0; mstatus_n.sie = 1'b0; mstatus_n.mpie = mstatus_q.mie; @@ -362,6 +364,9 @@ module csr_regfile #( en_ld_st_translation_n = 1'b1; else // otherwise we go with the regular settings en_ld_st_translation_n = en_translation_o; + + ld_st_priv_lvl_o = (mstatus_q.mprv) ? mstatus_q.mpp : priv_lvl_o; + en_ld_st_translation_o = en_ld_st_translation_q; // ----------------------- // Return from Exception // ----------------------- @@ -537,75 +542,76 @@ module csr_regfile #( // output assignments dependent on privilege mode always_comb begin : priv_output - automatic logic [63:0] base = {mtvec_q[63:2], 2'b0}; - epc_o = mepc_q; + trap_vector_base_o = {mtvec_q[63:2], 2'b0}; // output user mode stvec if (trap_to_priv_lvl == PRIV_LVL_S) begin - base = {stvec_q[63:2], 2'b0}; + trap_vector_base_o = {stvec_q[63:2], 2'b0}; end - // check if we are in vectored mode, if yes then do BASE + 4*cause + // check if we are in vectored mode, if yes then do BASE + 4 * cause + // we are imposing an additional alignment-constraint of 64 * 4 bytes since + // we want to spare the costly addition if ((mtvec_q[0] || stvec_q[0]) && csr_exception_o.cause[63]) begin - base = base + (csr_exception_o.cause[62:0] << 2); + trap_vector_base_o[7:2] = csr_exception_o.cause[5:0]; end + epc_o = mepc_q; // we are returning from supervisor mode, so take the sepc register if (sret) begin - epc_o = sepc_q; + epc_o = sepc_q; end - trap_vector_base_o = base; end // sequential process always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - priv_lvl_q <= PRIV_LVL_M; + priv_lvl_q <= PRIV_LVL_M; // machine mode registers - mstatus_q <= 64'b0; - mtvec_q <= {boot_addr_i[63:2], 2'b0}; // set to boot address + direct mode - medeleg_q <= 64'b0; - mideleg_q <= 64'b0; - mip_q <= 64'b0; - mie_q <= 64'b0; - mepc_q <= 64'b0; - mcause_q <= 64'b0; - mscratch_q <= 64'b0; - mtval_q <= 64'b0; + mstatus_q <= 64'b0; + mtvec_q <= {boot_addr_i[63:2], 2'b0}; // set to boot address + direct mode + medeleg_q <= 64'b0; + mideleg_q <= 64'b0; + mip_q <= 64'b0; + mie_q <= 64'b0; + mepc_q <= 64'b0; + mcause_q <= 64'b0; + mscratch_q <= 64'b0; + mtval_q <= 64'b0; // supervisor mode registers - sepc_q <= 64'b0; - scause_q <= 64'b0; - stvec_q <= 64'b0; - sscratch_q <= 64'b0; - stval_q <= 64'b0; - satp_q <= 64'b0; + sepc_q <= 64'b0; + scause_q <= 64'b0; + stvec_q <= 64'b0; + sscratch_q <= 64'b0; + stval_q <= 64'b0; + satp_q <= 64'b0; // timer and counters - cycle_q <= 64'b0; - instret_q <= 64'b0; + cycle_q <= 64'b0; + instret_q <= 64'b0; // aux registers en_ld_st_translation_q <= 1'b0; end else begin - priv_lvl_q <= priv_lvl_n; + priv_lvl_q <= priv_lvl_n; // machine mode registers - mstatus_q <= mstatus_n; - mtvec_q <= mtvec_n; - medeleg_q <= medeleg_n; - mideleg_q <= mideleg_n; - mip_q <= mip_n; - mie_q <= mie_n; - mepc_q <= mepc_n; - mcause_q <= mcause_n; - mscratch_q <= mscratch_n; - mtval_q <= mtval_n; + mstatus_q <= mstatus_n; + mtvec_q <= mtvec_n; + medeleg_q <= medeleg_n; + mideleg_q <= mideleg_n; + mip_q <= mip_n; + mie_q <= mie_n; + mepc_q <= mepc_n; + mcause_q <= mcause_n; + mscratch_q <= mscratch_n; + mtval_q <= mtval_n; // supervisor mode registers - sepc_q <= sepc_n; - scause_q <= scause_n; - stvec_q <= stvec_n; - sscratch_q <= sscratch_n; - stval_q <= stval_n; - satp_q <= satp_n; + sepc_q <= sepc_n; + scause_q <= scause_n; + stvec_q <= stvec_n; + sscratch_q <= sscratch_n; + stval_q <= stval_n; + satp_q <= satp_n; // timer and counters - cycle_q <= cycle_n; - instret_q <= instret_n; + cycle_q <= cycle_n; + instret_q <= instret_n; // aux registers en_ld_st_translation_q <= en_ld_st_translation_n; end diff --git a/src/dcache_arbiter.sv b/src/dcache_arbiter.sv index 09e2d0b19..5526b8fb6 100644 --- a/src/dcache_arbiter.sv +++ b/src/dcache_arbiter.sv @@ -108,15 +108,15 @@ module dcache_arbiter #( request_port_n = i; request_index = i; // wait for the grant - if (data_gnt_i) begin - // set the slave on which we are waiting - in_data = 1'b1 << i[DATA_WIDTH-1:0]; - push = 1'b1; - end - + // set the slave on which we are waiting + in_data = 1'b1 << i[DATA_WIDTH-1:0]; break; // break here as this is a priority select end end + // only if we got a grant save it to the queue + if (data_gnt_i) begin + push = 1'b1; + end end // pass through all signals from the correct slave port diff --git a/src/decoder.sv b/src/decoder.sv index 8205768fd..4075308a8 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -96,11 +96,17 @@ module decoder ( instruction_o.op = SRET; // check privilege level, SRET can only be executed in S and M mode // we'll just decode an illegal instruction if we are in the wrong privilege level - if (priv_lvl_i == PRIV_LVL_U) + if (priv_lvl_i == PRIV_LVL_U) begin illegal_instr = 1'b1; + // do not change privilege level if this is an illegal instruction + instruction_o.op = ADD; + end // if we are in S-Mode and Trap SRET (tsr) is set -> trap on illegal instruction - if (priv_lvl_i == PRIV_LVL_S && tsr_i) + if (priv_lvl_i == PRIV_LVL_S && tsr_i) begin illegal_instr = 1'b1; + // do not change privilege level if this is an illegal instruction + instruction_o.op = ADD; + end end // MRET 12'b1100000010: begin @@ -276,8 +282,6 @@ module decoder ( else illegal_instr = 1'b1; end - - default: illegal_instr = 1'b1; endcase end diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 4924286ec..582102aed 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -40,7 +40,7 @@ module fetch_fifo input logic out_ready_i ); - localparam DEPTH = 8; // must be a power of two + localparam int unsigned DEPTH = 8; // must be a power of two // input registers - bounding the path from memory branchpredict_sbe branch_predict_n, branch_predict_q; @@ -164,7 +164,6 @@ module fetch_fifo status_cnt++; write_pointer++; - // $display("Instruction: [ c | c ] @ %t", $time); // or is it an unaligned 32 bit instruction like // ____________________________________________________ // |instr [15:0] | instr [31:16] | compressed 1[15:0] | @@ -176,7 +175,6 @@ module fetch_fifo unaligned_n = 1'b1; // save the address as well unaligned_address_n = {in_addr_q[63:2], 2'b10}; - // $display("Instruction: [ i0 | c ] @ %t", $time); // this does not consume space in the FIFO end end else begin @@ -189,7 +187,6 @@ module fetch_fifo }; status_cnt++; write_pointer++; - // $display("Instruction: [ i ] @ %t", $time); end end // we have an outstanding unaligned instruction @@ -217,7 +214,6 @@ module fetch_fifo write_pointer++; // unaligned access served unaligned_n = 1'b0; - // $display("Instruction: [ c | i1 ] @ %t", $time); // or is it an unaligned 32 bit instruction like // ____________________________________________________ // |instr [15:0] | instr [31:16] | compressed 1[15:0] | @@ -229,7 +225,6 @@ module fetch_fifo unaligned_n = 1'b1; // save the address as well unaligned_address_n = {in_addr_q[63:2], 2'b10}; - // $display("Instruction: [ i0 | i1 ] @ %t", $time); // this does not consume space in the FIFO // we've got a predicted taken branch we need to clear the unaligned flag if it was decoded as a lower 16 instruction end else if (branch_predict_q.valid && branch_predict_q.predict_taken && branch_predict_q.is_lower_16) begin diff --git a/src/fifo.sv b/src/fifo.sv index 4bda4de62..f6e073197 100644 --- a/src/fifo.sv +++ b/src/fifo.sv @@ -68,6 +68,7 @@ module fifo #( // but increment the read pointer... read_pointer_n = read_pointer_q + 1; // ... and decrement the overall count + mem_n[read_pointer_q] = '0; status_cnt_n = status_cnt_q - 1; end // keep the count pointer stable if we push and pop at the same time diff --git a/src/id_stage.sv b/src/id_stage.sv index b5bc55d46..f815ed9a1 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -19,121 +19,37 @@ // import ariane_pkg::*; -module id_stage #( - parameter int NR_ENTRIES = 4, - parameter int NR_WB_PORTS = 4 - )( +module id_stage ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low - input logic test_en_i, // Test Enable input logic flush_i, - input logic flush_unissued_instr_i, // from IF input fetch_entry fetch_entry_i, input logic fetch_entry_valid_i, - output logic decoded_instr_ack_o, + output logic decoded_instr_ack_o, // acknowledge the instruction (fetch entry) + + // to ID + output scoreboard_entry issue_entry_o, // a decoded instruction + output logic issue_entry_valid_o, // issue entry is valid + output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions + input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions // from CSR file - input priv_lvl_t priv_lvl_i, // current privilege level + input priv_lvl_t priv_lvl_i, // current privilege level input logic tvm_i, input logic tw_i, - input logic tsr_i, - - output logic ready_o, // id is ready - output fu_t fu_o, - output fu_op operator_o, - output logic [63:0] operand_a_o, - output logic [63:0] operand_b_o, - output logic [63:0] imm_o, - output logic [TRANS_ID_BITS-1:0] trans_id_o, - output logic [63:0] pc_o, - output logic is_compressed_instr_o, - - input logic alu_ready_i, - output logic alu_valid_o, - // ex just resolved our predicted branch, we are ready to accept new requests - input logic resolve_branch_i, - - input logic lsu_ready_i, - output logic lsu_valid_o, - // branch prediction - input logic branch_ready_i, - output logic branch_valid_o, // use branch prediction unit - output branchpredict_sbe branch_predict_o, - - input logic mult_ready_i, - output logic mult_valid_o, // Branch predict Out - - input logic csr_ready_i, - output logic csr_valid_o, - - // write back port - input logic [NR_WB_PORTS-1:0][TRANS_ID_BITS-1:0] trans_id_i, - input logic [NR_WB_PORTS-1:0][63:0] wdata_i, - input exception [NR_WB_PORTS-1:0] ex_ex_i, // exception from execute stage - input logic [NR_WB_PORTS-1:0] wb_valid_i, - // commit port - input logic[4:0] waddr_a_i, - input logic[63:0] wdata_a_i, - input logic we_a_i, - - output scoreboard_entry commit_instr_o, - input logic commit_ack_i + input logic tsr_i ); - // --------------------------------------------------- - // Global signals - // --------------------------------------------------- - logic full; - // --------------------------------------------------- - // Scoreboard (SB) <-> Issue and Read Operands (iro) - // --------------------------------------------------- - fu_t [31:0] rd_clobber_sb_iro; - logic [4:0] rs1_iro_sb; - logic [63:0] rs1_sb_iro; - logic rs1_valid_sb_iro; - logic [4:0] rs2_iro_sb; - logic [63:0] rs2_sb_iro; - logic rs2_valid_iro_sb; - scoreboard_entry issue_instr_sb_iro; - logic issue_instr_valid_sb_iro; - logic issue_ack_iro_sb; - // --------------------------------------------------- - // Decoder (DC) <-> Scoreboard (SB) - // --------------------------------------------------- - scoreboard_entry decoded_instr_dc_sb; - // --------------------------------------------------- - // Decoder (DC) <-> Branch Logic - // --------------------------------------------------- + // register stage + struct packed { + logic valid; + scoreboard_entry sbe; + logic is_ctrl_flow; + + } issue_n, issue_q; + logic is_control_flow_instr; - - // --------------------------------------------------- - // Branch (resolve) logic - // --------------------------------------------------- - // This should basically prevent the scoreboard from accepting - // instructions past a branch. We need to resolve the branch beforehand. - // This limitation is in place to ease the backtracking of mis-predicted branches as they - // can simply be in the front-end of the processor. - logic unresolved_branch_n, unresolved_branch_q; - - always_comb begin : unresolved_branch - unresolved_branch_n = unresolved_branch_q; - // we just resolved the branch - if (resolve_branch_i) begin - unresolved_branch_n = 1'b0; - end - // if the instruction is valid and it is a control flow instruction - if (fetch_entry_valid_i && is_control_flow_instr) begin - unresolved_branch_n = 1'b1; - end - // if we are requested to flush also flush the unresolved branch flag because either the flush - // was requested by a branch or an exception. In any case: any unresolved branch will get evicted - if (flush_unissued_instr_i || flush_i) begin - unresolved_branch_n = 1'b0; - end - end - // we are ready if we are not full and don't have any unresolved branches, but it can be - // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i.valid == 1) - assign ready_o = ~full && (~unresolved_branch_q || resolve_branch_i); + scoreboard_entry decoded_instruction; decoder decoder_i ( .pc_i ( fetch_entry_i.address ), @@ -142,58 +58,45 @@ module id_stage #( .branch_predict_i ( fetch_entry_i.branch_predict ), .is_illegal_i ( fetch_entry_i.is_illegal ), .ex_i ( fetch_entry_i.ex ), - .instruction_o ( decoded_instr_dc_sb ), + .instruction_o ( decoded_instruction ), .is_control_flow_instr_o ( is_control_flow_instr ), .* ); - scoreboard #( - .NR_ENTRIES ( NR_ENTRIES ), - .NR_WB_PORTS ( NR_WB_PORTS ) - ) - scoreboard_i - ( - .full_o ( full ), - .rd_clobber_o ( rd_clobber_sb_iro ), - .rs1_i ( rs1_iro_sb ), - .rs1_o ( rs1_sb_iro ), - .rs1_valid_o ( rs1_valid_sb_iro ), - .rs2_i ( rs2_iro_sb ), - .rs2_o ( rs2_sb_iro ), - .rs2_valid_o ( rs2_valid_iro_sb ), - .commit_instr_o ( commit_instr_o ), - .commit_ack_i ( commit_ack_i ), - .decoded_instr_i ( decoded_instr_dc_sb ), - .decoded_instr_valid_i ( fetch_entry_valid_i ), - .issue_instr_o ( issue_instr_sb_iro ), - .issue_instr_valid_o ( issue_instr_valid_sb_iro ), - .issue_ack_i ( issue_ack_iro_sb ), - .trans_id_i ( trans_id_i ), - .wdata_i ( wdata_i ), - .ex_i ( ex_ex_i ), - .* - ); + // ------------------ + // Output Registers + // ------------------ + assign issue_entry_o = issue_q.sbe; + assign issue_entry_valid_o = issue_q.valid; + assign is_ctrl_flow_o = issue_q.is_ctrl_flow; + always_comb begin + issue_n = issue_q; + decoded_instr_ack_o = 1'b0; - issue_read_operands issue_read_operands_i ( - .issue_instr_i ( issue_instr_sb_iro ), - .issue_instr_valid_i ( issue_instr_valid_sb_iro ), - .issue_ack_o ( issue_ack_iro_sb ), - .rs1_o ( rs1_iro_sb ), - .rs1_i ( rs1_sb_iro ), - .rs1_valid_i ( rs1_valid_sb_iro ), - .rs2_o ( rs2_iro_sb ), - .rs2_i ( rs2_sb_iro ), - .rs2_valid_i ( rs2_valid_iro_sb ), - .rd_clobber_i ( rd_clobber_sb_iro ), - .* - ); + if (issue_instr_ack_i) + issue_n.valid = 1'b0; + // if we have a space in the register and the fetch is valid, go get it + // or the issue stage is currently acknowledging an instruction, which means that we will have space + // for a new instruction + if ((!issue_q.valid || issue_instr_ack_i) && fetch_entry_valid_i) begin + decoded_instr_ack_o = 1'b1; + issue_n = { 1'b1, decoded_instruction, is_control_flow_instr}; + end + + // invalidate on a flush + if (flush_i) + issue_n.valid = 1'b0; + end + // ------------------------- + // Registers (ID <-> Issue) + // ------------------------- always_ff @(posedge clk_i or negedge rst_ni) begin - if (~rst_ni) begin - unresolved_branch_q <= 1'b0; + if(~rst_ni) begin + issue_q <= '0; end else begin - unresolved_branch_q <= unresolved_branch_n; + issue_q <= issue_n; end end diff --git a/src/if_stage.sv b/src/if_stage.sv index 13d356124..ada31a255 100644 --- a/src/if_stage.sv +++ b/src/if_stage.sv @@ -24,7 +24,6 @@ module if_stage ( // control signals input logic flush_i, output logic if_busy_o, // is the IF stage busy fetching instructions? - input logic id_ready_i, // ID stage is ready // fetch direction from PC Gen input logic [63:0] fetch_address_i, // address to fetch from input logic fetch_valid_i, // the fetch address is valid @@ -88,7 +87,7 @@ module if_stage ( NS = CS; addr_valid = 1'b0; - unique case(CS) + case(CS) // default state, not waiting for requested data IDLE: begin instr_addr_o = fetch_address; @@ -211,11 +210,6 @@ module if_stage ( // otherwise wait in this state for the rvalid end end - - default: begin - NS = IDLE; - instr_req_o = 1'b0; - end endcase end diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index b76fa19b4..8b3987d41 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -338,7 +338,7 @@ module issue_read_operands ( `ifndef verilator assert property ( @(posedge clk_i) (alu_valid_q || lsu_valid_q || csr_valid_q) |-> (!$isunknown(operand_a_q) && !$isunknown(operand_b_q))) - else $error ("Got unknown value in one of the operands"); + else $warning ("Got unknown value in one of the operands"); `endif `endif endmodule diff --git a/src/issue_stage.sv b/src/issue_stage.sv new file mode 100755 index 000000000..5233e3639 --- /dev/null +++ b/src/issue_stage.sv @@ -0,0 +1,175 @@ +// Author: Florian Zaruba, ETH Zurich +// Date: 21.05.2017 +// Description: Issue stage dispatches instructions to the FUs +// +// +// Copyright (C) 2017 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. +// + +import ariane_pkg::*; + +module issue_stage #( + parameter int NR_ENTRIES = 8, + parameter int NR_WB_PORTS = 4 + )( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic test_en_i, // Test Enable + + input logic flush_unissued_instr_i, + input logic flush_i, + // from ID + input scoreboard_entry decoded_instr_i, + input logic decoded_instr_valid_i, + input logic is_ctrl_flow_i, + output logic decoded_instr_ack_o, + // to EX + output fu_t fu_o, + output fu_op operator_o, + output logic [63:0] operand_a_o, + output logic [63:0] operand_b_o, + output logic [63:0] imm_o, + output logic [TRANS_ID_BITS-1:0] trans_id_o, + output logic [63:0] pc_o, + output logic is_compressed_instr_o, + + input logic alu_ready_i, + output logic alu_valid_o, + // ex just resolved our predicted branch, we are ready to accept new requests + input logic resolve_branch_i, + + input logic lsu_ready_i, + output logic lsu_valid_o, + // branch prediction + input logic branch_ready_i, + output logic branch_valid_o, // use branch prediction unit + output branchpredict_sbe branch_predict_o, + + input logic mult_ready_i, + output logic mult_valid_o, // Branch predict Out + + input logic csr_ready_i, + output logic csr_valid_o, + + // write back port + input logic [NR_WB_PORTS-1:0][TRANS_ID_BITS-1:0] trans_id_i, + input logic [NR_WB_PORTS-1:0][63:0] wdata_i, + input exception [NR_WB_PORTS-1:0] ex_ex_i, // exception from execute stage + input logic [NR_WB_PORTS-1:0] wb_valid_i, + + // commit port + input logic[4:0] waddr_a_i, + input logic[63:0] wdata_a_i, + input logic we_a_i, + + output scoreboard_entry commit_instr_o, + input logic commit_ack_i +); + // --------------------------------------------------- + // Scoreboard (SB) <-> Issue and Read Operands (IRO) + // --------------------------------------------------- + fu_t [31:0] rd_clobber_sb_iro; + logic [4:0] rs1_iro_sb; + logic [63:0] rs1_sb_iro; + logic rs1_valid_sb_iro; + logic [4:0] rs2_iro_sb; + logic [63:0] rs2_sb_iro; + logic rs2_valid_iro_sb; + scoreboard_entry issue_instr_sb_iro; + logic issue_instr_valid_sb_iro; + logic issue_ack_iro_sb; + + + // --------------------------------------------------- + // Branch (resolve) logic + // --------------------------------------------------- + // This should basically prevent the scoreboard from accepting + // instructions past a branch. We need to resolve the branch beforehand. + // This limitation is in place to ease the backtracking of mis-predicted branches as they + // can simply be in the front-end of the processor. + logic unresolved_branch_n, unresolved_branch_q; + + always_comb begin : unresolved_branch + unresolved_branch_n = unresolved_branch_q; + // we just resolved the branch + if (resolve_branch_i) begin + unresolved_branch_n = 1'b0; + end + // if the instruction is valid, it is a control flow instruction and the issue stage acknowledged its dispatch + // set the unresolved branch flag + if (issue_ack_iro_sb && decoded_instr_valid_i && is_ctrl_flow_i) begin + unresolved_branch_n = 1'b1; + end + // if we predicted a taken branch this means that we need to stall issue for one cycle to resolve the + // branch, otherwise we might issue a wrong instruction + if (issue_ack_iro_sb && decoded_instr_i.bp.valid && decoded_instr_i.bp.predict_taken) begin + unresolved_branch_n = 1'b1; + end + // if we are requested to flush also flush the unresolved branch flag because either the flush + // was requested by a branch or an exception. In any case: any unresolved branch will get evicted + if (flush_unissued_instr_i || flush_i) begin + unresolved_branch_n = 1'b0; + end + end + + issue_read_operands issue_read_operands_i ( + .flush_i ( flush_unissued_instr_i ), + .issue_instr_i ( issue_instr_sb_iro ), + .issue_instr_valid_i ( issue_instr_valid_sb_iro ), + .issue_ack_o ( issue_ack_iro_sb ), + .rs1_o ( rs1_iro_sb ), + .rs1_i ( rs1_sb_iro ), + .rs1_valid_i ( rs1_valid_sb_iro ), + .rs2_o ( rs2_iro_sb ), + .rs2_i ( rs2_sb_iro ), + .rs2_valid_i ( rs2_valid_iro_sb ), + .rd_clobber_i ( rd_clobber_sb_iro ), + .* + ); + + scoreboard #( + .NR_ENTRIES ( NR_ENTRIES ), + .NR_WB_PORTS ( NR_WB_PORTS ) + ) + scoreboard_i + ( + .unresolved_branch_i ( unresolved_branch_q ), + .rd_clobber_o ( rd_clobber_sb_iro ), + .rs1_i ( rs1_iro_sb ), + .rs1_o ( rs1_sb_iro ), + .rs1_valid_o ( rs1_valid_sb_iro ), + .rs2_i ( rs2_iro_sb ), + .rs2_o ( rs2_sb_iro ), + .rs2_valid_o ( rs2_valid_iro_sb ), + + .issue_instr_o ( issue_instr_sb_iro ), + .issue_instr_valid_o ( issue_instr_valid_sb_iro ), + .issue_ack_i ( issue_ack_iro_sb ), + + .trans_id_i ( trans_id_i ), + .wdata_i ( wdata_i ), + .ex_i ( ex_ex_i ), + .* + ); + + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + unresolved_branch_q <= 1'b0; + end else begin + unresolved_branch_q <= unresolved_branch_n; + end + end + +endmodule diff --git a/src/load_unit.sv b/src/load_unit.sv index 77bd58504..e2ffc76e9 100644 --- a/src/load_unit.sv +++ b/src/load_unit.sv @@ -23,11 +23,9 @@ module load_unit ( input logic rst_ni, // Asynchronous reset active low input logic flush_i, // load unit input port - input fu_op operator_i, - input logic [TRANS_ID_BITS-1:0] trans_id_i, input logic valid_i, - input logic [63:0] vaddr_i, - input logic [7:0] be_i, + input lsu_ctrl_t lsu_ctrl_i, + output logic pop_ld_o, // load unit output port output logic valid_o, output logic ready_o, @@ -38,8 +36,8 @@ module load_unit ( output logic translation_req_o, // request address translation output logic [63:0] vaddr_o, // virtual address out input logic [63:0] paddr_i, // physical address in - input logic translation_valid_i, input exception ex_i, // exception which may has happened earlier. for example: mis-aligned exception + input logic dtlb_hit_i, // hit on the dtlb, send in the same cycle as the request // address checker output logic [11:0] page_offset_o, input logic page_offset_matches_i, @@ -56,56 +54,46 @@ module load_unit ( input logic data_rvalid_i, input logic [63:0] data_rdata_i ); - enum logic [2:0] {IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, ABORT_TRANSLATION, WAIT_FLUSH} NS, CS; + enum logic [2:0] {IDLE, WAIT_GNT, SEND_TAG, WAIT_PAGE_OFFSET, ABORT_TRANSACTION, WAIT_TRANSLATION, WAIT_FLUSH} NS, CS; // in order to decouple the response interface from the request interface we need a // a queue which can hold all outstanding memory requests - typedef struct packed { + struct packed { logic [TRANS_ID_BITS-1:0] trans_id; logic [2:0] address_offset; fu_op operator; - } rvalid_entry_t; + } load_data_n, load_data_q, in_data; - // queue control signal - rvalid_entry_t in_data; - logic push; - rvalid_entry_t out_data; - logic pop; - logic empty; - // register to save the physical address after address translation - // going directly to memory with this address will not work in-terms of timing (e.g.: the path to the memory - // is already super-critical with the address checker and memory arbiter on it). - logic [63:0] paddr_n, paddr_q; // page offset is defined as the lower 12 bits, feed through for address checker - assign page_offset_o = vaddr_i[11:0]; + assign page_offset_o = lsu_ctrl_i.vaddr[11:0]; // feed-through the virtual address for VA translation - assign vaddr_o = vaddr_i; + assign vaddr_o = lsu_ctrl_i.vaddr; // this is a read-only interface so set the write enable to 0 assign data_we_o = 1'b0; // compose the queue data, control is handled in the FSM - assign in_data = {trans_id_i, vaddr_i[2:0], operator_i}; - + assign in_data = {lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.operator}; // output address // we can now output the lower 12 bit as the index to the cache - assign address_index_o = vaddr_i[11:0]; + assign address_index_o = lsu_ctrl_i.vaddr[11:0]; // translation from last cycle, again: control is handled in the FSM - assign address_tag_o = paddr_q[55:12]; + assign address_tag_o = paddr_i[55:12]; + // directly output an exception + assign ex_o = ex_i; // --------------- // Load Control // --------------- - always_comb begin : load_controll + always_comb begin : load_control // default assignments NS = CS; - paddr_n = paddr_q; + load_data_n = in_data; translation_req_o = 1'b0; ready_o = 1'b1; data_req_o = 1'b0; // tag control kill_req_o = 1'b0; tag_valid_o = 1'b0; - push = 1'b0; - data_be_o = be_i; - ex_o = ex_i; + data_be_o = lsu_ctrl_i.be; + pop_ld_o = 1'b0; case (CS) IDLE: begin @@ -117,107 +105,59 @@ module load_unit ( // check if the page offset matches with a store, if it does then stall and wait if (!page_offset_matches_i) begin // make a load request to memory - data_req_o = 1'b1; - // the translation request we got is valid - if (translation_valid_i) begin - // save the physical address for the next cycle - paddr_n = paddr_i; - // we got no data grant so wait for the grant before sending the tag - if (!data_gnt_i) begin - NS = WAIT_GNT; - ready_o = 1'b0; - end else begin - // put the request in the queue - push = 1'b1; - // we got a grant so we can send the tag in the next cycle - NS = SEND_TAG; - end - // we got a TLB miss + data_req_o = 1'b1; + // we got no data grant so wait for the grant before sending the tag + if (!data_gnt_i) begin + NS = WAIT_GNT; end else begin - // we need to abort the translation and let the PTW walker fix the TLB miss - NS = ABORT_TRANSLATION; - ready_o = 1'b0; + if (dtlb_hit_i) begin + // we got a grant and a hit on the DTLB so we can send the tag in the next cycle + NS = SEND_TAG; + pop_ld_o = 1'b1; + end else + NS = ABORT_TRANSACTION; end end else begin - // stall and wait for the store-buffer to drain - ready_o = 1'b0; // wait for the store buffer to train and the page offset to not match anymore NS = WAIT_PAGE_OFFSET; end end end + // wait here for the page offset to not match anymore WAIT_PAGE_OFFSET: begin // we are definitely not ready to accept a new request // we need unique access to the LSU ready_o = 1'b0; - translation_req_o = 1'b1; // we make a new request as soon as the page offset does not match anymore - // essentially the same part as above if (!page_offset_matches_i) begin - // make a load request to memory - data_req_o = 1'b1; - // the translation request we got is valid - if (translation_valid_i) begin - // save the physical address for the next cycle - paddr_n = paddr_i; - // we got no data grant so wait for the grant before sending the tag - if (!data_gnt_i) begin - NS = WAIT_GNT; - end else begin - // put the request in the queue - push = 1'b1; - // we got a grant so we can send the tag in the next cycle - NS = SEND_TAG; - end - // we got a TLB miss - end else begin - // we need to abort the translation and let the PTW walker fix the TLB miss - NS = ABORT_TRANSLATION; - ready_o = 1'b0; - end + NS = WAIT_GNT; end end + + // abort the previous request - free the D$ arbiter // we are here because of a TLB miss, we need to abort the current request and give way for the // PTW walker to satisfy the TLB miss - ABORT_TRANSLATION: begin - // keep the translation request hight to tell the PTW that we want this - // translation - translation_req_o = 1'b1; - // we are not ready here + ABORT_TRANSACTION: begin + ready_o = 1'b0; + kill_req_o = 1'b1; + tag_valid_o = 1'b1; + // redo the request by going back to the wait gnt state + NS = WAIT_TRANSLATION; + end + + WAIT_TRANSLATION: begin ready_o = 1'b0; - // send an abort signal - tag_valid_o = 1'b1; - kill_req_o = 1'b1; - // wait for the translation to become valid and redo the request - if (translation_valid_i) begin - // we have a valid translation so tell the cache it should wait for it on the next cycle - // reset the the kill request - tag_valid_o = 1'b0; - kill_req_o = 1'b0; - // if the request is still here, do the load - if (valid_i) begin - - data_req_o = 1'b1; - paddr_n = paddr_i; - - if (!data_gnt_i) begin - NS = WAIT_GNT; - ready_o = 1'b0; - end else begin - // here we are ready to accept a new request - ready_o = 1'b1; - // put the request in the queue - push = 1'b1; - // we got a grant so we can send the tag in the next cycle - NS = SEND_TAG; - end - end - end + translation_req_o = 1'b1; + // we've got a hit and we can continue with the request process + if (dtlb_hit_i) + NS = WAIT_GNT; end WAIT_GNT: begin + // keep the translation request up + translation_req_o = 1'b1; // we are waiting for the grant so we are not ready to accept anything new ready_o = 1'b0; // keep the request up @@ -225,144 +165,104 @@ module load_unit ( // we finally got a data grant if (data_gnt_i) begin // so we send the tag in the next cycle - NS = SEND_TAG; - // we store this grant in our queue - push = 1'b1; - // plus: we can accept a new request - ready_o = 1'b1; + if (dtlb_hit_i) begin + NS = SEND_TAG; + pop_ld_o = 1'b1; + end else // should we not have hit on the TLB abort this transaction an retry later + NS = ABORT_TRANSACTION; end // otherwise we keep waiting on our grant end - + // we know for sure that the tag we want to send is valid SEND_TAG: begin - ready_o = 1'b1; - // tell the cache that this tag is valid tag_valid_o = 1'b1; - // if we are sending our tag we are able to process a new request - // ------------- - // New Request - // ------------- - // we can make a new request if we got one + NS = IDLE; + // we can make a new request here if we got one if (valid_i) begin - // do another address translation + // start the translation process even though we do not know if the addresses match + // this should ease timing translation_req_o = 1'b1; - if(!page_offset_matches_i) begin - // make a load request to memory - data_req_o = 1'b1; - // the translation request we got is valid - if (translation_valid_i) begin - // save the physical address for the next cycle - paddr_n = paddr_i; - // we got no data grant so wait for the grant before sending the tag - if (!data_gnt_i) begin - NS = WAIT_GNT; - ready_o = 1'b0; - end else begin - // put the request in the queue - push = 1'b1; - // we got a grant so we can send the tag in the next cycle - NS = SEND_TAG; - end - // we got a TLB miss - end else begin - // we need to abort the translation and let the PTW walker fix the TLB miss - NS = ABORT_TRANSLATION; - ready_o = 1'b0; - end - // page offset mis-match -> go back to idle + // check if the page offset matches with a store, if it does stall and wait + if (!page_offset_matches_i) begin + // make a load request to memory + data_req_o = 1'b1; + // we got no data grant so wait for the grant before sending the tag + if (!data_gnt_i) begin + NS = WAIT_GNT; + end else begin + // we got a grant so we can send the tag in the next cycle + if (dtlb_hit_i) begin + // we got a grant and a hit on the DTLB so we can send the tag in the next cycle + NS = SEND_TAG; + pop_ld_o = 1'b1; + end else // we missed on the TLB -> wait for the translation + NS = ABORT_TRANSACTION; + end end else begin - NS = IDLE; + // wait for the store buffer to train and the page offset to not match anymore + NS = WAIT_PAGE_OFFSET; end - end else begin - NS = IDLE; + end + // ---------- + // Exception + // ---------- + // if we got an exception we need to kill the request immediately + if (ex_i.valid) begin + kill_req_o = 1'b1; end end WAIT_FLUSH: begin - ready_o = 1'b0; - // we got all outstanding requests - if (empty) begin - ready_o = 1'b1; - NS = IDLE; - end + ready_o = 1'b0; + // the D$ arbiter will take care of presenting this to the memory only in case we + // have an outstanding request + kill_req_o = 1'b1; + tag_valid_o = 1'b1; + // we've killed the current request so we can go back to idle + NS = IDLE; end endcase - // ----------------- - // Access Exception - // ----------------- - // we've got an exception - if (valid_i && ex_i.valid) begin - // clear the request - data_req_o = 1'b0; - // we are ready - ready_o = 1'b1; - // do not push this request - push = 1'b0; - // reset state machine + // we got an exception + if (ex_i.valid) begin + // the next state will be the idle state NS = IDLE; end // if we just flushed and the queue is not empty or we are getting an rvalid this cycle wait in a extra stage - if (flush_i && (!empty || data_rvalid_i)) begin + if (flush_i) begin NS = WAIT_FLUSH; - end else if (flush_i) begin - NS = IDLE; end end // decoupled rvalid process always_comb begin : rvalid_output - pop = 1'b0; valid_o = 1'b0; // output the queue data directly, the valid signal is set corresponding to the process above - trans_id_o = out_data.trans_id; - + trans_id_o = load_data_q.trans_id; // we got an rvalid and are currently not flushing and not aborting the request - if (data_rvalid_i && CS != WAIT_FLUSH && !kill_req_o) begin - pop = 1'b1; - valid_o = 1'b1; - end - // pass through an exception - if (valid_i && ex_i.valid) begin - valid_o = 1'b1; - // in case of an exception we can use the current trans_id since we either stalled - // or we are taking the exception in the first cycle - trans_id_o = trans_id_i; + if (data_rvalid_i && CS != WAIT_FLUSH) begin + // we killed the request + if(!kill_req_o) + valid_o = 1'b1; + // the output is also valid if we got an exception + if (ex_i.valid) + valid_o = 1'b1; end + end - // latch physical address + // latch physical address for the tag cycle (one cycle after applying the index) always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - CS <= IDLE; - paddr_q <= '0; + CS <= IDLE; + load_data_q <= '0; end else begin - CS <= NS; - paddr_q <= paddr_n; + CS <= NS; + load_data_q <= load_data_n; end end - // -------------- - // Rvalid FIFO - // -------------- - // we can have two outstanding requests, hence we need to elements in the FIFO - fifo #( - .dtype ( rvalid_entry_t ), - .DEPTH ( 2 ) - ) - fifo_i ( - .full_o ( ), // we can ignore the full signal, the FIFO will never overflow - .empty_o ( empty ), - .single_element_o ( ), // we don't care about the single element either - - .data_i ( in_data ), - .push_i ( push ), - .data_o ( out_data ), - .pop_i ( pop ), - .* - ); - // --------------- // Sign Extend // --------------- @@ -378,43 +278,43 @@ module load_unit ( // sign extension for words always_comb begin : sign_extend_word - case (out_data.address_offset) - default: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[31]}}, data_rdata_i[31:0]} : {32'h0, data_rdata_i[31:0]}; - 3'b001: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[39]}}, data_rdata_i[39:8]} : {32'h0, data_rdata_i[39:8]}; - 3'b010: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[47]}}, data_rdata_i[47:16]} : {32'h0, data_rdata_i[47:16]}; - 3'b011: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[55]}}, data_rdata_i[55:24]} : {32'h0, data_rdata_i[55:24]}; - 3'b100: rdata_w_ext = (out_data.operator == LW) ? {{32{data_rdata_i[63]}}, data_rdata_i[63:32]} : {32'h0, data_rdata_i[63:32]}; + case (load_data_q.address_offset) + default: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[31]}}, data_rdata_i[31:0]} : {32'h0, data_rdata_i[31:0]}; + 3'b001: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[39]}}, data_rdata_i[39:8]} : {32'h0, data_rdata_i[39:8]}; + 3'b010: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[47]}}, data_rdata_i[47:16]} : {32'h0, data_rdata_i[47:16]}; + 3'b011: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[55]}}, data_rdata_i[55:24]} : {32'h0, data_rdata_i[55:24]}; + 3'b100: rdata_w_ext = (load_data_q.operator == LW) ? {{32{data_rdata_i[63]}}, data_rdata_i[63:32]} : {32'h0, data_rdata_i[63:32]}; endcase end // sign extension for half words always_comb begin : sign_extend_half_word - case (out_data.address_offset) - default: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[15]}}, data_rdata_i[15:0]} : {48'h0, data_rdata_i[15:0]}; - 3'b001: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[23]}}, data_rdata_i[23:8]} : {48'h0, data_rdata_i[23:8]}; - 3'b010: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[31]}}, data_rdata_i[31:16]} : {48'h0, data_rdata_i[31:16]}; - 3'b011: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[39]}}, data_rdata_i[39:24]} : {48'h0, data_rdata_i[39:24]}; - 3'b100: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[47]}}, data_rdata_i[47:32]} : {48'h0, data_rdata_i[47:32]}; - 3'b101: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[55]}}, data_rdata_i[55:40]} : {48'h0, data_rdata_i[55:40]}; - 3'b110: rdata_h_ext = (out_data.operator == LH) ? {{48{data_rdata_i[63]}}, data_rdata_i[63:48]} : {48'h0, data_rdata_i[63:48]}; + case (load_data_q.address_offset) + default: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[15]}}, data_rdata_i[15:0]} : {48'h0, data_rdata_i[15:0]}; + 3'b001: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[23]}}, data_rdata_i[23:8]} : {48'h0, data_rdata_i[23:8]}; + 3'b010: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[31]}}, data_rdata_i[31:16]} : {48'h0, data_rdata_i[31:16]}; + 3'b011: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[39]}}, data_rdata_i[39:24]} : {48'h0, data_rdata_i[39:24]}; + 3'b100: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[47]}}, data_rdata_i[47:32]} : {48'h0, data_rdata_i[47:32]}; + 3'b101: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[55]}}, data_rdata_i[55:40]} : {48'h0, data_rdata_i[55:40]}; + 3'b110: rdata_h_ext = (load_data_q.operator == LH) ? {{48{data_rdata_i[63]}}, data_rdata_i[63:48]} : {48'h0, data_rdata_i[63:48]}; endcase end always_comb begin : sign_extend_byte - case (out_data.address_offset) - default: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[7]}}, data_rdata_i[7:0]} : {56'h0, data_rdata_i[7:0]}; - 3'b001: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[15]}}, data_rdata_i[15:8]} : {56'h0, data_rdata_i[15:8]}; - 3'b010: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[23]}}, data_rdata_i[23:16]} : {56'h0, data_rdata_i[23:16]}; - 3'b011: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[31]}}, data_rdata_i[31:24]} : {56'h0, data_rdata_i[31:24]}; - 3'b100: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[39]}}, data_rdata_i[39:32]} : {56'h0, data_rdata_i[39:32]}; - 3'b101: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[47]}}, data_rdata_i[47:40]} : {56'h0, data_rdata_i[47:40]}; - 3'b110: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[55]}}, data_rdata_i[55:48]} : {56'h0, data_rdata_i[55:48]}; - 3'b111: rdata_b_ext = (out_data.operator == LB) ? {{56{data_rdata_i[63]}}, data_rdata_i[63:56]} : {56'h0, data_rdata_i[63:56]}; + case (load_data_q.address_offset) + default: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[7]}}, data_rdata_i[7:0]} : {56'h0, data_rdata_i[7:0]}; + 3'b001: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[15]}}, data_rdata_i[15:8]} : {56'h0, data_rdata_i[15:8]}; + 3'b010: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[23]}}, data_rdata_i[23:16]} : {56'h0, data_rdata_i[23:16]}; + 3'b011: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[31]}}, data_rdata_i[31:24]} : {56'h0, data_rdata_i[31:24]}; + 3'b100: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[39]}}, data_rdata_i[39:32]} : {56'h0, data_rdata_i[39:32]}; + 3'b101: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[47]}}, data_rdata_i[47:40]} : {56'h0, data_rdata_i[47:40]}; + 3'b110: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[55]}}, data_rdata_i[55:48]} : {56'h0, data_rdata_i[55:48]}; + 3'b111: rdata_b_ext = (load_data_q.operator == LB) ? {{56{data_rdata_i[63]}}, data_rdata_i[63:56]} : {56'h0, data_rdata_i[63:56]}; endcase end always_comb begin - case (out_data.operator) + case (load_data_q.operator) LW, LWU: result_o = rdata_w_ext; LH, LHU: result_o = rdata_h_ext; LB, LBU: result_o = rdata_b_ext; diff --git a/src/lsu.sv b/src/lsu.sv index 041c7fb1f..93b9a1ba1 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -85,30 +85,20 @@ module lsu #( // -------------------------------------- // those are the signals which are always correct // e.g.: they keep the value in the stall case - logic valid; - logic [63:0] vaddr; - logic [63:0] data; - logic [7:0] be; - fu_t fu; - fu_op operator; - logic [TRANS_ID_BITS-1:0] trans_id; - // registered address in case of a necessary stall - logic valid_n, valid_q; - logic [63:0] vaddr_n, vaddr_q; - logic [63:0] data_n, data_q; - fu_t fu_n, fu_q; - fu_op operator_n, operator_q; - logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; - logic [7:0] be_n, be_q; - logic stall_n, stall_q; - // ------------------------------ + lsu_ctrl_t lsu_ctrl; + + logic lsu_ctrl_full; + lsu_ctrl_t lsu_ctrl_o; + logic pop_st; + logic pop_ld; + // Address Generation Unit (AGU) // ------------------------------ // virtual address as calculated by the AGU in the first cycle logic [63:0] vaddr_i; logic [7:0] be_i; - assign vaddr_i = $signed(imm_i) + $signed(operand_a_i); + assign vaddr_i = $unsigned($signed(imm_i) + $signed(operand_a_i)); logic st_valid_i; logic st_ready_o; @@ -123,6 +113,7 @@ module lsu #( logic [63:0] mmu_vaddr; logic [63:0] mmu_paddr; exception mmu_exception; + logic dtlb_hit; logic ld_valid; logic [TRANS_ID_BITS-1:0] ld_trans_id; @@ -201,6 +192,7 @@ module lsu #( .lsu_valid_o ( translation_valid ), .lsu_paddr_o ( mmu_paddr ), .lsu_exception_o ( mmu_exception ), + .lsu_dtlb_hit_o ( dtlb_hit ), // send in the same cycle as the request // connecting PTW to D$ IF (aka mem arbiter .address_index_o ( address_index_i [0] ), .address_tag_o ( address_tag_i [0] ), @@ -219,12 +211,10 @@ module lsu #( // Store Unit // ------------------ store_unit store_unit_i ( - .operator_i ( operator ), - .trans_id_i ( trans_id ), .valid_i ( st_valid_i ), - .vaddr_i ( vaddr ), - .be_i ( be ), - .data_i ( data ), + .lsu_ctrl_i ( lsu_ctrl ), + .pop_st_o ( pop_st ), + .valid_o ( st_valid ), .ready_o ( st_ready_o ), .trans_id_o ( st_trans_id ), @@ -234,8 +224,8 @@ module lsu #( .translation_req_o ( st_translation_req ), .vaddr_o ( st_vaddr ), .paddr_i ( mmu_paddr ), - .translation_valid_i ( translation_valid ), .ex_i ( mmu_exception ), + .dtlb_hit_i ( dtlb_hit ), // Load Unit .page_offset_i ( page_offset ), .page_offset_matches_o ( page_offset_matches ), @@ -257,11 +247,10 @@ module lsu #( // Load Unit // ------------------ load_unit load_unit_i ( - .operator_i ( operator ), - .trans_id_i ( trans_id ), .valid_i ( ld_valid_i ), - .vaddr_i ( vaddr ), - .be_i ( be ), + .lsu_ctrl_i ( lsu_ctrl ), + .pop_ld_o ( pop_ld ), + .valid_o ( ld_valid ), .ready_o ( ld_ready_o ), .trans_id_o ( ld_trans_id ), @@ -271,8 +260,8 @@ module lsu #( .translation_req_o ( ld_translation_req ), .vaddr_o ( ld_vaddr ), .paddr_i ( mmu_paddr ), - .translation_valid_i ( translation_valid ), .ex_i ( mmu_exception ), + .dtlb_hit_i ( dtlb_hit ), // to store unit .page_offset_o ( page_offset ), .page_offset_matches_i ( page_offset_matches ), @@ -314,38 +303,29 @@ module lsu #( .ex_o ( lsu_exception_o ) ); - // ------------------ - // LSU Control - // ------------------ - always_comb begin : lsu_control - // the LSU is ready if both, stores and loads are ready because we do not know - // which of the two we are getting - lsu_ready_o = ld_ready_o && st_ready_o; - // "arbitrate" MMU access, there is only one request possible - translation_req = 1'b0; - mmu_vaddr = 64'b0; - // this arbitrates access to the MMU - if (st_translation_req) begin - translation_req = 1'b1; - mmu_vaddr = st_vaddr; - end else if (ld_translation_req) begin - translation_req = 1'b1; - mmu_vaddr = ld_vaddr; - end - end - // determine whether this is a load or store always_comb begin : which_op ld_valid_i = 1'b0; st_valid_i = 1'b0; + translation_req = 1'b0; + mmu_vaddr = 64'b0; + // check the operator to activate the right functional unit accordingly - unique case (fu) + unique case (lsu_ctrl.fu) // all loads go here - LOAD: ld_valid_i = valid; + LOAD: begin + ld_valid_i = lsu_ctrl.valid; + translation_req = ld_translation_req; + mmu_vaddr = ld_vaddr; + end // all stores go here - STORE: st_valid_i = valid; + STORE: begin + st_valid_i = lsu_ctrl.valid; + translation_req = st_translation_req; + mmu_vaddr = st_vaddr; + end // not relevant for the LSU default: ; endcase @@ -441,100 +421,44 @@ module lsu #( if (data_misaligned) begin - if (fu == LOAD) begin + if (lsu_ctrl.fu == LOAD) begin misaligned_exception = { LD_ADDR_MISALIGNED, - vaddr, + lsu_ctrl.vaddr, 1'b1 }; - end else if (fu == STORE) begin + end else if (lsu_ctrl.fu == STORE) begin misaligned_exception = { ST_ADDR_MISALIGNED, - vaddr, + lsu_ctrl.vaddr, 1'b1 }; end end end - // this process selects the input based on the current state of the LSU - // it can either be feed-through from the issue stage or from the internal registers - always_comb begin : input_select - // if we are stalling use the values we saved - if (stall_q) begin - valid = valid_q; - vaddr = vaddr_q; - data = data_q; - fu = fu_q; - operator = operator_q; - trans_id = trans_id_q; - be = be_q; - end else begin // otherwise bypass them - valid = lsu_valid_i; - vaddr = vaddr_i; - data = operand_b_i; - fu = fu_i; - operator = operator_i; - trans_id = trans_id_i; - be = be_i; - end - end - // 1st register stage - always_comb begin : register_stage - valid_n = valid_q; - vaddr_n = vaddr_q; - data_n = data_q; - fu_n = fu_q; - operator_n = operator_q; - trans_id_n = trans_id_q; - be_n = be_q; - // get new input data - if (lsu_valid_i) begin - valid_n = lsu_valid_i; - vaddr_n = vaddr_i; - data_n = operand_b_i; - fu_n = fu_i; - operator_n = operator_i; - trans_id_n = trans_id_i; - be_n = be_i; - end + // ------------------ + // LSU Control + // ------------------ + // new data arrives here + lsu_ctrl_t lsu_req_i; - if (lsu_ready_o) begin - stall_n = 1'b0; - end else begin - stall_n = 1'b1; - end - // if we flush we can safely un-stall - if (flush_i) - stall_n = 1'b0; - end + assign lsu_req_i = {lsu_valid_i, vaddr_i, operand_b_i, be_i, fu_i, operator_i, trans_id_i}; - // registers - always_ff @(posedge clk_i or negedge rst_ni) begin - if (~rst_ni) begin - // 1st LSU stage - valid_q <= 1'b0; - vaddr_q <= 64'b0; - data_q <= 64'b0; - fu_q <= NONE; - operator_q <= ADD; - trans_id_q <= '{default: 0}; - be_q <= 8'b0; - stall_q <= 1'b0; - end else begin - // 1st LSU stage - valid_q <= valid_n; - vaddr_q <= vaddr_n; - data_q <= data_n; - fu_q <= fu_n; - operator_q <= operator_n; - trans_id_q <= trans_id_n; - be_q <= be_n; - stall_q <= stall_n; - end - end + lsu_bypass lsu_bypass_i ( + .lsu_req_i ( lsu_req_i ), + .lus_req_valid_i ( lsu_valid_i ), + .pop_ld_i ( pop_ld ), + .pop_st_i ( pop_st ), + .ld_ready_i ( ld_ready_o ), + .st_ready_i ( st_ready_o ), + + .lsu_ctrl_o ( lsu_ctrl ), + .ready_o ( lsu_ready_o ), + .* + ); // ------------ // Assertions // ------------ @@ -567,4 +491,107 @@ module lsu #( // else begin $error("address contains X when request is set"); $stop(); end `endif `endif -endmodule \ No newline at end of file +endmodule + +// ------------------ +// LSU Control +// ------------------ +// The LSU consists of two independent block which share a common address translation block. +// The one block is the load unit, the other one is the store unit. They will signal their readiness +// with separate signals. If they are not ready the LSU control should keep the last applied signals stable. +// Furthermore it can be the case that another request for one of the two store units arrives in which case +// the LSU controll should sample it and store it for later application to the units. It does so, by storing it in a +// two element FIFO. +module lsu_bypass ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + + input lsu_ctrl_t lsu_req_i, + input logic lus_req_valid_i, + input logic pop_ld_i, + input logic pop_st_i, + + input logic ld_ready_i, + input logic st_ready_i, + + output lsu_ctrl_t lsu_ctrl_o, + output logic ready_o + ); + + lsu_ctrl_t [1:0] mem_n, mem_q; + logic read_pointer_n, read_pointer_q; + logic write_pointer_n, write_pointer_q; + logic [1:0] status_cnt_n, status_cnt_q; + + logic empty; + assign empty = (status_cnt_q == 0); + assign ready_o = empty; + + always_comb begin + automatic logic [1:0] status_cnt = status_cnt_q; + automatic logic write_pointer = write_pointer_q; + automatic logic read_pointer = read_pointer_q; + + mem_n = mem_q; + // we've got a valid LSU request + if (lus_req_valid_i) begin + mem_n[write_pointer_q] = lsu_req_i; + write_pointer++; + status_cnt++; + end + + if (pop_ld_i) begin + // invalidate the result + mem_n[read_pointer_q].valid = 1'b0; + read_pointer++; + status_cnt--; + end + + if (pop_st_i) begin + // invalidate the result + mem_n[read_pointer_q].valid = 1'b0; + read_pointer++; + status_cnt--; + end + + if (pop_st_i && pop_ld_i) + mem_n = '{default: 0}; + + if (flush_i) begin + status_cnt = '0; + write_pointer = '0; + read_pointer = '0; + mem_n = '{default: 0}; + end + // default assignments + read_pointer_n = read_pointer; + write_pointer_n = write_pointer; + status_cnt_n = status_cnt; + end + + // output assignment + always_comb begin : output_assignments + if (empty) begin + lsu_ctrl_o = lsu_req_i; + end else begin + lsu_ctrl_o = mem_q[read_pointer_q]; + end + end + + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + mem_q <= '{default: 0}; + status_cnt_q <= '0; + write_pointer_q <= '0; + read_pointer_q <= '0; + end else begin + mem_q <= mem_n; + status_cnt_q <= status_cnt_n; + write_pointer_q <= write_pointer_n; + read_pointer_q <= read_pointer_n; + end + end +endmodule + diff --git a/src/mmu.sv b/src/mmu.sv index 86c8b7704..d7fe536ac 100644 --- a/src/mmu.sv +++ b/src/mmu.sv @@ -48,6 +48,9 @@ module mmu #( input logic [63:0] lsu_vaddr_i, // virtual address in input logic lsu_is_store_i, // the translation is requested by a store // if we need to walk the page table we can't grant in the same cycle + // Cycle 0 + output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB + // Cycle 1 output logic lsu_valid_o, // translation is valid output logic [63:0] lsu_paddr_o, // translated address output exception lsu_exception_o, // address translation threw an exception @@ -83,14 +86,14 @@ module mmu #( ); // instruction error // instruction error valid signal and exception, delayed one cycle - logic ierr_valid_q, ierr_valid_n; - exception fetch_ex_q, fetch_ex_n; + logic ierr_valid_q, ierr_valid_n; + exception fetch_ex_q, fetch_ex_n; - logic iaccess_err; // insufficient privilege to access this instruction page - logic daccess_err; // insufficient privilege to access this data page - logic ptw_active; // PTW is currently walking a page table - logic walking_instr; // PTW is walking because of an ITLB miss - logic ptw_error; // PTW threw an exception + logic iaccess_err; // insufficient privilege to access this instruction page + logic daccess_err; // insufficient privilege to access this data page + logic ptw_active; // PTW is currently walking a page table + logic walking_instr; // PTW is walking because of an ITLB miss + logic ptw_error; // PTW threw an exception logic update_is_2M; logic update_is_1G; @@ -98,19 +101,19 @@ module mmu #( logic [0:0] update_asid; pte_t update_content; - logic itlb_update; - logic itlb_lu_access; - pte_t itlb_content; - logic itlb_is_2M; - logic itlb_is_1G; - logic itlb_lu_hit; + logic itlb_update; + logic itlb_lu_access; + pte_t itlb_content; + logic itlb_is_2M; + logic itlb_is_1G; + logic itlb_lu_hit; - logic dtlb_update; - logic dtlb_lu_access; - pte_t dtlb_content; - logic dtlb_is_2M; - logic dtlb_is_1G; - logic dtlb_lu_hit; + logic dtlb_update; + logic dtlb_lu_access; + pte_t dtlb_content; + logic dtlb_is_2M; + logic dtlb_is_1G; + logic dtlb_lu_hit; // Assignments assign itlb_lu_access = fetch_req_i; @@ -224,7 +227,7 @@ module mmu #( instr_if_address_o = {itlb_content.ppn, fetch_vaddr_i[11:0]}; // Mega page if (itlb_is_2M) begin - instr_if_address_o[20:12] = fetch_vaddr_i[20:12]; + instr_if_address_o[20:12] = fetch_vaddr_i[20:12]; end // Giga page if (itlb_is_1G) begin @@ -236,16 +239,16 @@ module mmu #( // -------- // if we hit the ITLB output the request signal immediately if (itlb_lu_hit) begin - instr_if_data_req_o = fetch_req_i; - // we got an access error - if (iaccess_err) begin - // immediately grant a fetch which threw an exception, and stop the request from happening - instr_if_data_req_o = 1'b0; - fetch_gnt_o = 1'b1; - ierr_valid_n = 1'b1; - // throw a page fault - fetch_ex_n = {INSTR_ACCESS_FAULT, fetch_vaddr_i, 1'b1}; - end + instr_if_data_req_o = fetch_req_i; + // we got an access error + if (iaccess_err) begin + // immediately grant a fetch which threw an exception, and stop the request from happening + instr_if_data_req_o = 1'b0; + fetch_gnt_o = 1'b1; + ierr_valid_n = 1'b1; + // throw a page fault + fetch_ex_n = {INSTR_ACCESS_FAULT, fetch_vaddr_i, 1'b1}; + end end else // --------- // ITLB Miss @@ -261,52 +264,88 @@ module mmu #( // the fetch is valid if we either got an error in the previous cycle or the I$ gave us a valid signal. fetch_valid_o = instr_if_data_rvalid_i || ierr_valid_q; end + // ---------- + // Registers + // ---------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + ierr_valid_q <= 1'b0; + fetch_ex_q <= '0; + end else begin + ierr_valid_q <= ierr_valid_n; + fetch_ex_q <= fetch_ex_n; + end + end //----------------------- // Data Interface //----------------------- + logic [63:0] lsu_vaddr_n, lsu_vaddr_q; + pte_t dtlb_pte_n, dtlb_pte_q; + exception misaligned_ex_n, misaligned_ex_q; + logic lsu_req_n, lsu_req_q; + logic lsu_is_store_n, lsu_is_store_q; + logic dtlb_hit_n, dtlb_hit_q; + logic dtlb_is_2M_n, dtlb_is_2M_q; + logic dtlb_is_1G_n, dtlb_is_1G_q; + + // check if we need to do translation or if we are always ready (e.g.: we are not translating anything) + assign lsu_dtlb_hit_o = (en_ld_st_translation_i) ? dtlb_lu_hit : 1'b1; + // The data interface is simpler and only consists of a request/response interface always_comb begin : data_interface - lsu_paddr_o = lsu_vaddr_i; - lsu_valid_o = lsu_req_i; - lsu_exception_o = misaligned_ex_i; + // save request and DTLB response + lsu_vaddr_n = lsu_vaddr_i; + lsu_req_n = lsu_req_i; + misaligned_ex_n = misaligned_ex_i; + dtlb_pte_n = dtlb_content; + dtlb_hit_n = dtlb_lu_hit; + lsu_is_store_n = lsu_is_store_i; + dtlb_is_2M_n = dtlb_is_2M; + dtlb_is_1G_n = dtlb_is_1G; + + lsu_paddr_o = lsu_vaddr_q; + lsu_valid_o = lsu_req_q; + lsu_exception_o = misaligned_ex_q; + // Check if the User flag is set, then we may only access it in supervisor mode // if SUM is enabled - daccess_err = (ld_st_priv_lvl_i == PRIV_LVL_S && !sum_i && dtlb_content.u) || // SUM is not set and we are trying to access a user page in supervisor mode - (ld_st_priv_lvl_i == PRIV_LVL_U && !dtlb_content.u); // this is not a user page but we are in user mode and trying to access it + daccess_err = (ld_st_priv_lvl_i == PRIV_LVL_S && !sum_i && dtlb_pte_q.u) || // SUM is not set and we are trying to access a user page in supervisor mode + (ld_st_priv_lvl_i == PRIV_LVL_U && !dtlb_pte_q.u); // this is not a user page but we are in user mode and trying to access it // translation is enabled and no misaligned exception occurred - if (en_ld_st_translation_i && !misaligned_ex_i.valid) begin + if (en_ld_st_translation_i && !misaligned_ex_q.valid) begin lsu_valid_o = 1'b0; // 4K page - lsu_paddr_o = {dtlb_content.ppn, lsu_vaddr_i[11:0]}; + lsu_paddr_o = {dtlb_pte_q.ppn, lsu_vaddr_q[11:0]}; // Mega page - if (dtlb_is_2M) begin - lsu_paddr_o[20:12] = lsu_vaddr_i[20:12]; + if (dtlb_is_2M_q) begin + lsu_paddr_o[20:12] = lsu_vaddr_q[20:12]; end // Giga page - if (dtlb_is_1G) begin - lsu_paddr_o[29:12] = lsu_vaddr_i[29:12]; + if (dtlb_is_1G_q) begin + lsu_paddr_o[29:12] = lsu_vaddr_q[29:12]; end // --------- // DTLB Hit // -------- - if (dtlb_lu_hit && lsu_req_i) begin + if (dtlb_hit_q && lsu_req_q) begin lsu_valid_o = 1'b1; // this is a store - if (lsu_is_store_i) begin + if (lsu_is_store_q) begin // check if the page is write-able and we are not violating privileges - if (!dtlb_content.w || daccess_err) begin - lsu_exception_o = {ST_ACCESS_FAULT, lsu_vaddr_i, 1'b1}; + if (!dtlb_pte_q.w || daccess_err) begin + lsu_exception_o = {ST_ACCESS_FAULT, lsu_vaddr_q, 1'b1}; end // check if the dirty flag is set - if (!dtlb_content.d) begin - lsu_exception_o = {STORE_PAGE_FAULT, lsu_vaddr_i, 1'b1}; + if (!dtlb_pte_q.d) begin + lsu_exception_o = {STORE_PAGE_FAULT, lsu_vaddr_q, 1'b1}; end // this is a load, check for sufficient access privileges end else if (daccess_err) begin - lsu_exception_o = {LD_ACCESS_FAULT, lsu_vaddr_i, 1'b1}; + lsu_exception_o = {LD_ACCESS_FAULT, lsu_vaddr_q, 1'b1}; end end else + // --------- // DTLB Miss // --------- @@ -317,7 +356,7 @@ module mmu #( // an error makes the translation valid lsu_valid_o = 1'b1; // the page table walker can only throw page faults - if (lsu_is_store_i) begin + if (lsu_is_store_q) begin lsu_exception_o = {STORE_PAGE_FAULT, {25'b0, update_vaddr}, 1'b1}; end else begin lsu_exception_o = {LOAD_PAGE_FAULT, {25'b0, update_vaddr}, 1'b1}; @@ -331,11 +370,23 @@ module mmu #( // ---------- always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin - ierr_valid_q <= 1'b0; - fetch_ex_q <= '0; + lsu_vaddr_q <= '0; + lsu_req_q <= '0; + misaligned_ex_q <= '0; + dtlb_pte_q <= '0; + dtlb_hit_q <= '0; + lsu_is_store_q <= '0; + dtlb_is_2M_q <= '0; + dtlb_is_1G_q <= '0; end else begin - ierr_valid_q <= ierr_valid_n; - fetch_ex_q <= fetch_ex_n; + lsu_vaddr_q <= lsu_vaddr_n; + lsu_req_q <= lsu_req_n; + misaligned_ex_q <= misaligned_ex_n; + dtlb_pte_q <= dtlb_pte_n; + dtlb_hit_q <= dtlb_hit_n; + lsu_is_store_q <= lsu_is_store_n; + dtlb_is_2M_q <= dtlb_is_2M_n; + dtlb_is_1G_q <= dtlb_is_1G_n; end end endmodule \ No newline at end of file diff --git a/src/pcgen.sv b/src/pcgen.sv index 9f6b21f80..80623583a 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -44,20 +44,22 @@ module pcgen ( ); logic [63:0] npc_n, npc_q; + // the PC was set to a new region by a higher priority input (e.g.: exception, debug, ctrl return from exception) + logic set_pc_n, set_pc_q; branchpredict_sbe branch_predict_btb; - - assign fetch_address_o = npc_q; + // branch-predict input register -> this path is critical + branchpredict resolved_branch_q; btb #( - .NR_ENTRIES(4096), - .BITS_SATURATION_COUNTER(2) + .NR_ENTRIES ( BTB_ENTRIES ), + .BITS_SATURATION_COUNTER ( BITS_SATURATION_COUNTER ) ) btb_i ( // Use the PC from last cycle to perform branch lookup for the current cycle .flush_i ( flush_bp_i ), .vpc_i ( npc_q ), - .branch_predict_i ( resolved_branch_i ), // update port + .branch_predict_i ( resolved_branch_q ), // update port .branch_predict_o ( branch_predict_btb ), // read port .* ); @@ -65,23 +67,42 @@ module pcgen ( // Next PC // ------------------- // next PC (NPC) can come from: - // 1. Exception - // 2. Return from exception - // 3. Predicted branch - // 4. Debug - // 5. Boot address + // 0. Default assignment + // 1. Branch Predict taken + // 2. Debug + // 3. Control flow change request + // 4. Exception + // 5. Return from exception + // 6. Pipeline Flush because of CSR side effects always_comb begin : npc_select + automatic logic [63:0] fetch_address = npc_q; + branch_predict_o = branch_predict_btb; fetch_valid_o = 1'b1; + // this tells us whether it is a consecutive PC or a completely new PC + set_pc_n = 1'b0; + + // keep the PC stable if IF by default + npc_n = npc_q; + // ------------------------------- + // 3. Control flow change request + // ------------------------------- + // check if had a mis-predict the cycle earlier and if we can reset the PC (e.g.: it was a predicted or consecutive PC + // which was set a cycle earlier) + if (resolved_branch_q.is_mispredict && !set_pc_q) begin + // we already got the correct target address + fetch_address = resolved_branch_q.target_address; + end // ------------------------------- // 0. Default assignment // ------------------------------- // default is a consecutive PC if (if_ready_i && fetch_enable_i) - npc_n = {npc_q[63:2], 2'b0} + 64'h4; - else // or keep the PC stable if IF is not ready - npc_n = npc_q; + // but operate on the current fetch address + npc_n = {fetch_address[63:2], 2'b0} + 64'h4; + + // we only need to stall the consecutive and predicted case since in any other case we will flush at least // the front-end which means that the IF stage will always be ready to accept a new request @@ -89,52 +110,53 @@ module pcgen ( // 1. Predict taken // ------------------------------- // only predict if the IF stage is ready, otherwise we might take the predicted PC away which will end in a endless loop - if (if_ready_i && branch_predict_btb.valid && branch_predict_btb.predict_taken) begin + // also check if we fetched on a half word (npc_q[1] == 1), it might be the case that we need the next 16 byte of the following instruction + // prediction could potentially prevent us from getting them + if (if_ready_i && branch_predict_btb.valid && branch_predict_btb.predict_taken && !fetch_address[1]) begin npc_n = branch_predict_btb.predict_address; end // ------------------------------- // 2. Debug // ------------------------------- - // ------------------------------- - // 3. Control flow change request - // ------------------------------- - if (resolved_branch_i.is_mispredict) begin - // we already got the correct target address - npc_n = resolved_branch_i.target_address; - end - // ------------------------------- // 4. Exception // ------------------------------- if (ex_i.valid) begin npc_n = trap_vector_base_i; branch_predict_o.valid = 1'b0; + set_pc_n = 1'b1; end // ------------------------------- // 5. Return from exception // ------------------------------- if (eret_i) begin - npc_n = epc_i; + npc_n = epc_i; + set_pc_n = 1'b1; end - // ------------------------------- - // 6. Pipeline Flush - // ------------------------------- + // ----------------------------------------------- + // 6. Pipeline Flush because of CSR side effects + // ----------------------------------------------- // On a pipeline flush start fetching from the next address // of the instruction in the commit stage if (flush_i) begin // we came here from a flush request of a CSR instruction, // as CSR instructions do not exist in a compressed form // we can unconditionally do PC + 4 here - npc_n = pc_commit_i + 64'h4; + npc_n = pc_commit_i + 64'h4; + set_pc_n = 1'b1; end // fetch enable if (!fetch_enable_i) begin fetch_valid_o = 1'b0; end + + // set fetch address + fetch_address_o = fetch_address; + end // ------------------- // Sequential Process @@ -142,9 +164,13 @@ module pcgen ( // PCGEN -> IF Pipeline Stage always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin - npc_q <= boot_addr_i; + npc_q <= boot_addr_i; + set_pc_q <= 1'b0; + resolved_branch_q <= '0; end else begin - npc_q <= npc_n; + npc_q <= npc_n; + set_pc_q <= set_pc_n; + resolved_branch_q <= resolved_branch_i; end end diff --git a/src/regfile.sv b/src/regfile.sv index 3ee8674c3..70799dd85 100644 --- a/src/regfile.sv +++ b/src/regfile.sv @@ -147,9 +147,7 @@ module regfile mem[0] = '0; for (int unsigned k = 1; k < NUM_WORDS; k++) begin : w_WordIter - if (~rst_n) - mem[k] = '0; - else if (mem_clocks[k] == 1'b1) + if (mem_clocks[k] == 1'b1) mem[k] = wdata_a_q; end end diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 5b375a181..6fee6a568 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -21,15 +21,14 @@ import ariane_pkg::*; module scoreboard #( - parameter int NR_ENTRIES = 8, - parameter int NR_WB_PORTS = 1 + parameter int unsigned NR_ENTRIES = 8, + parameter int unsigned NR_WB_PORTS = 1 ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low - output logic full_o, // We can't take anymore data input logic flush_i, // flush whole scoreboard - input logic flush_unissued_instr_i, + input logic unresolved_branch_i, // we have an unresolved branch // list of clobbered registers to issue stage output fu_t [31:0] rd_clobber_o, @@ -63,7 +62,7 @@ module scoreboard #( input exception [NR_WB_PORTS-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception, divide by zero) input logic [NR_WB_PORTS-1:0] wb_valid_i // data in is valid ); - localparam BITS_ENTRIES = $clog2(NR_ENTRIES); + localparam int unsigned BITS_ENTRIES = $clog2(NR_ENTRIES); // this is the FIFO struct of the issue queue struct packed { @@ -78,7 +77,7 @@ module scoreboard #( // the issue queue is full don't issue any new instructions assign issue_full = (issue_cnt_q == NR_ENTRIES-1); - assign full_o = issue_full; + // output commit instruction directly assign commit_instr_o = mem_q[commit_pointer_q].sbe; @@ -87,9 +86,12 @@ module scoreboard #( issue_instr_o = decoded_instr_i; // make sure we assign the correct trans ID issue_instr_o.trans_id = issue_pointer_q; - issue_instr_valid_o = ~issue_full && decoded_instr_valid_i && !flush_unissued_instr_i; - decoded_instr_ack_o = issue_ack_i; + // we are ready if we are not full and don't have any unresolved branches, but it can be + // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i == 1) + issue_instr_valid_o = decoded_instr_valid_i && !unresolved_branch_i && !issue_full; + decoded_instr_ack_o = issue_ack_i && !issue_full; end + // maintain a FIFO with issued instructions // keep track of all issued instructions always_comb begin : issue_fifo @@ -99,11 +101,11 @@ module scoreboard #( commit_pointer_n = commit_pointer_q; issue_pointer_n = issue_pointer_q; - // if we got a acknowledge from the FIFO, put this scoreboard entry in the queue - if (issue_ack_i) begin + // if we got a acknowledge from the issue stage, put this scoreboard entry in the queue + if (decoded_instr_valid_i && decoded_instr_ack_o) begin + // the decoded instruction we put in there is valid (1st bit) // increase the issue counter issue_cnt++; - // the decoded instruction we put in there is valid (1st bit) mem_n[issue_pointer_q] = {1'b1, decoded_instr_i}; // advance issue pointer issue_pointer_n = issue_pointer_q + 1'b1; @@ -112,7 +114,7 @@ module scoreboard #( // ------------ // Write Back // ------------ - for (int i = 0; i < NR_WB_PORTS; i++) begin + for (int unsigned i = 0; i < NR_WB_PORTS; i++) begin // check if this instruction was issued (e.g.: it could happen after a flush that there is still // something in the pipeline e.g. an incomplete memory operation) if (wb_valid_i[i] && mem_n[trans_id_i[i]].issued) begin @@ -141,7 +143,7 @@ module scoreboard #( // Flush // ------ if (flush_i) begin - for (int i = 0; i < NR_ENTRIES; i++) begin + for (int unsigned i = 0; i < NR_ENTRIES; i++) begin // set all valid flags for all entries to zero mem_n[i].issued = 1'b0; mem_n[i].sbe.valid = 1'b0; @@ -163,7 +165,7 @@ module scoreboard #( always_comb begin : clobber_output rd_clobber_o = '{default: NONE}; // check for all valid entries and set the clobber register accordingly - for (int i = 0; i < NR_ENTRIES; i++) begin + for (int unsigned i = 0; i < NR_ENTRIES; i++) begin if (mem_q[i].issued) begin // output the functional unit which is going to clobber this register rd_clobber_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu; @@ -183,7 +185,7 @@ module scoreboard #( rs1_valid_o = 1'b0; rs2_valid_o = 1'b0; - for (int i = 0; i < NR_ENTRIES; i++) begin + for (int unsigned i = 0; i < NR_ENTRIES; i++) begin // only consider this entry if it is valid if (mem_q[i].issued) begin // look at the appropriate fields and look whether there was an @@ -203,7 +205,7 @@ module scoreboard #( // ----------- // provide a direct combinational path from WB a.k.a forwarding // make sure that we are not forwarding a result that got an exception - for (int j = 0; j < NR_WB_PORTS; j++) begin + for (int unsigned j = 0; j < NR_WB_PORTS; j++) begin if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid) begin rs1_o = wdata_i[j]; rs1_valid_o = wb_valid_i[j]; @@ -231,7 +233,7 @@ module scoreboard #( commit_pointer_q <= '0; issue_pointer_q <= '0; end else begin - mem_q <= mem_n; + mem_q <= mem_n; issue_cnt_q <= issue_cnt_n; commit_pointer_q <= commit_pointer_n; issue_pointer_q <= issue_pointer_n; diff --git a/src/store_queue.sv b/src/store_buffer.sv similarity index 85% rename from src/store_queue.sv rename to src/store_buffer.sv index e4b92812a..9b3ad755f 100644 --- a/src/store_queue.sv +++ b/src/store_buffer.sv @@ -18,17 +18,15 @@ // University of Bologna. // -module store_queue ( +module store_buffer ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_i, // if we flush we need to pause the transactions on the memory // otherwise we will run in a deadlock with the memory arbiter output logic no_st_pending_o, // non-speculative queue is empty (e.g.: everything is committed to the memory hierarchy) - output logic [63:0] paddr_o, // physical address of the valid store - output logic [63:0] data_o, // data at the given address - output logic valid_o, // committed data is valid - output logic [7:0] be_o, // byte enable set + input logic [11:0] page_offset_i, + output logic page_offset_matches_o, input logic commit_i, // commit the instruction which was placed there most recently output logic ready_o, // the store queue is ready to accept a new request @@ -74,12 +72,6 @@ module store_queue ( logic is_speculative; // set if the entry isn't committed yet } commit_queue_n, commit_queue_q; - // we can directly output the commit entry since we have just one element in the "queue" - assign paddr_o = commit_queue_q.address; - assign data_o = commit_queue_q.data; - assign be_o = commit_queue_q.be; - assign valid_o = commit_queue_q.valid; - // those signals can directly be output to the memory assign address_index_o = commit_queue_q.address[11:0]; // if we got a new request we already saved the tag from the previous cycle @@ -153,6 +145,34 @@ module store_queue ( end + // ------------------ + // Address Checker + // ------------------ + // The load should return the data stored by the most recent store to the + // same physical address. The most direct way to implement this is to + // maintain physical addresses in the store buffer. + + // Of course, there are other micro-architectural techniques to accomplish + // the same thing: you can interlock and wait for the store buffer to + // drain if the load VA matches any store VA modulo the page size (i.e. + // bits 11:0). As a special case, it is correct to bypass if the full VA + // matches, and no younger stores' VAs match in bits 11:0. + // + // checks if the requested load is in the store buffer + // page offsets are virtually and physically the same + always_comb begin : address_checker + page_offset_matches_o = 1'b0; + // check if the LSBs are identical and the entry is valid + if ((page_offset_i[11:3] == commit_queue_q.address[11:3]) && commit_queue_q.valid) begin + page_offset_matches_o = 1'b1; + end + + if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_i) begin + page_offset_matches_o = 1'b1; + end + end + + // registers always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ if(~rst_ni) begin diff --git a/src/store_unit.sv b/src/store_unit.sv index 53a323dc6..ea81187b1 100644 --- a/src/store_unit.sv +++ b/src/store_unit.sv @@ -24,12 +24,9 @@ module store_unit ( input logic flush_i, output logic no_st_pending_o, // store unit input port - input fu_op operator_i, - input logic [TRANS_ID_BITS-1:0] trans_id_i, input logic valid_i, - input logic [63:0] vaddr_i, - input logic [7:0] be_i, - input logic [63:0] data_i, + input lsu_ctrl_t lsu_ctrl_i, + output logic pop_st_o, input logic commit_i, // store unit output port output logic valid_o, @@ -41,8 +38,8 @@ module store_unit ( output logic translation_req_o, // request address translation output logic [63:0] vaddr_o, // virtual address out input logic [63:0] paddr_i, // physical address in - input logic translation_valid_i, input exception ex_i, + input logic dtlb_hit_i, // will be one in the same cycle translation_req was asserted if it hits // address checker input logic [11:0] page_offset_i, output logic page_offset_matches_o, @@ -60,108 +57,162 @@ module store_unit ( ); assign result_o = 64'b0; - logic [63:0] st_buffer_paddr; // physical address for store - logic [63:0] st_buffer_data; // store buffer data out - logic [63:0] st_data; // aligned data to store buffer - logic [7:0] st_buffer_be; - logic st_buffer_valid; + enum logic [1:0] {IDLE, VALID_STORE, WAIT_TRANSLATION, WAIT_STORE_READY} NS, CS; + // store buffer control signals logic st_ready; logic st_valid; - assign vaddr_o = vaddr_i; - // --------------- - // Store Control - // --------------- + // keep the data and the byte enable for the second cycle (after address translation) + logic [63:0] st_data_n, st_data_q; + logic [7:0] st_be_n, st_be_q; + logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; + + // output assignments + assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address + assign trans_id_o = trans_id_q; // transaction id from previous cycle + always_comb begin : store_control translation_req_o = 1'b0; - valid_o = 1'b0; ready_o = 1'b1; - trans_id_o = trans_id_i; - ex_o = ex_i; + valid_o = 1'b0; st_valid = 1'b0; - // we got a valid store - if (valid_i) begin - // first do address translation, we need to do it in the first cycle since we want to share the MMU - // between the load and the store unit. But we only know that when a new request arrives that we are not using - // it at the same time. - translation_req_o = 1'b1; - // check if translation was valid and we have space in the store buffer - // otherwise simply stall - if (translation_valid_i && st_ready) begin + pop_st_o = 1'b0; + ex_o = ex_i; + trans_id_n = lsu_ctrl_i.trans_id; + NS = CS; + + case (CS) + // we got a valid store + IDLE: begin + if (valid_i) begin + + NS = VALID_STORE; + translation_req_o = 1'b1; + + // check if translation was valid and we have space in the store buffer + // otherwise simply stall + if (!dtlb_hit_i) begin + NS = WAIT_TRANSLATION; + end + + if (!st_ready) begin + NS = WAIT_STORE_READY; + end + end + end + + VALID_STORE: begin + ready_o = 1'b0; valid_o = 1'b1; // post this store to the store buffer - st_valid = 1'b1; - // translation was not successful - stall here - end else begin + if (!flush_i) + st_valid = 1'b1; + + pop_st_o = 1'b1; + + // // we have another request + // if (valid_i) begin + + // translation_req_o = 1'b1; + + // if (!dtlb_hit_i) begin + // NS = WAIT_TRANSLATION; + // end + + // if (!st_ready) begin + // NS = WAIT_STORE_READY; + // end + // // if we do not have another request go back to idle + // end else begin + // NS = IDLE; + // end + NS = IDLE; + end + + // the store queue is currently full + WAIT_STORE_READY: begin + ready_o = 1'b0; + // keep the translation request high + translation_req_o = 1'b1; + + if (st_ready && dtlb_hit_i) begin + NS = VALID_STORE; + end + end + + // we didn't receive a valid translation, wait for one + // but we know that the store queue is not full as we could only have landed here if + // it wasn't full + WAIT_TRANSLATION: begin ready_o = 1'b0; + translation_req_o = 1'b1; + + if (dtlb_hit_i) begin + NS = VALID_STORE; + end end - // ----------------- - // Access Exception - // ----------------- - // we got an address translation exception (access rights) - if (ex_i.valid) begin - // result is valid - valid_o = 1'b1; - // do not store this - st_valid = 1'b0; - // we are ready if we got this exception - ready_o = 1'b1; - end + endcase + + // ----------------- + // Access Exception + // ----------------- + // we got an address translation exception (access rights, misaligned or page fault) + if (ex_i.valid && (CS != IDLE)) begin + // the only difference is that we do not want to store this request + st_valid = 1'b0; + NS = IDLE; + valid_o = 1'b1; end + + if (flush_i) + NS = IDLE; end + // ----------- // Re-aligner // ----------- // re-align the write data to comply with the address offset always_comb begin - st_data = data_i; - case (vaddr_i[2:0]) - 3'b000: st_data = data_i; - 3'b001: st_data = {data_i[55:0], data_i[63:56]}; - 3'b010: st_data = {data_i[47:0], data_i[63:48]}; - 3'b011: st_data = {data_i[39:0], data_i[63:40]}; - 3'b100: st_data = {data_i[31:0], data_i[63:32]}; - 3'b101: st_data = {data_i[23:0], data_i[63:24]}; - 3'b110: st_data = {data_i[15:0], data_i[63:16]}; - 3'b111: st_data = {data_i[7:0], data_i[63:8]}; + st_be_n = lsu_ctrl_i.be; + st_data_n = lsu_ctrl_i.data; + case (lsu_ctrl_i.vaddr[2:0]) + 3'b000: st_data_n = lsu_ctrl_i.data; + 3'b001: st_data_n = {lsu_ctrl_i.data[55:0], lsu_ctrl_i.data[63:56]}; + 3'b010: st_data_n = {lsu_ctrl_i.data[47:0], lsu_ctrl_i.data[63:48]}; + 3'b011: st_data_n = {lsu_ctrl_i.data[39:0], lsu_ctrl_i.data[63:40]}; + 3'b100: st_data_n = {lsu_ctrl_i.data[31:0], lsu_ctrl_i.data[63:32]}; + 3'b101: st_data_n = {lsu_ctrl_i.data[23:0], lsu_ctrl_i.data[63:24]}; + 3'b110: st_data_n = {lsu_ctrl_i.data[15:0], lsu_ctrl_i.data[63:16]}; + 3'b111: st_data_n = {lsu_ctrl_i.data[7:0], lsu_ctrl_i.data[63:8]}; endcase end // --------------- // Store Queue // --------------- - store_queue store_queue_i ( + store_buffer store_buffer_i ( // store queue write port .valid_i ( st_valid ), - .data_i ( st_data ), - // store buffer in - .paddr_o ( st_buffer_paddr ), - .data_o ( st_buffer_data ), - .valid_o ( st_buffer_valid ), - .be_o ( st_buffer_be ), + .data_i ( st_data_q ), + .be_i ( st_be_q ), + // store buffer out .ready_o ( st_ready ), .* ); - // ------------------ - // Address Checker - // ------------------ - // The load should return the data stored by the most recent store to the - // same physical address. The most direct way to implement this is to - // maintain physical addresses in the store buffer. - - // Of course, there are other micro-architectural techniques to accomplish - // the same thing: you can interlock and wait for the store buffer to - // drain if the load VA matches any store VA modulo the page size (i.e. - // bits 11:0). As a special case, it is correct to bypass if the full VA - // matches, and no younger stores' VAs match in bits 11:0. - // - // checks if the requested load is in the store buffer - // page offsets are virtually and physically the same - always_comb begin : address_checker - page_offset_matches_o = 1'b0; - // check if the LSBs are identical and the entry is valid - if ((vaddr_i[11:3] == st_buffer_paddr[11:3]) && st_buffer_valid) begin - page_offset_matches_o = 1'b1; + // --------------- + // Registers + // --------------- + always_ff @(posedge clk_i or negedge rst_ni) begin + if(~rst_ni) begin + CS <= IDLE; + st_be_q <= '0; + st_data_q <= '0; + trans_id_q <= '0; + end else begin + CS <= NS; + st_be_q <= st_be_n; + st_data_q <= st_data_n; + trans_id_q <= trans_id_n; end end diff --git a/src/util/exception_trace_item.svh b/src/util/exception_trace_item.svh index 2048d51df..fd6a43079 100755 --- a/src/util/exception_trace_item.svh +++ b/src/util/exception_trace_item.svh @@ -38,7 +38,7 @@ class exception_trace_item; INSTR_PAGE_FAULT: this.cause = "Instruction Page Fault"; LOAD_PAGE_FAULT: this.cause = "Load Page Fault"; STORE_PAGE_FAULT: this.cause = "Store Page Fault"; - default: cause = "Interrupt"; + default: this.cause = "Interrupt"; endcase this.tval = tval; diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 41dd8d768..48fba7cb0 100755 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -26,13 +26,12 @@ class instruction_trace_item; logic [63:0] reg_file [32]; logic [4:0] read_regs [$]; logic [4:0] result_regs [$]; + logic [63:0] imm; logic [63:0] result; logic [63:0] paddr; - logic [63:0] paddr_queue [$]; - logic [63:0] vaddr; - logic [63:0] vaddr_queue [$]; + // constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information - function new (time simtime, longint unsigned cycle, scoreboard_entry sbe, logic [31:0] instr, logic [63:0] reg_file [32], logic [63:0] result, logic [63:0] vaddr, logic [63:0] paddr); + function new (time simtime, longint unsigned cycle, scoreboard_entry sbe, logic [31:0] instr, logic [63:0] reg_file [32], logic [63:0] result, logic [63:0] paddr); this.simtime = simtime; this.cycle = cycle; this.pc = sbe.pc; @@ -40,7 +39,6 @@ class instruction_trace_item; this.instr = instr; this.reg_file = reg_file; this.result = result; - this.vaddr = vaddr; this.paddr = paddr; endfunction // convert register address to ABI compatible form @@ -198,11 +196,17 @@ class instruction_trace_item; if (read_regs[i] != 0) s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), reg_file[read_regs[i]]); end - // if we got a physical address also display address translation - foreach (paddr_queue[i]) begin - s = $sformatf("%s VA: %x PA: %x", s, this.vaddr, paddr_queue[i]); - end - + casex (instr) + // check of the instrction was a load or store + INSTR_STORE: begin + logic [63:0] vaddress = reg_file[read_regs[1]] + this.imm; + s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr); + end + INSTR_LOAD: begin + logic [63:0] vaddress = reg_file[read_regs[0]] + this.imm; + s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr); + end + endcase return s; endfunction @@ -307,7 +311,9 @@ class instruction_trace_item; result_regs.push_back(sbe.rd); read_regs.push_back(sbe.rs1); - paddr_queue.push_back(paddr); + // save the immediate for calculating the virtual address + this.imm = sbe.result; + return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction @@ -322,9 +328,10 @@ class instruction_trace_item; default: return printMnemonic("INVALID"); endcase - read_regs.push_back(sbe.rs1); read_regs.push_back(sbe.rs2); - paddr_queue.push_back(paddr); + read_regs.push_back(sbe.rs1); + // save the immediate for calculating the virtual address + this.imm = sbe.result; return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); diff --git a/src/util/instruction_tracer.svh b/src/util/instruction_tracer.svh index a1fff4037..798665557 100755 --- a/src/util/instruction_tracer.svh +++ b/src/util/instruction_tracer.svh @@ -36,16 +36,21 @@ class instruction_tracer; int f; // address mapping // contains mappings of the form vaddr <-> paddr - struct { - logic [63:0] vaddr; - logic [63:0] paddr; - } store_mapping[$], load_mapping[$], address_mapping; + logic [63:0] store_mapping[$], load_mapping[$], address_mapping; function new(virtual instruction_tracer_if tracer_if); + this.tracer_if = tracer_if; - f = $fopen("output.txt","w"); endfunction : new + function void create_file(logic [5:0] cluster_id, logic [3:0] core_id); + string fn; + $sformat(fn, "trace_core_%h_%h.log", cluster_id, core_id); + $display("[TRACER] Output filename is: %s", fn); + + this.f = $fopen(fn,"w"); + endfunction : create_file + task trace(); fetch_entry decode_instruction, issue_instruction, issue_commit_instruction; scoreboard_entry commit_instruction; @@ -82,22 +87,13 @@ class instruction_tracer; // -------------------- // Address Translation // -------------------- - if (tracer_if.pck.translation_valid) begin - // put it in the store mapping queue if it is a store - if (tracer_if.pck.is_store && tracer_if.pck.st_ready) begin - store_mapping.push_back('{ - vaddr: tracer_if.pck.vaddr, - paddr: tracer_if.pck.paddr - }); - // or else put it in the load mapping - end else if (!tracer_if.pck.is_store && tracer_if.pck.ld_ready) begin - load_mapping.push_back('{ - vaddr: tracer_if.pck.vaddr, - paddr: tracer_if.pck.paddr - }); - end + if (tracer_if.pck.st_valid) begin + store_mapping.push_back(tracer_if.pck.st_paddr); end + if (tracer_if.pck.ld_valid && !tracer_if.pck.ld_kill) begin + load_mapping.push_back(tracer_if.pck.ld_paddr); + end // -------------- // Commit // -------------- @@ -115,9 +111,9 @@ class instruction_tracer; // check if the write back is valid, if not we need to source the result from the register file // as the most recent version of this register will be there. if (tracer_if.pck.we) begin - printInstr(issue_sbe, issue_commit_instruction.instruction, tracer_if.pck.wdata, address_mapping.vaddr, address_mapping.paddr); + printInstr(issue_sbe, issue_commit_instruction.instruction, tracer_if.pck.wdata, address_mapping); end else - printInstr(issue_sbe, issue_commit_instruction.instruction, reg_file[commit_instruction.rd], address_mapping.vaddr, address_mapping.paddr); + printInstr(issue_sbe, issue_commit_instruction.instruction, reg_file[commit_instruction.rd], address_mapping); end // -------------- @@ -149,6 +145,7 @@ class instruction_tracer; end endtask + // flush all decoded instructions function void flushDecode (); decode_queue = {}; @@ -165,8 +162,8 @@ class instruction_tracer; load_mapping = {}; endfunction; - function void printInstr(scoreboard_entry sbe, logic [63:0] instr, logic [63:0] result, logic [63:0] vaddr, logic [63:0] paddr); - instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, vaddr, paddr); + function void printInstr(scoreboard_entry sbe, logic [63:0] instr, logic [63:0] result, logic [63:0] paddr); + instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr); // print instruction to console string print_instr = iti.printInstr(); $display(print_instr); diff --git a/src/util/instruction_tracer_if.sv b/src/util/instruction_tracer_if.sv index c805037b0..b42d8ac3a 100755 --- a/src/util/instruction_tracer_if.sv +++ b/src/util/instruction_tracer_if.sv @@ -39,19 +39,23 @@ interface instruction_tracer_if ( // commit stage scoreboard_entry commit_instr; // commit instruction logic commit_ack; + // address translation - logic translation_valid; - logic [63:0] vaddr; - logic [63:0] paddr; - logic is_store; - logic st_ready; - logic ld_ready; + // stores + logic st_valid; + logic [63:0] st_paddr; + // loads + logic ld_valid; + logic ld_kill; + logic [63:0] ld_paddr; + // exceptions exception exception; // the tracer just has a passive interface we do not drive anything with it clocking pck @(posedge clk); input rstn, flush_unissued, flush, fetch, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr, - wdata, we, commit_instr, commit_ack, translation_valid, vaddr, paddr, is_store, st_ready, ld_ready, exception; + st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, + wdata, we, commit_instr, commit_ack, exception; endclocking endinterface diff --git a/tb/agents/scoreboard_if/scoreboard_if.sv b/tb/agents/scoreboard_if/scoreboard_if.sv index bfe9ee073..40fe73722 100644 --- a/tb/agents/scoreboard_if/scoreboard_if.sv +++ b/tb/agents/scoreboard_if/scoreboard_if.sv @@ -13,7 +13,6 @@ import ariane_pkg::*; interface scoreboard_if #(parameter int NR_WB_PORTS = 1)(input clk); - wire full; wire flush; wire [31:0][$bits(fu_t)-1:0] rd_clobber; wire [4:0] rs1_address; @@ -26,6 +25,7 @@ interface scoreboard_if #(parameter int NR_WB_PORTS = 1)(input clk); wire commit_ack; scoreboard_entry decoded_instr; wire decoded_instr_valid; + wire decoded_instr_ack; scoreboard_entry issue_instr; wire issue_instr_valid; wire issue_ack; @@ -38,12 +38,12 @@ interface scoreboard_if #(parameter int NR_WB_PORTS = 1)(input clk); clocking mck @(posedge clk); default input #1 output #5; // save timing output flush, rs1_address, rs2_address, commit_ack, decoded_instr, decoded_instr_valid, issue_ack, trans_id, wdata, ex, wb_valid; - input full, rd_clobber, rs1, rs1_valid, rs2, rs2_valid, commit_instr, issue_instr, issue_instr_valid; + input rd_clobber, rs1, rs1_valid, rs2, rs2_valid, commit_instr, issue_instr, issue_instr_valid, decoded_instr_ack; endclocking // Scoreboard interface configured in passive mode (-> monitor) clocking pck @(posedge clk); input flush, rs1_address, rs2_address, commit_ack, decoded_instr, decoded_instr_valid, issue_ack, trans_id, wdata, ex, wb_valid, - full, rd_clobber, rs1, rs1_valid, rs2, rs2_valid, commit_instr, issue_instr, issue_instr_valid; + rd_clobber, rs1, rs1_valid, rs2, rs2_valid, commit_instr, issue_instr, issue_instr_valid, decoded_instr_ack; endclocking modport master (clocking mck); diff --git a/tb/agents/store_queue_if/store_queue_if.sv b/tb/agents/store_queue_if/store_queue_if.sv index 8b5d0cc8d..82d01235a 100755 --- a/tb/agents/store_queue_if/store_queue_if.sv +++ b/tb/agents/store_queue_if/store_queue_if.sv @@ -29,10 +29,9 @@ interface store_queue_if ); wire flush; - wire [ADDRESS_SIZE-1:0] check_paddr; - wire [DATA_WIDTH-1:0] check_data; - wire valid; - wire [DATA_WIDTH/8-1:0] check_be; + wire no_st_pending; + wire [11:0] page_offset; + wire page_offset_matches; wire commit; wire ready; wire store_valid; @@ -41,14 +40,15 @@ interface store_queue_if wire [DATA_WIDTH/8-1:0] store_be; clocking mck @(posedge clk); - output flush, commit, valid, store_paddr, store_data, store_be, store_valid; - input check_paddr, check_data, check_be, ready; + output flush, commit, store_valid, page_offset, store_paddr, store_data, store_be; + input ready, page_offset_matches, no_st_pending; endclocking clocking pck @(posedge clk); - input flush, check_paddr, check_data, valid, check_be, commit, ready, store_valid, store_paddr, store_data, store_be; + input flush, commit, ready, page_offset, page_offset_matches, store_valid, store_paddr, + store_data, store_be, no_st_pending; endclocking endinterface diff --git a/tb/agents/store_queue_if/store_queue_if_driver.svh b/tb/agents/store_queue_if/store_queue_if_driver.svh index 040e05f74..777bbf8da 100644 --- a/tb/agents/store_queue_if/store_queue_if_driver.svh +++ b/tb/agents/store_queue_if/store_queue_if_driver.svh @@ -58,7 +58,7 @@ class store_queue_if_driver extends uvm_driver #(store_queue_if_seq_item); seq_item_port.item_done(); // fork off a commit task // commit a couple of cycles later - @(m_vif.mck iff m_vif.pck.valid) + @(m_vif.mck iff m_vif.pck.store_valid) fork commit_block: begin sem.get(1); diff --git a/tb/core_tb.sv b/tb/core_tb.sv index fe6fab0ad..0255457ed 100644 --- a/tb/core_tb.sv +++ b/tb/core_tb.sv @@ -19,17 +19,17 @@ // import ariane_pkg::*; +import uvm_pkg::*; +import core_lib_pkg::*; `define DRAM_BASE 64'h80000000 + module core_tb; import "DPI-C" function chandle read_elf(string fn); import "DPI-C" function longint unsigned get_symbol_address(string symb); import "DPI-C" function longint unsigned get_symbol_size(string symb); - import uvm_pkg::*; - import core_lib_pkg::*; - logic clk_i; logic rst_ni; logic rtc_i; @@ -190,7 +190,8 @@ module core_tb; // initialize .bss bss_address = get_symbol_address(".bss"); bss_size = get_symbol_size(".bss"); - $display("Symbol Address: %x, Symbol Size: %x, Address: %x", ((bss_address - `DRAM_BASE) >> 3), bss_size, address); + // `uvm_info("Core Test", $sformatf(".bss address: %x, .bss size: %x, .tohost address: %x", ((bss_address - `DRAM_BASE) >> 3), bss_size, address), UVM_LOW) + // the section should be aligned on a double word boundary for (int i = 0; i < bss_size/8; i++) begin core_mem_i.ram_i.mem[((bss_address - `DRAM_BASE) >> 3) + i] = 64'b0; diff --git a/tb/scoreboard_tb.sv b/tb/scoreboard_tb.sv index fff661d9f..fb6309865 100644 --- a/tb/scoreboard_tb.sv +++ b/tb/scoreboard_tb.sv @@ -19,33 +19,37 @@ module scoreboard_tb; scoreboard_if #(.NR_WB_PORTS(1) ) scoreboard_if (clk); scoreboard #( - .NR_WB_PORTS ( 1 ), - .NR_ENTRIES ( NR_SB_ENTRIES ) + .NR_WB_PORTS ( 1 ), + .NR_ENTRIES ( NR_SB_ENTRIES ) ) dut ( - .clk_i ( clk ), - .rst_ni ( rst_ni ), - .full_o ( scoreboard_if.full ), - .flush_i ( scoreboard_if.flush ), - .rd_clobber_o ( scoreboard_if.rd_clobber ), - .rs1_i ( scoreboard_if.rs1_address ), - .rs1_o ( scoreboard_if.rs1 ), - .rs1_valid_o ( scoreboard_if.rs1_valid ), - .rs2_i ( scoreboard_if.rs2_address ), - .rs2_o ( scoreboard_if.rs2 ), - .rs2_valid_o ( scoreboard_if.rs2_valid ), - .commit_instr_o ( scoreboard_if.commit_instr ), - .commit_ack_i ( scoreboard_if.commit_ack ), - .decoded_instr_i ( scoreboard_if.decoded_instr ), - .decoded_instr_valid_i( scoreboard_if.decoded_instr_valid ), - .issue_instr_o ( scoreboard_if.issue_instr ), - .issue_instr_valid_o ( scoreboard_if.issue_instr_valid ), - .issue_ack_i ( scoreboard_if.issue_ack ), - .trans_id_i ( scoreboard_if.trans_id ), - .wdata_i ( scoreboard_if.wdata ), - .ex_i ( scoreboard_if.ex ), - .wb_valid_i ( scoreboard_if.wb_valid ) + .clk_i ( clk ), + .rst_ni ( rst_ni ), + .flush_i ( scoreboard_if.flush ), + .rd_clobber_o ( scoreboard_if.rd_clobber ), + .rs1_i ( scoreboard_if.rs1_address ), + .rs1_o ( scoreboard_if.rs1 ), + .rs1_valid_o ( scoreboard_if.rs1_valid ), + .rs2_i ( scoreboard_if.rs2_address ), + .rs2_o ( scoreboard_if.rs2 ), + .rs2_valid_o ( scoreboard_if.rs2_valid ), + + .commit_instr_o ( scoreboard_if.commit_instr ), + .commit_ack_i ( scoreboard_if.commit_ack ), + + .decoded_instr_i ( scoreboard_if.decoded_instr ), + .decoded_instr_valid_i ( scoreboard_if.decoded_instr_valid ), + .decoded_instr_ack_o ( scoreboard_if.decoded_instr_ack ), + + .issue_instr_o ( scoreboard_if.issue_instr ), + .issue_instr_valid_o ( scoreboard_if.issue_instr_valid ), + .issue_ack_i ( scoreboard_if.issue_ack ), + + .trans_id_i ( scoreboard_if.trans_id ), + .wdata_i ( scoreboard_if.wdata ), + .ex_i ( scoreboard_if.ex ), + .wb_valid_i ( scoreboard_if.wb_valid ) ); initial begin @@ -86,13 +90,10 @@ module scoreboard_tb; @(scoreboard_if.mck); // if we are not full load another instruction - if (scoreboard_if.full == 1'b0) begin scoreboard_if.mck.decoded_instr <= Scoreboard::randomize_scoreboard(); scoreboard_if.mck.decoded_instr_valid <= 1'b1; - end else begin + @(scoreboard_if.mck iff scoreboard_if.mck.decoded_instr_ack == 1'b1) scoreboard_if.mck.decoded_instr_valid <= 1'b0; - end - end end diff --git a/tb/store_queue_tb.sv b/tb/store_queue_tb.sv index b64275679..959317d5f 100644 --- a/tb/store_queue_tb.sv +++ b/tb/store_queue_tb.sv @@ -29,31 +29,31 @@ module store_queue_tb; dcache_if slave(clk); store_queue_if store_queue(clk); - store_queue dut ( - .clk_i ( clk ), - .rst_ni ( rst_ni ), - .flush_i ( store_queue.flush ), - .paddr_o ( store_queue.check_paddr ), - .data_o ( store_queue.check_data ), - .valid_o ( store_queue.valid ), - .be_o ( store_queue.check_be ), - .commit_i ( store_queue.commit ), - .ready_o ( store_queue.ready ), - .valid_i ( store_queue.store_valid && store_queue.ready ), - .paddr_i ( store_queue.store_paddr ), - .data_i ( store_queue.store_data ), - .be_i ( store_queue.store_be ), + store_buffer dut ( + .clk_i ( clk ), + .rst_ni ( rst_ni ), + .flush_i ( store_queue.flush ), - .address_index_o ( slave.address_index ), - .address_tag_o ( slave.address_tag ), - .data_wdata_o ( slave.data_wdata ), - .data_req_o ( slave.data_req ), - .data_we_o ( slave.data_we ), - .data_be_o ( slave.data_be ), - .kill_req_o ( slave.kill_req ), - .tag_valid_o ( slave.tag_valid ), - .data_gnt_i ( slave.data_gnt & slave.data_req ), - .data_rvalid_i ( slave.data_rvalid ) + .no_st_pending_o (), + .page_offset_i ( store_queue.page_offset ), + .page_offset_matches_o ( store_queue.page_offset_matches ), + .commit_i ( store_queue.commit ), + .ready_o ( store_queue.ready ), + .valid_i ( store_queue.store_valid && store_queue.ready ), + .paddr_i ( store_queue.store_paddr ), + .data_i ( store_queue.store_data ), + .be_i ( store_queue.store_be ), + + .address_index_o ( slave.address_index ), + .address_tag_o ( slave.address_tag ), + .data_wdata_o ( slave.data_wdata ), + .data_req_o ( slave.data_req ), + .data_we_o ( slave.data_we ), + .data_be_o ( slave.data_be ), + .kill_req_o ( slave.kill_req ), + .tag_valid_o ( slave.tag_valid ), + .data_gnt_i ( slave.data_gnt & slave.data_req ), + .data_rvalid_i ( slave.data_rvalid ) ); initial begin diff --git a/tb/wave/wave_core.do b/tb/wave/wave_core.do index 4289d8933..ccec7a11e 100644 --- a/tb/wave/wave_core.do +++ b/tb/wave/wave_core.do @@ -1,28 +1,40 @@ add wave -noupdate -group core /core_tb/dut/* + add wave -noupdate -group pcgen_stage -group btb /core_tb/dut/pcgen_i/btb_i/* add wave -noupdate -group pcgen_stage /core_tb/dut/pcgen_i/* + add wave -noupdate -group if_stage -group fetch_fifo /core_tb/dut/if_stage_i/fetch_fifo_i/* add wave -noupdate -group if_stage /core_tb/dut/if_stage_i/* -add wave -noupdate -group id_stage -group scoreboard /core_tb/dut/id_stage_i/scoreboard_i/* + add wave -noupdate -group id_stage -group decoder /core_tb/dut/id_stage_i/decoder_i/* -add wave -noupdate -group id_stage -group issue_read_operands /core_tb/dut/id_stage_i/issue_read_operands_i/* add wave -noupdate -group id_stage /core_tb/dut/id_stage_i/* + +add wave -noupdate -group issue_stage -group scoreboard /core_tb/dut/issue_stage_i/scoreboard_i/* +add wave -noupdate -group issue_stage -group issue_read_operands /core_tb/dut/issue_stage_i/issue_read_operands_i/* +add wave -noupdate -group issue_stage /core_tb/dut/issue_stage_i/* + add wave -noupdate -group ex_stage -group alu /core_tb/dut/ex_stage_i/alu_i/* + +add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/* +add wave -noupdate -group ex_stage -group lsu -group lsu_bypass /core_tb/dut/ex_stage_i/lsu_i/lsu_bypass_i/* add wave -noupdate -group ex_stage -group lsu -group mmu /core_tb/dut/ex_stage_i/lsu_i/mmu_i/* add wave -noupdate -group ex_stage -group lsu -group mmu -group itlb /core_tb/dut/ex_stage_i/lsu_i/mmu_i/itlb_i/* add wave -noupdate -group ex_stage -group lsu -group mmu -group dtlb /core_tb/dut/ex_stage_i/lsu_i/mmu_i/dtlb_i/* add wave -noupdate -group ex_stage -group lsu -group mmu -group ptw /core_tb/dut/ex_stage_i/lsu_i/mmu_i/ptw_i/* -add wave -noupdate -group ex_stage -group lsu /core_tb/dut/ex_stage_i/lsu_i/* add wave -noupdate -group ex_stage -group lsu -group mem_arbiter /core_tb/dut/ex_stage_i/lsu_i/dcache_arbiter_i/* add wave -noupdate -group ex_stage -group lsu -group mem_arbiter -group arbiter_fifo /core_tb/dut/ex_stage_i/lsu_i/dcache_arbiter_i/fifo_i/* add wave -noupdate -group ex_stage -group lsu -group store_unit /core_tb/dut/ex_stage_i/lsu_i/store_unit_i/* -add wave -noupdate -group ex_stage -group lsu -group store_unit -group store_queue /core_tb/dut/ex_stage_i/lsu_i/store_unit_i/store_queue_i/* +add wave -noupdate -group ex_stage -group lsu -group store_unit -group store_buffer /core_tb/dut/ex_stage_i/lsu_i/store_unit_i/store_buffer_i/* add wave -noupdate -group ex_stage -group lsu -group load_unit /core_tb/dut/ex_stage_i/lsu_i/load_unit_i/* -add wave -noupdate -group ex_stage -group lsu -group load_unit -group fifo /core_tb/dut/ex_stage_i/lsu_i/load_unit_i/fifo_i/* add wave -noupdate -group ex_stage -group lsu -group lsu_arbiter /core_tb/dut/ex_stage_i/lsu_i/lsu_arbiter_i/* + add wave -noupdate -group ex_stage -group branch_unit /core_tb/dut/ex_stage_i/branch_unit_i/* + add wave -noupdate -group ex_stage -group csr_buffer /core_tb/dut/ex_stage_i/csr_buffer_i/* add wave -noupdate -group ex_stage /core_tb/dut/ex_stage_i/* + add wave -noupdate -group commit_stage /core_tb/dut/commit_stage_i/* + add wave -noupdate -group csr_file /core_tb/dut/csr_regfile_i/* + add wave -noupdate -group controller /core_tb/dut/controller_i/*