diff --git a/include/ariane_pkg.svh b/include/ariane_pkg.svh index 69dd7a571..8465360a2 100644 --- a/include/ariane_pkg.svh +++ b/include/ariane_pkg.svh @@ -91,6 +91,16 @@ package ariane_pkg; // LSU functions LD, SD, LW, LWU, SW, LH, LHU, SH, LB, SB, LBU } fu_op; + + typedef struct packed { + logic valid; + logic [63:0] vaddr; + logic [63:0] data; + logic [7:0] be; + fu_t fu; + fu_op operator; + logic [TRANS_ID_BITS-1:0] trans_id; + } lsu_ctrl_t; // --------------- // IF/ID Stage // --------------- diff --git a/src/btb.sv b/src/btb.sv index a25876f5c..e68caaf40 100644 --- a/src/btb.sv +++ b/src/btb.sv @@ -48,6 +48,7 @@ module btb #( logic [BITS_SATURATION_COUNTER-1:0] saturation_counter; // branch-predict input register branchpredict branch_predict_q; + // get actual index positions // we ignore the 0th bit since all instructions are aligned on // a half word boundary diff --git a/src/load_unit.sv b/src/load_unit.sv index 086d46284..19c7bf86d 100644 --- a/src/load_unit.sv +++ b/src/load_unit.sv @@ -23,10 +23,8 @@ module load_unit ( input logic rst_ni, // Asynchronous reset active low input logic flush_i, // load unit input port - input fu_op operator_i, - input logic [TRANS_ID_BITS-1:0] trans_id_i, input logic valid_i, - input logic [63:0] vaddr_i, + input lsu_ctrl_t lsu_ctrl_i, input logic [7:0] be_i, // load unit output port output logic valid_o, @@ -66,16 +64,16 @@ module load_unit ( } load_data_n, load_data_q, in_data; // page offset is defined as the lower 12 bits, feed through for address checker - assign page_offset_o = vaddr_i[11:0]; + assign page_offset_o = lsu_ctrl_i.vaddr[11:0]; // feed-through the virtual address for VA translation - assign vaddr_o = vaddr_i; + assign vaddr_o = lsu_ctrl_i.vaddr; // this is a read-only interface so set the write enable to 0 assign data_we_o = 1'b0; // compose the queue data, control is handled in the FSM - assign in_data = {trans_id_i, vaddr_i[2:0], operator_i}; + assign in_data = {lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.operator}; // output address // we can now output the lower 12 bit as the index to the cache - assign address_index_o = vaddr_i[11:0]; + assign address_index_o = lsu_ctrl_i.vaddr[11:0]; // translation from last cycle, again: control is handled in the FSM assign address_tag_o = paddr_i[55:12]; // directly output an exception @@ -94,7 +92,7 @@ module load_unit ( // tag control kill_req_o = 1'b0; tag_valid_o = 1'b0; - data_be_o = be_i; + data_be_o = lsu_ctrl_i.be; case (CS) IDLE: begin diff --git a/src/lsu.sv b/src/lsu.sv index e7497937f..15243b295 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -85,22 +85,11 @@ module lsu #( // -------------------------------------- // those are the signals which are always correct // e.g.: they keep the value in the stall case - logic valid; - logic [63:0] vaddr; - logic [63:0] data; - logic [7:0] be; - fu_t fu; - fu_op operator; - logic [TRANS_ID_BITS-1:0] trans_id; + lsu_ctrl_t lsu_ctrl; + // registered address in case of a necessary stall - logic valid_n, valid_q; - logic [63:0] vaddr_n, vaddr_q; - logic [63:0] data_n, data_q; - fu_t fu_n, fu_q; - fu_op operator_n, operator_q; - logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; - logic [7:0] be_n, be_q; - // ------------------------------ + lsu_ctrl_t lsu_ctrl_n, lsu_ctrl_q, lsu_ctrl_nn, lsu_ctrl_qq; + // Address Generation Unit (AGU) // ------------------------------ // virtual address as calculated by the AGU in the first cycle @@ -220,12 +209,9 @@ module lsu #( // Store Unit // ------------------ store_unit store_unit_i ( - .operator_i ( operator ), - .trans_id_i ( trans_id ), .valid_i ( st_valid_i ), - .vaddr_i ( vaddr ), - .be_i ( be ), - .data_i ( data ), + .lsu_ctrl_i ( lsu_ctrl ), + .valid_o ( st_valid ), .ready_o ( st_ready_o ), .trans_id_o ( st_trans_id ), @@ -258,11 +244,9 @@ module lsu #( // Load Unit // ------------------ load_unit load_unit_i ( - .operator_i ( operator ), - .trans_id_i ( trans_id ), .valid_i ( ld_valid_i ), - .vaddr_i ( vaddr ), - .be_i ( be ), + .lsu_ctrl_i ( lsu_ctrl ), + .valid_o ( ld_valid ), .ready_o ( ld_ready_o ), .trans_id_o ( ld_trans_id ), @@ -320,8 +304,9 @@ module lsu #( // ------------------ always_comb begin : lsu_control // the LSU is ready if both, stores and loads are ready because we do not know - // which of the two we are getting - lsu_ready_o = ld_ready_o && st_ready_o; + // which unit we need for the instruction we get + // additionally it might be the case that we still have one instruction in the buffer, check for that + lsu_ready_o = ld_ready_o && st_ready_o && !lsu_ctrl_qq.valid; end // determine whether this is a load or store @@ -334,16 +319,16 @@ module lsu #( mmu_vaddr = 64'b0; // check the operator to activate the right functional unit accordingly - unique case (fu) + unique case (lsu_ctrl.fu) // all loads go here LOAD: begin - ld_valid_i = valid; + ld_valid_i = lsu_ctrl.valid; translation_req = ld_translation_req; mmu_vaddr = ld_vaddr; end // all stores go here STORE: begin - st_valid_i = valid; + st_valid_i = lsu_ctrl.valid; translation_req = st_translation_req; mmu_vaddr = st_vaddr; end @@ -442,63 +427,57 @@ module lsu #( if (data_misaligned) begin - if (fu == LOAD) begin + if (lsu_ctrl.fu == LOAD) begin misaligned_exception = { LD_ADDR_MISALIGNED, - vaddr, + lsu_ctrl.vaddr, 1'b1 }; - end else if (fu == STORE) begin + end else if (lsu_ctrl.fu == STORE) begin misaligned_exception = { ST_ADDR_MISALIGNED, - vaddr, + lsu_ctrl.vaddr, 1'b1 }; end end end - + // ------------------ + // Input Select + // ------------------ // this process selects the input based on the current state of the LSU // it can either be feed-through from the issue stage or from the internal registers always_comb begin : input_select // if we are stalling use the values we saved - if (lsu_ready_o) begin - valid = lsu_valid_i; - vaddr = vaddr_i; - data = operand_b_i; - fu = fu_i; - operator = operator_i; - trans_id = trans_id_i; - be = be_i; + if (lsu_ctrl_qq.valid && ld_ready_o && st_ready_o) begin + lsu_ctrl = lsu_ctrl_qq; + end else if (lsu_ready_o) begin + lsu_ctrl = {lsu_valid_i, vaddr_i, operand_b_i, be_i, fu_i, operator_i, trans_id_i}; end else begin // otherwise bypass them - valid = valid_q; - vaddr = vaddr_q; - data = data_q; - fu = fu_q; - operator = operator_q; - trans_id = trans_id_q; - be = be_q; + lsu_ctrl = lsu_ctrl_q; end end // 1st register stage always_comb begin : register_stage - valid_n = valid_q; - vaddr_n = vaddr_q; - data_n = data_q; - fu_n = fu_q; - operator_n = operator_q; - trans_id_n = trans_id_q; - be_n = be_q; + lsu_ctrl_n = lsu_ctrl_q; + lsu_ctrl_nn = lsu_ctrl_qq; + // if we are not ready it might be the case that we get another request from the issue stage + if (!lsu_ready_o && lsu_valid_i) begin + lsu_ctrl_nn = {lsu_valid_i, vaddr_i, operand_b_i, be_i, fu_i, operator_i, trans_id_i}; + end + // if both units are ready, invalidate the buffer flag + if (ld_ready_o && st_ready_o) begin + lsu_ctrl_nn.valid = 1'b0; + end // get new input data if (lsu_ready_o) begin - valid_n = lsu_valid_i; - vaddr_n = vaddr_i; - data_n = operand_b_i; - fu_n = fu_i; - operator_n = operator_i; - trans_id_n = trans_id_i; - be_n = be_i; + lsu_ctrl_n = {lsu_valid_i, vaddr_i, operand_b_i, be_i, fu_i, operator_i, trans_id_i}; + end + + if (flush_i) begin + lsu_ctrl_nn.valid = 1'b0; + lsu_ctrl_n.valid = 1'b0; end end @@ -506,22 +485,12 @@ module lsu #( always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin // 1st LSU stage - valid_q <= 1'b0; - vaddr_q <= 64'b0; - data_q <= 64'b0; - fu_q <= NONE; - operator_q <= ADD; - trans_id_q <= '{default: 0}; - be_q <= 8'b0; + lsu_ctrl_q <= '0; + lsu_ctrl_qq <= '0; end else begin // 1st LSU stage - valid_q <= valid_n; - vaddr_q <= vaddr_n; - data_q <= data_n; - fu_q <= fu_n; - operator_q <= operator_n; - trans_id_q <= trans_id_n; - be_q <= be_n; + lsu_ctrl_q <= lsu_ctrl_n; + lsu_ctrl_qq <= lsu_ctrl_nn; end end diff --git a/src/lsu_arbiter.sv b/src/lsu_arbiter.sv index 8e62facb4..76a5894a9 100644 --- a/src/lsu_arbiter.sv +++ b/src/lsu_arbiter.sv @@ -43,7 +43,7 @@ module lsu_arbiter ( // it unconditionally posts the result on its output ports and expects it to be consumed. // 4 entries is enough to unconditionally post loads and stores since we can only have two outstanding loads - localparam int WIDTH = 2; + localparam int WIDTH = 4; // queue pointer logic [$clog2(WIDTH)-1:0] read_pointer_n, read_pointer_q; diff --git a/src/pcgen.sv b/src/pcgen.sv index 63d67cc4b..aa4858b2e 100644 --- a/src/pcgen.sv +++ b/src/pcgen.sv @@ -81,9 +81,9 @@ module pcgen ( // ------------------------------- // default is a consecutive PC if (if_ready_i && fetch_enable_i) - npc_n = {npc_q[63:2], 2'b0} + 64'h4; + npc_n = {npc_q[63:2], 2'b0} + 64'h4; else // or keep the PC stable if IF is not ready - npc_n = npc_q; + npc_n = npc_q; // we only need to stall the consecutive and predicted case since in any other case we will flush at least // the front-end which means that the IF stage will always be ready to accept a new request @@ -105,7 +105,7 @@ module pcgen ( // ------------------------------- if (resolved_branch_i.is_mispredict) begin // we already got the correct target address - npc_n = resolved_branch_i.target_address; + npc_n = resolved_branch_i.target_address; end // ------------------------------- diff --git a/src/scoreboard.sv b/src/scoreboard.sv index d02f6173a..6fee6a568 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -88,7 +88,7 @@ module scoreboard #( issue_instr_o.trans_id = issue_pointer_q; // we are ready if we are not full and don't have any unresolved branches, but it can be // the case that we have an unresolved branch which is cleared in that cycle (resolved_branch_i == 1) - issue_instr_valid_o = decoded_instr_valid_i && !unresolved_branch_i; + issue_instr_valid_o = decoded_instr_valid_i && !unresolved_branch_i && !issue_full; decoded_instr_ack_o = issue_ack_i && !issue_full; end diff --git a/src/store_unit.sv b/src/store_unit.sv index bf9330b2b..71c28b1c9 100644 --- a/src/store_unit.sv +++ b/src/store_unit.sv @@ -24,12 +24,8 @@ module store_unit ( input logic flush_i, output logic no_st_pending_o, // store unit input port - input fu_op operator_i, - input logic [TRANS_ID_BITS-1:0] trans_id_i, input logic valid_i, - input logic [63:0] vaddr_i, - input logic [7:0] be_i, - input logic [63:0] data_i, + input lsu_ctrl_t lsu_ctrl_i, input logic commit_i, // store unit output port output logic valid_o, @@ -75,7 +71,7 @@ module store_unit ( logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; // output assignments - assign vaddr_o = vaddr_i; // virtual address + assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address assign trans_id_o = trans_id_q; // transaction id from previous cycle always_comb begin : store_control @@ -84,7 +80,7 @@ module store_unit ( valid_o = 1'b0; st_valid = 1'b0; ex_o = ex_i; - trans_id_n = trans_id_i; + trans_id_n = lsu_ctrl_i.trans_id; NS = CS; case (CS) @@ -175,17 +171,17 @@ module store_unit ( // ----------- // re-align the write data to comply with the address offset always_comb begin - st_be_n = be_i; - st_data_n = data_i; - case (vaddr_i[2:0]) - 3'b000: st_data_n = data_i; - 3'b001: st_data_n = {data_i[55:0], data_i[63:56]}; - 3'b010: st_data_n = {data_i[47:0], data_i[63:48]}; - 3'b011: st_data_n = {data_i[39:0], data_i[63:40]}; - 3'b100: st_data_n = {data_i[31:0], data_i[63:32]}; - 3'b101: st_data_n = {data_i[23:0], data_i[63:24]}; - 3'b110: st_data_n = {data_i[15:0], data_i[63:16]}; - 3'b111: st_data_n = {data_i[7:0], data_i[63:8]}; + st_be_n = lsu_ctrl_i.be; + st_data_n = lsu_ctrl_i.data; + case (lsu_ctrl_i.vaddr[2:0]) + 3'b000: st_data_n = lsu_ctrl_i.data; + 3'b001: st_data_n = {lsu_ctrl_i.data[55:0], lsu_ctrl_i.data[63:56]}; + 3'b010: st_data_n = {lsu_ctrl_i.data[47:0], lsu_ctrl_i.data[63:48]}; + 3'b011: st_data_n = {lsu_ctrl_i.data[39:0], lsu_ctrl_i.data[63:40]}; + 3'b100: st_data_n = {lsu_ctrl_i.data[31:0], lsu_ctrl_i.data[63:32]}; + 3'b101: st_data_n = {lsu_ctrl_i.data[23:0], lsu_ctrl_i.data[63:24]}; + 3'b110: st_data_n = {lsu_ctrl_i.data[15:0], lsu_ctrl_i.data[63:16]}; + 3'b111: st_data_n = {lsu_ctrl_i.data[7:0], lsu_ctrl_i.data[63:8]}; endcase end // --------------- @@ -238,11 +234,11 @@ module store_unit ( always_comb begin : address_checker page_offset_matches_o = 1'b0; // check if the LSBs are identical and the entry is valid - if ((vaddr_i[11:3] == st_buffer_paddr[11:3]) && st_buffer_valid) begin + if ((lsu_ctrl_i.vaddr[11:3] == st_buffer_paddr[11:3]) && st_buffer_valid) begin page_offset_matches_o = 1'b1; end - if ((vaddr_i[11:3] == paddr_i[11:3]) && (CS == VALID_STORE)) begin + if ((lsu_ctrl_i.vaddr[11:3] == paddr_i[11:3]) && (CS == VALID_STORE)) begin page_offset_matches_o = 1'b1; end end