diff --git a/src/ariane.sv b/src/ariane.sv index 6c5a115a5..ec5f29e1e 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -54,6 +54,7 @@ module ariane output logic data_if_data_req_o, output logic data_if_data_we_o, output logic [7:0] data_if_data_be_o, + output logic [1:0] data_if_tag_status_o, input logic data_if_data_gnt_i, input logic data_if_data_rvalid_i, input logic [63:0] data_if_data_rdata_i, diff --git a/src/ex_stage.sv b/src/ex_stage.sv index d65454fda..a0be87cc6 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -90,6 +90,7 @@ module ex_stage #( output logic data_if_data_req_o, output logic data_if_data_we_o, output logic [7:0] data_if_data_be_o, + output logic [1:0] data_if_tag_status_o, input logic data_if_data_gnt_i, input logic data_if_data_rvalid_i, input logic [63:0] data_if_data_rdata_i, diff --git a/src/fetch_fifo.sv b/src/fetch_fifo.sv index 232f050e5..56cb73b24 100644 --- a/src/fetch_fifo.sv +++ b/src/fetch_fifo.sv @@ -26,8 +26,8 @@ module fetch_fifo // control signals input logic flush_i, // clears the contents of the FIFO -> quasi reset // branch prediction at in_addr_i address, as this is an address and not PC it can be the case - // that we have two compressed instruction (or one compressed instruction and one unaligned instruction) so we need - // keep two prediction inputs: [c1|c0] <- prediction for c1 and c0 + // that we have two compressed instruction (or one compressed instruction and one unaligned instruction) so we + // only predict on one entry and discard (or keep) the other depending on its position and prediction. input branchpredict_sbe branch_predict_i, input logic [63:0] in_addr_i, input logic [31:0] in_rdata_i, diff --git a/src/lsu.sv b/src/lsu.sv index d56e7a062..52b639ea0 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -65,6 +65,7 @@ module lsu #( output logic data_if_data_req_o, output logic data_if_data_we_o, output logic [7:0] data_if_data_be_o, + output logic [1:0] data_if_tag_status_o, input logic data_if_data_gnt_i, input logic data_if_data_rvalid_i, input logic [63:0] data_if_data_rdata_i, @@ -72,6 +73,9 @@ module lsu #( output exception lsu_exception_o // to WB, signal exception status LD/ST exception ); + // tag status + enum logic [1:0] { WAIT_TRANSLATION, ABORT_TRANSLATION, VALID_TRANSLATION, NOT_IMPL } tag_status; + mem_if ptw_if(clk_i); // byte enable based on operation to perform // data is misaligned @@ -81,17 +85,14 @@ module lsu #( // virtual address as calculated by the AGU in the first cycle logic [63:0] vaddr_i; - // stall signal e.g.: do not update registers from above - logic stall; // gets the data from the register logic get_from_register; - // those are the signals which are always correct // e.g.: they keep the value in the stall case - logic [63:0] vaddr; - logic [63:0] data; - logic [7:0] be; - fu_op operator; + logic [63:0] vaddr; + logic [63:0] data; + logic [7:0] be; + fu_op operator; logic [TRANS_ID_BITS-1:0] trans_id; // registered address in case of a necessary stall @@ -99,6 +100,8 @@ module lsu #( logic [63:0] data_q; fu_op operator_q; logic [TRANS_ID_BITS-1:0] trans_id_q; + // stall signal e.g.: do not update registers from above + logic stall; // for ld/st address checker logic [63:0] st_buffer_paddr; // physical address for store @@ -167,37 +170,38 @@ module lsu #( // MMU e.g.: TLBs/PTW // ------------------- mmu #( - .INSTR_TLB_ENTRIES ( 16 ), - .DATA_TLB_ENTRIES ( 16 ), - .ASID_WIDTH ( ASID_WIDTH ) + .INSTR_TLB_ENTRIES ( 16 ), + .DATA_TLB_ENTRIES ( 16 ), + .ASID_WIDTH ( ASID_WIDTH ) ) mmu_i ( - .lsu_req_i ( translation_req ), - .lsu_vaddr_i ( vaddr ), - .lsu_valid_o ( translation_valid ), - .lsu_paddr_o ( paddr_o ), - // connecting PTW to D$ IF (aka mem arbiter) - .data_if_address_o ( address_i [0] ), - .data_if_data_wdata_o ( data_wdata_i [0] ), - .data_if_data_req_o ( data_req_i [0] ), - .data_if_data_we_o ( data_we_i [0] ), - .data_if_data_be_o ( data_be_i [0] ), - .data_if_data_gnt_i ( data_gnt_o [0] ), - .data_if_data_rvalid_i ( data_rvalid_o [0] ), - .data_if_data_rdata_i ( data_rdata_o [0] ), + .lsu_req_i ( translation_req ), + .lsu_vaddr_i ( vaddr ), + .lsu_valid_o ( translation_valid ), + .lsu_paddr_o ( paddr_o ), + // connecting PTW to D$ IF (aka mem arbiter + .data_if_address_o ( address_i [0] ), + .data_if_data_wdata_o ( data_wdata_i [0] ), + .data_if_data_req_o ( data_req_i [0] ), + .data_if_data_we_o ( data_we_i [0] ), + .data_if_data_be_o ( data_be_i [0] ), + .data_if_data_gnt_i ( data_gnt_o [0] ), + .data_if_data_rvalid_i ( data_rvalid_o [0] ), + .data_if_data_rdata_i ( data_rdata_o [0] ), .* ); // ------------------ // Address Checker // ------------------ - logic address_match; + logic page_offset_match; // checks if the requested load is in the store buffer + // page offsets are virtually and physically the same always_comb begin : address_checker - address_match = 1'b0; - // as a beginning the uppermost bits are identical and the entry is valid - if (translation_valid & (paddr_o[63:3] == st_buffer_paddr[63:3]) & st_buffer_valid) begin + page_offset_match = 1'b0; + // check if the LSBs are identical and the entry is valid + if ((paddr_o[11:3] == st_buffer_paddr[11:3]) & st_buffer_valid) begin // TODO: implement propperly, this is overly pessimistic - address_match = 1'b1; + page_offset_match = 1'b1; end end @@ -210,16 +214,16 @@ module lsu #( always_comb begin : lsu_control // default assignment NS = CS; - lsu_trans_id_o = trans_id; - lsu_ready_o = 1'b1; + lsu_trans_id_o = trans_id; + lsu_ready_o = 1'b1; // is the store valid e.g.: can we put it in the store buffer - st_valid = 1'b0; + st_valid = 1'b0; // as a default we are not requesting on the read interface - data_req_i[1] = 1'b0; + data_req_i[1] = 1'b0; // request the address translation - translation_req = 1'b0; + translation_req = 1'b0; // as a default we don't stall - stall = 1'b0; + stall = 1'b0; // as a default we won't take the operands from the internal // registers get_from_register = 1'b0; @@ -227,166 +231,6 @@ module lsu #( // we need to give the valid result even to stores lsu_valid_o = 1'b0; unique case (CS) - // we can freely accept new request - IDLE: begin - // First of all we distinguish between load and stores - // 1. for loads we need to wait until they can happen - // 2. stores can be placed in the store buffer if it is empty - // in any case we need to do address translation beforehand - // LOAD - if (op == LD_OP & lsu_valid_i) begin - translation_req = 1'b1; - // we can never handle a load in a single cycle - // but at least on a tlb hit we can output it to the memory - if (translation_valid) begin - // check if the address is in the store buffer otherwise we need - // to wait until the store buffer has cleared its entry - if (~address_match) begin - // lets request this read - data_req_i[1] = 1'b1; - // we already got a grant here so lets wait for the rvalid - if (data_gnt_o[1]) begin - NS = LOAD_WAIT_RVALID; - end else begin // we didn't get a grant so wait for it in a separate stage - NS = LOAD_WAIT_GNT; - end - end - end else begin// otherwise we need to wait for the translation - NS = LOAD_WAIT_TRANSLATION; - end - - lsu_ready_o = 1'b0; - - // STORE - end else if (op == ST_OP & lsu_valid_i) begin - translation_req = 1'b1; - // we can handle this store in this cycle if - // a. the storebuffer is not full - // b. the TLB was a hit - if (st_ready & translation_valid) begin - NS = IDLE; - // and commit to the store buffer - st_valid = 1'b1; - lsu_valid_o = 1'b1; - // make a dummy writeback so we - // tell the scoreboard that we processed the instruction accordingly - - end else begin - // otherwise we are not able to process new requests, we wait for and ad - lsu_ready_o = 1'b0; - NS = STORE; - end - end - end - // we wait here until the store buffer becomes ready again - STORE: begin - translation_req = 1'b1; - // we are here because we weren't able to finish either the translation - // or the store buffer was not ready. Wait here for both events. - // this gets the data from the flipflop - get_from_register = 1'b1; - if (st_ready & translation_valid) begin - // we can accept a new request if we are here - // but first lets commit to the store buffer - st_valid = 1'b1; - // go back to the IDLE state - NS = IDLE; - // and tell the scoreboard that the result is valid - lsu_valid_o = 1'b1; - end else begin // we can't accept a new request and stay in the store state - stall = 1'b1; - end - // we are not ready to accept new requests in this state. - lsu_ready_o = 1'b0; - end - // we wait here for the translation to finish - LOAD_WAIT_TRANSLATION: begin - translation_req = 1'b1; - // get everything from the registers - get_from_register = 1'b1; - // and stall - stall = 1'b1; - // we can't accept new data - lsu_ready_o = 1'b0; - // wait here for the translation to be valid and request the data - // also be sure that the address doesn't match with the one in the store buffer - if (translation_valid & ~address_match) begin - // lets request this read - data_req_i[1] = 1'b1; - // we already got a grant here so lets wait for the rvalid - if (data_gnt_o[1]) begin - NS = LOAD_WAIT_RVALID; - end else begin // we didn't get a grant so wait for it in a separate stage - NS = LOAD_WAIT_GNT; - end - end - end - // we wait here for the grant to happen - LOAD_WAIT_GNT: begin - translation_req = 1'b1; - // we can't accept new data - lsu_ready_o = 1'b0; - // get everything from the registers - get_from_register = 1'b1; - // and stall - stall = 1'b1; - // lets request this read - data_req_i[1] = 1'b1; - // wait for the grant - if (data_gnt_o[1]) begin - NS = LOAD_WAIT_RVALID; - end - end - // we wait here for the rvalid to happen - LOAD_WAIT_RVALID: begin - // we got an rvalid, query for new data - if (data_rvalid_o[1]) begin - translation_req = 1'b1; - // output the correct transaction_id since we don't use the get from register signal here - lsu_trans_id_o = trans_id_q; - // we got a rvalid so we can accept a new store/load request - lsu_ready_o = 1'b1; - // the result is valid if we got the rvalid - lsu_valid_o = 1'b1; - // did we get a new request? - - // essentially the same part as in IDLE but we can't accept a new store - // as the store could immediately be performed and we would collide on the - // trans id part (e.g.: a structural hazard) - // if (op == LD_OP & lsu_valid_i) begin - // translation_req = 1'b1; - // // we can never handle a load in a single cycle - // // but at least on a tlb hit we can output it to the memory - // if (translation_valid) begin - // // check if the address is in the store buffer otherwise we need - // // to wait until the store buffer has cleared its entry - // if (~address_match) begin - // // lets request this read - // data_req_i[1] = 1'b1; - // // we already got a grant here so lets wait for the rvalid - // if (data_gnt_o[1]) begin - // NS = LOAD_WAIT_RVALID; - // end else begin // we didn't get a grant so wait for it in a separate stage - // NS = LOAD_WAIT_GNT; - // end - // end - // end else begin// otherwise we need to wait for the translation - // NS = LOAD_WAIT_TRANSLATION; - // end - // // STORE - // end else if (op == ST_OP & lsu_valid_i) begin - // NS = STORE; - // end else begin - NS = IDLE; - // end - - end else begin - // and stall - stall = 1'b1; - // we can't accept new data - lsu_ready_o = 1'b0; - end - end default:; endcase end @@ -593,7 +437,7 @@ module lsu #( // it can either be feedthrough from the issue stage or from the internal register always_comb begin : input_select // if we are stalling use the values we saved - if (get_from_register) begin + if (lsu_ready_o) begin vaddr = vaddr_q; data = data_q; operator = operator_q; @@ -616,7 +460,7 @@ module lsu #( CS <= IDLE; end else begin CS <= NS; - if (~stall) begin + if (lsu_ready_o) begin vaddr_q <= vaddr_i; data_q <= operand_b_i; operator_q <= operator_i; diff --git a/src/mem_arbiter.sv b/src/mem_arbiter.sv index f82ca6ddb..588fb63d4 100644 --- a/src/mem_arbiter.sv +++ b/src/mem_arbiter.sv @@ -46,13 +46,13 @@ module mem_arbiter #( ); localparam DATA_WIDTH = $clog2(NR_PORTS); - logic full_o; - logic empty_o; + logic full_o; + logic empty_o; logic [DATA_WIDTH-1:0] data_i; - logic push_i; + logic push_i; logic [DATA_WIDTH-1:0] data_o; - logic pop_i; - logic single_element_o; + logic pop_i; + logic single_element_o; // essentially wait for the queue to be empty // or we just got a grant -> this means we issued a memory request in this cycle // although we are ready if we only got a single element in the queue and an rvalid diff --git a/src/mmu.sv b/src/mmu.sv index 9ddfb83d8..3d9812417 100644 --- a/src/mmu.sv +++ b/src/mmu.sv @@ -74,6 +74,7 @@ module mmu #( // assignments necessary to use interfaces here // only done for the few signals of the instruction interface logic [63:0] fetch_paddr; + logic fetch_req; assign instr_if_data_req_o = fetch_req; assign instr_if_address_o = fetch_paddr; diff --git a/src/tlb.sv b/src/tlb.sv index c464a9227..4abc26056 100644 --- a/src/tlb.sv +++ b/src/tlb.sv @@ -78,8 +78,7 @@ module tlb #( for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin // first level match, this may be a giga page, check the ASID flags as well - if (tags_q[i].valid && (lu_asid_i == tags_q[i].asid) - && vpn2 == tags_q[i].vpn2) begin + if (tags_q[i].valid && lu_asid_i == tags_q[i].asid && vpn2 == tags_q[i].vpn2) begin // second level if (tags_q[i].is_1G) begin lu_is_1G_o = 1'b1; @@ -91,7 +90,7 @@ module tlb #( // this could be a 2 mega page hit or a 4 kB hit // output accordingly if (tags_q[i].is_2M || vpn0 == tags_q[i].vpn0) begin - lu_is_2M_o = tags_q[i].is_1G; + lu_is_2M_o = tags_q[i].is_2M; lu_content_o = content_q[i]; lu_hit_o = 1'b1; lu_hit[i] = 1'b1; diff --git a/tb/core_tb.sv b/tb/core_tb.sv index 3d3bf4f05..3e680368b 100644 --- a/tb/core_tb.sv +++ b/tb/core_tb.sv @@ -45,6 +45,7 @@ module core_tb; .data_if_data_req_o ( data_if.data_req ), .data_if_data_we_o ( data_if.data_we ), .data_if_data_be_o ( data_if.data_be ), + .data_if_tag_status_o ( ), .data_if_data_gnt_i ( data_if.data_gnt ), .data_if_data_rvalid_i ( data_if.data_rvalid ), .data_if_data_rdata_i ( data_if.data_rdata ),