diff --git a/docs/architecture.md b/docs/architecture.md index 8bf736d4d..2fae29808 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -55,18 +55,20 @@ The scoreboard is implemented as a circular buffer with two pointers. The first The FU are not supposed to have inter-unit dependencies for the moment, e.g.: every FU must be able to perform its operation independently of every other unit. The following interface is proposed to keep maximum interoperability. A minimum set of port definitions would be: -| **Signal** | **Direction** | **Description** | -| ---------- | ------------- | --------------------------------------------- | -| clk_i | Input | Global clock signal | -| rst_ni | Input | Reset the functional unit to a specific state | -| operator_i | Input | Operation to perform | -| operand_ai | Input | Operand A | -| operand_bi | Input | Operand B | -| result_o | Output | Result Output | -| valid_i | Input | Data is valid from ID/Scoreboard | -| ready_o | Output | Ready signal to ID/Scoreboard | -| ready_i | Input | Ready signal from WB/Scoreboard | -| valid_o | Output | Data is valid to WB/Scoreboard | +| **Signal** | **Direction** | **Description** | +|-------------|---------------|--------------------------------------------------| +| clk_i | Input | Global clock signal | +| rst_ni | Input | Reset the functional unit to a specific state | +| operator_i | Input | Operation to perform | +| operand_a_i | Input | Operand A | +| operand_b_i | Input | Operand B | +| result_o | Output | Result Output | +| valid_i | Input | Data is valid from ID/Scoreboard | +| ready_o | Output | Ready signal to ID/Scoreboard | +| valid_o | Output | Data is valid to WB/Scoreboard | +| trans_id_i | Input | Transaction ID for the operation to perform | +| trans_id_o | Output | Transaction ID at which to write back the result | + TODO: Details about comparisons and branches. diff --git a/lsu.sv b/lsu.sv index bfc8e116e..50d398693 100644 --- a/lsu.sv +++ b/lsu.sv @@ -61,34 +61,50 @@ module lsu #( ); mem_if ptw_if(clk_i); // byte enable based on operation to perform - logic [7:0] data_be_o; // data is misaligned logic data_misaligned; assign lsu_valid_o = 1'b0; enum { IDLE, STORE, LOAD_WAIT_TRANSLATION, LOAD_WAIT_GNT, LOAD_WAIT_RVALID } CS, NS; - // virtual address as calculated by the AGU + // virtual address as calculated by the AGU in the first cycle + logic [63:0] vaddr_i; + // stall signal e.g.: do not update registers from above + logic stall; + // gets the data from the register + logic get_from_register; + + // those are the signals which are always correct + // e.g.: they keep the value in the stall case logic [63:0] vaddr; + logic [63:0] data; + logic [7:0] be; + fu_op operator; + logic [TRANS_ID_BITS-1:0] trans_id; + + // registered address in case of a necessary stall + logic [63:0] vaddr_q; + logic [63:0] data_q; + fu_op operator_q; + logic [TRANS_ID_BITS-1:0] trans_id_q; // for ld/st address checker - logic [63:0] st_buffer_paddr; // physical address for store - logic [63:0] st_buffer_data; // store buffer data out - logic [7:0] st_buffer_be; - logic st_buffer_valid; - + logic [63:0] st_buffer_paddr; // physical address for store + logic [63:0] st_buffer_data; // store buffer data out + logic [7:0] st_buffer_be; + logic st_buffer_valid; // store buffer control signals logic st_ready; logic st_valid; // from MMU logic translation_req, translation_valid; - logic [63:0] lsu_paddr_o; + logic [63:0] paddr_o; // ------------------------------ // Address Generation Unit (AGU) // ------------------------------ - assign vaddr = imm_i + operand_a_i; - assign data_if.address = vaddr; + assign vaddr_i = imm_i + operand_a_i; + assign data_if.address = vaddr_i; // --------------- // Memory Arbiter @@ -127,20 +143,24 @@ module lsu #( .* ); - // connecting PTW to D$ (aka mem arbiter) - assign address_i[0] = ptw_if.address; + // connecting PTW to D$ IF (aka mem arbiter) + assign address_i [0] = ptw_if.address; assign data_wdata_i[0] = ptw_if.data_wdata; - assign data_req_i[0] = ptw_if.data_req; - assign data_we_i [0] = ptw_if.data_we; - assign data_be_i [0] = ptw_if.data_be; + assign data_req_i [0] = ptw_if.data_req; + assign data_we_i [0] = ptw_if.data_we; + assign data_be_i [0] = ptw_if.data_be; assign ptw_if.data_rvalid = data_rvalid_o[0]; assign ptw_if.data_rdata = data_rdata_o[0]; - // connect the load logic to the memory arbiter - assign address_i [1] = lsu_paddr_o; - // this is a read only interface - assign data_we_i [1] = 1'b0; - // assign data_be_i [1] = + // connect the load logic to the memory arbiter + assign address_i [1] = paddr_o; + // this is a read only interface + assign data_we_i [1] = 1'b0; + assign data_wdata_i [1] = 1'b0; + assign data_be_i [1] = be; + logic [63:0] rdata; + // data coming from arbiter interface 1 + assign rdata = data_rdata_o[1]; // ------------------- // MMU e.g.: TLBs/PTW // ------------------- @@ -152,11 +172,25 @@ module lsu #( .lsu_req_i ( translation_req ), .lsu_vaddr_i ( vaddr ), .lsu_valid_o ( translation_valid ), - .lsu_paddr_o ( lsu_paddr_o ), + .lsu_paddr_o ( paddr_o ), .data_if ( ptw_if ), .* ); + // ------------------ + // Address Checker + // ------------------ + logic address_match; + // checks if the requested load is in the store buffer + always_comb begin : address_checker + address_match = 1'b0; + // as a beginning the uppermost bits are identical and the entry is valid + if (translation_valid & (paddr_o[63:3] == st_buffer_paddr[63:3]) & st_buffer_valid) begin + // TODO: implement propperly, this is overly pessimistic + address_match = 1'b1; + end + end + // ------------------ // LSU Control // ------------------ @@ -166,6 +200,7 @@ module lsu #( always_comb begin : lsu_control // default assignment NS = CS; + lsu_trans_id_o = trans_id; lsu_ready_o = 1'b1; // is the store valid e.g.: can we put it in the store buffer st_valid = 1'b0; @@ -173,9 +208,12 @@ module lsu #( data_req_i[1] = 1'b0; // request the address translation translation_req = 1'b0; - // lsu_valid_i; - // lsu_trans_id_i - // lsu_trans_id_o + // as a default we don't stall + stall = 1'b0; + // as a default we won't take the operands from the internal + // registers + get_from_register = 1'b0; + unique case (CS) // we can freely accept new request IDLE: begin @@ -183,57 +221,155 @@ module lsu #( // 1. for loads we need to wait until they can happen // 2. stores can be placed in the store buffer if it is empty // in any case we need to do address translation beforehand - if (op == LD) begin + // LOAD + if (op == LD & lsu_valid_i) begin translation_req = 1'b1; // we can never handle a load in a single cycle // but at least on a tlb hit we can output it to the memory if (translation_valid) begin - // lets request this read - data_req_i[1] = 1'b1; - // we already got a grant here so lets wait for the rvalid - if (data_gnt_o[1]) begin - NS = LOAD_WAIT_RVALID; - end else begin // we didn't get a grant so wait for it in a separate stage - NS = LOAD_WAIT_GNT; + // check if the address is in the store buffer otherwise we need + // to wait until the store buffer has cleared its entry + if (~address_match) begin + // lets request this read + data_req_i[1] = 1'b1; + // we already got a grant here so lets wait for the rvalid + if (data_gnt_o[1]) begin + NS = LOAD_WAIT_RVALID; + end else begin // we didn't get a grant so wait for it in a separate stage + NS = LOAD_WAIT_GNT; + end end end else begin// otherwise we need to wait for the translation NS = LOAD_WAIT_TRANSLATION; end + lsu_ready_o = 1'b0; - end else if (op == ST) begin + + // STORE + end else if (op == ST & lsu_valid_i) begin translation_req = 1'b1; - // we can handle this store in a single cycle if + // we can handle this store in this cycle if // a. the storebuffer is not full // b. the TLB was a hit - if (st_ready && translation_valid) begin + if (st_ready & translation_valid) begin NS = IDLE; // and commit to the store buffer st_valid = 1'b1; // make a dummy writeback so we // tell the scoreboard that we processed the instruction accordingly - lsu_trans_id_o = lsu_trans_id_i; + end else begin - // otherwise we are not able to process new requests + // otherwise we are not able to process new requests, we wait for and ad lsu_ready_o = 1'b0; NS = STORE; end end end - + // we wait here until the store buffer becomes ready again STORE: begin - + translation_req = 1'b1; + // we are here because we weren't able to finish either the translation + // or the store buffer was not ready. Wait here for both events. + // this gets the data from the flipflop + get_from_register = 1'b1; + if (st_ready & translation_valid) begin + // we can accept a new request if we are here + // but first lets commit to the store buffer + st_valid = 1'b1; + // go back to the IDLE state + NS = IDLE; + end else begin // we can't accept a new request and stay in the store state + stall = 1'b1; + end + // we are not ready to accept new requests in this state. + lsu_ready_o = 1'b0; end - + // we wait here for the translation to finish LOAD_WAIT_TRANSLATION: begin - + translation_req = 1'b1; + // get everything from the registers + get_from_register = 1'b1; + // and stall + stall = 1'b1; + // we can't accept new data + lsu_ready_o = 1'b0; + // wait here for the translation to be valid and request the data + // also be sure that the address doesn't match with the one in the store buffer + if (translation_valid & ~address_match) begin + // lets request this read + data_req_i[1] = 1'b1; + // we already got a grant here so lets wait for the rvalid + if (data_gnt_o[1]) begin + NS = LOAD_WAIT_RVALID; + end else begin // we didn't get a grant so wait for it in a separate stage + NS = LOAD_WAIT_GNT; + end + end end - + // we wait here for the grant to happen LOAD_WAIT_GNT: begin - + translation_req = 1'b1; + // we can't accept new data + lsu_ready_o = 1'b0; + // get everything from the registers + get_from_register = 1'b1; + // and stall + stall = 1'b1; + // lets request this read + data_req_i[1] = 1'b1; + // wait for the grant + if (data_gnt_o[1]) begin + NS = LOAD_WAIT_RVALID; + end end - + // we wait here for the rvalid to happen LOAD_WAIT_RVALID: begin + // we got an rvalid, query for new data + if (data_rvalid_o[1]) begin + translation_req = 1'b1; + // output the correct transaction_id since we don't use the get from register signal here + lsu_trans_id_o = trans_id_q; + // we got a rvalid so we can accept a new store/load request + lsu_ready_o = 1'b1; + // did we get a new request? + // essentially the same part as in IDLE but we can't accept a new store + // as the store could immediately be performed and we would collide on the + // trans id part (e.g.: a structural hazard) + if (op == LD & lsu_valid_i) begin + translation_req = 1'b1; + // we can never handle a load in a single cycle + // but at least on a tlb hit we can output it to the memory + if (translation_valid) begin + // check if the address is in the store buffer otherwise we need + // to wait until the store buffer has cleared its entry + if (~address_match) begin + // lets request this read + data_req_i[1] = 1'b1; + // we already got a grant here so lets wait for the rvalid + if (data_gnt_o[1]) begin + NS = LOAD_WAIT_RVALID; + end else begin // we didn't get a grant so wait for it in a separate stage + NS = LOAD_WAIT_GNT; + end + end + end else begin// otherwise we need to wait for the translation + NS = LOAD_WAIT_TRANSLATION; + end + + lsu_ready_o = 1'b0; + + // STORE + end else if (op == ST & lsu_valid_i) begin + NS = STORE; + end + + end else begin + // and stall + stall = 1'b1; + // we can't accept new data + lsu_ready_o = 1'b0; + end end endcase end @@ -254,17 +390,18 @@ module lsu #( // Store Queue // --------------- store_queue store_queue_i ( + // store queue write port + .valid_i ( st_valid ), + .paddr_i ( paddr_o ), + .data_i ( data ), + .be_i ( be ), + // store buffer in .paddr_o ( st_buffer_paddr ), .data_o ( st_buffer_data ), .valid_o ( st_buffer_valid ), .be_o ( st_buffer_be ), .ready_o ( st_ready ), - .valid_i ( st_valid ), - .paddr_i ( lsu_paddr_o ), - .data_i ( operand_b_i ), - .be_i ( data_be_o ), - .address_o ( address_i [2] ), .data_wdata_o ( data_wdata_i [2] ), .data_req_o ( data_req_i [2] ), @@ -282,50 +419,51 @@ module lsu #( always_comb begin : byte_enable // we can generate the byte enable from the virtual address since the last // 12 bit are the same anyway - case (operator_i) + // and we can always generate the byte enable from the address at hand + case (operator) LD, SD: // double word case (vaddr[2:0]) - 3'b000: data_be_o = 8'b1111_1111; - 3'b001: data_be_o = 8'b1111_1110; - 3'b010: data_be_o = 8'b1111_1100; - 3'b011: data_be_o = 8'b1111_1000; - 3'b100: data_be_o = 8'b1111_0000; - 3'b101: data_be_o = 8'b1110_0000; - 3'b110: data_be_o = 8'b1100_0000; - 3'b111: data_be_o = 8'b1000_0000; + 3'b000: be = 8'b1111_1111; + // 3'b001: be = 8'b1111_1110; + // 3'b010: be = 8'b1111_1100; + // 3'b011: be = 8'b1111_1000; + // 3'b100: be = 8'b1111_0000; + // 3'b101: be = 8'b1110_0000; + // 3'b110: be = 8'b1100_0000; + // 3'b111: be = 8'b1000_0000; endcase LW, LWU, SW: // word case (vaddr[2:0]) - 3'b000: data_be_o = 8'b0000_1111; - 3'b001: data_be_o = 8'b0001_1110; - 3'b010: data_be_o = 8'b0011_1100; - 3'b011: data_be_o = 8'b0111_1000; - 3'b100: data_be_o = 8'b1111_0000; - 3'b101: data_be_o = 8'b1110_0000; - 3'b110: data_be_o = 8'b1100_0000; - 3'b111: data_be_o = 8'b1000_0000; + 3'b000: be = 8'b0000_1111; + 3'b001: be = 8'b0001_1110; + 3'b010: be = 8'b0011_1100; + 3'b011: be = 8'b0111_1000; + 3'b100: be = 8'b1111_0000; + // 3'b101: be = 8'b1110_0000; + // 3'b110: be = 8'b1100_0000; + // 3'b111: be = 8'b1000_0000; endcase LH, LHU, SH: // half word case (vaddr[2:0]) - 3'b000: data_be_o = 8'b0000_0011; - 3'b001: data_be_o = 8'b0000_0110; - 3'b010: data_be_o = 8'b0000_1100; - 3'b011: data_be_o = 8'b0001_1000; - 3'b100: data_be_o = 8'b0011_0000; - 3'b101: data_be_o = 8'b0110_0000; - 3'b110: data_be_o = 8'b1100_0000; - 3'b111: data_be_o = 8'b1000_0000; + 3'b000: be = 8'b0000_0011; + 3'b001: be = 8'b0000_0110; + 3'b010: be = 8'b0000_1100; + 3'b011: be = 8'b0001_1000; + 3'b100: be = 8'b0011_0000; + 3'b101: be = 8'b0110_0000; + 3'b110: be = 8'b1100_0000; + // 3'b111: be = 8'b1000_0000; endcase LB, LBU, SB: // byte case (vaddr[2:0]) - 3'b000: data_be_o = 8'b0000_0001; - 3'b001: data_be_o = 8'b0000_0010; - 3'b010: data_be_o = 8'b0000_0100; - 3'b011: data_be_o = 8'b0000_1000; - 3'b100: data_be_o = 8'b0001_0000; - 3'b101: data_be_o = 8'b0010_0000; - 3'b110: data_be_o = 8'b0100_0000; - 3'b111: data_be_o = 8'b1000_0000; + 3'b000: be = 8'b0000_0001; + 3'b001: be = 8'b0000_0010; + 3'b010: be = 8'b0000_0100; + 3'b011: be = 8'b0000_1000; + 3'b100: be = 8'b0001_0000; + 3'b101: be = 8'b0010_0000; + 3'b110: be = 8'b0100_0000; + 3'b111: be = 8'b1000_0000; endcase endcase end @@ -333,9 +471,7 @@ module lsu #( // --------------- // Sign Extend // --------------- - logic [63:0] rdata_ext; - // data coming from arbiter interface 1 - assign rdata_ext = data_rdata_o[1]; + logic [63:0] rdata_d_ext; // sign extension for double words, actually only misaligned assembly logic [63:0] rdata_w_ext; // sign extension for words logic [63:0] rdata_h_ext; // sign extension for half words @@ -344,7 +480,7 @@ module lsu #( // double words always_comb begin : sign_extend_double_word case (vaddr[2:0]) - 3'b000: rdata_d_ext = operand_b_i[63:0]; + 3'b000: rdata_d_ext = rdata[63:0]; // this is for misaligned accesse only // 3'b001: rdata_d_ext = {data_rdata_i[7:0], rdata_q[63:8]}; // 3'b010: rdata_d_ext = {data_rdata_i[15:0], rdata_q[63:16]}; @@ -359,11 +495,11 @@ module lsu #( // sign extension for words always_comb begin : sign_extend_word case (vaddr[2:0]) - 3'b000: rdata_w_ext = (operator_i == LW) ? {{32{rdata_ext[31]}}, rdata_ext[31:0]} : {32'h0, rdata_ext[31:0]}; - 3'b001: rdata_w_ext = (operator_i == LW) ? {{32{rdata_ext[39]}}, rdata_ext[39:8]} : {32'h0, rdata_ext[39:8]}; - 3'b010: rdata_w_ext = (operator_i == LW) ? {{32{rdata_ext[47]}}, rdata_ext[47:16]} : {32'h0, rdata_ext[47:16]}; - 3'b011: rdata_w_ext = (operator_i == LW) ? {{32{rdata_ext[55]}}, rdata_ext[55:24]} : {32'h0, rdata_ext[55:24]}; - 3'b100: rdata_w_ext = (operator_i == LW) ? {{32{rdata_ext[63]}}, rdata_ext[63:32]} : {32'h0, rdata_ext[63:32]}; + 3'b000: rdata_w_ext = (operator == LW) ? {{32{rdata[31]}}, rdata[31:0]} : {32'h0, rdata[31:0]}; + 3'b001: rdata_w_ext = (operator == LW) ? {{32{rdata[39]}}, rdata[39:8]} : {32'h0, rdata[39:8]}; + 3'b010: rdata_w_ext = (operator == LW) ? {{32{rdata[47]}}, rdata[47:16]} : {32'h0, rdata[47:16]}; + 3'b011: rdata_w_ext = (operator == LW) ? {{32{rdata[55]}}, rdata[55:24]} : {32'h0, rdata[55:24]}; + 3'b100: rdata_w_ext = (operator == LW) ? {{32{rdata[63]}}, rdata[63:32]} : {32'h0, rdata[63:32]}; // miss-aligned access // 3'b101: rdata_w_ext = (data_sign_ext_q) ? {{32{data_rdata_i[7]}}, data_rdata_i[7:0], rdata_q[63:40]} : {32'h0, data_rdata_i[7:0], rdata_q[63:40]}; // 3'b110: rdata_w_ext = (data_sign_ext_q) ? {{32{data_rdata_i[15]}}, data_rdata_i[15:0], rdata_q[63:48]} : {32'h0, data_rdata_i[15:0], rdata_q[63:48]}; @@ -374,13 +510,13 @@ module lsu #( // sign extension for half words always_comb begin : sign_extend_half_word case (vaddr[2:0]) - 3'b000: rdata_h_ext = (operator_i == LH) ? {{48{rdata_ext[15]}}, rdata_ext[15:0]} : {48'h0, rdata_ext[15:0]}; - 3'b001: rdata_h_ext = (operator_i == LH) ? {{48{rdata_ext[23]}}, rdata_ext[23:8]} : {48'h0, rdata_ext[23:8]}; - 3'b010: rdata_h_ext = (operator_i == LH) ? {{48{rdata_ext[31]}}, rdata_ext[31:16]} : {48'h0, rdata_ext[31:16]}; - 3'b011: rdata_h_ext = (operator_i == LH) ? {{48{rdata_ext[39]}}, rdata_ext[39:24]} : {48'h0, rdata_ext[39:24]}; - 3'b100: rdata_h_ext = (operator_i == LH) ? {{48{rdata_ext[47]}}, rdata_ext[47:32]} : {48'h0, rdata_ext[47:32]}; - 3'b101: rdata_h_ext = (operator_i == LH) ? {{48{rdata_ext[55]}}, rdata_ext[55:40]} : {48'h0, rdata_ext[55:40]}; - 3'b110: rdata_h_ext = (operator_i == LH) ? {{48{rdata_ext[63]}}, rdata_ext[63:48]} : {48'h0, rdata_ext[63:48]}; + 3'b000: rdata_h_ext = (operator == LH) ? {{48{rdata[15]}}, rdata[15:0]} : {48'h0, rdata[15:0]}; + 3'b001: rdata_h_ext = (operator == LH) ? {{48{rdata[23]}}, rdata[23:8]} : {48'h0, rdata[23:8]}; + 3'b010: rdata_h_ext = (operator == LH) ? {{48{rdata[31]}}, rdata[31:16]} : {48'h0, rdata[31:16]}; + 3'b011: rdata_h_ext = (operator == LH) ? {{48{rdata[39]}}, rdata[39:24]} : {48'h0, rdata[39:24]}; + 3'b100: rdata_h_ext = (operator == LH) ? {{48{rdata[47]}}, rdata[47:32]} : {48'h0, rdata[47:32]}; + 3'b101: rdata_h_ext = (operator == LH) ? {{48{rdata[55]}}, rdata[55:40]} : {48'h0, rdata[55:40]}; + 3'b110: rdata_h_ext = (operator == LH) ? {{48{rdata[63]}}, rdata[63:48]} : {48'h0, rdata[63:48]}; // miss-aligned access // 3'b111: rdata_h_ext = (data_sign_ext_q) ? {{48{data_rdata_i[7]}}, data_rdata_i[7:0], rdata_q[31:24]} : {48'h0, data_rdata_i[7:0], rdata_q[31:24]}; endcase @@ -388,19 +524,19 @@ module lsu #( always_comb begin : sign_extend_byte case (vaddr[2:0]) - 3'b000: rdata_b_ext = (operator_i == LB) ? {{56{rdata_ext[7]}}, rdata_ext[7:0]} : {56'h0, rdata_ext[7:0]}; - 3'b001: rdata_b_ext = (operator_i == LB) ? {{56{rdata_ext[15]}}, rdata_ext[15:8]} : {56'h0, rdata_ext[15:8]}; - 3'b010: rdata_b_ext = (operator_i == LB) ? {{56{rdata_ext[23]}}, rdata_ext[23:16]} : {56'h0, rdata_ext[23:16]}; - 3'b011: rdata_b_ext = (operator_i == LB) ? {{56{rdata_ext[31]}}, rdata_ext[31:24]} : {56'h0, rdata_ext[31:24]}; - 3'b100: rdata_b_ext = (operator_i == LB) ? {{56{rdata_ext[39]}}, rdata_ext[39:32]} : {56'h0, rdata_ext[39:32]}; - 3'b101: rdata_b_ext = (operator_i == LB) ? {{56{rdata_ext[47]}}, rdata_ext[47:40]} : {56'h0, rdata_ext[47:40]}; - 3'b110: rdata_b_ext = (operator_i == LB) ? {{56{rdata_ext[55]}}, rdata_ext[55:48]} : {56'h0, rdata_ext[55:48]}; - 3'b111: rdata_b_ext = (operator_i == LB) ? {{56{rdata_ext[63]}}, rdata_ext[63:56]} : {56'h0, rdata_ext[63:56]}; + 3'b000: rdata_b_ext = (operator == LB) ? {{56{rdata[7]}}, rdata[7:0]} : {56'h0, rdata[7:0]}; + 3'b001: rdata_b_ext = (operator == LB) ? {{56{rdata[15]}}, rdata[15:8]} : {56'h0, rdata[15:8]}; + 3'b010: rdata_b_ext = (operator == LB) ? {{56{rdata[23]}}, rdata[23:16]} : {56'h0, rdata[23:16]}; + 3'b011: rdata_b_ext = (operator == LB) ? {{56{rdata[31]}}, rdata[31:24]} : {56'h0, rdata[31:24]}; + 3'b100: rdata_b_ext = (operator == LB) ? {{56{rdata[39]}}, rdata[39:32]} : {56'h0, rdata[39:32]}; + 3'b101: rdata_b_ext = (operator == LB) ? {{56{rdata[47]}}, rdata[47:40]} : {56'h0, rdata[47:40]}; + 3'b110: rdata_b_ext = (operator == LB) ? {{56{rdata[55]}}, rdata[55:48]} : {56'h0, rdata[55:48]}; + 3'b111: rdata_b_ext = (operator == LB) ? {{56{rdata[63]}}, rdata[63:56]} : {56'h0, rdata[63:56]}; endcase // case (rdata_offset_q) end always_comb begin - case (operator_i) + case (operator) LD: lsu_result_o = rdata_d_ext; LW, LWU: lsu_result_o = rdata_w_ext; LH, LHU: lsu_result_o = rdata_h_ext; @@ -413,23 +549,24 @@ module lsu #( // ------------------ // misaligned detector // page fault, privilege exception + // we can detect a misaligned exception immediately always_comb begin : exception_control data_misaligned = 1'b0; if(lsu_valid_i) begin case (operator_i) LD, SD: begin // double word - if(vaddr[2:0] != 3'b000) + if(vaddr_i[2:0] != 3'b000) data_misaligned = 1'b1; end LW, LWU, SW: begin // word - if(vaddr[2] == 1'b1 && vaddr[2:0] != 3'b100) + if(vaddr_i[2] == 1'b1 && vaddr_i[2:0] != 3'b100) data_misaligned = 1'b1; end LH, LHU, SH: begin // half word - if(vaddr[2:0] == 3'b111) + if(vaddr_i[2:0] == 3'b111) data_misaligned = 1'b1; end // byte -> is always aligned default:; @@ -437,13 +574,66 @@ module lsu #( end end - // registers - always_ff @(posedge clk_i or negedge rst_ni) begin - if(~rst_ni) begin - CS <= IDLE; - end else begin - CS <= NS; + // this process selects the input based on the current state of the LSU + // it can either be feedthrough from the issue stage or from the internal register + always_comb begin : input_select + // if we are stalling use the values we saved + if (get_from_register) begin + vaddr = vaddr_q; + data = data_q; + operator = operator_q; + trans_id = trans_id_q; + end else begin // otherwise pass them directly through + vaddr = vaddr_i; + data = operand_b_i; + operator = operator_i; + trans_id = lsu_trans_id_i; end end + // registers + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin + vaddr_q <= 64'b0; + data_q <= 64'b0; + operator_q <= ADD; + trans_id_q <= '{default: 0}; + CS <= IDLE; + end else begin + CS <= NS; + if (~stall) begin + vaddr_q <= vaddr_i; + data_q <= operand_b_i; + operator_q <= operator_i; + trans_id_q <= lsu_trans_id_i; + end + end + end + + // // ------------ + // // Assertions + // // ------------ + + // // make sure there is no new request when the old one is not yet completely done + // // i.e. it should not be possible to get a grant without an rvalid for the + // // last request + // `ifndef VERILATOR + // assert property ( + // @(posedge clk) ((CS == WAIT_RVALID) && (data_gnt_i == 1'b1)) |-> (data_rvalid_i == 1'b1) ) + // else begin $error("data grant without rvalid"); $stop(); end + + // // there should be no rvalid when we are in IDLE + // assert property ( + // @(posedge clk) (CS == IDLE) |-> (data_rvalid_i == 1'b0) ) + // else begin $error("Received rvalid while in IDLE state"); $stop(); end + + // // assert that errors are only sent at the same time as grant or rvalid + // assert property ( @(posedge clk) (data_err_i) |-> (data_gnt_i || data_rvalid_i) ) + // else begin $error("Error without data grant or rvalid"); $stop(); end + + // // assert that the address does not contain X when request is sent + // assert property ( @(posedge clk) (data_req_o) |-> (!$isunknown(data_addr_o)) ) + // else begin $error("address contains X when request is set"); $stop(); end + // `endif + endmodule \ No newline at end of file diff --git a/mmu.sv b/mmu.sv index 298abbaf7..985bb8be2 100644 --- a/mmu.sv +++ b/mmu.sv @@ -233,7 +233,7 @@ assign iaccess_err = fetch_req_i & ( end end if (ptw_active & walking_instr) begin - // On error play through fetch with error signaled with valid + // On error pass through fetch with error signaled with valid fetch_gnt_o = ptw_error; ierr_valid_n = ptw_error; // signal valid/error on next cycle end diff --git a/store_queue.sv b/store_queue.sv old mode 100755 new mode 100644