mirror of
https://github.com/openhwgroup/cva6.git
synced 2025-04-23 13:47:13 -04:00
Start re-partitioning LSU
This commit is contained in:
parent
5d5400282c
commit
69c846a50c
3 changed files with 230 additions and 318 deletions
|
@ -0,0 +1,95 @@
|
|||
import ariane_pkg::*;
|
||||
|
||||
module load_unit (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// load unit input port
|
||||
input logic [1:0] operator_i,
|
||||
input logic valid_i,
|
||||
input logic [63:0] vaddr_i,
|
||||
input logic [7:0] be_i,
|
||||
// load unit output port
|
||||
output logic valid_o,
|
||||
output logic ready_o,
|
||||
output logic [TRANS_ID_BITS-1:0] trans_id_o,
|
||||
output logic [63:0] result_o,
|
||||
// MMU -> Address Translation
|
||||
output logic translation_req_o, // request address translation
|
||||
output logic vaddr_o, // virtual address out
|
||||
input logic [63:0] paddr_i, // physical address in
|
||||
input logic translation_valid_i,
|
||||
// address checker
|
||||
output logic [11:0] page_offset_o,
|
||||
input logic page_offset_matches_i,
|
||||
// memory interface
|
||||
output logic [63:0] address_o,
|
||||
output logic [63:0] data_wdata_o,
|
||||
output logic data_req_o,
|
||||
output logic data_we_o,
|
||||
output logic [7:0] data_be_o,
|
||||
output logic [1:0] data_tag_status_o,
|
||||
input logic data_gnt_i,
|
||||
input logic data_rvalid_i,
|
||||
input logic [63:0] data_rdata_i
|
||||
);
|
||||
|
||||
// ---------------
|
||||
// Sign Extend
|
||||
// ---------------
|
||||
logic [63:0] rdata_d_ext; // sign extension for double words, actually only misaligned assembly
|
||||
logic [63:0] rdata_w_ext; // sign extension for words
|
||||
logic [63:0] rdata_h_ext; // sign extension for half words
|
||||
logic [63:0] rdata_b_ext; // sign extension for bytes
|
||||
|
||||
// double words
|
||||
always_comb begin : sign_extend_double_word
|
||||
rdata_d_ext = data_rdata_i[63:0];
|
||||
end
|
||||
|
||||
// sign extension for words
|
||||
always_comb begin : sign_extend_word
|
||||
case (vaddr_i[2:0])
|
||||
default: rdata_w_ext = (operator_i == LW) ? {{32{data_rdata_i[31]}}, data_rdata_i[31:0]} : {32'h0, data_rdata_i[31:0]};
|
||||
3'b001: rdata_w_ext = (operator_i == LW) ? {{32{data_rdata_i[39]}}, data_rdata_i[39:8]} : {32'h0, data_rdata_i[39:8]};
|
||||
3'b010: rdata_w_ext = (operator_i == LW) ? {{32{data_rdata_i[47]}}, data_rdata_i[47:16]} : {32'h0, data_rdata_i[47:16]};
|
||||
3'b011: rdata_w_ext = (operator_i == LW) ? {{32{data_rdata_i[55]}}, data_rdata_i[55:24]} : {32'h0, data_rdata_i[55:24]};
|
||||
3'b100: rdata_w_ext = (operator_i == LW) ? {{32{data_rdata_i[63]}}, data_rdata_i[63:32]} : {32'h0, data_rdata_i[63:32]};
|
||||
endcase
|
||||
end
|
||||
|
||||
// sign extension for half words
|
||||
always_comb begin : sign_extend_half_word
|
||||
case (vaddr_i[2:0])
|
||||
default: rdata_h_ext = (operator_i == LH) ? {{48{data_rdata_i[15]}}, data_rdata_i[15:0]} : {48'h0, data_rdata_i[15:0]};
|
||||
3'b001: rdata_h_ext = (operator_i == LH) ? {{48{data_rdata_i[23]}}, data_rdata_i[23:8]} : {48'h0, data_rdata_i[23:8]};
|
||||
3'b010: rdata_h_ext = (operator_i == LH) ? {{48{data_rdata_i[31]}}, data_rdata_i[31:16]} : {48'h0, data_rdata_i[31:16]};
|
||||
3'b011: rdata_h_ext = (operator_i == LH) ? {{48{data_rdata_i[39]}}, data_rdata_i[39:24]} : {48'h0, data_rdata_i[39:24]};
|
||||
3'b100: rdata_h_ext = (operator_i == LH) ? {{48{data_rdata_i[47]}}, data_rdata_i[47:32]} : {48'h0, data_rdata_i[47:32]};
|
||||
3'b101: rdata_h_ext = (operator_i == LH) ? {{48{data_rdata_i[55]}}, data_rdata_i[55:40]} : {48'h0, data_rdata_i[55:40]};
|
||||
3'b110: rdata_h_ext = (operator_i == LH) ? {{48{data_rdata_i[63]}}, data_rdata_i[63:48]} : {48'h0, data_rdata_i[63:48]};
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin : sign_extend_byte
|
||||
case (vaddr_i[2:0])
|
||||
default: rdata_b_ext = (operator_i == LB) ? {{56{data_rdata_i[7]}}, data_rdata_i[7:0]} : {56'h0, data_rdata_i[7:0]};
|
||||
3'b001: rdata_b_ext = (operator_i == LB) ? {{56{data_rdata_i[15]}}, data_rdata_i[15:8]} : {56'h0, data_rdata_i[15:8]};
|
||||
3'b010: rdata_b_ext = (operator_i == LB) ? {{56{data_rdata_i[23]}}, data_rdata_i[23:16]} : {56'h0, data_rdata_i[23:16]};
|
||||
3'b011: rdata_b_ext = (operator_i == LB) ? {{56{data_rdata_i[31]}}, data_rdata_i[31:24]} : {56'h0, data_rdata_i[31:24]};
|
||||
3'b100: rdata_b_ext = (operator_i == LB) ? {{56{data_rdata_i[39]}}, data_rdata_i[39:32]} : {56'h0, data_rdata_i[39:32]};
|
||||
3'b101: rdata_b_ext = (operator_i == LB) ? {{56{data_rdata_i[47]}}, data_rdata_i[47:40]} : {56'h0, data_rdata_i[47:40]};
|
||||
3'b110: rdata_b_ext = (operator_i == LB) ? {{56{data_rdata_i[55]}}, data_rdata_i[55:48]} : {56'h0, data_rdata_i[55:48]};
|
||||
3'b111: rdata_b_ext = (operator_i == LB) ? {{56{data_rdata_i[63]}}, data_rdata_i[63:56]} : {56'h0, data_rdata_i[63:56]};
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (operator_i)
|
||||
LW, LWU: result_o = rdata_w_ext;
|
||||
LH, LHU: result_o = rdata_h_ext;
|
||||
LB, LBU: result_o = rdata_b_ext;
|
||||
default: result_o = rdata_d_ext;
|
||||
endcase
|
||||
end
|
||||
|
||||
endmodule
|
369
src/lsu.sv
369
src/lsu.sv
|
@ -81,15 +81,11 @@ module lsu #(
|
|||
output exception lsu_exception_o // to WB, signal exception status LD/ST exception
|
||||
|
||||
);
|
||||
// byte enable based on operation to perform
|
||||
// data is misaligned
|
||||
logic data_misaligned;
|
||||
|
||||
enum logic [2:0] { IDLE, WAIT_ADDRESSCONFLICT, WAIT_STORE, WAIT_GNT, WAIT_RVALID, WAIT_FLUSH } CS, NS;
|
||||
|
||||
// virtual address as calculated by the AGU in the first cycle
|
||||
logic [63:0] vaddr_i;
|
||||
logic [7:0] be_i;
|
||||
// --------------------------------------
|
||||
// 1st register stage - (stall registers)
|
||||
// --------------------------------------
|
||||
// those are the signals which are always correct
|
||||
// e.g.: they keep the value in the stall case
|
||||
logic [63:0] vaddr;
|
||||
|
@ -97,36 +93,19 @@ module lsu #(
|
|||
logic [7:0] be;
|
||||
fu_op operator;
|
||||
logic [TRANS_ID_BITS-1:0] trans_id;
|
||||
logic lsu_valid;
|
||||
logic lsu_ready_q;
|
||||
// the page offset matches - we need to wait until the store buffer drains
|
||||
logic page_offset_match;
|
||||
|
||||
// registered address in case of a necessary stall
|
||||
logic [63:0] vaddr_q;
|
||||
logic [63:0] data_q;
|
||||
fu_op operator_q;
|
||||
logic [TRANS_ID_BITS-1:0] trans_id_q;
|
||||
logic [7:0] be_q;
|
||||
logic lsu_valid_q;
|
||||
|
||||
// for ld/st address checker
|
||||
logic [63:0] st_buffer_paddr; // physical address for store
|
||||
logic [63:0] st_buffer_data; // store buffer data out
|
||||
logic [7:0] st_buffer_be;
|
||||
logic st_buffer_valid;
|
||||
// store buffer control signals
|
||||
logic st_ready;
|
||||
logic st_valid;
|
||||
// from MMU
|
||||
logic translation_req;
|
||||
// registers at the end of MMU
|
||||
logic translation_valid_n, translation_valid_q;
|
||||
logic [63:0] paddr_n, paddr_q;
|
||||
|
||||
logic [63:0] vaddr_n, vaddr_q;
|
||||
logic [63:0] data_n, data_q;
|
||||
fu_op operator_n, operator_q;
|
||||
logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q;
|
||||
logic [7:0] be_n, be_q;
|
||||
logic stall_n, stall_q;
|
||||
// ------------------------------
|
||||
// Address Generation Unit (AGU)
|
||||
// ------------------------------
|
||||
// virtual address as calculated by the AGU in the first cycle
|
||||
logic [63:0] vaddr_i;
|
||||
logic [7:0] be_i;
|
||||
assign vaddr_i = $signed(imm_i) + $signed(operand_a_i);
|
||||
|
||||
// ---------------
|
||||
|
@ -142,7 +121,9 @@ module lsu #(
|
|||
logic [2:0] data_rvalid_o;
|
||||
logic [2:0][63:0] data_rdata_o;
|
||||
|
||||
// port 0 PTW, port 1 loads, port 2 stores
|
||||
// Port 0: PTW
|
||||
// Port 1: Load Unit
|
||||
// Port 2: Store Unit
|
||||
mem_arbiter mem_arbiter_i (
|
||||
// to D$
|
||||
.address_o ( data_if_address_o ),
|
||||
|
@ -154,8 +135,7 @@ module lsu #(
|
|||
.data_gnt_i ( data_if_data_gnt_i ),
|
||||
.data_rvalid_i ( data_if_data_rvalid_i ),
|
||||
.data_rdata_i ( data_if_data_rdata_i ),
|
||||
|
||||
// from PTW, Load logic and store queue
|
||||
// from PTW, Load Unit and Store Unit
|
||||
.address_i ( address_i ),
|
||||
.data_wdata_i ( data_wdata_i ),
|
||||
.data_req_i ( data_req_i ),
|
||||
|
@ -168,14 +148,6 @@ module lsu #(
|
|||
.*
|
||||
);
|
||||
|
||||
// this is a read only interface
|
||||
assign data_we_i [1] = 1'b0;
|
||||
assign data_wdata_i[1] = 64'b0;
|
||||
assign data_be_i [1] = be_i;
|
||||
|
||||
logic [63:0] rdata;
|
||||
// data coming from arbiter interface 1
|
||||
assign rdata = data_rdata_o[1];
|
||||
// -------------------
|
||||
// MMU e.g.: TLBs/PTW
|
||||
// -------------------
|
||||
|
@ -200,56 +172,13 @@ module lsu #(
|
|||
.data_if_data_rdata_i ( data_rdata_o [0] ),
|
||||
.*
|
||||
);
|
||||
|
||||
// ---------------
|
||||
// Store Queue
|
||||
// ---------------
|
||||
store_queue store_queue_i (
|
||||
// store queue write port
|
||||
.valid_i ( st_valid ),
|
||||
.paddr_i ( paddr_q ),
|
||||
.data_i ( data ),
|
||||
.be_i ( be ),
|
||||
// store buffer in
|
||||
.paddr_o ( st_buffer_paddr ),
|
||||
.data_o ( st_buffer_data ),
|
||||
.valid_o ( st_buffer_valid ),
|
||||
.be_o ( st_buffer_be ),
|
||||
.ready_o ( st_ready ),
|
||||
|
||||
.address_o ( address_i [2] ),
|
||||
.data_wdata_o ( data_wdata_i [2] ),
|
||||
.data_req_o ( data_req_i [2] ),
|
||||
.data_we_o ( data_we_i [2] ),
|
||||
.data_be_o ( data_be_i [2] ),
|
||||
.data_tag_status_o ( data_tag_status_i[2] ),
|
||||
.data_gnt_i ( data_gnt_o [2] ),
|
||||
.data_rvalid_i ( data_rvalid_o [2] ),
|
||||
.*
|
||||
);
|
||||
// ------------------
|
||||
// Store Unit
|
||||
// ------------------
|
||||
|
||||
// ------------------
|
||||
// Address Checker
|
||||
// Load Unit
|
||||
// ------------------
|
||||
// The load should return the data stored by the most recent store to the
|
||||
// same physical address. The most direct way to implement this is to
|
||||
// maintain physical addresses in the store buffer.
|
||||
|
||||
// Of course, there are other micro-architectural techniques to accomplish
|
||||
// the same thing: you can interlock and wait for the store buffer to
|
||||
// drain if the load VA matches any store VA modulo the page size (i.e.
|
||||
// bits 11:0). As a special case, it is correct to bypass if the full VA
|
||||
// matches, and no younger stores' VAs match in bits 11:0.
|
||||
//
|
||||
// checks if the requested load is in the store buffer
|
||||
// page offsets are virtually and physically the same
|
||||
always_comb begin : address_checker
|
||||
page_offset_match = 1'b0;
|
||||
// check if the LSBs are identical and the entry is valid
|
||||
if ((vaddr[11:3] == st_buffer_paddr[11:3]) && st_buffer_valid) begin
|
||||
page_offset_match = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
// ------------------
|
||||
// LSU Control (FSM)
|
||||
|
@ -259,148 +188,7 @@ module lsu #(
|
|||
// connect the load logic to the memory arbiter
|
||||
// assign address_i[1] = paddr_o;
|
||||
always_comb begin : lsu_control
|
||||
automatic logic make_request = 1'b0;
|
||||
// default assignment
|
||||
NS = CS;
|
||||
lsu_trans_id_o = trans_id;
|
||||
lsu_ready_o = 1'b1;
|
||||
// LSU result is valid
|
||||
// we need to give the valid result even to stores
|
||||
lsu_valid_o = data_rvalid_o[1];
|
||||
// is the store valid e.g.: can we put it in the store buffer
|
||||
st_valid = 1'b0;
|
||||
// as a default we are not requesting on the read interface
|
||||
data_req_i[1] = 1'b0;
|
||||
// request the address translation
|
||||
translation_req = 1'b0;
|
||||
// as a default let the translation be valid
|
||||
data_tag_status_i[1] = `VALID_TRANSLATION;
|
||||
// in cycle zero output the virtual address, if we do not do VA translation than this is the last
|
||||
// address we output if we have it enabled we output the tag field in the next cycle, along with
|
||||
// the tag ready signal
|
||||
address_i[1] = vaddr;
|
||||
// as a default we are not making a new request
|
||||
make_request = 1'b0;
|
||||
|
||||
unique case (CS)
|
||||
IDLE: begin
|
||||
if (lsu_valid) begin
|
||||
make_request = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
WAIT_STORE: begin
|
||||
// as a default lets say we are not ready
|
||||
lsu_ready_o = 1'b0;
|
||||
// check for a valid translation and a place in the store buffer
|
||||
// but only check for a valid translation if we actually enabled translation
|
||||
if (enable_translation_i) begin
|
||||
if (translation_valid_q) begin
|
||||
st_valid = 1'b1;
|
||||
lsu_ready_o = 1'b1;
|
||||
lsu_valid_o = 1'b1;
|
||||
// we can make a new request here
|
||||
if (lsu_valid_i)
|
||||
make_request = 1'b1;
|
||||
end
|
||||
end else begin
|
||||
st_valid = 1'b1;
|
||||
lsu_ready_o = 1'b1;
|
||||
lsu_valid_o = 1'b1;
|
||||
// we can make a new request here
|
||||
if (lsu_valid_i)
|
||||
make_request = 1'b1;
|
||||
end
|
||||
|
||||
// we can make a new request here!!
|
||||
end
|
||||
// we are waiting for the grant in this stage
|
||||
WAIT_GNT: begin
|
||||
// we are never ready if we are waiting for a grant
|
||||
lsu_ready_o = 1'b0;
|
||||
// got a grant wait for the rvalid do the same as if we got it x cycles earlier
|
||||
if (data_gnt_o[1]) begin
|
||||
NS = WAIT_RVALID;
|
||||
end
|
||||
end
|
||||
// we are waiting for the rvalid in here, the reason we came here is that we already got a data grant
|
||||
// so depending on whether we do address translation we need to send the tag here or simply wait for the rvalid
|
||||
WAIT_RVALID: begin
|
||||
lsu_ready_o = 1'b0;
|
||||
// we are doing address translation
|
||||
if (enable_translation_i) begin
|
||||
// translation from the previous cycle was valid
|
||||
if (translation_valid_q) begin
|
||||
lsu_ready_o = 1'b1;
|
||||
// output the physical address
|
||||
address_i[1] = paddr_q;
|
||||
// we can make a new request here
|
||||
if (lsu_valid_i)
|
||||
make_request = 1'b1;
|
||||
end else begin // TODO: Abort in case of access
|
||||
data_tag_status_i[1] = `WAIT_TRANSLATION;
|
||||
end
|
||||
// we do not need address translation, we can simply wait for the rvalid
|
||||
end else if (lsu_valid) begin
|
||||
// we can make a new request here
|
||||
make_request = 1'b1;
|
||||
lsu_ready_o = 1'b1;
|
||||
end
|
||||
end
|
||||
// we've got a flush request but have an outstanding rvalid, wait for it
|
||||
WAIT_FLUSH: begin
|
||||
lsu_ready_o = 1'b0;
|
||||
// wait for the rvalid to arrive
|
||||
if (data_rvalid_o[1])
|
||||
NS = IDLE;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
// ------------------
|
||||
// New Request Logic
|
||||
// ------------------
|
||||
if (make_request) begin
|
||||
// if we got a valid request and page offsets don't match we can continue
|
||||
if (!page_offset_match) begin
|
||||
// we have something to do - so lets request this address translation, if address translation is not enabled
|
||||
// this will simply tell the MMU to bypass our VA (= PA)
|
||||
translation_req = 1'b1;
|
||||
// check if this operation is a load or store
|
||||
// it is a LOAD
|
||||
if (op == LD_OP) begin
|
||||
|
||||
data_req_i[1] = 1'b1; // request this address
|
||||
// if address translation is enabled wait for the tag in second (or n-th) cycle
|
||||
if (enable_translation_i)
|
||||
data_tag_status_i[1] = `WAIT_TRANSLATION; // wait for the second part of the address
|
||||
|
||||
// we've got a grant, we can proceed as normal
|
||||
if (data_gnt_o[1]) begin
|
||||
NS = WAIT_RVALID;
|
||||
end else begin
|
||||
// we need to wait for the grant to arrive before proceeding
|
||||
NS = WAIT_GNT;
|
||||
end
|
||||
end
|
||||
// a store does not need to pass the address conflict check because it can't conflict
|
||||
end else if (op == ST_OP) begin
|
||||
// A store can pass through if the store buffer is not full
|
||||
if (st_ready) begin
|
||||
translation_req = 1'b1;
|
||||
// e.g.: if the address was valid
|
||||
NS = WAIT_STORE;
|
||||
end else begin
|
||||
lsu_ready_o = 1'b0;
|
||||
end
|
||||
// we've got an address conflict wait until we resolved it
|
||||
end else begin
|
||||
lsu_ready_o = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
if (flush_i && CS == WAIT_RVALID && !data_rvalid_o[1]) begin
|
||||
NS = WAIT_FLUSH;
|
||||
end
|
||||
end
|
||||
|
||||
// determine whether this is a load or store
|
||||
|
@ -417,18 +205,18 @@ module lsu #(
|
|||
|
||||
|
||||
// ---------------
|
||||
// Byte Enable - TODO: Find a more beautiful way to accomplish this functionality
|
||||
// Byte Enable
|
||||
// ---------------
|
||||
always_comb begin : byte_enable
|
||||
be_i = 8'b0;
|
||||
// we can generate the byte enable from the virtual address since the last
|
||||
// 12 bit are the same anyway
|
||||
// and we can always generate the byte enable from the address at hand
|
||||
case (operator)
|
||||
case (operator_i)
|
||||
LD, SD: // double word
|
||||
be_i = 8'b1111_1111;
|
||||
LW, LWU, SW: // word
|
||||
case (vaddr[2:0])
|
||||
case (vaddr_i[2:0])
|
||||
3'b000: be_i = 8'b0000_1111;
|
||||
3'b001: be_i = 8'b0001_1110;
|
||||
3'b010: be_i = 8'b0011_1100;
|
||||
|
@ -437,7 +225,7 @@ module lsu #(
|
|||
default:;
|
||||
endcase
|
||||
LH, LHU, SH: // half word
|
||||
case (vaddr[2:0])
|
||||
case (vaddr_i[2:0])
|
||||
3'b000: be_i = 8'b0000_0011;
|
||||
3'b001: be_i = 8'b0000_0110;
|
||||
3'b010: be_i = 8'b0000_1100;
|
||||
|
@ -448,7 +236,7 @@ module lsu #(
|
|||
default:;
|
||||
endcase
|
||||
LB, LBU, SB: // byte
|
||||
case (vaddr[2:0])
|
||||
case (vaddr_i[2:0])
|
||||
3'b000: be_i = 8'b0000_0001;
|
||||
3'b001: be_i = 8'b0000_0010;
|
||||
3'b010: be_i = 8'b0000_0100;
|
||||
|
@ -463,65 +251,6 @@ module lsu #(
|
|||
endcase
|
||||
end
|
||||
|
||||
// ---------------
|
||||
// Sign Extend
|
||||
// ---------------
|
||||
logic [63:0] rdata_d_ext; // sign extension for double words, actually only misaligned assembly
|
||||
logic [63:0] rdata_w_ext; // sign extension for words
|
||||
logic [63:0] rdata_h_ext; // sign extension for half words
|
||||
logic [63:0] rdata_b_ext; // sign extension for bytes
|
||||
|
||||
// double words
|
||||
always_comb begin : sign_extend_double_word
|
||||
rdata_d_ext = rdata[63:0];
|
||||
end
|
||||
|
||||
// sign extension for words
|
||||
always_comb begin : sign_extend_word
|
||||
case (vaddr[2:0])
|
||||
default: rdata_w_ext = (operator_i == LW) ? {{32{rdata[31]}}, rdata[31:0]} : {32'h0, rdata[31:0]};
|
||||
3'b001: rdata_w_ext = (operator_i == LW) ? {{32{rdata[39]}}, rdata[39:8]} : {32'h0, rdata[39:8]};
|
||||
3'b010: rdata_w_ext = (operator_i == LW) ? {{32{rdata[47]}}, rdata[47:16]} : {32'h0, rdata[47:16]};
|
||||
3'b011: rdata_w_ext = (operator_i == LW) ? {{32{rdata[55]}}, rdata[55:24]} : {32'h0, rdata[55:24]};
|
||||
3'b100: rdata_w_ext = (operator_i == LW) ? {{32{rdata[63]}}, rdata[63:32]} : {32'h0, rdata[63:32]};
|
||||
endcase
|
||||
end
|
||||
|
||||
// sign extension for half words
|
||||
always_comb begin : sign_extend_half_word
|
||||
case (vaddr[2:0])
|
||||
default: rdata_h_ext = (operator == LH) ? {{48{rdata[15]}}, rdata[15:0]} : {48'h0, rdata[15:0]};
|
||||
3'b001: rdata_h_ext = (operator == LH) ? {{48{rdata[23]}}, rdata[23:8]} : {48'h0, rdata[23:8]};
|
||||
3'b010: rdata_h_ext = (operator == LH) ? {{48{rdata[31]}}, rdata[31:16]} : {48'h0, rdata[31:16]};
|
||||
3'b011: rdata_h_ext = (operator == LH) ? {{48{rdata[39]}}, rdata[39:24]} : {48'h0, rdata[39:24]};
|
||||
3'b100: rdata_h_ext = (operator == LH) ? {{48{rdata[47]}}, rdata[47:32]} : {48'h0, rdata[47:32]};
|
||||
3'b101: rdata_h_ext = (operator == LH) ? {{48{rdata[55]}}, rdata[55:40]} : {48'h0, rdata[55:40]};
|
||||
3'b110: rdata_h_ext = (operator == LH) ? {{48{rdata[63]}}, rdata[63:48]} : {48'h0, rdata[63:48]};
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin : sign_extend_byte
|
||||
case (vaddr[2:0])
|
||||
default: rdata_b_ext = (operator == LB) ? {{56{rdata[7]}}, rdata[7:0]} : {56'h0, rdata[7:0]};
|
||||
3'b001: rdata_b_ext = (operator == LB) ? {{56{rdata[15]}}, rdata[15:8]} : {56'h0, rdata[15:8]};
|
||||
3'b010: rdata_b_ext = (operator == LB) ? {{56{rdata[23]}}, rdata[23:16]} : {56'h0, rdata[23:16]};
|
||||
3'b011: rdata_b_ext = (operator == LB) ? {{56{rdata[31]}}, rdata[31:24]} : {56'h0, rdata[31:24]};
|
||||
3'b100: rdata_b_ext = (operator == LB) ? {{56{rdata[39]}}, rdata[39:32]} : {56'h0, rdata[39:32]};
|
||||
3'b101: rdata_b_ext = (operator == LB) ? {{56{rdata[47]}}, rdata[47:40]} : {56'h0, rdata[47:40]};
|
||||
3'b110: rdata_b_ext = (operator == LB) ? {{56{rdata[55]}}, rdata[55:48]} : {56'h0, rdata[55:48]};
|
||||
3'b111: rdata_b_ext = (operator == LB) ? {{56{rdata[63]}}, rdata[63:56]} : {56'h0, rdata[63:56]};
|
||||
endcase
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
case (operator)
|
||||
LW, LWU: lsu_result_o = rdata_w_ext;
|
||||
LH, LHU: lsu_result_o = rdata_h_ext;
|
||||
LB, LBU: lsu_result_o = rdata_b_ext;
|
||||
default: lsu_result_o = rdata_d_ext;
|
||||
endcase
|
||||
end
|
||||
|
||||
// ------------------
|
||||
// Exception Control
|
||||
// ------------------
|
||||
|
@ -579,53 +308,57 @@ module lsu #(
|
|||
// it can either be feedthrough from the issue stage or from the internal register
|
||||
always_comb begin : input_select
|
||||
// if we are stalling use the values we saved
|
||||
if (!lsu_ready_q) begin
|
||||
if (stall_q) begin
|
||||
vaddr = vaddr_q;
|
||||
data = data_q;
|
||||
operator = operator_q;
|
||||
trans_id = trans_id_q;
|
||||
be = be_q;
|
||||
lsu_valid = lsu_valid_q;
|
||||
end else begin // otherwise bypass them
|
||||
vaddr = vaddr_i;
|
||||
data = operand_b_i;
|
||||
operator = operator_i;
|
||||
trans_id = trans_id_i;
|
||||
be = be_i;
|
||||
lsu_valid = lsu_valid_i;
|
||||
end
|
||||
end
|
||||
// 1st register stage
|
||||
always_comb begin : register_stage
|
||||
vaddr_n = vaddr_q;
|
||||
data_n = data_q;
|
||||
operator_n = operator_q;
|
||||
trans_id_n = trans_id_q;
|
||||
be_n = be_q;
|
||||
stall_n = 1'b1;
|
||||
// get new input data
|
||||
if (lsu_ready_o) begin
|
||||
vaddr_n = vaddr_i;
|
||||
data_n = operand_b_i;
|
||||
operator_n = operator_i;
|
||||
trans_id_n = trans_id_i;
|
||||
be_n = be_i;
|
||||
stall_n = 1'b0;
|
||||
end
|
||||
end
|
||||
|
||||
// registers
|
||||
always_ff @(posedge clk_i or negedge rst_ni) begin
|
||||
if (~rst_ni) begin
|
||||
CS <= IDLE;
|
||||
lsu_ready_q <= 1'b1;
|
||||
// 1st LSU stage
|
||||
vaddr_q <= 64'b0;
|
||||
data_q <= 64'b0;
|
||||
operator_q <= ADD;
|
||||
trans_id_q <= '{default: 0};
|
||||
be_q <= 8'b0;
|
||||
lsu_valid_q <= 1'b0;
|
||||
// 2nd LSU stage (after MMU)
|
||||
paddr_q <= 64'b0;
|
||||
translation_valid_q <= 1'b0;
|
||||
stall_q <= 1'b0;
|
||||
end else begin
|
||||
CS <= NS;
|
||||
lsu_ready_q <= lsu_ready_o;
|
||||
// 1st LSU stage
|
||||
if (lsu_ready_o) begin
|
||||
vaddr_q <= vaddr_i;
|
||||
data_q <= operand_b_i;
|
||||
operator_q <= operator_i;
|
||||
trans_id_q <= trans_id_i;
|
||||
be_q <= be_i;
|
||||
lsu_valid_q <= lsu_valid_i;
|
||||
end
|
||||
// 2nd LSU stage (after MMU)
|
||||
paddr_q <= paddr_n;
|
||||
translation_valid_q <= translation_valid_n;
|
||||
vaddr_q <= vaddr_n;
|
||||
data_q <= data_n;
|
||||
operator_q <= operator_n;
|
||||
trans_id_q <= trans_id_n;
|
||||
be_q <= be_n;
|
||||
stall_q <= stall_n;
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
import ariane_pkg::*;
|
||||
|
||||
module store_unit (
|
||||
input logic clk_i, // Clock
|
||||
input logic rst_ni, // Asynchronous reset active low
|
||||
// store unit input port
|
||||
input logic [1:0] operator_i,
|
||||
input logic valid_i,
|
||||
input logic [63:0] vaddr_i,
|
||||
input logic [7:0] be_i,
|
||||
input logic commit_i,
|
||||
// store unit output port
|
||||
output logic valid_o,
|
||||
output logic ready_o,
|
||||
output logic [TRANS_ID_BITS-1:0] trans_id_o,
|
||||
output logic [63:0] result_o,
|
||||
// MMU -> Address Translation
|
||||
output logic translation_req_o, // request address translation
|
||||
output logic vaddr_o, // virtual address out
|
||||
input logic [63:0] paddr_i, // physical address in
|
||||
input logic translation_valid_i,
|
||||
// address checker
|
||||
input logic [11:0] page_offset_i,
|
||||
output logic page_offset_matches_o,
|
||||
// memory interface
|
||||
output logic [63:0] address_o,
|
||||
output logic [63:0] data_wdata_o,
|
||||
output logic data_req_o,
|
||||
output logic data_we_o,
|
||||
output logic [7:0] data_be_o,
|
||||
output logic [1:0] data_tag_status_o,
|
||||
input logic data_gnt_i,
|
||||
input logic data_rvalid_i
|
||||
);
|
||||
assign result_o = 64'b0;
|
||||
|
||||
logic [63:0] st_buffer_paddr; // physical address for store
|
||||
logic [63:0] st_buffer_data; // store buffer data out
|
||||
logic [7:0] st_buffer_be;
|
||||
logic st_buffer_valid;
|
||||
// store buffer control signals
|
||||
logic st_ready;
|
||||
logic st_valid;
|
||||
// ---------------
|
||||
// Store Queue
|
||||
// ---------------
|
||||
store_queue store_queue_i (
|
||||
// store queue write port
|
||||
.valid_i ( st_valid ),
|
||||
.paddr_i ( paddr_q ),
|
||||
.data_i ( data ),
|
||||
.be_i ( be ),
|
||||
// store buffer in
|
||||
.paddr_o ( st_buffer_paddr ),
|
||||
.data_o ( st_buffer_data ),
|
||||
.valid_o ( st_buffer_valid ),
|
||||
.be_o ( st_buffer_be ),
|
||||
.ready_o ( st_ready ),
|
||||
.*
|
||||
);
|
||||
// ------------------
|
||||
// Address Checker
|
||||
// ------------------
|
||||
// The load should return the data stored by the most recent store to the
|
||||
// same physical address. The most direct way to implement this is to
|
||||
// maintain physical addresses in the store buffer.
|
||||
|
||||
// Of course, there are other micro-architectural techniques to accomplish
|
||||
// the same thing: you can interlock and wait for the store buffer to
|
||||
// drain if the load VA matches any store VA modulo the page size (i.e.
|
||||
// bits 11:0). As a special case, it is correct to bypass if the full VA
|
||||
// matches, and no younger stores' VAs match in bits 11:0.
|
||||
//
|
||||
// checks if the requested load is in the store buffer
|
||||
// page offsets are virtually and physically the same
|
||||
always_comb begin : address_checker
|
||||
page_offset_matches_o = 1'b0;
|
||||
// check if the LSBs are identical and the entry is valid
|
||||
if ((vaddr_i[11:3] == st_buffer_paddr[11:3]) && st_buffer_valid) begin
|
||||
page_offset_matches_o = 1'b1;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
Loading…
Add table
Add a link
Reference in a new issue