New LSU control, enabling separate index and tag

This commit is contained in:
Florian Zaruba 2017-05-18 17:18:41 +02:00
parent d7d7f8cd52
commit 3d4252e738

View file

@ -18,6 +18,14 @@
//
import ariane_pkg::*;
// ---------------
// D$ Tag Status
// ---------------
`define WAIT_TRANSLATION 2'b00
`define VALID_TRANSLATION 2'b01
`define ABORT_TRANSLATION 2'b10
`define NOT_IMPL 2'b11
module lsu #(
parameter int ASID_WIDTH = 1
)(
@ -78,12 +86,11 @@ module lsu #(
// data is misaligned
logic data_misaligned;
enum logic [2:0] { IDLE, STORE, LOAD_WAIT_TRANSLATION, LOAD_WAIT_GNT, LOAD_WAIT_RVALID } CS, NS;
enum logic [2:0] { IDLE, WAIT_ADDRESSCONFLICT, WAIT_STORE, WAIT_GNT, WAIT_RVALID, WAIT_FLUSH } CS, NS;
// virtual address as calculated by the AGU in the first cycle
logic [63:0] vaddr_i;
// gets the data from the register
logic get_from_register;
logic [7:0] be_i;
// those are the signals which are always correct
// e.g.: they keep the value in the stall case
logic [63:0] vaddr;
@ -91,14 +98,18 @@ module lsu #(
logic [7:0] be;
fu_op operator;
logic [TRANS_ID_BITS-1:0] trans_id;
logic lsu_valid;
logic lsu_ready_q;
// the page offset matches - we need to wait until the store buffer drains
logic page_offset_match;
// registered address in case of a necessary stall
logic [63:0] vaddr_q;
logic [63:0] data_q;
fu_op operator_q;
logic [TRANS_ID_BITS-1:0] trans_id_q;
// stall signal e.g.: do not update registers from above
logic stall;
logic [7:0] be_q;
logic lsu_valid_q;
// for ld/st address checker
logic [63:0] st_buffer_paddr; // physical address for store
@ -109,8 +120,10 @@ module lsu #(
logic st_ready;
logic st_valid;
// from MMU
logic translation_req, translation_valid;
logic [63:0] paddr_o;
logic translation_req;
// registers at the end of MMU
logic translation_valid_n, translation_valid_q;
logic [63:0] paddr_n, paddr_q;
// ------------------------------
// Address Generation Unit (AGU)
@ -156,12 +169,11 @@ module lsu #(
.*
);
// connect the load logic to the memory arbiter
assign address_i [1] = paddr_o;
// this is a read only interface
assign data_we_i [1] = 1'b0;
assign data_wdata_i [1] = 64'b0;
assign data_be_i [1] = be;
assign data_we_i [1] = 1'b0;
assign data_wdata_i[1] = 64'b0;
assign data_be_i [1] = be_i;
logic [63:0] rdata;
// data coming from arbiter interface 1
assign rdata = data_rdata_o[1];
@ -169,14 +181,14 @@ module lsu #(
// MMU e.g.: TLBs/PTW
// -------------------
mmu #(
.INSTR_TLB_ENTRIES ( 16 ),
.DATA_TLB_ENTRIES ( 16 ),
.ASID_WIDTH ( ASID_WIDTH )
.INSTR_TLB_ENTRIES ( 16 ),
.DATA_TLB_ENTRIES ( 16 ),
.ASID_WIDTH ( ASID_WIDTH )
) mmu_i (
.lsu_req_i ( translation_req ),
.lsu_vaddr_i ( vaddr ),
.lsu_valid_o ( translation_valid ),
.lsu_paddr_o ( paddr_o ),
.lsu_req_i ( translation_req ),
.lsu_vaddr_i ( vaddr ),
.lsu_valid_o ( translation_valid_n ),
.lsu_paddr_o ( paddr_n ),
// connecting PTW to D$ IF (aka mem arbiter
.data_if_address_o ( address_i [0] ),
.data_if_data_wdata_o ( data_wdata_i [0] ),
@ -189,70 +201,13 @@ module lsu #(
.*
);
// ------------------
// Address Checker
// ------------------
logic page_offset_match;
// checks if the requested load is in the store buffer
// page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_match = 1'b0;
// check if the LSBs are identical and the entry is valid
if ((paddr_o[11:3] == st_buffer_paddr[11:3]) & st_buffer_valid) begin
// TODO: implement propperly, this is overly pessimistic
page_offset_match = 1'b1;
end
end
// ------------------
// LSU Control
// ------------------
// is the operation a load or store or nothing of relevance for the LSU
enum logic [1:0] { NONE, LD_OP, ST_OP } op;
always_comb begin : lsu_control
// default assignment
NS = CS;
lsu_trans_id_o = trans_id;
lsu_ready_o = 1'b1;
// is the store valid e.g.: can we put it in the store buffer
st_valid = 1'b0;
// as a default we are not requesting on the read interface
data_req_i[1] = 1'b0;
// request the address translation
translation_req = 1'b0;
// as a default we don't stall
stall = 1'b0;
// as a default we won't take the operands from the internal
// registers
get_from_register = 1'b0;
// LSU result is valid
// we need to give the valid result even to stores
lsu_valid_o = 1'b0;
unique case (CS)
default:;
endcase
end
// determine whether this is a load or store
always_comb begin : which_op
unique case (operator_i)
// all loads go here
LD, LW, LWU, LH, LHU, LB, LBU: op = LD_OP;
// all stores go here
SD, SW, SH, SB: op = ST_OP;
// not relevant for the lsu
default: op = NONE;
endcase
end
// ---------------
// Store Queue
// ---------------
store_queue store_queue_i (
// store queue write port
.valid_i ( st_valid ),
.paddr_i ( paddr_o ),
.paddr_i ( paddr_q ),
.data_i ( data ),
.be_i ( be ),
// store buffer in
@ -272,57 +227,248 @@ module lsu #(
.*
);
// ------------------
// Address Checker
// ------------------
// The load should return the data stored by the most recent store to the
// same physical address. The most direct way to implement this is to
// maintain physical addresses in the store buffer.
// Of course, there are other micro-architectural techniques to accomplish
// the same thing: you can interlock and wait for the store buffer to
// drain if the load VA matches any store VA modulo the page size (i.e.
// bits 11:0). As a special case, it is correct to bypass if the full VA
// matches, and no younger stores' VAs match in bits 11:0.
//
// checks if the requested load is in the store buffer
// page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_match = 1'b0;
// check if the LSBs are identical and the entry is valid
if ((vaddr[11:3] == st_buffer_paddr[11:3]) & st_buffer_valid) begin
// TODO: implement propperly, this is overly pessimistic
page_offset_match = 1'b1;
end
end
// ------------------
// LSU Control (FSM)
// ------------------
// is the operation a load or store or nothing of relevance for the LSU
enum logic [1:0] { NONE, LD_OP, ST_OP } op;
// connect the load logic to the memory arbiter
// assign address_i[1] = paddr_o;
always_comb begin : lsu_control
automatic logic make_request = 1'b0;
// default assignment
NS = CS;
lsu_trans_id_o = trans_id;
lsu_ready_o = 1'b1;
// LSU result is valid
// we need to give the valid result even to stores
lsu_valid_o = 1'b0;
// is the store valid e.g.: can we put it in the store buffer
st_valid = 1'b0;
// as a default we are not requesting on the read interface
data_req_i[1] = 1'b0;
// request the address translation
translation_req = 1'b0;
// as a default let the translation be valid
data_tag_status_i[1] = `VALID_TRANSLATION;
// in cycle zero output the virtual address, if we do not do VA translation than this is the last
// address we output if we have it enabled we output the tag field in the next cycle, along with
// the tag ready signal
address_i[1] = vaddr;
// as a default we are not making a new request
make_request = 1'b0;
unique case (CS)
IDLE: begin
if (lsu_valid) begin
make_request = 1'b1;
end
end
WAIT_STORE: begin
// as a default lets say we are not ready
lsu_ready_o = 1'b0;
// check for a valid translation and a place in the store buffer
// but only check for a valid translation if we actually enabled translation
if (enable_translation_i) begin
if (translation_valid_q) begin
st_valid = 1'b1;
lsu_ready_o = 1'b1;
lsu_valid_o = 1'b1;
// we can make a new request here
if (lsu_valid_i)
make_request = 1'b1;
end
end else begin
st_valid = 1'b1;
lsu_ready_o = 1'b1;
lsu_valid_o = 1'b1;
// we can make a new request here
if (lsu_valid_i)
make_request = 1'b1;
end
// we can make a new request here!!
end
// we are waiting for the grant in this stage
WAIT_GNT: begin
// we are never ready if we are waiting for a grant
lsu_ready_o = 1'b0;
// got a grant wait for the rvalid do the same as if we got it x cycles earlier
if (data_gnt_o[1]) begin
NS = WAIT_RVALID;
end
end
// we are waiting for the rvalid in here, the reason we came here is that we already got a data grant
// so depending on whether we do address translation we need to send the tag here or simply wait for the rvalid
WAIT_RVALID: begin
lsu_ready_o = 1'b0;
// we are doing address translation
if (enable_translation_i) begin
// translation from the previous cycle was valid
if (translation_valid_q) begin
// output the physical address
address_i[1] = paddr_q;
// wait for the rvalid
if (data_rvalid_o[1]) begin
lsu_ready_o = 1'b1;
lsu_valid_o = 1'b1;
// we can make a new request here
if (lsu_valid_i)
make_request = 1'b1;
end
end else begin
data_tag_status_i[1] = `WAIT_TRANSLATION;
end
// we do not need address translation, we can simply wait for the rvalid
end else if (data_rvalid_o) begin
// we can make a new request here
if (lsu_valid_i)
make_request = 1'b1;
end
end
// we've got a flush request but have an outstanding rvalid, wait for it
WAIT_FLUSH: begin
lsu_ready_o = 1'b0;
// wait for the rvalid to arrive
if (data_rvalid_o[1])
NS = IDLE;
end
default:;
endcase
// ------------------
// New Request Logic
// ------------------
if (make_request) begin
// if we got a valid request and page offsets don't match we can continue
if (!page_offset_match) begin
// we have something to do - so lets request this address translation, if address translation is not enabled
// this will simply tell the MMU to bypass our VA (= PA)
translation_req = 1'b1;
// check if this operation is a load or store
// it is a LOAD
if (operator == LD_OP) begin
data_req_i[1] = 1'b1; // request this address
// if address translation is enabled wait for the tag in second (or n-th) cycle
if (enable_translation_i)
data_tag_status_i[1] = `WAIT_TRANSLATION; // wait for the second part of the address
// we've got a grant, we can proceed as normal
if (data_gnt_o[1]) begin
NS = WAIT_RVALID;
end else begin
// we need to wait for the grant to arrive before proceeding
NS = WAIT_GNT;
end
end
// a store does not need to pass the address conflict check because it can't conflict
end else if (operator == ST_OP) begin
// A store can pass through if the store buffer is not full
if (st_ready) begin
// e.g.: if the address was valid
NS = WAIT_STORE;
end else begin
lsu_ready_o = 1'b0;
end
// we've got an address conflict wait until we resolved it
end else begin
lsu_ready_o = 1'b0;
end
end
if (flush_i && CS == WAIT_RVALID && !data_rvalid_o[1]) begin
NS = WAIT_FLUSH;
end
end
// determine whether this is a load or store
always_comb begin : which_op
unique case (operator_i)
// all loads go here
LD, LW, LWU, LH, LHU, LB, LBU: op = LD_OP;
// all stores go here
SD, SW, SH, SB: op = ST_OP;
// not relevant for the lsu
default: op = NONE;
endcase
end
// ---------------
// Byte Enable - TODO: Find a more beautiful way to accomplish this functionality
// ---------------
always_comb begin : byte_enable
be = 8'b0;
be_i = 8'b0;
// we can generate the byte enable from the virtual address since the last
// 12 bit are the same anyway
// and we can always generate the byte enable from the address at hand
case (operator)
LD, SD: // double word
be = 8'b1111_1111;
be_i = 8'b1111_1111;
LW, LWU, SW: // word
case (vaddr[2:0])
3'b000: be = 8'b0000_1111;
3'b001: be = 8'b0001_1110;
3'b010: be = 8'b0011_1100;
3'b011: be = 8'b0111_1000;
3'b100: be = 8'b1111_0000;
3'b000: be_i = 8'b0000_1111;
3'b001: be_i = 8'b0001_1110;
3'b010: be_i = 8'b0011_1100;
3'b011: be_i = 8'b0111_1000;
3'b100: be_i = 8'b1111_0000;
default:;
endcase
LH, LHU, SH: // half word
case (vaddr[2:0])
3'b000: be = 8'b0000_0011;
3'b001: be = 8'b0000_0110;
3'b010: be = 8'b0000_1100;
3'b011: be = 8'b0001_1000;
3'b100: be = 8'b0011_0000;
3'b101: be = 8'b0110_0000;
3'b110: be = 8'b1100_0000;
3'b000: be_i = 8'b0000_0011;
3'b001: be_i = 8'b0000_0110;
3'b010: be_i = 8'b0000_1100;
3'b011: be_i = 8'b0001_1000;
3'b100: be_i = 8'b0011_0000;
3'b101: be_i = 8'b0110_0000;
3'b110: be_i = 8'b1100_0000;
default:;
endcase
LB, LBU, SB: // byte
case (vaddr[2:0])
3'b000: be = 8'b0000_0001;
3'b001: be = 8'b0000_0010;
3'b010: be = 8'b0000_0100;
3'b011: be = 8'b0000_1000;
3'b100: be = 8'b0001_0000;
3'b101: be = 8'b0010_0000;
3'b110: be = 8'b0100_0000;
3'b111: be = 8'b1000_0000;
3'b000: be_i = 8'b0000_0001;
3'b001: be_i = 8'b0000_0010;
3'b010: be_i = 8'b0000_0100;
3'b011: be_i = 8'b0000_1000;
3'b100: be_i = 8'b0001_0000;
3'b101: be_i = 8'b0010_0000;
3'b110: be_i = 8'b0100_0000;
3'b111: be_i = 8'b1000_0000;
endcase
default:
be = 8'b0;
be_i = 8'b0;
endcase
end
// ---------------
// Sign Extend
// ---------------
logic [63:0] rdata_d_ext; // sign extension for double words, actually only misaligned assembly
logic [63:0] rdata_w_ext; // sign extension for words
logic [63:0] rdata_h_ext; // sign extension for half words
@ -336,11 +482,11 @@ module lsu #(
// sign extension for words
always_comb begin : sign_extend_word
case (vaddr[2:0])
default: rdata_w_ext = (operator == LW) ? {{32{rdata[31]}}, rdata[31:0]} : {32'h0, rdata[31:0]};
3'b001: rdata_w_ext = (operator == LW) ? {{32{rdata[39]}}, rdata[39:8]} : {32'h0, rdata[39:8]};
3'b010: rdata_w_ext = (operator == LW) ? {{32{rdata[47]}}, rdata[47:16]} : {32'h0, rdata[47:16]};
3'b011: rdata_w_ext = (operator == LW) ? {{32{rdata[55]}}, rdata[55:24]} : {32'h0, rdata[55:24]};
3'b100: rdata_w_ext = (operator == LW) ? {{32{rdata[63]}}, rdata[63:32]} : {32'h0, rdata[63:32]};
default: rdata_w_ext = (operator_i == LW) ? {{32{rdata[31]}}, rdata[31:0]} : {32'h0, rdata[31:0]};
3'b001: rdata_w_ext = (operator_i == LW) ? {{32{rdata[39]}}, rdata[39:8]} : {32'h0, rdata[39:8]};
3'b010: rdata_w_ext = (operator_i == LW) ? {{32{rdata[47]}}, rdata[47:16]} : {32'h0, rdata[47:16]};
3'b011: rdata_w_ext = (operator_i == LW) ? {{32{rdata[55]}}, rdata[55:24]} : {32'h0, rdata[55:24]};
3'b100: rdata_w_ext = (operator_i == LW) ? {{32{rdata[63]}}, rdata[63:32]} : {32'h0, rdata[63:32]};
endcase
end
@ -436,35 +582,53 @@ module lsu #(
// it can either be feedthrough from the issue stage or from the internal register
always_comb begin : input_select
// if we are stalling use the values we saved
if (lsu_ready_o) begin
vaddr = vaddr_q;
data = data_q;
operator = operator_q;
trans_id = trans_id_q;
end else begin // otherwise pass them directly through
vaddr = vaddr_i;
data = operand_b_i;
operator = operator_i;
trans_id = trans_id_i;
if (!lsu_ready_q) begin
vaddr = vaddr_q;
data = data_q;
operator = operator_q;
trans_id = trans_id_q;
be = be_q;
lsu_valid = lsu_valid_q;
end else begin // otherwise bypass them
vaddr = vaddr_i;
data = operand_b_i;
operator = operator_i;
trans_id = trans_id_i;
be = be_i;
lsu_valid = lsu_valid_i;
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
vaddr_q <= 64'b0;
data_q <= 64'b0;
operator_q <= ADD;
trans_id_q <= '{default: 0};
CS <= IDLE;
CS <= IDLE;
lsu_ready_q <= 1'b1;
// 1st LSU stage
vaddr_q <= 64'b0;
data_q <= 64'b0;
operator_q <= ADD;
trans_id_q <= '{default: 0};
be_q <= 8'b0;
lsu_valid_q <= 1'b0;
// 2nd LSU stage (after MMU)
paddr_q <= 64'b0;
translation_valid_q <= 1'b0;
end else begin
CS <= NS;
CS <= NS;
lsu_ready_q <= lsu_ready_o;
// 1st LSU stage
if (lsu_ready_o) begin
vaddr_q <= vaddr_i;
data_q <= operand_b_i;
operator_q <= operator_i;
trans_id_q <= trans_id_i;
vaddr_q <= vaddr_i;
data_q <= operand_b_i;
operator_q <= operator_i;
trans_id_q <= trans_id_i;
be_q <= be_i;
lsu_valid_q <= lsu_valid_i;
end
// 2nd LSU stage (after MMU)
paddr_q <= paddr_n;
translation_valid_q <= translation_valid_n;
end
end