Rename store_queue to store_buffer

This commit is contained in:
Florian Zaruba 2017-06-25 16:46:53 +02:00
parent 5452af2bd4
commit faec717541
6 changed files with 24 additions and 249 deletions

View file

@ -1,197 +0,0 @@
// Author: Florian Zaruba, ETH Zurich
// Date: 25.04.2017
// Description: Store queue persists store requests and pushes them to memory
// if they are no longer speculative
//
// Copyright (C) 2017 ETH Zurich, University of Bologna
// All rights reserved.
//
// This code is under development and not yet released to the public.
// Until it is released, the code is under the copyright of ETH Zurich and
// the University of Bologna, and may contain confidential and/or unpublished
// work. Any reuse/redistribution is strictly forbidden without written
// permission from ETH Zurich.
//
// Bug fixes and contributions will eventually be released under the
// SolderPad open hardware license in the context of the PULP platform
// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the
// University of Bologna.
//
module store_queue (
input logic clk_i, // Clock
input logic rst_ni, // Asynchronous reset active low
input logic flush_i, // if we flush we need to pause the transactions on the memory
// otherwise we will run in a deadlock with the memory arbiter
output logic no_st_pending_o, // non-speculative queue is empty (e.g.: everything is committed to the memory hierarchy)
input logic [11:0] page_offset_i,
output logic page_offset_matches_o,
input logic commit_i, // commit the instruction which was placed there most recently
output logic ready_o, // the store queue is ready to accept a new request
// it is only ready if it can unconditionally commit the instruction, e.g.:
// the commit buffer needs to be empty
input logic valid_i, // this is a valid store
input logic [63:0] paddr_i, // physical address of store which needs to be placed in the queue
input logic [63:0] data_i, // data which is placed in the queue
input logic [7:0] be_i, // byte enable in
// D$ interface
output logic [11:0] address_index_o,
output logic [43:0] address_tag_o,
output logic [63:0] data_wdata_o,
output logic data_req_o,
output logic data_we_o,
output logic [7:0] data_be_o,
output logic kill_req_o,
output logic tag_valid_o,
input logic data_gnt_i,
input logic data_rvalid_i
);
// we need to keep the tag portion of the address for a cycle later
logic [43:0] address_tag_n, address_tag_q;
logic tag_valid_n, tag_valid_q;
// the store queue has two parts:
// 1. Speculative queue
// 2. Commit queue which is non-speculative, e.g.: the store will definitely happen.
// For simplicity reasons we just keep those two elements and not one real queue
// should it turn out that this bottlenecks we can still increase the capacity here
// at the cost of increased area and worse timing since we need to check all addresses which are committed for
// potential aliasing.
//
// In the current implementation this is represented by a single entry and
// differentiated by the is_speculative flag.
struct packed {
logic [63:0] address;
logic [63:0] data;
logic [7:0] be;
logic valid; // entry is valid
logic is_speculative; // set if the entry isn't committed yet
} commit_queue_n, commit_queue_q;
// those signals can directly be output to the memory
assign address_index_o = commit_queue_q.address[11:0];
// if we got a new request we already saved the tag from the previous cycle
assign address_tag_o = address_tag_q;
assign data_wdata_o = commit_queue_q.data;
assign data_be_o = commit_queue_q.be;
assign tag_valid_o = tag_valid_q;
// we will never kill a request in the store buffer since we already know that the translation is valid
// e.g.: a kill request will only be necessary if we are not sure if the requested memory address will result in a TLB fault
assign kill_req_o = 1'b0;
// no store is pending if we don't have any uncommitted data, e.g.: the queue is either not valid or the entry is
// speculative (it can be flushed)
assign no_st_pending_o = !commit_queue_q.valid || commit_queue_q.is_speculative;
// memory interface
always_comb begin : store_if
// if there is no commit pending and the uncommitted queue is empty as well we can accept the request
// if we got a grant this implies that the value was not speculative anymore and that we
// do not need to save the values anymore since the memory already processed them
automatic logic ready = !commit_queue_q.valid || data_gnt_i;
ready_o = ready && !flush_i;
address_tag_n = address_tag_q;
commit_queue_n = commit_queue_q;
tag_valid_n = 1'b0;
data_we_o = 1'b1; // we will always write in the store queue
data_req_o = 1'b0;
// there should be no commit when we are flushing
if (!flush_i) begin
// if the entry in the commit queue is valid and not speculative anymore
// we can issue this instruction
// we can issue it as soon as the commit_i goes high or any number of cycles later
// by looking at the is_speculative flag
if (commit_queue_q.valid && (!commit_queue_q.is_speculative || commit_i)) begin
data_req_o = 1'b1;
if (data_gnt_i) begin
// we can evict it from the commit buffer
commit_queue_n.valid = 1'b0;
// save the tag portion
address_tag_n = commit_queue_q.address[55:12];
// signal a valid tag the cycle afterwards
tag_valid_n = 1'b1;
end
end
// we ignore the rvalid signal for now as we assume that the store
// happened
end
// shift the store request from the speculative buffer
// to the non-speculative
if (commit_i) begin
commit_queue_n.is_speculative = 1'b0;
end
// LSU interface
// we are ready to accept a new entry and the input data is valid
if (ready && valid_i) begin
commit_queue_n.address = paddr_i;
commit_queue_n.data = data_i;
commit_queue_n.be = be_i;
commit_queue_n.valid = 1'b1;
commit_queue_n.is_speculative = 1'b1;
end
// when we flush evict the speculative store
if (flush_i && commit_queue_q.is_speculative) begin
commit_queue_n.valid = 1'b0;
end
end
// ------------------
// Address Checker
// ------------------
// The load should return the data stored by the most recent store to the
// same physical address. The most direct way to implement this is to
// maintain physical addresses in the store buffer.
// Of course, there are other micro-architectural techniques to accomplish
// the same thing: you can interlock and wait for the store buffer to
// drain if the load VA matches any store VA modulo the page size (i.e.
// bits 11:0). As a special case, it is correct to bypass if the full VA
// matches, and no younger stores' VAs match in bits 11:0.
//
// checks if the requested load is in the store buffer
// page offsets are virtually and physically the same
always_comb begin : address_checker
page_offset_matches_o = 1'b0;
// check if the LSBs are identical and the entry is valid
if ((page_offset_i[11:3] == commit_queue_q.address[11:3]) && commit_queue_q.valid) begin
page_offset_matches_o = 1'b1;
end
if ((page_offset_i[11:3] == paddr_i[11:3]) && valid_i) begin
page_offset_matches_o = 1'b1;
end
end
// registers
always_ff @(posedge clk_i or negedge rst_ni) begin : proc_
if(~rst_ni) begin
address_tag_q <= 'b0;
tag_valid_q <= 1'b0;
commit_queue_q <= '{default: 0};
end else begin
commit_queue_q <= commit_queue_n;
tag_valid_q <= tag_valid_n;
address_tag_q <= address_tag_n;
end
end
`ifndef SYNTHESIS
`ifndef verilator
// assert that commit is never set when we are flushing this would be counter intuitive
// as flush and commit is decided in the same stage
assert property (
@(posedge clk_i) rst_ni && flush_i |-> !commit_i)
else $error ("You are trying to commit and flush in the same cycle");
`endif
`endif
endmodule

View file

@ -190,7 +190,7 @@ module store_unit (
// ---------------
// Store Queue
// ---------------
store_queue store_queue_i (
store_buffer store_buffer_i (
// store queue write port
.valid_i ( st_valid ),
.data_i ( st_data_q ),

View file

@ -26,13 +26,12 @@ class instruction_trace_item;
logic [63:0] reg_file [32];
logic [4:0] read_regs [$];
logic [4:0] result_regs [$];
logic [63:0] imm;
logic [63:0] result;
logic [63:0] paddr;
logic [63:0] paddr_queue [$];
logic [63:0] vaddr;
logic [63:0] vaddr_queue [$];
// constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information
function new (time simtime, longint unsigned cycle, scoreboard_entry sbe, logic [31:0] instr, logic [63:0] reg_file [32], logic [63:0] result, logic [63:0] vaddr, logic [63:0] paddr);
function new (time simtime, longint unsigned cycle, scoreboard_entry sbe, logic [31:0] instr, logic [63:0] reg_file [32], logic [63:0] result, logic [63:0] paddr);
this.simtime = simtime;
this.cycle = cycle;
this.pc = sbe.pc;
@ -40,7 +39,6 @@ class instruction_trace_item;
this.instr = instr;
this.reg_file = reg_file;
this.result = result;
this.vaddr = vaddr;
this.paddr = paddr;
endfunction
// convert register address to ABI compatible form
@ -198,11 +196,13 @@ class instruction_trace_item;
if (read_regs[i] != 0)
s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), reg_file[read_regs[i]]);
end
// if we got a physical address also display address translation
foreach (paddr_queue[i]) begin
s = $sformatf("%s VA: %x PA: %x", s, this.vaddr, paddr_queue[i]);
end
casex (instr)
// check of the instrction was a load or store
INSTR_LOAD, INSTR_STORE: begin
logic [63:0] vaddress = reg_file[read_regs[0]] + this.imm;
s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr);
end
endcase
return s;
endfunction
@ -305,9 +305,11 @@ class instruction_trace_item;
default: return printMnemonic("INVALID");
endcase
result_regs.push_back(sbe.rd);
read_regs.push_back(sbe.rs1);
paddr_queue.push_back(paddr);
result_regs.push_back(sbe.rd);
// save the immediate for calculating the virtual address
this.imm = sbe.result;
return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1));
endfunction
@ -324,7 +326,8 @@ class instruction_trace_item;
read_regs.push_back(sbe.rs1);
read_regs.push_back(sbe.rs2);
paddr_queue.push_back(paddr);
// save the immediate for calculating the virtual address
this.imm = sbe.result;
return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1));

View file

@ -36,10 +36,7 @@ class instruction_tracer;
int f;
// address mapping
// contains mappings of the form vaddr <-> paddr
struct {
logic [63:0] vaddr;
logic [63:0] paddr;
} store_mapping[$], load_mapping[$], address_mapping;
logic [63:0] store_mapping[$], load_mapping[$], address_mapping;
function new(virtual instruction_tracer_if tracer_if);
@ -90,23 +87,6 @@ class instruction_tracer;
// --------------------
// Address Translation
// --------------------
// we've got a valid translation
if (tracer_if.pck.translation_req && tracer_if.pck.translation_valid) begin
// put it in the store mapping queue if it is a store
if (tracer_if.pck.is_store && tracer_if.pck.st_ready) begin
store_mapping.push_back('{
vaddr: tracer_if.pck.vaddr,
paddr: get_paddr(tracer_if.pck.vaddr, tracer_if.pck.pte, tracer_if.pck.is_2M, tracer_if.pck.is_1G)
});
// or else put it in the load mapping
end else if (!tracer_if.pck.is_store && tracer_if.pck.ld_ready) begin
load_mapping.push_back('{
vaddr: tracer_if.pck.vaddr,
paddr: get_paddr(tracer_if.pck.vaddr, tracer_if.pck.pte, tracer_if.pck.is_2M, tracer_if.pck.is_1G)
});
end
end
// --------------
// Commit
@ -125,9 +105,9 @@ class instruction_tracer;
// check if the write back is valid, if not we need to source the result from the register file
// as the most recent version of this register will be there.
if (tracer_if.pck.we) begin
printInstr(issue_sbe, issue_commit_instruction.instruction, tracer_if.pck.wdata, address_mapping.vaddr, address_mapping.paddr);
printInstr(issue_sbe, issue_commit_instruction.instruction, tracer_if.pck.wdata, address_mapping);
end else
printInstr(issue_sbe, issue_commit_instruction.instruction, reg_file[commit_instruction.rd], address_mapping.vaddr, address_mapping.paddr);
printInstr(issue_sbe, issue_commit_instruction.instruction, reg_file[commit_instruction.rd], address_mapping);
end
// --------------
@ -159,17 +139,6 @@ class instruction_tracer;
end
endtask
// Calculate the physical address given the values retrieved from the TLB
function logic [63:0] get_paddr (logic [63:0] vaddr, pte_t pte, logic is_2M, logic is_1G);
if (is_2M)
return {pte.ppn[43:9], vaddr[20:0]};
if (is_2M)
return {pte.ppn[43:18], vaddr[29:0]};
return {pte.ppn, vaddr[11:0]};
endfunction;
// flush all decoded instructions
function void flushDecode ();
@ -187,8 +156,8 @@ class instruction_tracer;
load_mapping = {};
endfunction;
function void printInstr(scoreboard_entry sbe, logic [63:0] instr, logic [63:0] result, logic [63:0] vaddr, logic [63:0] paddr);
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, vaddr, paddr);
function void printInstr(scoreboard_entry sbe, logic [63:0] instr, logic [63:0] result, logic [63:0] paddr);
instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr);
// print instruction to console
string print_instr = iti.printInstr();
$display(print_instr);

View file

@ -29,7 +29,7 @@ module store_queue_tb;
dcache_if slave(clk);
store_queue_if store_queue(clk);
store_queue dut (
store_buffer dut (
.clk_i ( clk ),
.rst_ni ( rst_ni ),
.flush_i ( store_queue.flush ),

View file

@ -24,7 +24,7 @@ add wave -noupdate -group ex_stage -group lsu -group mmu -group ptw /core_tb/dut
add wave -noupdate -group ex_stage -group lsu -group mem_arbiter /core_tb/dut/ex_stage_i/lsu_i/dcache_arbiter_i/*
add wave -noupdate -group ex_stage -group lsu -group mem_arbiter -group arbiter_fifo /core_tb/dut/ex_stage_i/lsu_i/dcache_arbiter_i/fifo_i/*
add wave -noupdate -group ex_stage -group lsu -group store_unit /core_tb/dut/ex_stage_i/lsu_i/store_unit_i/*
add wave -noupdate -group ex_stage -group lsu -group store_unit -group store_queue /core_tb/dut/ex_stage_i/lsu_i/store_unit_i/store_queue_i/*
add wave -noupdate -group ex_stage -group lsu -group store_unit -group store_buffer /core_tb/dut/ex_stage_i/lsu_i/store_unit_i/store_buffer_i/*
add wave -noupdate -group ex_stage -group lsu -group load_unit /core_tb/dut/ex_stage_i/lsu_i/load_unit_i/*
add wave -noupdate -group ex_stage -group lsu -group lsu_arbiter /core_tb/dut/ex_stage_i/lsu_i/lsu_arbiter_i/*