mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Adding support for cache flush and writeback
Crediting Yi-Lin Tsai's original work at https://github.com/richardyilin/GPU_writeback
This commit is contained in:
parent
a5bde3693f
commit
f5014e8975
17 changed files with 727 additions and 271 deletions
|
@ -99,6 +99,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.MREQ_SIZE (`L2_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L2_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
|
|
|
@ -535,6 +535,11 @@
|
|||
`define DCACHE_NUM_WAYS 1
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
`ifndef DCACHE_WRITEBACK
|
||||
`define DCACHE_WRITEBACK 1
|
||||
`endif
|
||||
|
||||
// LMEM Configurable Knobs ////////////////////////////////////////////////////
|
||||
|
||||
`ifndef LMEM_DISABLE
|
||||
|
@ -594,6 +599,11 @@
|
|||
`define L2_NUM_WAYS 2
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L2_WRITEBACK
|
||||
`define L2_WRITEBACK 1
|
||||
`endif
|
||||
|
||||
// L3cache Configurable Knobs /////////////////////////////////////////////////
|
||||
|
||||
// Cache Size
|
||||
|
@ -635,6 +645,11 @@
|
|||
`define L3_NUM_WAYS 4
|
||||
`endif
|
||||
|
||||
// Enable Cache Writeback
|
||||
`ifndef L3_WRITEBACK
|
||||
`define L3_WRITEBACK 1
|
||||
`endif
|
||||
|
||||
// ISA Extensions /////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef EXT_A_ENABLE
|
||||
|
|
|
@ -149,6 +149,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`DCACHE_WRITEBACK),
|
||||
.NC_ENABLE (1),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2)
|
||||
|
|
|
@ -83,6 +83,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
.MREQ_SIZE (`L3_MREQ_SIZE),
|
||||
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.CORE_OUT_BUF (2),
|
||||
.MEM_OUT_BUF (2),
|
||||
|
|
109
hw/rtl/cache/VX_bank_flush.sv
vendored
Normal file
109
hw/rtl/cache/VX_bank_flush.sv
vendored
Normal file
|
@ -0,0 +1,109 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_bank_flush #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire flush_in_valid,
|
||||
output wire flush_in_ready,
|
||||
output wire flush_out_init,
|
||||
output wire flush_out_valid,
|
||||
output wire [`CS_LINE_SEL_BITS-1:0] flush_out_line,
|
||||
output wire [NUM_WAYS-1:0] flush_out_way,
|
||||
input wire flush_out_ready,
|
||||
input wire mshr_empty
|
||||
);
|
||||
parameter CTR_WIDTH = `CS_LINE_SEL_BITS + (WRITEBACK ? `CS_WAY_SEL_BITS : 0);
|
||||
|
||||
parameter STATE_IDLE = 2'd0;
|
||||
parameter STATE_INIT = 2'd1;
|
||||
parameter STATE_FLUSH = 2'd2;
|
||||
|
||||
reg [CTR_WIDTH-1:0] counter_r;
|
||||
reg [1:0] state_r, state_n;
|
||||
reg flush_in_ready_r, flush_in_ready_n;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
flush_in_ready_n = 0;
|
||||
case (state_r)
|
||||
// STATE_IDLE
|
||||
default: begin
|
||||
if (flush_in_valid && mshr_empty) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_INIT: begin
|
||||
if (counter_r == ((2 ** `CS_LINE_SEL_BITS)-1)) begin
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
if (counter_r == ((2 ** CTR_WIDTH)-1)) begin
|
||||
state_n = STATE_IDLE;
|
||||
flush_in_ready_n = 1;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_INIT;
|
||||
counter_r <= '0;
|
||||
flush_in_ready_r <= '0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
flush_in_ready_r <= flush_in_ready_n;
|
||||
if (state_r != STATE_IDLE) begin
|
||||
if ((state_r == STATE_INIT) || flush_out_ready) begin
|
||||
counter_r <= counter_r + CTR_WIDTH'(1);
|
||||
end
|
||||
end else begin
|
||||
counter_r <= '0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign flush_in_ready = flush_in_ready_r;
|
||||
|
||||
assign flush_out_init = (state_r == STATE_INIT);
|
||||
|
||||
assign flush_out_valid = (state_r == STATE_FLUSH);
|
||||
assign flush_out_line = counter_r[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin
|
||||
reg [NUM_WAYS-1:0] flush_out_way_r;
|
||||
always @(*) begin
|
||||
flush_out_way_r = '0;
|
||||
flush_out_way_r[counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]] = 1;
|
||||
end
|
||||
assign flush_out_way = flush_out_way_r;
|
||||
end else begin
|
||||
assign flush_out_way = {NUM_WAYS{1'b1}};
|
||||
end
|
||||
|
||||
endmodule
|
198
hw/rtl/cache/VX_cache.sv
vendored
198
hw/rtl/cache/VX_cache.sv
vendored
|
@ -42,6 +42,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -67,6 +70,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
`STATIC_ASSERT(WRITE_ENABLE || !WRITEBACK, ("invalid parameter"))
|
||||
|
||||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||
|
@ -78,36 +82,46 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH;
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
|
||||
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
|
||||
localparam CORE_REQ_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
||||
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
|
||||
|
||||
localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
||||
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
||||
`endif
|
||||
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (WORD_SIZE),
|
||||
.TAG_WIDTH (TAG_WIDTH)
|
||||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
|
||||
assign core_bus_if[i].req_ready = core_req_ready[i];
|
||||
`UNUSED_VAR (core_bus_if[i].req_data.atype)
|
||||
end
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_valid;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
||||
// this reset relay is required to sync with bank initialization
|
||||
`RESET_RELAY (flush_reset, reset);
|
||||
|
||||
VX_cache_flush #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (flush_reset),
|
||||
.core_bus_in_if (core_bus_if),
|
||||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_valid (per_bank_flush_valid),
|
||||
.flush_ready (per_bank_flush_ready)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -131,9 +145,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.valid_in (core_rsp_valid_s[i]),
|
||||
.ready_in (core_rsp_ready_s[i]),
|
||||
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
|
||||
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus_if[i].rsp_valid),
|
||||
.ready_out (core_bus_if[i].rsp_ready)
|
||||
.data_out ({core_bus2_if[i].rsp_data.data, core_bus2_if[i].rsp_data.tag}),
|
||||
.valid_out (core_bus2_if[i].rsp_valid),
|
||||
.ready_out (core_bus2_if[i].rsp_ready)
|
||||
);
|
||||
end
|
||||
|
||||
|
@ -146,12 +160,15 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire mem_req_flush_s;
|
||||
wire mem_req_ready_s;
|
||||
|
||||
wire mem_bus_if_flush;
|
||||
|
||||
`RESET_RELAY (mem_req_reset, reset);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH),
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
|
@ -159,13 +176,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.reset (mem_req_reset),
|
||||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s, mem_req_flush_s}),
|
||||
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag, mem_bus_if_flush}),
|
||||
.valid_out (mem_bus_if.req_valid),
|
||||
.ready_out (mem_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_if.req_data.atype = '0;
|
||||
assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -192,27 +209,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.ready_out (mem_rsp_ready_s)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] init_line_sel;
|
||||
wire init_enable;
|
||||
|
||||
// this reset relay is required to sync with bank initialization
|
||||
`RESET_RELAY (init_reset, reset);
|
||||
|
||||
VX_cache_init #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS)
|
||||
) cache_init (
|
||||
.clk (clk),
|
||||
.reset (init_reset),
|
||||
.addr_out (init_line_sel),
|
||||
.valid_out (init_enable)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
||||
|
@ -222,6 +219,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
||||
|
@ -233,14 +231,16 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
||||
wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
||||
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_mem_req_wsel;
|
||||
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
||||
assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
|
||||
end else begin
|
||||
|
@ -249,12 +249,33 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
// Bank requests dispatch
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
wire [NUM_REQS-1:0] core_req_valid;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
|
||||
wire [NUM_REQS-1:0] core_req_rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_flush;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
|
||||
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
||||
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid;
|
||||
wire [NUM_REQS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel;
|
||||
|
||||
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
|
||||
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus2_if[i].req_valid;
|
||||
assign core_req_rw[i] = core_bus2_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus2_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus2_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
|
||||
assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
|
||||
assign core_bus2_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (WORDS_PER_LINE > 1) begin
|
||||
assign core_req_wsel[i] = core_req_addr[i][0 +: WORD_SEL_BITS];
|
||||
|
@ -279,7 +300,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
core_req_wsel[i],
|
||||
core_req_byteen[i],
|
||||
core_req_data[i],
|
||||
core_req_tag[i]};
|
||||
core_req_tag[i],
|
||||
core_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
@ -288,12 +311,12 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
`RESET_RELAY (req_xbar_reset, reset);
|
||||
|
||||
VX_stream_xbar #(
|
||||
VX_stream_xbar #(
|
||||
.NUM_INPUTS (NUM_REQS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.OUT_BUF ((NUM_REQS > 4) ? 2 : 0)
|
||||
.OUT_BUF (REQ_XBAR_BUF)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (req_xbar_reset),
|
||||
|
@ -319,11 +342,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
per_bank_core_req_wsel[i],
|
||||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_data[i],
|
||||
per_bank_core_req_tag[i]} = core_req_data_out[i];
|
||||
per_bank_core_req_tag[i],
|
||||
per_bank_core_req_flush[i]
|
||||
} = core_req_data_out[i];
|
||||
end
|
||||
|
||||
// Banks access
|
||||
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks
|
||||
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
|
||||
|
@ -348,6 +373,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF),
|
||||
|
@ -371,6 +397,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.core_req_data (per_bank_core_req_data[bank_id]),
|
||||
.core_req_tag (per_bank_core_req_tag[bank_id]),
|
||||
.core_req_idx (per_bank_core_req_idx[bank_id]),
|
||||
.core_req_flush (per_bank_core_req_flush[bank_id]),
|
||||
.core_req_ready (per_bank_core_req_ready[bank_id]),
|
||||
|
||||
// Core response
|
||||
|
@ -384,10 +411,10 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.mem_req_valid (per_bank_mem_req_valid[bank_id]),
|
||||
.mem_req_addr (curr_bank_mem_req_addr),
|
||||
.mem_req_rw (per_bank_mem_req_rw[bank_id]),
|
||||
.mem_req_wsel (per_bank_mem_req_wsel[bank_id]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
|
||||
.mem_req_data (per_bank_mem_req_data[bank_id]),
|
||||
.mem_req_id (per_bank_mem_req_id[bank_id]),
|
||||
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
|
||||
|
||||
// Memory response
|
||||
|
@ -396,9 +423,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
// initialization
|
||||
.init_enable (init_enable),
|
||||
.init_line_sel (init_line_sel)
|
||||
.flush_valid (per_bank_flush_valid[bank_id]),
|
||||
.flush_ready (per_bank_flush_ready[bank_id])
|
||||
);
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
|
@ -446,31 +472,33 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire mem_req_valid_p;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_addr_p;
|
||||
wire mem_req_rw_p;
|
||||
wire [WORD_SEL_WIDTH-1:0] mem_req_wsel_p;
|
||||
wire [WORD_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [`CS_WORD_WIDTH-1:0] mem_req_data_p;
|
||||
wire [LINE_SIZE-1:0] mem_req_byteen_p;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data_p;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_p;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id_p;
|
||||
wire mem_req_flush_p;
|
||||
wire mem_req_ready_p;
|
||||
|
||||
// Memory request arbitration
|
||||
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH)-1:0] data_in;
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign data_in[i] = {per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_wsel[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i]};
|
||||
assign data_in[i] = {
|
||||
per_bank_mem_req_addr[i],
|
||||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i],
|
||||
per_bank_mem_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
`RESET_RELAY (mem_arb_reset, reset);
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.ARBITER ("F")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
|
@ -478,7 +506,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_wsel_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p}),
|
||||
.data_out ({mem_req_addr_p, mem_req_rw_p, mem_req_byteen_p, mem_req_data_p, mem_req_id_p, mem_req_flush_p}),
|
||||
.valid_out (mem_req_valid_p),
|
||||
.ready_out (mem_req_ready_p),
|
||||
`UNUSED_PIN (sel_out)
|
||||
|
@ -496,31 +524,15 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
assign mem_req_valid_s = mem_req_valid_p;
|
||||
assign mem_req_addr_s = mem_req_addr_p;
|
||||
assign mem_req_tag_s = mem_req_tag_p;
|
||||
assign mem_req_flush_s = mem_req_flush_p;
|
||||
assign mem_req_ready_p = mem_req_ready_s;
|
||||
|
||||
if (WRITE_ENABLE != 0) begin
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [LINE_SIZE-1:0] mem_req_byteen_r;
|
||||
reg [`CS_LINE_WIDTH-1:0] mem_req_data_r;
|
||||
|
||||
always @(*) begin
|
||||
mem_req_byteen_r = '0;
|
||||
mem_req_data_r = 'x;
|
||||
mem_req_byteen_r[mem_req_wsel_p * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p;
|
||||
mem_req_data_r[mem_req_wsel_p * `CS_WORD_WIDTH +: `CS_WORD_WIDTH] = mem_req_data_p;
|
||||
end
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_r;
|
||||
assign mem_req_data_s = mem_req_data_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_p;
|
||||
assign mem_req_data_s = mem_req_data_p;
|
||||
end
|
||||
assign mem_req_rw_s = mem_req_rw_p;
|
||||
assign mem_req_byteen_s = mem_req_byteen_p;
|
||||
assign mem_req_data_s = mem_req_data_p;
|
||||
end else begin
|
||||
`UNUSED_VAR (mem_req_byteen_p)
|
||||
`UNUSED_VAR (mem_req_wsel_p)
|
||||
`UNUSED_VAR (mem_req_data_p)
|
||||
`UNUSED_VAR (mem_req_rw_p)
|
||||
|
||||
|
@ -554,7 +566,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign perf_crsp_stall_per_req[i] = core_bus_if[i].rsp_valid && ~core_bus_if[i].rsp_ready;
|
||||
assign perf_crsp_stall_per_req[i] = core_bus2_if[i].rsp_valid && ~core_bus2_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_req);
|
||||
|
|
292
hw/rtl/cache/VX_cache_bank.sv
vendored
292
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -41,6 +41,9 @@ module VX_cache_bank #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -69,12 +72,13 @@ module VX_cache_bank #(
|
|||
// Core Request
|
||||
input wire core_req_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire core_req_rw,
|
||||
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel,
|
||||
input wire [WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx,
|
||||
input wire core_req_rw, // write enable
|
||||
input wire [WORD_SEL_WIDTH-1:0] core_req_wsel, // select the word in a cacheline, e.g. word size = 4 bytes, cacheline size = 64 bytes, it should have log(64/4)= 4 bits
|
||||
input wire [WORD_SIZE-1:0] core_req_byteen,// which bytes in data to write
|
||||
input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
|
||||
input wire core_req_flush, // flush enable
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
|
@ -88,10 +92,10 @@ module VX_cache_bank #(
|
|||
output wire mem_req_valid,
|
||||
output wire [`CS_LINE_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire mem_req_rw,
|
||||
output wire [WORD_SEL_WIDTH-1:0] mem_req_wsel,
|
||||
output wire [WORD_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_WORD_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr
|
||||
output wire mem_req_flush,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
|
@ -100,9 +104,9 @@ module VX_cache_bank #(
|
|||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// initialization
|
||||
input wire init_enable,
|
||||
input wire [`CS_LINE_SEL_BITS-1:0] init_line_sel
|
||||
// flush
|
||||
input wire flush_valid,
|
||||
output wire flush_ready
|
||||
);
|
||||
|
||||
localparam PIPELINE_STAGES = 2;
|
||||
|
@ -128,23 +132,56 @@ module VX_cache_bank #(
|
|||
wire [MSHR_ADDR_WIDTH-1:0] replay_id;
|
||||
wire replay_ready;
|
||||
|
||||
wire is_init_st0;
|
||||
wire is_flush_st0, is_flush_st1;
|
||||
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_st0, tag_st1;
|
||||
wire rw_sel, rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
|
||||
wire valid_sel, valid_st0, valid_st1;
|
||||
wire is_init_st0;
|
||||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_replay_st0, is_replay_st1;
|
||||
wire creq_flush_st0, creq_flush_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
wire line_flush_valid;
|
||||
wire line_flush_init;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_flush_sel;
|
||||
wire [NUM_WAYS-1:0] line_flush_way;
|
||||
wire line_flush_ready;
|
||||
|
||||
// flush unit
|
||||
VX_bank_flush #(
|
||||
.CACHE_SIZE (CACHE_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.NUM_WAYS (NUM_WAYS),
|
||||
.WRITEBACK (WRITEBACK)
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.flush_in_valid (flush_valid),
|
||||
.flush_in_ready (flush_ready),
|
||||
.flush_out_init (line_flush_init),
|
||||
.flush_out_valid (line_flush_valid),
|
||||
.flush_out_line (line_flush_sel),
|
||||
.flush_out_way (line_flush_way),
|
||||
.flush_out_ready (line_flush_ready),
|
||||
.mshr_empty (mshr_empty)
|
||||
);
|
||||
|
||||
wire rdw_hazard_st0;
|
||||
reg rdw_hazard_st1;
|
||||
|
@ -154,47 +191,50 @@ module VX_cache_bank #(
|
|||
// inputs arbitration:
|
||||
// mshr replay has highest priority to maximize utilization since there is no miss.
|
||||
// handle memory responses next to prevent deadlock with potential memory request from a miss.
|
||||
wire replay_grant = ~init_enable;
|
||||
// flush has precedence over core requests to ensure that the cache is in a consistent state.
|
||||
wire replay_grant = ~line_flush_init;
|
||||
wire replay_enable = replay_grant && replay_valid;
|
||||
|
||||
wire fill_grant = ~init_enable && ~replay_enable;
|
||||
wire fill_grant = ~line_flush_init && ~replay_enable;
|
||||
wire fill_enable = fill_grant && mem_rsp_valid;
|
||||
|
||||
wire creq_grant = ~init_enable && ~replay_enable && ~fill_enable;
|
||||
wire flush_grant = ~line_flush_init && ~replay_enable && ~fill_enable;
|
||||
wire flush_enable = flush_grant && line_flush_valid;
|
||||
|
||||
wire creq_grant = ~line_flush_init && ~replay_enable && ~fill_enable && ~flush_enable;
|
||||
wire creq_enable = creq_grant && core_req_valid;
|
||||
|
||||
assign replay_ready = replay_grant
|
||||
&& ~rdw_hazard_st0
|
||||
&& ~pipe_stall;
|
||||
&& ~rdw_hazard_st0
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign mem_rsp_ready = fill_grant
|
||||
&& ~pipe_stall;
|
||||
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
assign line_flush_ready = flush_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = init_enable;
|
||||
assign core_req_ready = creq_grant
|
||||
&& ~mreq_queue_alm_full
|
||||
&& ~mshr_alm_full
|
||||
&& ~pipe_stall;
|
||||
|
||||
wire init_fire = line_flush_init;
|
||||
wire replay_fire = replay_valid && replay_ready;
|
||||
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
|
||||
wire flush_fire = line_flush_valid && line_flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
wire [TAG_WIDTH-1:0] mshr_creq_tag = replay_enable ? replay_tag : core_req_tag;
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_sel = mshr_creq_tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_sel = 0;
|
||||
end
|
||||
|
||||
`UNUSED_VAR (mshr_creq_tag)
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || core_req_fire;
|
||||
|
||||
assign addr_sel = init_enable ? `CS_LINE_ADDR_WIDTH'(init_line_sel) :
|
||||
(replay_valid ? replay_addr :
|
||||
(mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
assign addr_sel = (line_flush_init | line_flush_valid) ? `CS_LINE_ADDR_WIDTH'(line_flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
assign data_sel[`CS_WORD_WIDTH-1:0] = replay_valid ? replay_data : (mem_rsp_valid ? mem_rsp_data[`CS_WORD_WIDTH-1:0] : core_req_data);
|
||||
|
@ -204,32 +244,24 @@ module VX_cache_bank #(
|
|||
`UNUSED_VAR (replay_data)
|
||||
end
|
||||
for (genvar i = `CS_WORD_WIDTH; i < `CS_LINE_WIDTH; ++i) begin
|
||||
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words od data_sel
|
||||
assign data_sel[i] = mem_rsp_data[i]; // only the memory response fills the upper words of data_sel
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_sel = tag_sel[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign req_uuid_sel = 0;
|
||||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({
|
||||
valid_sel,
|
||||
init_enable,
|
||||
replay_enable,
|
||||
fill_enable,
|
||||
creq_enable,
|
||||
addr_sel,
|
||||
data_sel,
|
||||
replay_valid ? replay_rw : core_req_rw,
|
||||
replay_valid ? replay_byteen : core_req_byteen,
|
||||
replay_valid ? replay_wsel : core_req_wsel,
|
||||
replay_valid ? replay_idx : core_req_idx,
|
||||
replay_valid ? replay_tag : core_req_tag,
|
||||
replay_id
|
||||
}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_creq_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
.data_in ({valid_sel, line_flush_init, replay_enable, fill_enable, flush_enable, creq_enable, core_req_flush, line_flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
|
@ -238,15 +270,18 @@ module VX_cache_bank #(
|
|||
assign req_uuid_st0 = 0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_init_st0 = valid_st0 && is_init_st0;
|
||||
wire do_flush_st0 = valid_st0 && is_flush_st0;
|
||||
wire do_creq_rd_st0 = valid_st0 && is_creq_st0 && ~rw_st0;
|
||||
wire do_replay_rd_st0 = valid_st0 && is_replay_st0 && ~rw_st0;
|
||||
wire do_fill_st0 = valid_st0 && is_fill_st0;
|
||||
wire do_lookup_st0 = valid_st0 && ~(is_fill_st0 || is_init_st0);
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0, tag_matches_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
|
||||
wire [NUM_WAYS-1:0] repl_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] repl_tag_st0;
|
||||
|
||||
`RESET_RELAY (tag_reset, reset);
|
||||
|
||||
|
@ -267,26 +302,33 @@ module VX_cache_bank #(
|
|||
|
||||
.stall (pipe_stall),
|
||||
|
||||
// read/Fill
|
||||
// init/fill/lookup/flush
|
||||
.init (do_init_st0 || do_flush_st0),
|
||||
.fill (do_fill_st0),
|
||||
.lookup (do_lookup_st0),
|
||||
.line_addr (addr_st0),
|
||||
.fill (do_fill_st0),
|
||||
.init (do_init_st0),
|
||||
.way_sel (way_sel_st0),
|
||||
.tag_matches(tag_matches_st0)
|
||||
.tag_matches(tag_matches_st0),
|
||||
|
||||
// replacement
|
||||
.repl_way (repl_way_st0),
|
||||
.repl_tag (repl_tag_st0)
|
||||
);
|
||||
|
||||
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
||||
|
||||
assign way_sel_st0 = is_fill_st0 ? repl_way_st0 : flush_way_st0;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_r_st0 = (is_fill_st0 || is_flush_st0) ? {repl_tag_st0, addr_st0[`CS_LINE_SEL_BITS-1:0]} : addr_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + NUM_WAYS + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_replay_st0, is_fill_st0, is_creq_st0, rw_st0, addr_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_replay_st1, is_fill_st1, is_creq_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_flush_st0, is_replay_st0, is_fill_st0, is_creq_st0, creq_flush_st0, rw_st0, addr_r_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, tag_matches_st0, way_sel_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_flush_st1, is_replay_st1, is_fill_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag hit
|
||||
|
@ -298,8 +340,10 @@ module VX_cache_bank #(
|
|||
assign req_uuid_st1 = 0;
|
||||
end
|
||||
|
||||
wire do_creq_rd_st1 = valid_st1 && is_creq_st1 && ~rw_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_creq_st1 && rw_st1;
|
||||
wire is_read_st1 = is_creq_st1 && ~rw_st1;
|
||||
wire is_write_st1 = is_creq_st1 && rw_st1;
|
||||
wire do_creq_rd_st1 = valid_st1 && is_read_st1;
|
||||
wire do_creq_wr_st1 = valid_st1 && is_write_st1;
|
||||
wire do_fill_st1 = valid_st1 && is_fill_st1;
|
||||
wire do_replay_rd_st1 = valid_st1 && is_replay_st1 && ~rw_st1;
|
||||
wire do_replay_wr_st1 = valid_st1 && is_replay_st1 && rw_st1;
|
||||
|
@ -310,20 +354,41 @@ module VX_cache_bank #(
|
|||
wire do_write_hit_st1 = do_creq_wr_st1 && is_hit_st1;
|
||||
wire do_write_miss_st1= do_creq_wr_st1 && ~is_hit_st1;
|
||||
|
||||
wire do_flush_st1 = valid_st1 && is_flush_st1;
|
||||
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("runtime error: invalid mshr replay"));
|
||||
|
||||
// detect BRAM's read-during-write hazard
|
||||
assign rdw_hazard_st0 = do_fill_st0; // after a fill
|
||||
assign rdw_hazard_st0 = do_fill_st0; // stall cycle after a fill
|
||||
wire rdw_write_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||
wire rdw_read_st0 = do_creq_rd_st0 || do_replay_rd_st0
|
||||
|| (!WRITEBACK || (do_flush_st0 || do_fill_st0)); // a writeback also do a data read
|
||||
always @(posedge clk) begin // after a write to same address
|
||||
rdw_hazard_st1 <= (do_creq_rd_st0 && do_write_hit_st1 && (addr_st0 == addr_st1))
|
||||
rdw_hazard_st1 <= (rdw_read_st0 && rdw_write_st1 && (addr_st0 == addr_st1))
|
||||
&& ~rdw_hazard_st1; // invalidate if pipeline stalled to avoid repeats
|
||||
end
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
|
||||
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
|
||||
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
|
||||
wire [LINE_SIZE-1:0] write_byteen_st1;
|
||||
|
||||
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
||||
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
||||
wire dirty_valid_st1;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [LINE_SIZE-1:0] write_byteen_r;
|
||||
always @(*) begin
|
||||
write_byteen_r = '0;
|
||||
write_byteen_r[wsel_st1 * WORD_SIZE +: WORD_SIZE] = byteen_st1;
|
||||
end
|
||||
assign write_byteen_st1 = write_byteen_r;
|
||||
end else begin
|
||||
assign write_byteen_st1 = byteen_st1;
|
||||
end
|
||||
|
||||
`RESET_RELAY (data_reset, reset);
|
||||
|
||||
|
@ -336,6 +401,7 @@ module VX_cache_bank #(
|
|||
.NUM_WAYS (NUM_WAYS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH)
|
||||
) cache_data (
|
||||
.clk (clk),
|
||||
|
@ -347,14 +413,18 @@ module VX_cache_bank #(
|
|||
|
||||
.read (do_read_hit_st1 || do_replay_rd_st1),
|
||||
.fill (do_fill_st1),
|
||||
.flush (do_flush_st1),
|
||||
.write (do_write_hit_st1 || do_replay_wr_st1),
|
||||
.way_sel (way_sel_st1 | tag_matches_st1),
|
||||
.line_addr (addr_st1),
|
||||
.wsel (wsel_st1),
|
||||
.byteen (byteen_st1),
|
||||
.fill_data (fill_data_st1),
|
||||
.write_data (write_data_st1),
|
||||
.read_data (read_data_st1)
|
||||
.write_byteen(write_byteen_st1),
|
||||
.read_data (read_data_st1),
|
||||
.dirty_valid(dirty_valid_st1),
|
||||
.dirty_data (dirty_data_st1),
|
||||
.dirty_byteen(dirty_byteen_st1)
|
||||
);
|
||||
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
|
||||
|
@ -362,7 +432,17 @@ module VX_cache_bank #(
|
|||
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
|
||||
wire mshr_lookup_st0 = mshr_allocate_st0;
|
||||
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
|
||||
wire mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
|
||||
// release allocated mshr entry if we had a hit
|
||||
wire mshr_release_st1;
|
||||
if (WRITEBACK) begin
|
||||
assign mshr_release_st1 = is_hit_st1;
|
||||
end else begin
|
||||
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
|
||||
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
|
||||
// this can happen when writes are sent late, when the fill was already in flight.
|
||||
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
end
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
|
@ -371,7 +451,7 @@ module VX_cache_bank #(
|
|||
.reset (reset),
|
||||
.incr (core_req_fire),
|
||||
.decr (replay_fire || (mshr_finalize_st1 && mshr_release_st1)),
|
||||
`UNUSED_PIN (empty),
|
||||
.empty (mshr_empty),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
.full (mshr_alm_full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
|
@ -437,7 +517,7 @@ module VX_cache_bank #(
|
|||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin
|
||||
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
|
||||
&& (i != mshr_alloc_id_st0) // exclude current mshr id
|
||||
&& ~mshr_lookup_rw_st0[i]; // exclude write requests
|
||||
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
|
||||
end
|
||||
assign mshr_pending_st0 = (| lookup_matches);
|
||||
|
||||
|
@ -475,29 +555,38 @@ module VX_cache_bank #(
|
|||
// schedule memory request
|
||||
|
||||
wire mreq_queue_push, mreq_queue_pop, mreq_queue_empty;
|
||||
wire [`CS_WORD_WIDTH-1:0] mreq_queue_data;
|
||||
wire [WORD_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [WORD_SEL_WIDTH-1:0] mreq_queue_wsel;
|
||||
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
||||
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id;
|
||||
wire mreq_queue_rw;
|
||||
wire mreq_queue_flush;
|
||||
|
||||
assign mreq_queue_push = (do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1;
|
||||
wire is_evict_st1 = (is_fill_st1 || is_flush_st1) && dirty_valid_st1;
|
||||
wire do_writeback_st1 = valid_st1 && is_evict_st1;
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
|
||||
if (WRITEBACK) begin
|
||||
assign mreq_queue_push = ((do_read_miss_st1 || do_write_miss_st1) && ~mshr_pending_st1)
|
||||
|| do_writeback_st1;
|
||||
end else begin
|
||||
`UNUSED_VAR (dirty_valid_st1)
|
||||
assign mreq_queue_push = (do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1;
|
||||
end
|
||||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
|
||||
assign mreq_queue_rw = WRITE_ENABLE && rw_st1;
|
||||
assign mreq_queue_rw = WRITE_ENABLE && (WRITEBACK ? is_evict_st1 : rw_st1);
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_id = mshr_id_st1;
|
||||
assign mreq_queue_wsel = wsel_st1;
|
||||
assign mreq_queue_byteen = byteen_st1;
|
||||
assign mreq_queue_data = write_data_st1;
|
||||
assign mreq_queue_data = is_write_st1 ? write_data_st1 : dirty_data_st1;
|
||||
assign mreq_queue_byteen = is_write_st1 ? write_byteen_st1 : dirty_byteen_st1;
|
||||
assign mreq_queue_flush = creq_flush_st1;
|
||||
|
||||
`RESET_RELAY (mreq_queue_reset, reset);
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + `CS_WORD_WIDTH),
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
|
||||
.DEPTH (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
|
@ -506,8 +595,8 @@ module VX_cache_bank #(
|
|||
.reset (mreq_queue_reset),
|
||||
.push (mreq_queue_push),
|
||||
.pop (mreq_queue_pop),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_wsel, mreq_queue_data}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_wsel, mem_req_data}),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}),
|
||||
.empty (mreq_queue_empty),
|
||||
.alm_full (mreq_queue_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
|
@ -527,15 +616,12 @@ module VX_cache_bank #(
|
|||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire crsp_queue_fire = crsp_queue_valid && crsp_queue_ready;
|
||||
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire);
|
||||
wire pipeline_stall = (replay_valid || mem_rsp_valid || core_req_valid || line_flush_valid)
|
||||
&& ~(replay_fire || mem_rsp_fire || core_req_fire || line_flush_valid);
|
||||
always @(posedge clk) begin
|
||||
if (pipeline_stall) begin
|
||||
`TRACE(3, ("%d: *** %s stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full));
|
||||
end
|
||||
if (init_enable) begin
|
||||
`TRACE(2, ("%d: %s init: addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(init_line_sel, BANK_ID)));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%d: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data));
|
||||
end
|
||||
|
@ -552,8 +638,10 @@ module VX_cache_bank #(
|
|||
`TRACE(2, ("%d: %s core-rd-rsp: addr=0x%0h, tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(addr_st1, BANK_ID), crsp_queue_tag, crsp_queue_idx, crsp_queue_data, req_uuid_st1));
|
||||
end
|
||||
if (mreq_queue_push) begin
|
||||
if (do_creq_wr_st1)
|
||||
if (do_creq_wr_st1 && !WRITEBACK)
|
||||
`TRACE(2, ("%d: %s writethrough: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||
else if (do_writeback_st1)
|
||||
`TRACE(2, ("%d: %s writeback: addr=0x%0h, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1));
|
||||
else
|
||||
`TRACE(2, ("%d: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1));
|
||||
end
|
||||
|
|
4
hw/rtl/cache/VX_cache_cluster.sv
vendored
4
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -46,6 +46,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -151,6 +154,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
|
|
89
hw/rtl/cache/VX_cache_data.sv
vendored
89
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -28,6 +28,8 @@ module VX_cache_data #(
|
|||
parameter WORD_SIZE = 1,
|
||||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0
|
||||
) (
|
||||
|
@ -42,52 +44,90 @@ module VX_cache_data #(
|
|||
|
||||
input wire read,
|
||||
input wire fill,
|
||||
input wire flush,
|
||||
input wire write,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
|
||||
input wire [WORD_SIZE-1:0] byteen,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
|
||||
input wire [`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
||||
output wire dirty_valid,
|
||||
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
|
||||
output wire [LINE_SIZE-1:0] dirty_byteen
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_PARAM (WORD_SIZE)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (stall)
|
||||
`UNUSED_VAR (line_addr)
|
||||
`UNUSED_VAR (read)
|
||||
`UNUSED_VAR (flush)
|
||||
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||
|
||||
if (WRITEBACK) begin
|
||||
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0][LINE_SIZE-1:0] dirty_bytes_r;
|
||||
reg [`CS_LINES_PER_BANK * NUM_WAYS-1:0] dirty_blocks_r;
|
||||
|
||||
wire [`CLOG2(`CS_LINES_PER_BANK * NUM_WAYS)-1:0] way_addr;
|
||||
if (NUM_WAYS > 1) begin
|
||||
assign way_addr = {line_sel, way_idx};
|
||||
end else begin
|
||||
assign way_addr = line_sel;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (fill) begin
|
||||
dirty_bytes_r[way_addr] <= '0;
|
||||
end else if (write) begin
|
||||
dirty_bytes_r[way_addr] <= dirty_bytes_r[way_addr] | write_byteen;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
dirty_blocks_r <= '0;
|
||||
end else begin
|
||||
if (fill) begin
|
||||
dirty_blocks_r[way_addr] <= 0;
|
||||
end else if (write) begin
|
||||
dirty_blocks_r[way_addr] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign dirty_byteen = dirty_bytes_r[way_addr];
|
||||
assign dirty_valid = dirty_blocks_r[way_addr];
|
||||
end else begin
|
||||
assign dirty_byteen = '0;
|
||||
assign dirty_valid = 0;
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM read.
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
|
||||
|
||||
always @(*) begin
|
||||
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
|
||||
wren_r = '0;
|
||||
wren_r[wsel] = byteen;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = (fill || !WRITE_ENABLE) ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{write_data[i]}};
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last
|
||||
// this allows performing onehot encoding of the way index in parallel with BRAM read.
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i])
|
||||
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}};
|
||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||
end
|
||||
end
|
||||
assign wren = wren_w;
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (byteen)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
|
@ -123,28 +163,35 @@ module VX_cache_data #(
|
|||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
assign per_way_rdata = rdata[wsel];
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = rdata;
|
||||
end
|
||||
|
||||
assign read_data = per_way_rdata[way_idx];
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] dirty_data_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign dirty_data_w[j][i] = rdata[i][j];
|
||||
end
|
||||
end
|
||||
assign dirty_data = dirty_data_w[way_idx];
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data));
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%d: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b, byteen=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_valid, dirty_byteen));
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
`TRACE(3, ("%d: %s read: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, read_data, req_uuid));
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, byteen, write_data, req_uuid));
|
||||
`TRACE(3, ("%d: %s write: addr=0x%0h, way=%b, blk_addr=%0d, byteen=%b, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, write_byteen, write_data, req_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
2
hw/rtl/cache/VX_cache_define.vh
vendored
2
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -50,7 +50,7 @@
|
|||
`define CS_TAG_SEL_ADDR_START (1+`CS_LINE_SEL_ADDR_END)
|
||||
`define CS_TAG_SEL_ADDR_END (`CS_WORD_ADDR_WIDTH-1)
|
||||
|
||||
`define CS_LINE_TAG_ADDR(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
|
||||
`define CS_LINE_ADDR_TAG(x) x[`CS_LINE_ADDR_WIDTH-1 : `CS_LINE_SEL_BITS]
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
154
hw/rtl/cache/VX_cache_flush.sv
vendored
Normal file
154
hw/rtl/cache/VX_cache_flush.sv
vendored
Normal file
|
@ -0,0 +1,154 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
module VX_cache_flush #(
|
||||
// Number of Word requests per cycle
|
||||
parameter NUM_REQS = 4,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Bank select latency
|
||||
parameter BANK_SEL_LATENCY = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
VX_mem_bus_if.slave core_bus_in_if [NUM_REQS],
|
||||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||
output wire [NUM_BANKS-1:0] flush_valid,
|
||||
input wire [NUM_BANKS-1:0] flush_ready
|
||||
);
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_WAIT = 1;
|
||||
localparam STATE_FLUSH = 2;
|
||||
localparam STATE_DONE = 3;
|
||||
|
||||
// track in-flight core requests
|
||||
|
||||
wire no_inflight_reqs;
|
||||
|
||||
if (BANK_SEL_LATENCY != 0) begin
|
||||
|
||||
localparam NUM_REQS_W = `CLOG2(NUM_REQS+1);
|
||||
localparam NUM_BANKS_W = `CLOG2(NUM_BANKS+1);
|
||||
|
||||
wire [NUM_REQS-1:0] core_bus_out_fire;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_out_fire[i] = core_bus_out_if[i].req_valid && core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
wire [NUM_REQS_W-1:0] core_bus_out_cnt;
|
||||
wire [NUM_BANKS_W-1:0] bank_req_cnt;
|
||||
|
||||
`POP_COUNT(core_bus_out_cnt, core_bus_out_fire);
|
||||
`POP_COUNT(bank_req_cnt, bank_req_fire);
|
||||
`UNUSED_VAR (core_bus_out_cnt)
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (BANK_SEL_LATENCY * NUM_BANKS),
|
||||
.INCRW (NUM_BANKS_W),
|
||||
.DECRW (NUM_BANKS_W)
|
||||
) pending_size (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.incr (NUM_BANKS_W'(core_bus_out_cnt)),
|
||||
.decr (bank_req_cnt),
|
||||
.empty (no_inflight_reqs),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
end else begin
|
||||
assign no_inflight_reqs = 0;
|
||||
`UNUSED_VAR (bank_req_fire)
|
||||
end
|
||||
|
||||
|
||||
reg [1:0] state, state_n;
|
||||
reg [NUM_BANKS-1:0] flush_done, flush_done_n;
|
||||
|
||||
wire [NUM_REQS-1:0] flush_req_mask;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
|
||||
end
|
||||
wire flush_req_enable = (| flush_req_mask);
|
||||
|
||||
reg [NUM_REQS-1:0] lock_released, lock_released_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire input_enable = ~flush_req_enable || lock_released[i];
|
||||
assign core_bus_out_if[i].req_valid = core_bus_in_if[i].req_valid && input_enable;
|
||||
assign core_bus_out_if[i].req_data = core_bus_in_if[i].req_data;
|
||||
assign core_bus_in_if[i].req_ready = core_bus_out_if[i].req_ready && input_enable;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_in_if[i].rsp_valid = core_bus_out_if[i].rsp_valid;
|
||||
assign core_bus_in_if[i].rsp_data = core_bus_out_if[i].rsp_data;
|
||||
assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] core_bus_out_ready;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
flush_done_n = flush_done;
|
||||
lock_released_n = lock_released;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT : STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_WAIT: begin
|
||||
if (no_inflight_reqs) begin
|
||||
state_n = STATE_FLUSH;
|
||||
end
|
||||
end
|
||||
STATE_FLUSH: begin
|
||||
flush_done_n = flush_done | flush_ready;
|
||||
if (flush_done_n == 0) begin
|
||||
state_n = STATE_DONE;
|
||||
lock_released_n = flush_req_mask;
|
||||
end
|
||||
end
|
||||
STATE_DONE: begin
|
||||
lock_released_n = lock_released & ~core_bus_out_ready;
|
||||
if (lock_released_n == 0) begin
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
flush_done <= '0;
|
||||
lock_released <= '0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
flush_done <= flush_done_n;
|
||||
lock_released <= lock_released_n;
|
||||
end
|
||||
end
|
||||
|
||||
assign flush_valid = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
|
||||
endmodule
|
1
hw/rtl/cache/VX_cache_init.sv
vendored
1
hw/rtl/cache/VX_cache_init.sv
vendored
|
@ -13,6 +13,7 @@
|
|||
|
||||
`include "VX_cache_define.vh"
|
||||
|
||||
// cache flush unit
|
||||
module VX_cache_init #(
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 1024,
|
||||
|
|
62
hw/rtl/cache/VX_cache_tags.sv
vendored
62
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -38,45 +38,63 @@ module VX_cache_tags #(
|
|||
|
||||
input wire stall,
|
||||
|
||||
// read/fill
|
||||
// init/fill/lookup
|
||||
input wire init,
|
||||
input wire fill,
|
||||
input wire lookup,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire fill,
|
||||
input wire init,
|
||||
output wire [NUM_WAYS-1:0] way_sel,
|
||||
output wire [NUM_WAYS-1:0] tag_matches
|
||||
output wire [NUM_WAYS-1:0] tag_matches,
|
||||
|
||||
// replacement
|
||||
output wire [NUM_WAYS-1:0] repl_way,
|
||||
output wire [`CS_TAG_SEL_BITS-1:0] repl_tag
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (lookup)
|
||||
|
||||
// valid, tag
|
||||
localparam TAG_WIDTH = 1 + `CS_TAG_SEL_BITS;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_TAG_ADDR(line_addr);
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire [NUM_WAYS-1:0] read_valid;
|
||||
|
||||
if (NUM_WAYS > 1) begin
|
||||
reg [NUM_WAYS-1:0] repl_way;
|
||||
reg [NUM_WAYS-1:0] repl_way_r;
|
||||
// cyclic assignment of replacement way
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
repl_way <= 1;
|
||||
repl_way_r <= 1;
|
||||
end else if (~stall) begin // hold the value on stalls prevent filling different slots twice
|
||||
repl_way <= {repl_way[NUM_WAYS-2:0], repl_way[NUM_WAYS-1]};
|
||||
repl_way_r <= {repl_way_r[NUM_WAYS-2:0], repl_way_r[NUM_WAYS-1]};
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
assign way_sel[i] = fill && repl_way[i];
|
||||
end
|
||||
|
||||
assign repl_way = repl_way_r;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (`CS_TAG_SEL_BITS),
|
||||
.N (NUM_WAYS)
|
||||
) repl_tag_sel (
|
||||
.data_in (read_tag),
|
||||
.sel_in (repl_way_r),
|
||||
.data_out (repl_tag)
|
||||
);
|
||||
end else begin
|
||||
`UNUSED_VAR (stall)
|
||||
assign way_sel = fill;
|
||||
assign repl_way = 1'b1;
|
||||
assign repl_tag = read_tag;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
wire [`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
wire read_valid;
|
||||
|
||||
wire do_fill = fill && repl_way[i];
|
||||
wire do_write = init || do_fill;
|
||||
wire line_valid = ~init;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (TAG_WIDTH),
|
||||
|
@ -85,27 +103,29 @@ module VX_cache_tags #(
|
|||
) tag_store (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
.write (way_sel[i] || init),
|
||||
.write (do_write),
|
||||
`UNUSED_PIN (wren),
|
||||
.addr (line_sel),
|
||||
.wdata ({~init, line_tag}),
|
||||
.rdata ({read_valid, read_tag})
|
||||
.wdata ({line_valid, line_tag}),
|
||||
.rdata ({read_valid[i], read_tag[i]})
|
||||
);
|
||||
end
|
||||
|
||||
assign tag_matches[i] = read_valid && (line_tag == read_tag);
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
assign tag_matches[i] = read_valid[i] && (line_tag == read_tag[i]);
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag));
|
||||
`TRACE(3, ("%d: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), repl_way, line_sel, line_tag));
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%d: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel));
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_matches != 0) begin
|
||||
`TRACE(3, ("%d: %s hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, line_tag, req_uuid));
|
||||
`TRACE(3, ("%d: %s hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid));
|
||||
end else begin
|
||||
`TRACE(3, ("%d: %s miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid));
|
||||
end
|
||||
|
|
54
hw/rtl/cache/VX_cache_wrap.sv
vendored
54
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -23,20 +23,20 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
|
||||
// Size of cache in bytes
|
||||
parameter CACHE_SIZE = 4096,
|
||||
parameter CACHE_SIZE = 4096,
|
||||
// Size of line inside a bank in bytes
|
||||
parameter LINE_SIZE = 64,
|
||||
parameter LINE_SIZE = 64,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
// Core Response Queue Size
|
||||
parameter CRSQ_SIZE = 2,
|
||||
// Miss Reserv Queue Knob
|
||||
parameter MSHR_SIZE = 8,
|
||||
parameter MSHR_SIZE = 8,
|
||||
// Memory Response Queue Size
|
||||
parameter MRSQ_SIZE = 0,
|
||||
// Memory Request Queue Size
|
||||
|
@ -45,6 +45,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Enable cache writeable
|
||||
parameter WRITE_ENABLE = 1,
|
||||
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
|
@ -63,7 +66,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// Memory request output buffer
|
||||
parameter MEM_OUT_BUF = 0
|
||||
) (
|
||||
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
|
@ -80,7 +83,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
|
||||
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
|
@ -98,7 +101,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
) mem_bus_cache_if();
|
||||
|
||||
if (NC_OR_BYPASS) begin
|
||||
|
||||
|
||||
`RESET_RELAY (nc_bypass_reset, reset);
|
||||
|
||||
VX_cache_bypass #(
|
||||
|
@ -108,13 +111,13 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.PASSTHRU (PASSTHRU),
|
||||
.NC_ENABLE (PASSTHRU ? 0 : NC_ENABLE),
|
||||
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.LINE_SIZE (LINE_SIZE),
|
||||
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_ADDR_WIDTH (`CS_WORD_ADDR_WIDTH),
|
||||
.CORE_TAG_WIDTH (TAG_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
|
||||
.MEM_ADDR_WIDTH (`CS_MEM_ADDR_WIDTH),
|
||||
.MEM_TAG_IN_WIDTH (CACHE_MEM_TAG_WIDTH),
|
||||
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH),
|
||||
|
||||
|
@ -132,15 +135,15 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.mem_bus_in_if (mem_bus_cache_if),
|
||||
.mem_bus_out_if (mem_bus_if)
|
||||
);
|
||||
|
||||
|
||||
end else begin
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
`ASSIGN_VX_MEM_BUS_IF (core_bus_cache_if[i], core_bus_if[i]);
|
||||
end
|
||||
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_cache_if);
|
||||
end
|
||||
end
|
||||
|
||||
if (PASSTHRU != 0) begin
|
||||
|
||||
|
@ -152,7 +155,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
assign core_bus_cache_if[i].rsp_valid = 0;
|
||||
assign core_bus_cache_if[i].rsp_data = '0;
|
||||
`UNUSED_VAR (core_bus_cache_if[i].rsp_ready)
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_bus_cache_if.req_valid = 0;
|
||||
assign mem_bus_cache_if.req_data = '0;
|
||||
|
@ -183,6 +186,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (MRSQ_SIZE),
|
||||
.MREQ_SIZE (MREQ_SIZE),
|
||||
.WRITE_ENABLE (WRITE_ENABLE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||
|
@ -195,8 +199,8 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
.core_bus_if (core_bus_cache_if),
|
||||
.mem_bus_if (mem_bus_cache_if)
|
||||
);
|
||||
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
|
@ -225,9 +229,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
end
|
||||
if (core_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s core-rd-rsp: tag=0x%0h, req_idx=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, core_bus_if[i].rsp_data.tag, i, core_bus_if[i].rsp_data.data, core_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] mem_rsp_uuid;
|
||||
|
@ -246,17 +250,17 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_bus_if.req_data.rw)
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-wr-req: addr=0x%0h, tag=0x%0h, byteen=%b, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_bus_if.req_data.byteen, mem_bus_if.req_data.data, mem_req_uuid));
|
||||
else
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-rd-req: addr=0x%0h, tag=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, `TO_FULL_ADDR(mem_bus_if.req_data.addr), mem_bus_if.req_data.tag, mem_req_uuid));
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
`TRACE(1, ("%d: %s mem-rd-rsp: tag=0x%0h, data=0x%0h (#%0d)\n",
|
||||
$time, INSTANCE_ID, mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data, mem_rsp_uuid));
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -181,7 +181,7 @@ module VX_alu_int #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_in ({br_enable, br_wid, br_taken, br_dest}),
|
||||
.data_out ({branch_ctl_if.valid, branch_ctl_if.wid, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
|
|
|
@ -269,7 +269,6 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
`endif
|
||||
.DATAW (`XLEN * `NUM_THREADS),
|
||||
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
|
||||
.ADDR_MIN ((b == 0) ? PER_ISSUE_WARPS : 0),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
//`TRACING_OFF
|
||||
module VX_pending_size #(
|
||||
parameter SIZE = 1,
|
||||
parameter INCRW = 1,
|
||||
|
@ -32,8 +32,8 @@ module VX_pending_size #(
|
|||
output wire alm_full,
|
||||
output wire [SIZEW-1:0] size
|
||||
);
|
||||
`STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter"))
|
||||
`STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter"))
|
||||
`STATIC_ASSERT(INCRW <= SIZEW, ("invalid parameter: %d vs %d", INCRW, SIZEW))
|
||||
`STATIC_ASSERT(DECRW <= SIZEW, ("invalid parameter: %d vs %d", DECRW, SIZEW))
|
||||
localparam ADDRW = `LOG2UP(SIZE);
|
||||
|
||||
reg empty_r, alm_empty_r;
|
||||
|
@ -53,8 +53,8 @@ module VX_pending_size #(
|
|||
full_r <= 0;
|
||||
size_r <= '0;
|
||||
end else begin
|
||||
`ASSERT((incr >= decr) || (size_n >= size_r), ("runtime error: counter overflow"));
|
||||
`ASSERT((incr <= decr) || (size_n <= size_r), ("runtime error: counter underflow"));
|
||||
`ASSERT((SIZEW'(incr) >= SIZEW'(decr)) || (size_n >= size_r), ("runtime error: counter overflow"));
|
||||
`ASSERT((SIZEW'(incr) <= SIZEW'(decr)) || (size_n <= size_r), ("runtime error: counter underflow"));
|
||||
size_r <= size_n;
|
||||
empty_r <= (size_n == SIZEW'(0));
|
||||
alm_empty_r <= (size_n == SIZEW'(ALM_EMPTY));
|
||||
|
@ -127,4 +127,4 @@ module VX_pending_size #(
|
|||
assign full = full_r;
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
//`TRACING_ON
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue