mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
761 lines
33 KiB
Systemverilog
761 lines
33 KiB
Systemverilog
`include "VX_cache_define.vh"
|
|
|
|
module VX_cache #(
|
|
parameter CACHE_ID = 0,
|
|
|
|
// Number of Word requests per cycle
|
|
parameter NUM_REQS = 4,
|
|
|
|
// Size of cache in bytes
|
|
parameter CACHE_SIZE = 16384,
|
|
// Size of line inside a bank in bytes
|
|
parameter CACHE_LINE_SIZE = 64,
|
|
// Number of banks
|
|
parameter NUM_BANKS = NUM_REQS,
|
|
// Number of ports per banks
|
|
parameter NUM_PORTS = 1,
|
|
// Size of a word in bytes
|
|
parameter WORD_SIZE = 4,
|
|
|
|
// Core Request Queue Size
|
|
parameter CREQ_SIZE = 0,
|
|
// Core Response Queue Size
|
|
parameter CRSQ_SIZE = 2,
|
|
// Miss Reserv Queue Knob
|
|
parameter MSHR_SIZE = 8,
|
|
// Memory Response Queue Size
|
|
parameter MRSQ_SIZE = 0,
|
|
// Memory Request Queue Size
|
|
parameter MREQ_SIZE = 4,
|
|
|
|
// Enable cache writeable
|
|
parameter WRITE_ENABLE = 1,
|
|
|
|
// core request tag size
|
|
parameter CORE_TAG_WIDTH = $clog2(MSHR_SIZE),
|
|
|
|
// size of tag id in core request tag
|
|
parameter CORE_TAG_ID_BITS = CORE_TAG_WIDTH,
|
|
|
|
// Memory request tag size
|
|
parameter MEM_TAG_WIDTH = (32 - $clog2(CACHE_LINE_SIZE)),
|
|
|
|
// bank offset from beginning of index range
|
|
parameter BANK_ADDR_OFFSET = 0,
|
|
|
|
// enable bypass for non-cacheable addresses
|
|
parameter NC_ENABLE = 0,
|
|
|
|
parameter WORD_SELECT_BITS = `UP(`WORD_SELECT_BITS)
|
|
) (
|
|
`SCOPE_IO_VX_cache
|
|
|
|
// PERF
|
|
`ifdef PERF_ENABLE
|
|
VX_perf_cache_if.master perf_cache_if,
|
|
`endif
|
|
|
|
input wire clk,
|
|
input wire reset,
|
|
|
|
// Core request
|
|
input wire [NUM_REQS-1:0] core_req_valid,
|
|
input wire [NUM_REQS-1:0] core_req_rw,
|
|
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
|
|
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
|
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
|
|
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
|
|
output wire [NUM_REQS-1:0] core_req_ready,
|
|
|
|
// Core response
|
|
output wire [`CORE_RSP_TAGS-1:0] core_rsp_valid,
|
|
output wire [NUM_REQS-1:0] core_rsp_tmask,
|
|
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
|
|
output wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
|
|
input wire [`CORE_RSP_TAGS-1:0] core_rsp_ready,
|
|
|
|
// Memory request
|
|
output wire mem_req_valid,
|
|
output wire mem_req_rw,
|
|
output wire [CACHE_LINE_SIZE-1:0] mem_req_byteen,
|
|
output wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
|
output wire [`CACHE_LINE_WIDTH-1:0] mem_req_data,
|
|
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
|
input wire mem_req_ready,
|
|
|
|
// Memory response
|
|
input wire mem_rsp_valid,
|
|
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
|
|
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
|
output wire mem_rsp_ready
|
|
);
|
|
|
|
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
|
|
`STATIC_ASSERT(NUM_PORTS <= NUM_BANKS, ("invalid value"))
|
|
|
|
localparam MSHR_ADDR_WIDTH = $clog2(MSHR_SIZE);
|
|
localparam MEM_TAG_IN_WIDTH = `BANK_SELECT_BITS + MSHR_ADDR_WIDTH;
|
|
localparam CORE_TAG_X_WIDTH = CORE_TAG_WIDTH - NC_ENABLE;
|
|
localparam CORE_TAG_ID_X_BITS = (CORE_TAG_ID_BITS != 0) ? (CORE_TAG_ID_BITS - NC_ENABLE) : CORE_TAG_ID_BITS;
|
|
|
|
`ifdef PERF_ENABLE
|
|
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
|
|
wire [NUM_BANKS-1:0] perf_write_miss_per_bank;
|
|
wire [NUM_BANKS-1:0] perf_mshr_stall_per_bank;
|
|
`endif
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
wire mem_req_valid_sb;
|
|
wire mem_req_rw_sb;
|
|
wire [CACHE_LINE_SIZE-1:0] mem_req_byteen_sb;
|
|
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_sb;
|
|
wire [`CACHE_LINE_WIDTH-1:0] mem_req_data_sb;
|
|
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_sb;
|
|
wire mem_req_ready_sb;
|
|
|
|
VX_skid_buffer #(
|
|
.DATAW (1+CACHE_LINE_SIZE+`MEM_ADDR_WIDTH+`CACHE_LINE_WIDTH+MEM_TAG_WIDTH),
|
|
.PASSTHRU (1 == NUM_BANKS)
|
|
) mem_req_sbuf (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.valid_in (mem_req_valid_sb),
|
|
.ready_in (mem_req_ready_sb),
|
|
.data_in ({mem_req_rw_sb, mem_req_byteen_sb, mem_req_addr_sb, mem_req_data_sb, mem_req_tag_sb}),
|
|
.data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag}),
|
|
.valid_out (mem_req_valid),
|
|
.ready_out (mem_req_ready)
|
|
);
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_sb;
|
|
wire [NUM_REQS-1:0] core_rsp_tmask_sb;
|
|
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_sb;
|
|
wire [`CORE_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_sb;
|
|
wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_sb;
|
|
|
|
if (CORE_TAG_ID_BITS != 0) begin
|
|
VX_skid_buffer #(
|
|
.DATAW (NUM_REQS + NUM_REQS*`WORD_WIDTH + CORE_TAG_WIDTH),
|
|
.PASSTHRU (1 == NUM_BANKS)
|
|
) core_rsp_sbuf (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.valid_in (core_rsp_valid_sb),
|
|
.ready_in (core_rsp_ready_sb),
|
|
.data_in ({core_rsp_tmask_sb, core_rsp_data_sb, core_rsp_tag_sb}),
|
|
.data_out ({core_rsp_tmask, core_rsp_data, core_rsp_tag}),
|
|
.valid_out (core_rsp_valid),
|
|
.ready_out (core_rsp_ready)
|
|
);
|
|
end else begin
|
|
for (genvar i = 0; i < NUM_REQS; i++) begin
|
|
VX_skid_buffer #(
|
|
.DATAW (1 + `WORD_WIDTH + CORE_TAG_WIDTH),
|
|
.PASSTHRU (1 == NUM_BANKS)
|
|
) core_rsp_sbuf (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.valid_in (core_rsp_valid_sb[i]),
|
|
.ready_in (core_rsp_ready_sb[i]),
|
|
.data_in ({core_rsp_tmask_sb[i], core_rsp_data_sb[i], core_rsp_tag_sb[i]}),
|
|
.data_out ({core_rsp_tmask[i], core_rsp_data[i], core_rsp_tag[i]}),
|
|
.valid_out (core_rsp_valid[i]),
|
|
.ready_out (core_rsp_ready[i])
|
|
);
|
|
end
|
|
end
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_p;
|
|
wire [NUM_PORTS-1:0] mem_req_pmask_p;
|
|
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_p;
|
|
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_p;
|
|
wire mem_req_rw_p;
|
|
|
|
if (WRITE_ENABLE) begin
|
|
if (`WORDS_PER_LINE > 1) begin
|
|
reg [CACHE_LINE_SIZE-1:0] mem_req_byteen_r;
|
|
reg [`CACHE_LINE_WIDTH-1:0] mem_req_data_r;
|
|
|
|
always @(*) begin
|
|
mem_req_byteen_r = 0;
|
|
mem_req_data_r = 'x;
|
|
for (integer i = 0; i < NUM_PORTS; ++i) begin
|
|
if ((1 == NUM_PORTS) || mem_req_pmask_p[i]) begin
|
|
mem_req_byteen_r[mem_req_wsel_p[i] * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p[i];
|
|
mem_req_data_r[mem_req_wsel_p[i] * `WORD_WIDTH +: `WORD_WIDTH] = mem_req_data_p[i];
|
|
end
|
|
end
|
|
end
|
|
|
|
assign mem_req_rw_sb = mem_req_rw_p;
|
|
assign mem_req_byteen_sb = mem_req_byteen_r;
|
|
assign mem_req_data_sb = mem_req_data_r;
|
|
end else begin
|
|
`UNUSED_VAR (mem_req_pmask_p)
|
|
`UNUSED_VAR (mem_req_wsel_p)
|
|
assign mem_req_rw_sb = mem_req_rw_p;
|
|
assign mem_req_byteen_sb = mem_req_byteen_p;
|
|
assign mem_req_data_sb = mem_req_data_p;
|
|
end
|
|
end else begin
|
|
`UNUSED_VAR (mem_req_byteen_p)
|
|
`UNUSED_VAR (mem_req_pmask_p)
|
|
`UNUSED_VAR (mem_req_wsel_p)
|
|
`UNUSED_VAR (mem_req_data_p)
|
|
`UNUSED_VAR (mem_req_rw_p)
|
|
|
|
assign mem_req_rw_sb = 0;
|
|
assign mem_req_byteen_sb = 'x;
|
|
assign mem_req_data_sb = 'x;
|
|
end
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
// Core request
|
|
wire [NUM_REQS-1:0] core_req_valid_c;
|
|
wire [NUM_REQS-1:0] core_req_rw_c;
|
|
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr_c;
|
|
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen_c;
|
|
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data_c;
|
|
wire [NUM_REQS-1:0][CORE_TAG_X_WIDTH-1:0] core_req_tag_c;
|
|
wire [NUM_REQS-1:0] core_req_ready_c;
|
|
|
|
// Core response
|
|
wire [`CORE_RSP_TAGS-1:0] core_rsp_valid_c;
|
|
wire [NUM_REQS-1:0] core_rsp_tmask_c;
|
|
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_c;
|
|
wire [`CORE_RSP_TAGS-1:0][CORE_TAG_X_WIDTH-1:0] core_rsp_tag_c;
|
|
wire [`CORE_RSP_TAGS-1:0] core_rsp_ready_c;
|
|
|
|
// Memory request
|
|
wire mem_req_valid_c;
|
|
wire mem_req_rw_c;
|
|
wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr_c;
|
|
wire [NUM_PORTS-1:0] mem_req_pmask_c;
|
|
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen_c;
|
|
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] mem_req_wsel_c;
|
|
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data_c;
|
|
wire [MEM_TAG_IN_WIDTH-1:0] mem_req_tag_c;
|
|
wire mem_req_ready_c;
|
|
|
|
// Memory response
|
|
wire mem_rsp_valid_c;
|
|
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_c;
|
|
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_c;
|
|
wire mem_rsp_ready_c;
|
|
|
|
if (NC_ENABLE) begin
|
|
VX_nc_bypass #(
|
|
.NUM_PORTS (NUM_PORTS),
|
|
.NUM_REQS (NUM_REQS),
|
|
.NUM_RSP_TAGS (`CORE_RSP_TAGS),
|
|
.NC_TAG_BIT (0),
|
|
|
|
.CORE_ADDR_WIDTH (`WORD_ADDR_WIDTH),
|
|
.CORE_DATA_SIZE (WORD_SIZE),
|
|
.CORE_TAG_IN_WIDTH (CORE_TAG_WIDTH),
|
|
|
|
.MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH),
|
|
.MEM_DATA_SIZE (CACHE_LINE_SIZE),
|
|
.MEM_TAG_IN_WIDTH (MEM_TAG_IN_WIDTH),
|
|
.MEM_TAG_OUT_WIDTH (MEM_TAG_WIDTH)
|
|
) nc_bypass (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
|
|
// Core request in
|
|
.core_req_valid_in (core_req_valid),
|
|
.core_req_rw_in (core_req_rw),
|
|
.core_req_byteen_in (core_req_byteen),
|
|
.core_req_addr_in (core_req_addr),
|
|
.core_req_data_in (core_req_data),
|
|
.core_req_tag_in (core_req_tag),
|
|
.core_req_ready_in (core_req_ready),
|
|
|
|
// Core request out
|
|
.core_req_valid_out (core_req_valid_c),
|
|
.core_req_rw_out (core_req_rw_c),
|
|
.core_req_byteen_out(core_req_byteen_c),
|
|
.core_req_addr_out (core_req_addr_c),
|
|
.core_req_data_out (core_req_data_c),
|
|
.core_req_tag_out (core_req_tag_c),
|
|
.core_req_ready_out (core_req_ready_c),
|
|
|
|
// Core response in
|
|
.core_rsp_valid_in (core_rsp_valid_c),
|
|
.core_rsp_tmask_in (core_rsp_tmask_c),
|
|
.core_rsp_data_in (core_rsp_data_c),
|
|
.core_rsp_tag_in (core_rsp_tag_c),
|
|
.core_rsp_ready_in (core_rsp_ready_c),
|
|
|
|
// Core response out
|
|
.core_rsp_valid_out (core_rsp_valid_sb),
|
|
.core_rsp_tmask_out (core_rsp_tmask_sb),
|
|
.core_rsp_data_out (core_rsp_data_sb),
|
|
.core_rsp_tag_out (core_rsp_tag_sb),
|
|
.core_rsp_ready_out (core_rsp_ready_sb),
|
|
|
|
// Memory request in
|
|
.mem_req_valid_in (mem_req_valid_c),
|
|
.mem_req_rw_in (mem_req_rw_c),
|
|
.mem_req_addr_in (mem_req_addr_c),
|
|
.mem_req_pmask_in (mem_req_pmask_c),
|
|
.mem_req_byteen_in (mem_req_byteen_c),
|
|
.mem_req_wsel_in (mem_req_wsel_c),
|
|
.mem_req_data_in (mem_req_data_c),
|
|
.mem_req_tag_in (mem_req_tag_c),
|
|
.mem_req_ready_in (mem_req_ready_c),
|
|
|
|
// Memory request out
|
|
.mem_req_valid_out (mem_req_valid_sb),
|
|
.mem_req_addr_out (mem_req_addr_sb),
|
|
.mem_req_rw_out (mem_req_rw_p),
|
|
.mem_req_pmask_out (mem_req_pmask_p),
|
|
.mem_req_byteen_out (mem_req_byteen_p),
|
|
.mem_req_wsel_out (mem_req_wsel_p),
|
|
.mem_req_data_out (mem_req_data_p),
|
|
.mem_req_tag_out (mem_req_tag_sb),
|
|
.mem_req_ready_out (mem_req_ready_sb),
|
|
|
|
// Memory response in
|
|
.mem_rsp_valid_in (mem_rsp_valid),
|
|
.mem_rsp_data_in (mem_rsp_data),
|
|
.mem_rsp_tag_in (mem_rsp_tag),
|
|
.mem_rsp_ready_in (mem_rsp_ready),
|
|
|
|
// Memory response out
|
|
.mem_rsp_valid_out (mem_rsp_valid_c),
|
|
.mem_rsp_data_out (mem_rsp_data_c),
|
|
.mem_rsp_tag_out (mem_rsp_tag_c),
|
|
.mem_rsp_ready_out (mem_rsp_ready_c)
|
|
);
|
|
end else begin
|
|
assign core_req_valid_c = core_req_valid;
|
|
assign core_req_rw_c = core_req_rw;
|
|
assign core_req_addr_c = core_req_addr;
|
|
assign core_req_byteen_c = core_req_byteen;
|
|
assign core_req_data_c = core_req_data;
|
|
assign core_req_tag_c = core_req_tag;
|
|
assign core_req_ready = core_req_ready_c;
|
|
|
|
assign core_rsp_valid_sb = core_rsp_valid_c;
|
|
assign core_rsp_tmask_sb = core_rsp_tmask_c;
|
|
assign core_rsp_data_sb = core_rsp_data_c;
|
|
assign core_rsp_tag_sb = core_rsp_tag_c;
|
|
assign core_rsp_ready_c = core_rsp_ready_sb;
|
|
|
|
assign mem_req_valid_sb = mem_req_valid_c;
|
|
assign mem_req_addr_sb = mem_req_addr_c;
|
|
assign mem_req_rw_p = mem_req_rw_c;
|
|
assign mem_req_pmask_p = mem_req_pmask_c;
|
|
assign mem_req_byteen_p = mem_req_byteen_c;
|
|
assign mem_req_wsel_p = mem_req_wsel_c;
|
|
assign mem_req_data_p = mem_req_data_c;
|
|
assign mem_req_tag_sb = mem_req_tag_c;
|
|
assign mem_req_ready_c = mem_req_ready_sb;
|
|
|
|
assign mem_rsp_valid_c = mem_rsp_valid;
|
|
assign mem_rsp_data_c = mem_rsp_data;
|
|
assign mem_rsp_tag_c = mem_rsp_tag;
|
|
assign mem_rsp_ready = mem_rsp_ready_c;
|
|
end
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual;
|
|
wire [MEM_TAG_IN_WIDTH-1:0] mem_rsp_tag_qual;
|
|
|
|
wire mrsq_out_valid, mrsq_out_ready;
|
|
|
|
`RESET_RELAY (mrsq_reset);
|
|
|
|
VX_elastic_buffer #(
|
|
.DATAW (MEM_TAG_IN_WIDTH + `CACHE_LINE_WIDTH),
|
|
.SIZE (MRSQ_SIZE),
|
|
.OUT_REG (MRSQ_SIZE > 2)
|
|
) mem_rsp_queue (
|
|
.clk (clk),
|
|
.reset (mrsq_reset),
|
|
.ready_in (mem_rsp_ready_c),
|
|
.valid_in (mem_rsp_valid_c),
|
|
.data_in ({mem_rsp_tag_c, mem_rsp_data_c}),
|
|
.data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}),
|
|
.ready_out (mrsq_out_ready),
|
|
.valid_out (mrsq_out_valid)
|
|
);
|
|
|
|
`UNUSED_VAR (mem_rsp_tag_c)
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
wire [`LINE_SELECT_BITS-1:0] flush_addr;
|
|
wire flush_enable;
|
|
|
|
`RESET_RELAY (flush_reset);
|
|
|
|
VX_flush_ctrl #(
|
|
.CACHE_SIZE (CACHE_SIZE),
|
|
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
|
.NUM_BANKS (NUM_BANKS)
|
|
) flush_ctrl (
|
|
.clk (clk),
|
|
.reset (flush_reset),
|
|
.addr_out (flush_addr),
|
|
.valid_out (flush_enable)
|
|
);
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_pmask;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_core_req_wsel;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_req_tag;
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
|
|
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
|
|
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_rsp_pmask;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] per_bank_core_rsp_tag;
|
|
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
|
|
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_mem_req_pmask;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_mem_req_byteen;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] per_bank_mem_req_wsel;
|
|
wire [NUM_BANKS-1:0][`MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
|
|
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
|
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`WORD_WIDTH-1:0] per_bank_mem_req_data;
|
|
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
|
|
|
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
|
|
|
if (NUM_BANKS == 1) begin
|
|
assign mrsq_out_ready = per_bank_mem_rsp_ready;
|
|
end else begin
|
|
assign mrsq_out_ready = per_bank_mem_rsp_ready[`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual)];
|
|
end
|
|
|
|
VX_core_req_bank_sel #(
|
|
.CACHE_ID (CACHE_ID),
|
|
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
|
.NUM_BANKS (NUM_BANKS),
|
|
.NUM_PORTS (NUM_PORTS),
|
|
.WORD_SIZE (WORD_SIZE),
|
|
.NUM_REQS (NUM_REQS),
|
|
.CORE_TAG_WIDTH (CORE_TAG_X_WIDTH),
|
|
.BANK_ADDR_OFFSET(BANK_ADDR_OFFSET)
|
|
) core_req_bank_sel (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
`ifdef PERF_ENABLE
|
|
.bank_stalls(perf_cache_if.bank_stalls),
|
|
`endif
|
|
.core_req_valid (core_req_valid_c),
|
|
.core_req_rw (core_req_rw_c),
|
|
.core_req_addr (core_req_addr_c),
|
|
.core_req_byteen (core_req_byteen_c),
|
|
.core_req_data (core_req_data_c),
|
|
.core_req_tag (core_req_tag_c),
|
|
.core_req_ready (core_req_ready_c),
|
|
.per_bank_core_req_valid (per_bank_core_req_valid),
|
|
.per_bank_core_req_pmask (per_bank_core_req_pmask),
|
|
.per_bank_core_req_rw (per_bank_core_req_rw),
|
|
.per_bank_core_req_addr (per_bank_core_req_addr),
|
|
.per_bank_core_req_wsel (per_bank_core_req_wsel),
|
|
.per_bank_core_req_byteen(per_bank_core_req_byteen),
|
|
.per_bank_core_req_data (per_bank_core_req_data),
|
|
.per_bank_core_req_tag (per_bank_core_req_tag),
|
|
.per_bank_core_req_tid (per_bank_core_req_tid),
|
|
.per_bank_core_req_ready (per_bank_core_req_ready)
|
|
);
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
|
wire curr_bank_core_req_valid;
|
|
wire [NUM_PORTS-1:0] curr_bank_core_req_pmask;
|
|
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_core_req_wsel;
|
|
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
|
|
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
|
|
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_req_tid;
|
|
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_req_tag;
|
|
wire curr_bank_core_req_rw;
|
|
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
|
|
wire curr_bank_core_req_ready;
|
|
|
|
wire curr_bank_core_rsp_valid;
|
|
wire [NUM_PORTS-1:0] curr_bank_core_rsp_pmask;
|
|
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
|
|
wire [NUM_PORTS-1:0][`REQS_BITS-1:0] curr_bank_core_rsp_tid;
|
|
wire [NUM_PORTS-1:0][CORE_TAG_X_WIDTH-1:0] curr_bank_core_rsp_tag;
|
|
wire curr_bank_core_rsp_ready;
|
|
|
|
wire curr_bank_mem_req_valid;
|
|
wire curr_bank_mem_req_rw;
|
|
wire [NUM_PORTS-1:0] curr_bank_mem_req_pmask;
|
|
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] curr_bank_mem_req_byteen;
|
|
wire [NUM_PORTS-1:0][WORD_SELECT_BITS-1:0] curr_bank_mem_req_wsel;
|
|
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
|
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_req_id;
|
|
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] curr_bank_mem_req_data;
|
|
wire curr_bank_mem_req_ready;
|
|
|
|
wire curr_bank_mem_rsp_valid;
|
|
wire [MSHR_ADDR_WIDTH-1:0] curr_bank_mem_rsp_id;
|
|
wire [`CACHE_LINE_WIDTH-1:0] curr_bank_mem_rsp_data;
|
|
wire curr_bank_mem_rsp_ready;
|
|
|
|
// Core Req
|
|
assign curr_bank_core_req_valid = per_bank_core_req_valid[i];
|
|
assign curr_bank_core_req_pmask = per_bank_core_req_pmask[i];
|
|
assign curr_bank_core_req_addr = per_bank_core_req_addr[i];
|
|
assign curr_bank_core_req_rw = per_bank_core_req_rw[i];
|
|
assign curr_bank_core_req_wsel = per_bank_core_req_wsel[i];
|
|
assign curr_bank_core_req_byteen = per_bank_core_req_byteen[i];
|
|
assign curr_bank_core_req_data = per_bank_core_req_data[i];
|
|
assign curr_bank_core_req_tag = per_bank_core_req_tag[i];
|
|
assign curr_bank_core_req_tid = per_bank_core_req_tid[i];
|
|
assign per_bank_core_req_ready[i] = curr_bank_core_req_ready;
|
|
|
|
// Core WB
|
|
assign curr_bank_core_rsp_ready = per_bank_core_rsp_ready[i];
|
|
assign per_bank_core_rsp_valid[i] = curr_bank_core_rsp_valid;
|
|
assign per_bank_core_rsp_pmask[i] = curr_bank_core_rsp_pmask;
|
|
assign per_bank_core_rsp_tid [i] = curr_bank_core_rsp_tid;
|
|
assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag;
|
|
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
|
|
|
|
// Memory request
|
|
assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid;
|
|
assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw;
|
|
assign per_bank_mem_req_pmask[i] = curr_bank_mem_req_pmask;
|
|
assign per_bank_mem_req_byteen[i] = curr_bank_mem_req_byteen;
|
|
assign per_bank_mem_req_wsel[i] = curr_bank_mem_req_wsel;
|
|
if (NUM_BANKS == 1) begin
|
|
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
|
|
end else begin
|
|
assign per_bank_mem_req_addr[i] = `LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
|
|
end
|
|
assign per_bank_mem_req_id[i] = curr_bank_mem_req_id;
|
|
assign per_bank_mem_req_data[i] = curr_bank_mem_req_data;
|
|
assign curr_bank_mem_req_ready = per_bank_mem_req_ready[i];
|
|
|
|
// Memory response
|
|
if (NUM_BANKS == 1) begin
|
|
assign curr_bank_mem_rsp_valid = mrsq_out_valid;
|
|
end else begin
|
|
assign curr_bank_mem_rsp_valid = mrsq_out_valid && (`MEM_TAG_TO_BANK_ID(mem_rsp_tag_qual) == i);
|
|
end
|
|
assign curr_bank_mem_rsp_id = `MEM_TAG_TO_REQ_ID(mem_rsp_tag_qual);
|
|
assign curr_bank_mem_rsp_data = mem_rsp_data_qual;
|
|
assign per_bank_mem_rsp_ready[i] = curr_bank_mem_rsp_ready;
|
|
|
|
`RESET_RELAY (bank_reset);
|
|
|
|
VX_bank #(
|
|
.BANK_ID (i),
|
|
.CACHE_ID (CACHE_ID),
|
|
.CACHE_SIZE (CACHE_SIZE),
|
|
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
|
|
.NUM_BANKS (NUM_BANKS),
|
|
.NUM_PORTS (NUM_PORTS),
|
|
.WORD_SIZE (WORD_SIZE),
|
|
.NUM_REQS (NUM_REQS),
|
|
.CREQ_SIZE (CREQ_SIZE),
|
|
.CRSQ_SIZE (CRSQ_SIZE),
|
|
.MSHR_SIZE (MSHR_SIZE),
|
|
.MREQ_SIZE (MREQ_SIZE),
|
|
.WRITE_ENABLE (WRITE_ENABLE),
|
|
.CORE_TAG_WIDTH (CORE_TAG_X_WIDTH),
|
|
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
|
|
) bank (
|
|
`SCOPE_BIND_VX_cache_bank(i)
|
|
|
|
.clk (clk),
|
|
.reset (bank_reset),
|
|
|
|
`ifdef PERF_ENABLE
|
|
.perf_read_misses (perf_read_miss_per_bank[i]),
|
|
.perf_write_misses (perf_write_miss_per_bank[i]),
|
|
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]),
|
|
`endif
|
|
|
|
// Core request
|
|
.core_req_valid (curr_bank_core_req_valid),
|
|
.core_req_pmask (curr_bank_core_req_pmask),
|
|
.core_req_rw (curr_bank_core_req_rw),
|
|
.core_req_byteen (curr_bank_core_req_byteen),
|
|
.core_req_addr (curr_bank_core_req_addr),
|
|
.core_req_wsel (curr_bank_core_req_wsel),
|
|
.core_req_data (curr_bank_core_req_data),
|
|
.core_req_tag (curr_bank_core_req_tag),
|
|
.core_req_tid (curr_bank_core_req_tid),
|
|
.core_req_ready (curr_bank_core_req_ready),
|
|
|
|
// Core response
|
|
.core_rsp_valid (curr_bank_core_rsp_valid),
|
|
.core_rsp_pmask (curr_bank_core_rsp_pmask),
|
|
.core_rsp_tid (curr_bank_core_rsp_tid),
|
|
.core_rsp_data (curr_bank_core_rsp_data),
|
|
.core_rsp_tag (curr_bank_core_rsp_tag),
|
|
.core_rsp_ready (curr_bank_core_rsp_ready),
|
|
|
|
// Memory request
|
|
.mem_req_valid (curr_bank_mem_req_valid),
|
|
.mem_req_rw (curr_bank_mem_req_rw),
|
|
.mem_req_pmask (curr_bank_mem_req_pmask),
|
|
.mem_req_byteen (curr_bank_mem_req_byteen),
|
|
.mem_req_wsel (curr_bank_mem_req_wsel),
|
|
.mem_req_addr (curr_bank_mem_req_addr),
|
|
.mem_req_id (curr_bank_mem_req_id),
|
|
.mem_req_data (curr_bank_mem_req_data),
|
|
.mem_req_ready (curr_bank_mem_req_ready),
|
|
|
|
// Memory response
|
|
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
|
.mem_rsp_id (curr_bank_mem_rsp_id),
|
|
.mem_rsp_data (curr_bank_mem_rsp_data),
|
|
.mem_rsp_ready (curr_bank_mem_rsp_ready),
|
|
|
|
// flush
|
|
.flush_enable (flush_enable),
|
|
.flush_addr (flush_addr)
|
|
);
|
|
end
|
|
|
|
VX_core_rsp_merge #(
|
|
.CACHE_ID (CACHE_ID),
|
|
.NUM_BANKS (NUM_BANKS),
|
|
.NUM_PORTS (NUM_PORTS),
|
|
.WORD_SIZE (WORD_SIZE),
|
|
.NUM_REQS (NUM_REQS),
|
|
.CORE_TAG_WIDTH (CORE_TAG_X_WIDTH),
|
|
.CORE_TAG_ID_BITS (CORE_TAG_ID_X_BITS)
|
|
) core_rsp_merge (
|
|
.clk (clk),
|
|
.reset (reset),
|
|
.per_bank_core_rsp_valid (per_bank_core_rsp_valid),
|
|
.per_bank_core_rsp_pmask (per_bank_core_rsp_pmask),
|
|
.per_bank_core_rsp_data (per_bank_core_rsp_data),
|
|
.per_bank_core_rsp_tag (per_bank_core_rsp_tag),
|
|
.per_bank_core_rsp_tid (per_bank_core_rsp_tid),
|
|
.per_bank_core_rsp_ready (per_bank_core_rsp_ready),
|
|
.core_rsp_valid (core_rsp_valid_c),
|
|
.core_rsp_tmask (core_rsp_tmask_c),
|
|
.core_rsp_tag (core_rsp_tag_c),
|
|
.core_rsp_data (core_rsp_data_c),
|
|
.core_rsp_ready (core_rsp_ready_c)
|
|
);
|
|
|
|
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH))-1:0] data_in;
|
|
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
|
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_id[i], per_bank_mem_req_rw[i], per_bank_mem_req_pmask[i], per_bank_mem_req_byteen[i], per_bank_mem_req_wsel[i], per_bank_mem_req_data[i]};
|
|
end
|
|
|
|
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id;
|
|
|
|
`RESET_RELAY (mreq_reset);
|
|
|
|
VX_stream_arbiter #(
|
|
.NUM_REQS (NUM_BANKS),
|
|
.DATAW (`MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + NUM_PORTS * (1 + WORD_SIZE + WORD_SELECT_BITS + `WORD_WIDTH)),
|
|
.TYPE ("R")
|
|
) mem_req_arb (
|
|
.clk (clk),
|
|
.reset (mreq_reset),
|
|
.valid_in (per_bank_mem_req_valid),
|
|
.data_in (data_in),
|
|
.ready_in (per_bank_mem_req_ready),
|
|
.valid_out (mem_req_valid_c),
|
|
.data_out ({mem_req_addr_c, mem_req_id, mem_req_rw_c, mem_req_pmask_c, mem_req_byteen_c, mem_req_wsel_c, mem_req_data_c}),
|
|
.ready_out (mem_req_ready_c)
|
|
);
|
|
|
|
if (NUM_BANKS == 1) begin
|
|
assign mem_req_tag_c = MEM_TAG_IN_WIDTH'(mem_req_id);
|
|
end else begin
|
|
assign mem_req_tag_c = MEM_TAG_IN_WIDTH'({`MEM_ADDR_TO_BANK_ID(mem_req_addr_c), mem_req_id});
|
|
end
|
|
|
|
`ifdef PERF_ENABLE
|
|
// per cycle: core_reads, core_writes
|
|
wire [$clog2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
|
|
wire [$clog2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
|
|
wire [$clog2(NUM_REQS+1)-1:0] perf_crsp_stall_per_cycle;
|
|
|
|
wire [NUM_REQS-1:0] perf_core_reads_per_mask = core_req_valid_c & core_req_ready_c & ~core_req_rw;
|
|
wire [NUM_REQS-1:0] perf_core_writes_per_mask = core_req_valid_c & core_req_ready_c & core_req_rw;
|
|
|
|
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
|
|
wire [$clog2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
|
|
wire [$clog2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
|
|
wire [$clog2(NUM_BANKS+1)-1:0] perf_mshr_stall_per_cycle;
|
|
wire [$clog2(NUM_BANKS+1)-1:0] perf_crsp_stall_per_cycle;
|
|
|
|
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_mask);
|
|
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_mask);
|
|
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
|
|
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
|
|
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
|
|
|
|
if (CORE_TAG_ID_BITS != 0) begin
|
|
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_tmask & {NUM_REQS{core_rsp_valid && ~core_rsp_ready}};
|
|
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
|
end else begin
|
|
wire [NUM_REQS-1:0] perf_crsp_stall_per_mask = core_rsp_valid & ~core_rsp_ready;
|
|
`POP_COUNT(perf_crsp_stall_per_cycle, perf_crsp_stall_per_mask);
|
|
end
|
|
|
|
wire perf_mem_stall_per_cycle = mem_req_valid & ~mem_req_ready;
|
|
|
|
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
|
|
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
|
|
reg [`PERF_CTR_BITS-1:0] perf_read_misses;
|
|
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
|
|
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
|
|
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
|
|
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
|
|
|
|
always @(posedge clk) begin
|
|
if (reset) begin
|
|
perf_core_reads <= 0;
|
|
perf_core_writes <= 0;
|
|
perf_read_misses <= 0;
|
|
perf_write_misses <= 0;
|
|
perf_mshr_stalls <= 0;
|
|
perf_mem_stalls <= 0;
|
|
perf_crsp_stalls <= 0;
|
|
end else begin
|
|
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
|
|
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
|
|
perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle);
|
|
perf_write_misses <= perf_write_misses + `PERF_CTR_BITS'(perf_write_miss_per_cycle);
|
|
perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle);
|
|
perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'(perf_mem_stall_per_cycle);
|
|
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
|
|
end
|
|
end
|
|
|
|
assign perf_cache_if.reads = perf_core_reads;
|
|
assign perf_cache_if.writes = perf_core_writes;
|
|
assign perf_cache_if.read_misses = perf_read_misses;
|
|
assign perf_cache_if.write_misses = perf_write_misses;
|
|
assign perf_cache_if.mshr_stalls = perf_mshr_stalls;
|
|
assign perf_cache_if.mem_stalls = perf_mem_stalls;
|
|
assign perf_cache_if.crsp_stalls = perf_crsp_stalls;
|
|
`endif
|
|
|
|
endmodule
|