fixed memory flags propagation through the cache hierarchy

This commit is contained in:
Blaise Tine 2024-10-05 13:46:10 -07:00
parent 83badaac86
commit 2eeb2ac532
19 changed files with 279 additions and 229 deletions

View file

@ -142,8 +142,8 @@ cache()
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache ways
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test cache banking
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
@ -154,10 +154,10 @@ cache()
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
# test writeback
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
# cache clustering
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2

View file

@ -100,6 +100,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
.WRITEBACK (`L2_WRITEBACK),
.DIRTY_BYTES (`L2_WRITEBACK),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.CORE_OUT_BUF (3),
.MEM_OUT_BUF (3),
.NC_ENABLE (1),

View file

@ -564,12 +564,8 @@
// Cache Size
`ifndef L2_CACHE_SIZE
`ifdef ALTERA_S10
`define L2_CACHE_SIZE 2097152
`else
`define L2_CACHE_SIZE 1048576
`endif
`endif
// Number of Banks
`ifndef L2_NUM_BANKS
@ -610,11 +606,7 @@
// Cache Size
`ifndef L3_CACHE_SIZE
`ifdef ALTERA_S10
`define L3_CACHE_SIZE 2097152
`else
`define L3_CACHE_SIZE 1048576
`endif
`endif
// Number of Banks

View file

@ -100,6 +100,7 @@ module VX_socket import VX_gpu_pkg::*; #(
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
.TAG_WIDTH (ICACHE_TAG_WIDTH),
.FLAGS_WIDTH (0),
.UUID_WIDTH (`UUID_WIDTH),
.WRITE_ENABLE (0),
.NC_ENABLE (0),
@ -146,6 +147,7 @@ module VX_socket import VX_gpu_pkg::*; #(
.MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE),
.TAG_WIDTH (DCACHE_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.WRITE_ENABLE (1),
.WRITEBACK (`DCACHE_WRITEBACK),
.DIRTY_BYTES (`DCACHE_WRITEBACK),

View file

@ -86,6 +86,7 @@ module Vortex import VX_gpu_pkg::*; (
.WRITEBACK (`L3_WRITEBACK),
.DIRTY_BYTES (`L3_WRITEBACK),
.UUID_WIDTH (`UUID_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.CORE_OUT_BUF (3),
.MEM_OUT_BUF (3),
.NC_ENABLE (1),

View file

@ -118,8 +118,8 @@ module VX_bank_flush #(
.N (`CS_WAY_SEL_BITS),
.D (NUM_WAYS)
) ctr_decoder (
.data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
.valid_in (1'b1),
.sel_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
.data_in (1'b1),
.data_out (flush_way)
);
end else begin : g_flush_way_all

View file

@ -54,6 +54,9 @@ module VX_cache import VX_gpu_pkg::*; #(
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// core request flags
parameter FLAGS_WIDTH = 0,
// Core response output register
parameter CORE_OUT_BUF = 0,
@ -90,7 +93,7 @@ module VX_cache import VX_gpu_pkg::*; #(
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + `UP(FLAGS_WIDTH);
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH;
@ -206,13 +209,13 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [LINE_SIZE-1:0] mem_req_byteen;
wire [`CS_LINE_WIDTH-1:0] mem_req_data;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_flush;
wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags;
wire mem_req_ready;
wire mem_req_flush_b;
wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flush_b;
VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
.SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
@ -220,13 +223,18 @@ module VX_cache import VX_gpu_pkg::*; #(
.reset (reset),
.valid_in (mem_req_valid),
.ready_in (mem_req_ready),
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}),
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flags}),
.data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}),
.valid_out (mem_bus_tmp_if.req_valid),
.ready_out (mem_bus_tmp_if.req_ready)
);
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0;
if (FLAGS_WIDTH != 0) begin : g_mem_req_flags
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b;
end else begin : g_no_mem_req_flags
assign mem_bus_tmp_if.req_data.flags = '0;
`UNUSED_VAR (mem_req_flush_b)
end
if (WRITE_ENABLE) begin : g_mem_bus_if
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if);
@ -244,7 +252,7 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_core_req_flags;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
@ -259,7 +267,7 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag;
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_mem_req_flags;
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
@ -276,7 +284,7 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
wire [NUM_REQS-1:0] core_req_flush;
wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags;
wire [NUM_REQS-1:0] core_req_ready;
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
@ -293,7 +301,7 @@ module VX_cache import VX_gpu_pkg::*; #(
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
assign core_req_data[i] = core_bus2_if[i].req_data.data;
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
assign core_req_flags[i] = `UP(FLAGS_WIDTH)'(core_bus2_if[i].req_data.flags);
assign core_bus2_if[i].req_ready = core_req_ready[i];
end
@ -325,7 +333,7 @@ module VX_cache import VX_gpu_pkg::*; #(
core_req_byteen[i],
core_req_data[i],
core_req_tag[i],
core_req_flush[i]
core_req_flags[i]
};
end
@ -366,7 +374,7 @@ module VX_cache import VX_gpu_pkg::*; #(
per_bank_core_req_byteen[i],
per_bank_core_req_data[i],
per_bank_core_req_tag[i],
per_bank_core_req_flush[i]
per_bank_core_req_flags[i]
} = core_req_data_out[i];
end
@ -393,6 +401,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.WRITEBACK (WRITEBACK),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.FLAGS_WIDTH (FLAGS_WIDTH),
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)),
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF))
) bank (
@ -414,7 +423,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.core_req_data (per_bank_core_req_data[bank_id]),
.core_req_tag (per_bank_core_req_tag[bank_id]),
.core_req_idx (per_bank_core_req_idx[bank_id]),
.core_req_flush (per_bank_core_req_flush[bank_id]),
.core_req_flags (per_bank_core_req_flags[bank_id]),
.core_req_ready (per_bank_core_req_ready[bank_id]),
// Core response
@ -431,7 +440,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
.mem_req_data (per_bank_mem_req_data[bank_id]),
.mem_req_tag (per_bank_mem_req_tag[bank_id]),
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
.mem_req_flags (per_bank_mem_req_flags[bank_id]),
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
// Memory response
@ -487,7 +496,7 @@ module VX_cache import VX_gpu_pkg::*; #(
// Memory request arbitration
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in;
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH))-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in
assign data_in[i] = {
@ -496,7 +505,7 @@ module VX_cache import VX_gpu_pkg::*; #(
per_bank_mem_req_byteen[i],
per_bank_mem_req_data[i],
per_bank_mem_req_tag[i],
per_bank_mem_req_flush[i]
per_bank_mem_req_flags[i]
};
end
@ -504,7 +513,7 @@ module VX_cache import VX_gpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1),
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
.ARBITER ("R")
) mem_req_arb (
.clk (clk),
@ -512,7 +521,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.valid_in (per_bank_mem_req_valid),
.ready_in (per_bank_mem_req_ready),
.data_in (data_in),
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}),
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flags}),
.valid_out (mem_req_valid),
.ready_out (mem_req_ready),
`UNUSED_PIN (sel_out)

View file

@ -53,6 +53,9 @@ module VX_cache_bank #(
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// core request flags
parameter FLAGS_WIDTH = 0,
// Core response output register
parameter CORE_OUT_REG = 0,
@ -82,7 +85,7 @@ module VX_cache_bank #(
input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
input wire core_req_flush, // flush enable
input wire [`UP(FLAGS_WIDTH)-1:0] core_req_flags,
output wire core_req_ready,
// Core Response
@ -99,7 +102,7 @@ module VX_cache_bank #(
output wire [LINE_SIZE-1:0] mem_req_byteen,
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_flush,
output wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags,
input wire mem_req_ready,
// Memory response
@ -143,22 +146,25 @@ module VX_cache_bank #(
wire [NUM_WAYS-1:0] flush_way_st0;
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1;
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1;
wire rw_sel, rw_st0, rw_st1;
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1;
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
wire [`CS_WORD_WIDTH-1:0] write_data_st0, write_data_st1;
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0;
wire valid_sel, valid_st0, valid_st1;
wire is_creq_st0, is_creq_st1;
wire is_fill_st0, is_fill_st1;
wire is_replay_st0, is_replay_st1;
wire creq_flush_sel, creq_flush_st0, creq_flush_st1;
wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1;
wire evict_dirty_st0, evict_dirty_st1;
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1;
wire [NUM_WAYS-1:0] tag_matches_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
@ -264,11 +270,11 @@ module VX_cache_bank #(
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
assign word_idx_sel= replay_valid ? replay_wsel : core_req_wsel;
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) :
(replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag));
assign creq_flush_sel = core_req_valid && core_req_flush;
assign flags_sel = core_req_valid ? core_req_flags : '0;
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
@ -294,14 +300,14 @@ module VX_cache_bank #(
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}),
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0})
);
if (UUID_WIDTH != 0) begin : g_req_uuid_st0
@ -321,9 +327,10 @@ module VX_cache_bank #(
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0);
wire [NUM_WAYS-1:0] evict_way_st0;
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
@ -353,7 +360,9 @@ module VX_cache_bank #(
.write (do_cache_wr_st0),
.lookup (do_lookup_st0),
.line_addr (addr_st0),
.way_sel (flush_way_st0),
.way_idx (flush_way_st0),
// tag matches
.tag_matches(tag_matches_st0),
// replacement
@ -362,29 +371,29 @@ module VX_cache_bank #(
.evict_tag (evict_tag_st0)
);
wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0;
wire [`CS_TAG_SEL_BITS-1:0] line_tag2_st0;
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
assign way_idx_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1})
);
// we have a tag hit
wire is_hit_st1 = (| way_sel_st1);
wire is_hit_st1 = (| way_idx_st1);
if (UUID_WIDTH != 0) begin : g_req_uuid_st1
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
@ -413,9 +422,7 @@ module VX_cache_bank #(
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
`UNUSED_VAR (do_write_miss_st1)
assign addr_st1 = {line_tag_st1, line_idx_st1};
// ensure mshr replay always get a hit
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time))
@ -426,28 +433,16 @@ module VX_cache_bank #(
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
always @(posedge clk) begin
// stall reads following writes to same line address
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_idx_st0 == line_idx_st1)
&& ~rdw_hazard3_st1; // release pipeline stall
end
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
assign write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
wire [LINE_SIZE-1:0] write_byteen_st1;
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
wire [LINE_SIZE-1:0] dirty_byteen_st1;
if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w;
always @(*) begin
write_byteen_w = '0;
write_byteen_w[wsel_st1] = byteen_st1;
end
assign write_byteen_st1 = write_byteen_w;
end else begin : g_write_byteen_st1
assign write_byteen_st1 = byteen_st1;
end
VX_cache_data #(
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
.BANK_ID (BANK_ID),
@ -473,12 +468,12 @@ module VX_cache_bank #(
.fill (do_fill_st1),
.flush (do_flush_st1),
.write (do_cache_wr_st1),
.way_sel (way_sel_st1),
.way_idx (way_idx_st1),
.line_addr (addr_st1),
.wsel (wsel_st1),
.word_idx (word_idx_st1),
.fill_data (fill_data_st1),
.write_data (write_data_st1),
.write_byteen(write_byteen_st1),
.write_byteen(byteen_st1),
.read_data (read_data_st1),
.dirty_data (dirty_data_st1),
.dirty_byteen(dirty_byteen_st1)
@ -488,13 +483,14 @@ module VX_cache_bank #(
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
wire mshr_lookup_st0 = mshr_allocate_st0;
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
// release allocated mshr entry if we had a hit
wire mshr_release_st1;
if (WRITEBACK) begin : g_mshr_release_st1
if (WRITEBACK) begin : g_mshr_release
assign mshr_release_st1 = is_hit_st1;
end else begin : g_mshr_release_st1_ro
end else begin : g_mshr_release_ro
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
// this can happen when writes are sent late, when the fill was already in flight.
@ -548,7 +544,7 @@ module VX_cache_bank #(
.allocate_valid (mshr_allocate_st0),
.allocate_addr (addr_st0),
.allocate_rw (rw_st0),
.allocate_data ({wsel_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
.allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
.allocate_id (mshr_alloc_id_st0),
.allocate_prev (mshr_prev_st0),
`UNUSED_PIN (allocate_ready),
@ -571,7 +567,7 @@ module VX_cache_bank #(
wire [MSHR_SIZE-1:0] lookup_matches;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
&& (i != mshr_alloc_id_st0) // exclude current mshr id
&& (i != mshr_id_st0) // exclude current mshr id
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
end
assign mshr_pending_st0 = (| lookup_matches);
@ -613,7 +609,7 @@ module VX_cache_bank #(
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag;
wire mreq_queue_rw;
wire mreq_queue_flush;
wire [`UP(FLAGS_WIDTH)-1:0] mreq_queue_flags;
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
@ -629,6 +625,7 @@ module VX_cache_bank #(
|| do_writeback_st1)
&& ~rdw_hazard3_st1;
end else begin : g_mreq_queue_push_ro
`UNUSED_VAR (do_write_miss_st1)
`UNUSED_VAR (do_writeback_st1)
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|| do_creq_wr_st1)
@ -637,7 +634,7 @@ module VX_cache_bank #(
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
assign mreq_queue_addr = addr_st1;
assign mreq_queue_flush = creq_flush_st1;
assign mreq_queue_flags = flags_st1;
if (WRITE_ENABLE) begin : g_mreq_queue
if (WRITEBACK) begin : g_writeback
@ -645,9 +642,18 @@ module VX_cache_bank #(
assign mreq_queue_data = dirty_data_st1;
assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1;
end else begin : g_writethrough
wire [LINE_SIZE-1:0] line_byteen;
VX_decoder #(
.N (`CS_WORD_SEL_BITS),
.M (WORD_SIZE)
) byteen_dec (
.sel_in (word_idx_st1),
.data_in (byteen_st1),
.data_out (line_byteen)
);
assign mreq_queue_rw = rw_st1;
assign mreq_queue_data = write_data_st1;
assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1;
assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_data_st1}};
assign mreq_queue_byteen = rw_st1 ? line_byteen : '1;
`UNUSED_VAR (is_fill_or_flush_st1)
`UNUSED_VAR (dirty_data_st1)
`UNUSED_VAR (dirty_byteen_st1)
@ -667,17 +673,17 @@ module VX_cache_bank #(
end
VX_fifo_queue #(
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
.DEPTH (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
.ALM_FULL (MREQ_SIZE - PIPELINE_STAGES),
.OUT_REG (MEM_OUT_REG)
) mem_req_queue (
.clk (clk),
.reset (reset),
.push (mreq_queue_push),
.pop (mreq_queue_pop),
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}),
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flags}),
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flags}),
.empty (mreq_queue_empty),
.alm_full (mreq_queue_alm_full),
`UNUSED_PIN (full),

View file

@ -58,6 +58,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// core request flags
parameter FLAGS_WIDTH = 0,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0,
@ -156,6 +159,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (ARB_TAG_WIDTH),
.FLAGS_WIDTH (FLAGS_WIDTH),
.TAG_SEL_IDX (TAG_SEL_IDX),
.CORE_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_BUF),
.MEM_OUT_BUF ((NUM_CACHES > 1) ? 2 : MEM_OUT_BUF),

View file

@ -50,11 +50,11 @@ module VX_cache_data #(
input wire flush,
input wire write,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
input wire [NUM_WAYS-1:0] way_sel,
input wire [`CS_WORD_WIDTH-1:0] write_data,
input wire [WORD_SIZE-1:0] write_byteen,
input wire [NUM_WAYS-1:0] way_idx,
output wire [`CS_WORD_WIDTH-1:0] read_data,
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
output wire [LINE_SIZE-1:0] dirty_byteen
@ -68,132 +68,144 @@ module VX_cache_data #(
`UNUSED_VAR (read)
`UNUSED_VAR (flush)
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx_bin;
wire [`CS_LINE_SEL_BITS-1:0] line_idx;
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
assign line_idx = line_addr[`CS_LINE_SEL_BITS-1:0];
VX_encoder #(
.N (NUM_WAYS)
) way_idx_enc (
.data_in (way_idx),
.data_out (way_idx_bin),
`UNUSED_PIN (valid_out)
);
if (WRITEBACK) begin : g_dirty_data
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
VX_transpose #(
.DATAW (`CS_WORD_WIDTH),
.N (`CS_WORDS_PER_LINE),
.M (NUM_WAYS)
) transpose (
.data_in (line_rdata),
.data_out (transposed_rdata)
);
assign dirty_data = transposed_rdata[way_idx];
assign dirty_data = line_rdata[way_idx_bin];
end else begin : g_dirty_data_0
assign dirty_data = '0;
end
if (DIRTY_BYTES) begin : g_dirty_byteen
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_rdata;
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_wdata;
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j
wire [WORD_SIZE-1:0] word_mask = {WORD_SIZE{(WORD_SIZE == 1) || (word_idx == j)}};
wire [WORD_SIZE-1:0] wdata = write ? (bs_rdata[i][j] | (write_byteen & word_mask)) : ((fill || flush) ? '0 : bs_rdata[i][j]);
assign bs_wdata[i][j] = init ? '0 : (way_idx[i] ? wdata : bs_rdata[i][j]);
end
end
wire bs_read = write || fill || flush;
wire bs_write = init || write || fill || flush;
VX_sp_ram #(
.DATAW (LINE_SIZE * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK)
) byteen_store (
.clk (clk),
.reset (reset),
.read (write || fill || flush),
.write (init || write || fill || flush),
.read (bs_read && ~stall),
.write (bs_write && ~stall),
.wren (1'b1),
.addr (line_sel),
.addr (line_idx),
.wdata (bs_wdata),
.rdata (bs_rdata)
);
assign dirty_byteen = bs_rdata[way_idx];
assign dirty_byteen = bs_rdata[way_idx_bin];
end else begin : g_dirty_byteen_0
assign dirty_byteen = '1;
end
// order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [BYTEENW-1:0] line_wren;
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
wire [BYTEENW-1:0] line_wren;
wire line_write;
wire line_read;
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i
for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
wire way_en = (NUM_WAYS == 1) || way_idx[i];
if (WRITE_ENABLE != 0) begin : g_line_data
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w;
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j
wire word_en = (WORD_SIZE == 1) || (word_idx == j);
assign line_wdata[j] = fill ? fill_data[j] : write_data;
assign wren_w[j] = {WORD_SIZE{fill}} | (write_byteen & {WORD_SIZE{word_en}});
end
assign line_wren = wren_w;
assign line_write = (fill || write) && way_en;
if (WRITEBACK) begin : g_line_read_wb
assign line_read = (read || fill || flush);
end else begin : g_line_read_wt
assign line_read = read;
end
end else begin : g_line_data_ro
`UNUSED_VAR (write)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_data)
assign line_wdata = fill_data;
assign line_wren = 1'b1;
assign line_write = fill && way_en;
assign line_read = read;
end
assign line_wren = wren_w;
end else begin : g_line_wdata_ro
`UNUSED_VAR (write)
`UNUSED_VAR (write_byteen)
`UNUSED_VAR (write_data)
assign line_wdata = fill_data;
assign line_wren = fill;
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW),
.NO_RWCHECK (1),
.RW_ASSERT (1)
) data_store (
.clk (clk),
.reset (reset),
.read (line_read && ~stall),
.write (line_write && ~stall),
.wren (line_wren),
.addr (line_idx),
.wdata (line_wdata),
.rdata (line_rdata[i])
);
end
VX_encoder #(
.N (NUM_WAYS)
) way_enc (
.data_in (way_sel),
.data_out (way_idx),
`UNUSED_PIN (valid_out)
);
wire line_read = (read && ~stall)
|| (WRITEBACK && (fill || flush));
wire line_write = write || fill;
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW),
.NO_RWCHECK (1),
.RW_ASSERT (1)
) data_store (
.clk (clk),
.reset (reset),
.read (line_read),
.write (line_write),
.wren (line_wren),
.addr (line_sel),
.wdata (line_wdata),
.rdata (line_rdata)
);
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel
assign per_way_rdata = line_rdata[wsel];
end else begin : g_per_way_rdata
`UNUSED_VAR (wsel)
assign per_way_rdata = line_rdata;
if (`CS_WORDS_PER_LINE > 1) begin : g_read_data
// order the data layout to perform ways multiplexing last.
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
VX_transpose #(
.DATAW (`CS_WORD_WIDTH),
.N (NUM_WAYS),
.M (`CS_WORDS_PER_LINE)
) transpose (
.data_in (line_rdata),
.data_out (transposed_rdata)
);
assign read_data = transposed_rdata[word_idx][way_idx_bin];
end else begin : g_read_data_1w
`UNUSED_VAR (word_idx)
assign read_data = line_rdata[way_idx_bin];
end
assign read_data = per_way_rdata[way_idx];
`ifdef DBG_TRACE_CACHE
always @(posedge clk) begin
if (fill && ~stall) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data))
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data))
end
if (flush && ~stall) begin
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data))
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data))
end
if (read && ~stall) begin
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid))
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid))
end
if (write && ~stall) begin
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid))
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid))
end
end
`endif

View file

@ -47,7 +47,7 @@ module VX_cache_tags #(
input wire write,
input wire lookup,
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
input wire [NUM_WAYS-1:0] way_sel,
input wire [NUM_WAYS-1:0] way_idx,
output wire [NUM_WAYS-1:0] tag_matches,
// eviction
@ -62,7 +62,7 @@ module VX_cache_tags #(
// valid, dirty, tag
localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0];
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
@ -80,7 +80,7 @@ module VX_cache_tags #(
end
end
assign evict_way = fill ? evict_way_r : way_sel;
assign evict_way = fill ? evict_way_r : way_idx;
VX_onehot_mux #(
.DATAW (`CS_TAG_SEL_BITS),
@ -103,7 +103,7 @@ module VX_cache_tags #(
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store
wire do_fill = fill_s && evict_way[i];
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
wire do_flush = flush_s && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode
wire do_write = WRITEBACK && write && tag_matches[i];
wire line_read = (WRITEBACK && (fill_s || flush_s));
@ -130,10 +130,10 @@ module VX_cache_tags #(
) tag_store (
.clk (clk),
.reset (reset),
.read (line_read),
.write (line_write),
.read (line_read && ~stall),
.write (line_write && ~stall),
.wren (1'b1),
.addr (line_sel),
.addr (line_idx),
.wdata (line_wdata),
.rdata (line_rdata)
);
@ -146,29 +146,29 @@ module VX_cache_tags #(
assign evict_dirty = | (read_dirty & evict_way);
`ifdef DBG_TRACE_CACHE
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx};
always @(posedge clk) begin
if (fill && ~stall) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
end
if (init) begin
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel))
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx))
end
if (flush && ~stall) begin
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty))
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty))
end
if (lookup && ~stall) begin
if (tag_matches != 0) begin
if (write) begin
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
end else begin
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
end
end else begin
if (write) begin
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
end else begin
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
end
end
end

View file

@ -57,6 +57,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
// core request tag size
parameter TAG_WIDTH = UUID_WIDTH + 1,
// core request flags
parameter FLAGS_WIDTH = 0,
// enable bypass for non-cacheable addresses
parameter NC_ENABLE = 0,
@ -175,6 +178,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
.DIRTY_BYTES (DIRTY_BYTES),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.FLAGS_WIDTH (FLAGS_WIDTH),
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
) cache (

View file

@ -69,8 +69,8 @@ module VX_cyclic_arbiter #(
.N (LOG_NUM_REQS),
.D (NUM_REQS)
) grant_decoder (
.data_in (grant_index),
.valid_in (1'b1),
.sel_in (grant_index),
.data_in (1'b1),
.data_out (grant_onehot_w)
);

View file

@ -18,25 +18,30 @@
`TRACING_OFF
module VX_decoder #(
parameter N = 1,
parameter N = 0,
parameter M = 1,
parameter MODEL = 0,
parameter D = 1 << N
) (
input wire [N-1:0] data_in,
input wire [M-1:0] valid_in,
input wire [`UP(N)-1:0] sel_in,
input wire [M-1:0] data_in,
output wire [D-1:0][M-1:0] data_out
);
logic [D-1:0][M-1:0] shift;
if (MODEL == 1) begin : g_model1
always @(*) begin
shift = '0;
shift[data_in] = {M{1'b1}};
if (N != 0) begin : g_decoder
logic [D-1:0][M-1:0] shift;
if (MODEL == 1) begin : g_model1
always @(*) begin
shift = '0;
shift[sel_in] = {M{1'b1}};
end
end else begin : g_model0
assign shift = ((D*M)'({M{1'b1}})) << (sel_in * M);
end
end else begin : g_model0
assign shift = ((D*M)'({M{1'b1}})) << (data_in * M);
assign data_out = {D{data_in}} & shift;
end else begin : g_passthru
`UNUSED_VAR (sel_in)
assign data_out = data_in;
end
assign data_out = {D{valid_in}} & shift;
endmodule
`TRACING_ON

View file

@ -104,8 +104,8 @@ module VX_mem_adapter #(
.N (D),
.M (SRC_DATA_WIDTH/8)
) req_be_dec (
.data_in (req_idx),
.valid_in (mem_req_byteen_in),
.sel_in (req_idx),
.data_in (mem_req_byteen_in),
.data_out (mem_req_byteen_out_w)
);
@ -113,8 +113,8 @@ module VX_mem_adapter #(
.N (D),
.M (SRC_DATA_WIDTH)
) req_data_dec (
.data_in (req_idx),
.valid_in (mem_req_data_in),
.sel_in (req_idx),
.data_in (mem_req_data_in),
.data_out (mem_req_data_out_w)
);

View file

@ -18,7 +18,7 @@ module VX_mem_coalescer #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_REQS = 1,
parameter ADDR_WIDTH = 32,
parameter FLAGS_WIDTH = 1,
parameter FLAGS_WIDTH = 0,
parameter DATA_IN_SIZE = 4,
parameter DATA_OUT_SIZE = 64,
parameter TAG_WIDTH = 8,
@ -43,7 +43,7 @@ module VX_mem_coalescer #(
input wire [NUM_REQS-1:0] in_req_mask,
input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen,
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags,
input wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] in_req_flags,
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
input wire [TAG_WIDTH-1:0] in_req_tag,
output wire in_req_ready,
@ -61,7 +61,7 @@ module VX_mem_coalescer #(
output wire [OUT_REQS-1:0] out_req_mask,
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags,
output wire [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags,
output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data,
output wire [OUT_TAG_WIDTH-1:0] out_req_tag,
input wire out_req_ready,
@ -92,7 +92,7 @@ module VX_mem_coalescer #(
logic out_req_rw_r, out_req_rw_n;
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n;
logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags_r, out_req_flags_n;
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
@ -110,7 +110,7 @@ module VX_mem_coalescer #(
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n;
logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] seed_flags_r, seed_flags_n;
logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n;
@ -139,7 +139,7 @@ module VX_mem_coalescer #(
assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W];
end
wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags;
wire [DATA_RATIO-1:0][`UP(FLAGS_WIDTH)-1:0] req_flags;
for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_req_flags
assign req_flags[j] = in_req_flags[DATA_RATIO * i + j];
end
@ -221,7 +221,7 @@ module VX_mem_coalescer #(
end
VX_pipe_register #(
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
.RESETW (1 + NUM_REQS + 1),
.INIT_VALUE ({1'b0, {NUM_REQS{1'b1}}, 1'b0})
) pipe_reg (
@ -270,7 +270,12 @@ module VX_mem_coalescer #(
assign out_req_mask = out_req_mask_r;
assign out_req_byteen = out_req_byteen_r;
assign out_req_addr = out_req_addr_r;
assign out_req_flags = out_req_flags_r;
if (FLAGS_WIDTH != 0) begin : g_out_req_flags
assign out_req_flags = out_req_flags_r;
end else begin : g_out_req_flags_0
`UNUSED_VAR (out_req_flags_r)
assign out_req_flags = '0;
end
assign out_req_data = out_req_data_r;
assign out_req_tag = out_req_tag_r;

View file

@ -21,7 +21,7 @@ module VX_mem_scheduler #(
parameter WORD_SIZE = 4,
parameter LINE_SIZE = WORD_SIZE,
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter FLAGS_WIDTH = 1,
parameter FLAGS_WIDTH = 0,
parameter TAG_WIDTH = 8,
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter CORE_QUEUE_SIZE= 8,
@ -50,7 +50,7 @@ module VX_mem_scheduler #(
input wire [CORE_REQS-1:0] core_req_mask,
input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr,
input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags,
input wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags,
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
input wire [TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
@ -72,7 +72,7 @@ module VX_mem_scheduler #(
output wire [MEM_CHANNELS-1:0] mem_req_mask,
output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen,
output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags,
output wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags,
output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
@ -112,7 +112,7 @@ module VX_mem_scheduler #(
wire reqq_rw;
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags;
wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags;
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data;
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
wire reqq_ready;
@ -122,7 +122,7 @@ module VX_mem_scheduler #(
wire reqq_rw_s;
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s;
wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s;
wire [MERGED_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags_s;
wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s;
wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s;
wire reqq_ready_s;
@ -132,7 +132,7 @@ module VX_mem_scheduler #(
wire mem_req_rw_s;
wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s;
wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s;
wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_s;
wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
wire mem_req_ready_s;
@ -167,7 +167,7 @@ module VX_mem_scheduler #(
end
VX_elastic_buffer #(
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + `UP(FLAGS_WIDTH) + WORD_WIDTH) + REQQ_TAG_WIDTH),
.SIZE (CORE_QUEUE_SIZE),
.OUT_REG (1)
) req_queue (
@ -297,7 +297,7 @@ module VX_mem_scheduler #(
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
@ -385,8 +385,10 @@ module VX_mem_scheduler #(
assign reqq_ready_s = req_sent_all;
wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_u;
VX_elastic_buffer #(
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + `UP(FLAGS_WIDTH) + LINE_WIDTH) + MEM_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
@ -395,11 +397,18 @@ module VX_mem_scheduler #(
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}),
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}),
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags_u, mem_req_data, mem_req_tag}),
.valid_out (mem_req_valid),
.ready_out (mem_req_ready)
);
if (FLAGS_WIDTH != 0) begin : g_mem_req_flags
assign mem_req_flags = mem_req_flags_u;
end else begin : g_mem_req_flags_0
`UNUSED_VAR (mem_req_flags_u)
assign mem_req_flags = '0;
end
// Handle memory responses ////////////////////////////////////////////////
reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask;

View file

@ -484,8 +484,8 @@ module VX_rr_arbiter #(
.N (LOG_NUM_REQS),
.D (NUM_REQS)
) grant_decoder (
.data_in (grant_index),
.valid_in (grant_valid),
.sel_in (grant_index),
.data_in (grant_valid),
.data_out (grant_onehot)
);

View file

@ -68,8 +68,8 @@ module VX_stream_xbar #(
.N (OUT_WIDTH),
.D (NUM_OUTPUTS)
) sel_in_decoder (
.data_in (sel_in[i]),
.valid_in (valid_in[i]),
.sel_in (sel_in[i]),
.data_in (valid_in[i]),
.data_out (per_output_valid_in[i])
);
assign ready_in[i] = | per_output_ready_in_w[i];
@ -141,8 +141,8 @@ module VX_stream_xbar #(
.N (OUT_WIDTH),
.D (NUM_OUTPUTS)
) sel_in_decoder (
.data_in (sel_in[0]),
.valid_in (valid_in[0]),
.sel_in (sel_in[0]),
.data_in (valid_in[0]),
.data_out (valid_out_w)
);