mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
fixed memory flags propagation through the cache hierarchy
This commit is contained in:
parent
83badaac86
commit
2eeb2ac532
19 changed files with 279 additions and 229 deletions
|
@ -142,8 +142,8 @@ cache()
|
|||
CONFIGS="-DL1_LINE_SIZE=$XSIZE -DLMEM_DISABLE" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache ways
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=8 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
CONFIGS="-DICACHE_NUM_WAYS=4 -DDCACHE_NUM_WAYS=8" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DLMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx
|
||||
|
@ -154,10 +154,10 @@ cache()
|
|||
CONFIGS="-DDCACHE_NUM_BANKS=2" ./ci/blackbox.sh --driver=simx --app=sgemmx
|
||||
|
||||
# test writeback
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DDCACHE_WRITEBACK=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=rtlsim --app=mstress
|
||||
CONFIGS="-DDCACHE_WRITEBACK=1 -DDCACHE_NUM_WAYS=4" ./ci/blackbox.sh --driver=simx --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=rtlsim --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
CONFIGS="-DSOCKET_SIZE=1 -DL2_WRITEBACK=1 -DL3_WRITEBACK=1" ./ci/blackbox.sh --driver=simx --cores=2 --clusters=2 --l2cache --l3cache --app=mstress
|
||||
|
||||
# cache clustering
|
||||
CONFIGS="-DSOCKET_SIZE=4 -DNUM_DCACHES=4 -DNUM_ICACHES=2" ./ci/blackbox.sh --driver=rtlsim --app=sgemmx --cores=4 --warps=1 --threads=2
|
||||
|
|
|
@ -100,6 +100,7 @@ module VX_cluster import VX_gpu_pkg::*; #(
|
|||
.WRITEBACK (`L2_WRITEBACK),
|
||||
.DIRTY_BYTES (`L2_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (3),
|
||||
.NC_ENABLE (1),
|
||||
|
|
|
@ -564,12 +564,8 @@
|
|||
|
||||
// Cache Size
|
||||
`ifndef L2_CACHE_SIZE
|
||||
`ifdef ALTERA_S10
|
||||
`define L2_CACHE_SIZE 2097152
|
||||
`else
|
||||
`define L2_CACHE_SIZE 1048576
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// Number of Banks
|
||||
`ifndef L2_NUM_BANKS
|
||||
|
@ -610,11 +606,7 @@
|
|||
|
||||
// Cache Size
|
||||
`ifndef L3_CACHE_SIZE
|
||||
`ifdef ALTERA_S10
|
||||
`define L3_CACHE_SIZE 2097152
|
||||
`else
|
||||
`define L3_CACHE_SIZE 1048576
|
||||
`endif
|
||||
`endif
|
||||
|
||||
// Number of Banks
|
||||
|
|
|
@ -100,6 +100,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.MRSQ_SIZE (`ICACHE_MRSQ_SIZE),
|
||||
.MREQ_SIZE (`ICACHE_MREQ_SIZE),
|
||||
.TAG_WIDTH (ICACHE_TAG_WIDTH),
|
||||
.FLAGS_WIDTH (0),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.WRITE_ENABLE (0),
|
||||
.NC_ENABLE (0),
|
||||
|
@ -146,6 +147,7 @@ module VX_socket import VX_gpu_pkg::*; #(
|
|||
.MREQ_SIZE (`DCACHE_WRITEBACK ? `DCACHE_MSHR_SIZE : `DCACHE_MREQ_SIZE),
|
||||
.TAG_WIDTH (DCACHE_TAG_WIDTH),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.WRITE_ENABLE (1),
|
||||
.WRITEBACK (`DCACHE_WRITEBACK),
|
||||
.DIRTY_BYTES (`DCACHE_WRITEBACK),
|
||||
|
|
|
@ -86,6 +86,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
.WRITEBACK (`L3_WRITEBACK),
|
||||
.DIRTY_BYTES (`L3_WRITEBACK),
|
||||
.UUID_WIDTH (`UUID_WIDTH),
|
||||
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
|
||||
.CORE_OUT_BUF (3),
|
||||
.MEM_OUT_BUF (3),
|
||||
.NC_ENABLE (1),
|
||||
|
|
4
hw/rtl/cache/VX_bank_flush.sv
vendored
4
hw/rtl/cache/VX_bank_flush.sv
vendored
|
@ -118,8 +118,8 @@ module VX_bank_flush #(
|
|||
.N (`CS_WAY_SEL_BITS),
|
||||
.D (NUM_WAYS)
|
||||
) ctr_decoder (
|
||||
.data_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
|
||||
.valid_in (1'b1),
|
||||
.sel_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]),
|
||||
.data_in (1'b1),
|
||||
.data_out (flush_way)
|
||||
);
|
||||
end else begin : g_flush_way_all
|
||||
|
|
45
hw/rtl/cache/VX_cache.sv
vendored
45
hw/rtl/cache/VX_cache.sv
vendored
|
@ -54,6 +54,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// core request flags
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_BUF = 0,
|
||||
|
||||
|
@ -90,7 +93,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + `UP(FLAGS_WIDTH);
|
||||
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH;
|
||||
|
||||
|
@ -206,13 +209,13 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [LINE_SIZE-1:0] mem_req_byteen;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_flush;
|
||||
wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_req_flush_b;
|
||||
wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flush_b;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
|
||||
.SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
|
@ -220,13 +223,18 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (mem_req_valid),
|
||||
.ready_in (mem_req_ready),
|
||||
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flush}),
|
||||
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data, mem_req_tag, mem_req_flags}),
|
||||
.data_out ({mem_bus_tmp_if.req_data.rw, mem_bus_tmp_if.req_data.byteen, mem_bus_tmp_if.req_data.addr, mem_bus_tmp_if.req_data.data, mem_bus_tmp_if.req_data.tag, mem_req_flush_b}),
|
||||
.valid_out (mem_bus_tmp_if.req_valid),
|
||||
.ready_out (mem_bus_tmp_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0;
|
||||
if (FLAGS_WIDTH != 0) begin : g_mem_req_flags
|
||||
assign mem_bus_tmp_if.req_data.flags = mem_req_flush_b;
|
||||
end else begin : g_no_mem_req_flags
|
||||
assign mem_bus_tmp_if.req_data.flags = '0;
|
||||
`UNUSED_VAR (mem_req_flush_b)
|
||||
end
|
||||
|
||||
if (WRITE_ENABLE) begin : g_mem_bus_if
|
||||
`ASSIGN_VX_MEM_BUS_IF (mem_bus_if, mem_bus_tmp_if);
|
||||
|
@ -244,7 +252,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_req_data;
|
||||
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
|
||||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_flush;
|
||||
wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_core_req_flags;
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
|
||||
|
@ -259,7 +267,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
|
||||
wire [NUM_BANKS-1:0][`UP(FLAGS_WIDTH)-1:0] per_bank_mem_req_flags;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
|
||||
|
@ -276,7 +284,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
|
||||
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
|
||||
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
|
||||
wire [NUM_REQS-1:0] core_req_flush;
|
||||
wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags;
|
||||
wire [NUM_REQS-1:0] core_req_ready;
|
||||
|
||||
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
|
||||
|
@ -293,7 +301,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus2_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
|
||||
assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
|
||||
assign core_req_flags[i] = `UP(FLAGS_WIDTH)'(core_bus2_if[i].req_data.flags);
|
||||
assign core_bus2_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
|
||||
|
@ -325,7 +333,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
core_req_byteen[i],
|
||||
core_req_data[i],
|
||||
core_req_tag[i],
|
||||
core_req_flush[i]
|
||||
core_req_flags[i]
|
||||
};
|
||||
end
|
||||
|
||||
|
@ -366,7 +374,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
per_bank_core_req_byteen[i],
|
||||
per_bank_core_req_data[i],
|
||||
per_bank_core_req_tag[i],
|
||||
per_bank_core_req_flush[i]
|
||||
per_bank_core_req_flags[i]
|
||||
} = core_req_data_out[i];
|
||||
end
|
||||
|
||||
|
@ -393,6 +401,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.WRITEBACK (WRITEBACK),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.FLAGS_WIDTH (FLAGS_WIDTH),
|
||||
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)),
|
||||
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) bank (
|
||||
|
@ -414,7 +423,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.core_req_data (per_bank_core_req_data[bank_id]),
|
||||
.core_req_tag (per_bank_core_req_tag[bank_id]),
|
||||
.core_req_idx (per_bank_core_req_idx[bank_id]),
|
||||
.core_req_flush (per_bank_core_req_flush[bank_id]),
|
||||
.core_req_flags (per_bank_core_req_flags[bank_id]),
|
||||
.core_req_ready (per_bank_core_req_ready[bank_id]),
|
||||
|
||||
// Core response
|
||||
|
@ -431,7 +440,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
|
||||
.mem_req_data (per_bank_mem_req_data[bank_id]),
|
||||
.mem_req_tag (per_bank_mem_req_tag[bank_id]),
|
||||
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
|
||||
.mem_req_flags (per_bank_mem_req_flags[bank_id]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
|
||||
|
||||
// Memory response
|
||||
|
@ -487,7 +496,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request arbitration
|
||||
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in;
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH))-1:0] data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin : g_data_in
|
||||
assign data_in[i] = {
|
||||
|
@ -496,7 +505,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_tag[i],
|
||||
per_bank_mem_req_flush[i]
|
||||
per_bank_mem_req_flags[i]
|
||||
};
|
||||
end
|
||||
|
||||
|
@ -504,7 +513,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
|
||||
.ARBITER ("R")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
|
@ -512,7 +521,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}),
|
||||
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flags}),
|
||||
.valid_out (mem_req_valid),
|
||||
.ready_out (mem_req_ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
|
|
114
hw/rtl/cache/VX_cache_bank.sv
vendored
114
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -53,6 +53,9 @@ module VX_cache_bank #(
|
|||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// core request flags
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
|
||||
// Core response output register
|
||||
parameter CORE_OUT_REG = 0,
|
||||
|
||||
|
@ -82,7 +85,7 @@ module VX_cache_bank #(
|
|||
input wire [`CS_WORD_WIDTH-1:0] core_req_data, // data to be written
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag, // identifier of the request (request id)
|
||||
input wire [REQ_SEL_WIDTH-1:0] core_req_idx, // index of the request in the core request array
|
||||
input wire core_req_flush, // flush enable
|
||||
input wire [`UP(FLAGS_WIDTH)-1:0] core_req_flags,
|
||||
output wire core_req_ready,
|
||||
|
||||
// Core Response
|
||||
|
@ -99,7 +102,7 @@ module VX_cache_bank #(
|
|||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_flush,
|
||||
output wire [`UP(FLAGS_WIDTH)-1:0] mem_req_flags,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
|
@ -143,22 +146,25 @@ module VX_cache_bank #(
|
|||
wire [NUM_WAYS-1:0] flush_way_st0;
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr_sel, addr_st0, addr_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel_st0, line_sel_st1;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_idx_st0, line_idx_st1;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag_st0, line_tag_st1;
|
||||
wire rw_sel, rw_st0, rw_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] wsel_sel, wsel_st0, wsel_st1;
|
||||
wire [WORD_SEL_WIDTH-1:0] word_idx_sel, word_idx_st0, word_idx_st1;
|
||||
wire [WORD_SIZE-1:0] byteen_sel, byteen_st0, byteen_st1;
|
||||
wire [REQ_SEL_WIDTH-1:0] req_idx_sel, req_idx_st0, req_idx_st1;
|
||||
wire [TAG_WIDTH-1:0] tag_sel, tag_st0, tag_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0, write_data_st1;
|
||||
wire [`CS_WORD_WIDTH-1:0] read_data_st1;
|
||||
wire [`CS_LINE_WIDTH-1:0] data_sel, data_st0, data_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0, mshr_id_st0, mshr_id_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_st0, mshr_id_st1;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] replay_id_st0;
|
||||
wire valid_sel, valid_st0, valid_st1;
|
||||
wire is_creq_st0, is_creq_st1;
|
||||
wire is_fill_st0, is_fill_st1;
|
||||
wire is_replay_st0, is_replay_st1;
|
||||
wire creq_flush_sel, creq_flush_st0, creq_flush_st1;
|
||||
wire [`UP(FLAGS_WIDTH)-1:0] flags_sel, flags_st0, flags_st1;
|
||||
wire evict_dirty_st0, evict_dirty_st1;
|
||||
wire [NUM_WAYS-1:0] way_sel_st0, way_sel_st1;
|
||||
wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
|
||||
|
@ -264,11 +270,11 @@ module VX_cache_bank #(
|
|||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign word_idx_sel= replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||
assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) :
|
||||
(replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag));
|
||||
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||
assign flags_sel = core_req_valid ? core_req_flags : '0;
|
||||
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
(replay_valid ? replay_addr : (mem_rsp_valid ? mem_rsp_addr : core_req_addr));
|
||||
|
@ -294,14 +300,14 @@ module VX_cache_bank #(
|
|||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + NUM_WAYS + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + 1 + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH),
|
||||
.RESETW (1)
|
||||
) pipe_reg0 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, creq_flush_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, wsel_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, creq_flush_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
.data_in ({valid_sel, init_valid, replay_enable, fill_enable, flush_enable, creq_enable, flags_sel, flush_way, addr_sel, data_sel, rw_sel, byteen_sel, word_idx_sel, req_idx_sel, tag_sel, replay_id}),
|
||||
.data_out ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush_st0, is_creq_st0, flags_st0, flush_way_st0, addr_st0, data_st0, rw_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, replay_id_st0})
|
||||
);
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_st0
|
||||
|
@ -321,9 +327,10 @@ module VX_cache_bank #(
|
|||
wire do_cache_wr_st0 = do_creq_wr_st0 || do_replay_wr_st0;
|
||||
wire do_lookup_st0 = do_cache_rd_st0 || do_cache_wr_st0;
|
||||
|
||||
wire [`CS_WORD_WIDTH-1:0] write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
assign write_data_st0 = data_st0[`CS_WORD_WIDTH-1:0];
|
||||
|
||||
assign line_sel_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
assign line_idx_st0 = addr_st0[`CS_LINE_SEL_BITS-1:0];
|
||||
assign line_tag_st0 = `CS_LINE_ADDR_TAG(addr_st0);
|
||||
|
||||
wire [NUM_WAYS-1:0] evict_way_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] evict_tag_st0;
|
||||
|
@ -353,7 +360,9 @@ module VX_cache_bank #(
|
|||
.write (do_cache_wr_st0),
|
||||
.lookup (do_lookup_st0),
|
||||
.line_addr (addr_st0),
|
||||
.way_sel (flush_way_st0),
|
||||
.way_idx (flush_way_st0),
|
||||
|
||||
// tag matches
|
||||
.tag_matches(tag_matches_st0),
|
||||
|
||||
// replacement
|
||||
|
@ -362,29 +371,29 @@ module VX_cache_bank #(
|
|||
.evict_tag (evict_tag_st0)
|
||||
);
|
||||
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] addr2_st0;
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag2_st0;
|
||||
|
||||
wire is_flush2_st0 = WRITEBACK && is_flush_st0;
|
||||
|
||||
assign mshr_id_st0 = is_creq_st0 ? mshr_alloc_id_st0 : replay_id_st0;
|
||||
|
||||
assign way_sel_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
|
||||
assign way_idx_st0 = (is_fill_st0 || is_flush2_st0) ? evict_way_st0 : tag_matches_st0;
|
||||
|
||||
assign addr2_st0 = (is_fill_st0 || is_flush2_st0) ? {evict_tag_st0, line_sel_st0} : addr_st0;
|
||||
assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `CS_LINE_ADDR_WIDTH + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, creq_flush_st0, rw_st0, addr2_st0, data_st0, byteen_st0, wsel_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_sel_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, creq_flush_st1, rw_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_sel_st1, evict_dirty_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag hit
|
||||
wire is_hit_st1 = (| way_sel_st1);
|
||||
wire is_hit_st1 = (| way_idx_st1);
|
||||
|
||||
if (UUID_WIDTH != 0) begin : g_req_uuid_st1
|
||||
assign req_uuid_st1 = tag_st1[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
|
@ -413,9 +422,7 @@ module VX_cache_bank #(
|
|||
wire do_cache_rd_st1 = do_read_hit_st1 || do_replay_rd_st1;
|
||||
wire do_cache_wr_st1 = do_write_hit_st1 || do_replay_wr_st1;
|
||||
|
||||
assign line_sel_st1 = addr_st1[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
assign addr_st1 = {line_tag_st1, line_idx_st1};
|
||||
|
||||
// ensure mshr replay always get a hit
|
||||
`RUNTIME_ASSERT (~(valid_st1 && is_replay_st1) || is_hit_st1, ("%t: missed mshr replay", $time))
|
||||
|
@ -426,28 +433,16 @@ module VX_cache_bank #(
|
|||
assign rdw_hazard2_sel = WRITEBACK && do_cache_wr_st0; // a writeback can evict any preceeding write
|
||||
always @(posedge clk) begin
|
||||
// stall reads following writes to same line address
|
||||
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_sel_st0 == line_sel_st1)
|
||||
rdw_hazard3_st1 <= do_cache_rd_st0 && do_cache_wr_st1 && (line_idx_st0 == line_idx_st1)
|
||||
&& ~rdw_hazard3_st1; // release pipeline stall
|
||||
end
|
||||
|
||||
wire [`CS_LINE_WIDTH-1:0] write_data_st1 = {`CS_WORDS_PER_LINE{data_st1[`CS_WORD_WIDTH-1:0]}};
|
||||
assign write_data_st1 = data_st1[`CS_WORD_WIDTH-1:0];
|
||||
wire [`CS_LINE_WIDTH-1:0] fill_data_st1 = data_st1;
|
||||
wire [LINE_SIZE-1:0] write_byteen_st1;
|
||||
|
||||
wire [`CS_LINE_WIDTH-1:0] dirty_data_st1;
|
||||
wire [LINE_SIZE-1:0] dirty_byteen_st1;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_write_byteen_st1_wsel
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen_w;
|
||||
always @(*) begin
|
||||
write_byteen_w = '0;
|
||||
write_byteen_w[wsel_st1] = byteen_st1;
|
||||
end
|
||||
assign write_byteen_st1 = write_byteen_w;
|
||||
end else begin : g_write_byteen_st1
|
||||
assign write_byteen_st1 = byteen_st1;
|
||||
end
|
||||
|
||||
VX_cache_data #(
|
||||
.INSTANCE_ID ($sformatf("%s-data", INSTANCE_ID)),
|
||||
.BANK_ID (BANK_ID),
|
||||
|
@ -473,12 +468,12 @@ module VX_cache_bank #(
|
|||
.fill (do_fill_st1),
|
||||
.flush (do_flush_st1),
|
||||
.write (do_cache_wr_st1),
|
||||
.way_sel (way_sel_st1),
|
||||
.way_idx (way_idx_st1),
|
||||
.line_addr (addr_st1),
|
||||
.wsel (wsel_st1),
|
||||
.word_idx (word_idx_st1),
|
||||
.fill_data (fill_data_st1),
|
||||
.write_data (write_data_st1),
|
||||
.write_byteen(write_byteen_st1),
|
||||
.write_byteen(byteen_st1),
|
||||
.read_data (read_data_st1),
|
||||
.dirty_data (dirty_data_st1),
|
||||
.dirty_byteen(dirty_byteen_st1)
|
||||
|
@ -488,13 +483,14 @@ module VX_cache_bank #(
|
|||
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
|
||||
wire mshr_allocate_st0 = valid_st0 && is_creq_st0 && ~pipe_stall;
|
||||
wire mshr_lookup_st0 = mshr_allocate_st0;
|
||||
|
||||
wire mshr_finalize_st1 = valid_st1 && is_creq_st1 && ~pipe_stall;
|
||||
|
||||
// release allocated mshr entry if we had a hit
|
||||
wire mshr_release_st1;
|
||||
if (WRITEBACK) begin : g_mshr_release_st1
|
||||
if (WRITEBACK) begin : g_mshr_release
|
||||
assign mshr_release_st1 = is_hit_st1;
|
||||
end else begin : g_mshr_release_st1_ro
|
||||
end else begin : g_mshr_release_ro
|
||||
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
|
||||
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
|
||||
// this can happen when writes are sent late, when the fill was already in flight.
|
||||
|
@ -548,7 +544,7 @@ module VX_cache_bank #(
|
|||
.allocate_valid (mshr_allocate_st0),
|
||||
.allocate_addr (addr_st0),
|
||||
.allocate_rw (rw_st0),
|
||||
.allocate_data ({wsel_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
|
||||
.allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
|
||||
.allocate_id (mshr_alloc_id_st0),
|
||||
.allocate_prev (mshr_prev_st0),
|
||||
`UNUSED_PIN (allocate_ready),
|
||||
|
@ -571,7 +567,7 @@ module VX_cache_bank #(
|
|||
wire [MSHR_SIZE-1:0] lookup_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches
|
||||
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
|
||||
&& (i != mshr_alloc_id_st0) // exclude current mshr id
|
||||
&& (i != mshr_id_st0) // exclude current mshr id
|
||||
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
|
||||
end
|
||||
assign mshr_pending_st0 = (| lookup_matches);
|
||||
|
@ -613,7 +609,7 @@ module VX_cache_bank #(
|
|||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag;
|
||||
wire mreq_queue_rw;
|
||||
wire mreq_queue_flush;
|
||||
wire [`UP(FLAGS_WIDTH)-1:0] mreq_queue_flags;
|
||||
|
||||
wire is_fill_or_flush_st1 = is_fill_st1 || is_flush_st1;
|
||||
wire do_fill_or_flush_st1 = valid_st1 && is_fill_or_flush_st1;
|
||||
|
@ -629,6 +625,7 @@ module VX_cache_bank #(
|
|||
|| do_writeback_st1)
|
||||
&& ~rdw_hazard3_st1;
|
||||
end else begin : g_mreq_queue_push_ro
|
||||
`UNUSED_VAR (do_write_miss_st1)
|
||||
`UNUSED_VAR (do_writeback_st1)
|
||||
assign mreq_queue_push = ((do_read_miss_st1 && ~mshr_pending_st1)
|
||||
|| do_creq_wr_st1)
|
||||
|
@ -637,7 +634,7 @@ module VX_cache_bank #(
|
|||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_flush = creq_flush_st1;
|
||||
assign mreq_queue_flags = flags_st1;
|
||||
|
||||
if (WRITE_ENABLE) begin : g_mreq_queue
|
||||
if (WRITEBACK) begin : g_writeback
|
||||
|
@ -645,9 +642,18 @@ module VX_cache_bank #(
|
|||
assign mreq_queue_data = dirty_data_st1;
|
||||
assign mreq_queue_byteen = is_fill_or_flush_st1 ? dirty_byteen_st1 : '1;
|
||||
end else begin : g_writethrough
|
||||
wire [LINE_SIZE-1:0] line_byteen;
|
||||
VX_decoder #(
|
||||
.N (`CS_WORD_SEL_BITS),
|
||||
.M (WORD_SIZE)
|
||||
) byteen_dec (
|
||||
.sel_in (word_idx_st1),
|
||||
.data_in (byteen_st1),
|
||||
.data_out (line_byteen)
|
||||
);
|
||||
assign mreq_queue_rw = rw_st1;
|
||||
assign mreq_queue_data = write_data_st1;
|
||||
assign mreq_queue_byteen = rw_st1 ? write_byteen_st1 : '1;
|
||||
assign mreq_queue_data = {`CS_WORDS_PER_LINE{write_data_st1}};
|
||||
assign mreq_queue_byteen = rw_st1 ? line_byteen : '1;
|
||||
`UNUSED_VAR (is_fill_or_flush_st1)
|
||||
`UNUSED_VAR (dirty_data_st1)
|
||||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
|
@ -667,17 +673,17 @@ module VX_cache_bank #(
|
|||
end
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
|
||||
.DEPTH (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
|
||||
.ALM_FULL (MREQ_SIZE - PIPELINE_STAGES),
|
||||
.OUT_REG (MEM_OUT_REG)
|
||||
) mem_req_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (mreq_queue_push),
|
||||
.pop (mreq_queue_pop),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flags}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flags}),
|
||||
.empty (mreq_queue_empty),
|
||||
.alm_full (mreq_queue_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
|
|
4
hw/rtl/cache/VX_cache_cluster.sv
vendored
4
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -58,6 +58,9 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// core request flags
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
|
@ -156,6 +159,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (ARB_TAG_WIDTH),
|
||||
.FLAGS_WIDTH (FLAGS_WIDTH),
|
||||
.TAG_SEL_IDX (TAG_SEL_IDX),
|
||||
.CORE_OUT_BUF ((NUM_INPUTS != NUM_CACHES) ? 2 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF ((NUM_CACHES > 1) ? 2 : MEM_OUT_BUF),
|
||||
|
|
182
hw/rtl/cache/VX_cache_data.sv
vendored
182
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -50,11 +50,11 @@ module VX_cache_data #(
|
|||
input wire flush,
|
||||
input wire write,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] wsel,
|
||||
input wire [`UP(`CS_WORD_SEL_BITS)-1:0] word_idx,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] fill_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] write_byteen,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
input wire [`CS_WORD_WIDTH-1:0] write_data,
|
||||
input wire [WORD_SIZE-1:0] write_byteen,
|
||||
input wire [NUM_WAYS-1:0] way_idx,
|
||||
output wire [`CS_WORD_WIDTH-1:0] read_data,
|
||||
output wire [`CS_LINE_WIDTH-1:0] dirty_data,
|
||||
output wire [LINE_SIZE-1:0] dirty_byteen
|
||||
|
@ -68,132 +68,144 @@ module VX_cache_data #(
|
|||
`UNUSED_VAR (read)
|
||||
`UNUSED_VAR (flush)
|
||||
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0) ? LINE_SIZE : 1;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx_bin;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_idx;
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_rdata;
|
||||
wire [`LOG2UP(NUM_WAYS)-1:0] way_idx;
|
||||
assign line_idx = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
VX_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) way_idx_enc (
|
||||
.data_in (way_idx),
|
||||
.data_out (way_idx_bin),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
if (WRITEBACK) begin : g_dirty_data
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
|
||||
VX_transpose #(
|
||||
.DATAW (`CS_WORD_WIDTH),
|
||||
.N (`CS_WORDS_PER_LINE),
|
||||
.M (NUM_WAYS)
|
||||
) transpose (
|
||||
.data_in (line_rdata),
|
||||
.data_out (transposed_rdata)
|
||||
);
|
||||
assign dirty_data = transposed_rdata[way_idx];
|
||||
assign dirty_data = line_rdata[way_idx_bin];
|
||||
end else begin : g_dirty_data_0
|
||||
assign dirty_data = '0;
|
||||
end
|
||||
|
||||
if (DIRTY_BYTES) begin : g_dirty_byteen
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_rdata;
|
||||
wire [NUM_WAYS-1:0][LINE_SIZE-1:0] bs_wdata;
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_rdata;
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] bs_wdata;
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_bs_wdata
|
||||
wire [LINE_SIZE-1:0] wdata = write ? (bs_rdata[i] | write_byteen) : ((fill || flush) ? '0 : bs_rdata[i]);
|
||||
assign bs_wdata[i] = init ? '0 : (way_sel[i] ? wdata : bs_rdata[i]);
|
||||
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j
|
||||
wire [WORD_SIZE-1:0] word_mask = {WORD_SIZE{(WORD_SIZE == 1) || (word_idx == j)}};
|
||||
wire [WORD_SIZE-1:0] wdata = write ? (bs_rdata[i][j] | (write_byteen & word_mask)) : ((fill || flush) ? '0 : bs_rdata[i][j]);
|
||||
assign bs_wdata[i][j] = init ? '0 : (way_idx[i] ? wdata : bs_rdata[i][j]);
|
||||
end
|
||||
end
|
||||
|
||||
wire bs_read = write || fill || flush;
|
||||
wire bs_write = init || write || fill || flush;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (LINE_SIZE * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK)
|
||||
) byteen_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (write || fill || flush),
|
||||
.write (init || write || fill || flush),
|
||||
.read (bs_read && ~stall),
|
||||
.write (bs_write && ~stall),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.addr (line_idx),
|
||||
.wdata (bs_wdata),
|
||||
.rdata (bs_rdata)
|
||||
);
|
||||
|
||||
assign dirty_byteen = bs_rdata[way_idx];
|
||||
assign dirty_byteen = bs_rdata[way_idx_bin];
|
||||
end else begin : g_dirty_byteen_0
|
||||
assign dirty_byteen = '1;
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_data_store
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] line_wdata;
|
||||
wire [BYTEENW-1:0] line_wren;
|
||||
wire line_write;
|
||||
wire line_read;
|
||||
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin : g_line_wdata
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin : g_i
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin : g_j
|
||||
assign line_wdata[i][j] = (fill || !WRITE_ENABLE) ? fill_data[i] : write_data[i];
|
||||
assign wren_w[i][j] = ((fill || !WRITE_ENABLE) ? {WORD_SIZE{1'b1}} : write_byteen[i])
|
||||
& {WORD_SIZE{(way_sel[j] || (NUM_WAYS == 1))}};
|
||||
wire way_en = (NUM_WAYS == 1) || way_idx[i];
|
||||
|
||||
if (WRITE_ENABLE != 0) begin : g_line_data
|
||||
wire [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar j = 0; j < `CS_WORDS_PER_LINE; ++j) begin : g_j
|
||||
wire word_en = (WORD_SIZE == 1) || (word_idx == j);
|
||||
assign line_wdata[j] = fill ? fill_data[j] : write_data;
|
||||
assign wren_w[j] = {WORD_SIZE{fill}} | (write_byteen & {WORD_SIZE{word_en}});
|
||||
end
|
||||
assign line_wren = wren_w;
|
||||
assign line_write = (fill || write) && way_en;
|
||||
if (WRITEBACK) begin : g_line_read_wb
|
||||
assign line_read = (read || fill || flush);
|
||||
end else begin : g_line_read_wt
|
||||
assign line_read = read;
|
||||
end
|
||||
end else begin : g_line_data_ro
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign line_wdata = fill_data;
|
||||
assign line_wren = 1'b1;
|
||||
assign line_write = fill && way_en;
|
||||
assign line_read = read;
|
||||
end
|
||||
assign line_wren = wren_w;
|
||||
end else begin : g_line_wdata_ro
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (write_byteen)
|
||||
`UNUSED_VAR (write_data)
|
||||
assign line_wdata = fill_data;
|
||||
assign line_wren = fill;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (line_read && ~stall),
|
||||
.write (line_write && ~stall),
|
||||
.wren (line_wren),
|
||||
.addr (line_idx),
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) way_enc (
|
||||
.data_in (way_sel),
|
||||
.data_out (way_idx),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire line_read = (read && ~stall)
|
||||
|| (WRITEBACK && (fill || flush));
|
||||
|
||||
wire line_write = write || fill;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1),
|
||||
.RW_ASSERT (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.wren (line_wren),
|
||||
.addr (line_sel),
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_per_way_rdata_wsel
|
||||
assign per_way_rdata = line_rdata[wsel];
|
||||
end else begin : g_per_way_rdata
|
||||
`UNUSED_VAR (wsel)
|
||||
assign per_way_rdata = line_rdata;
|
||||
if (`CS_WORDS_PER_LINE > 1) begin : g_read_data
|
||||
// order the data layout to perform ways multiplexing last.
|
||||
// this allows converting way index to binary in parallel with BRAM readaccess and way selection.
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] transposed_rdata;
|
||||
VX_transpose #(
|
||||
.DATAW (`CS_WORD_WIDTH),
|
||||
.N (NUM_WAYS),
|
||||
.M (`CS_WORDS_PER_LINE)
|
||||
) transpose (
|
||||
.data_in (line_rdata),
|
||||
.data_out (transposed_rdata)
|
||||
);
|
||||
assign read_data = transposed_rdata[word_idx][way_idx_bin];
|
||||
end else begin : g_read_data_1w
|
||||
`UNUSED_VAR (word_idx)
|
||||
assign read_data = line_rdata[way_idx_bin];
|
||||
end
|
||||
assign read_data = per_way_rdata[way_idx];
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, fill_data))
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data))
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, dirty_byteen, dirty_data))
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data))
|
||||
end
|
||||
if (read && ~stall) begin
|
||||
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, read_data, req_uuid))
|
||||
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid))
|
||||
end
|
||||
if (write && ~stall) begin
|
||||
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_sel, line_sel, wsel, write_byteen[wsel], write_data[wsel], req_uuid))
|
||||
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
30
hw/rtl/cache/VX_cache_tags.sv
vendored
30
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -47,7 +47,7 @@ module VX_cache_tags #(
|
|||
input wire write,
|
||||
input wire lookup,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] line_addr,
|
||||
input wire [NUM_WAYS-1:0] way_sel,
|
||||
input wire [NUM_WAYS-1:0] way_idx,
|
||||
output wire [NUM_WAYS-1:0] tag_matches,
|
||||
|
||||
// eviction
|
||||
|
@ -62,7 +62,7 @@ module VX_cache_tags #(
|
|||
// valid, dirty, tag
|
||||
localparam TAG_WIDTH = 1 + WRITEBACK + `CS_TAG_SEL_BITS;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_sel = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_idx = line_addr[`CS_LINE_SEL_BITS-1:0];
|
||||
wire [`CS_TAG_SEL_BITS-1:0] line_tag = `CS_LINE_ADDR_TAG(line_addr);
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_TAG_SEL_BITS-1:0] read_tag;
|
||||
|
@ -80,7 +80,7 @@ module VX_cache_tags #(
|
|||
end
|
||||
end
|
||||
|
||||
assign evict_way = fill ? evict_way_r : way_sel;
|
||||
assign evict_way = fill ? evict_way_r : way_idx;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (`CS_TAG_SEL_BITS),
|
||||
|
@ -103,7 +103,7 @@ module VX_cache_tags #(
|
|||
for (genvar i = 0; i < NUM_WAYS; ++i) begin : g_tag_store
|
||||
|
||||
wire do_fill = fill_s && evict_way[i];
|
||||
wire do_flush = flush_s && (!WRITEBACK || way_sel[i]); // flush the whole line in writethrough mode
|
||||
wire do_flush = flush_s && (!WRITEBACK || way_idx[i]); // flush the whole line in writethrough mode
|
||||
wire do_write = WRITEBACK && write && tag_matches[i];
|
||||
|
||||
wire line_read = (WRITEBACK && (fill_s || flush_s));
|
||||
|
@ -130,10 +130,10 @@ module VX_cache_tags #(
|
|||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.read (line_read),
|
||||
.write (line_write),
|
||||
.read (line_read && ~stall),
|
||||
.write (line_write && ~stall),
|
||||
.wren (1'b1),
|
||||
.addr (line_sel),
|
||||
.addr (line_idx),
|
||||
.wdata (line_wdata),
|
||||
.rdata (line_rdata)
|
||||
);
|
||||
|
@ -146,29 +146,29 @@ module VX_cache_tags #(
|
|||
assign evict_dirty = | (read_dirty & evict_way);
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_sel};
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx};
|
||||
always @(posedge clk) begin
|
||||
if (fill && ~stall) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_sel, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel))
|
||||
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx))
|
||||
end
|
||||
if (flush && ~stall) begin
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_sel, line_sel, evict_dirty))
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty))
|
||||
end
|
||||
if (lookup && ~stall) begin
|
||||
if (tag_matches != 0) begin
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
|
||||
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_sel, line_tag, req_uuid))
|
||||
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
|
||||
end
|
||||
end else begin
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
|
||||
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_sel, line_tag, req_uuid))
|
||||
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
4
hw/rtl/cache/VX_cache_wrap.sv
vendored
4
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -57,6 +57,9 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
|
||||
// core request flags
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
|
||||
// enable bypass for non-cacheable addresses
|
||||
parameter NC_ENABLE = 0,
|
||||
|
||||
|
@ -175,6 +178,7 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
.DIRTY_BYTES (DIRTY_BYTES),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.FLAGS_WIDTH (FLAGS_WIDTH),
|
||||
.CORE_OUT_BUF (NC_OR_BYPASS ? 1 : CORE_OUT_BUF),
|
||||
.MEM_OUT_BUF (NC_OR_BYPASS ? 1 : MEM_OUT_BUF)
|
||||
) cache (
|
||||
|
|
|
@ -69,8 +69,8 @@ module VX_cyclic_arbiter #(
|
|||
.N (LOG_NUM_REQS),
|
||||
.D (NUM_REQS)
|
||||
) grant_decoder (
|
||||
.data_in (grant_index),
|
||||
.valid_in (1'b1),
|
||||
.sel_in (grant_index),
|
||||
.data_in (1'b1),
|
||||
.data_out (grant_onehot_w)
|
||||
);
|
||||
|
||||
|
|
|
@ -18,25 +18,30 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_decoder #(
|
||||
parameter N = 1,
|
||||
parameter N = 0,
|
||||
parameter M = 1,
|
||||
parameter MODEL = 0,
|
||||
parameter D = 1 << N
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
input wire [M-1:0] valid_in,
|
||||
input wire [`UP(N)-1:0] sel_in,
|
||||
input wire [M-1:0] data_in,
|
||||
output wire [D-1:0][M-1:0] data_out
|
||||
);
|
||||
logic [D-1:0][M-1:0] shift;
|
||||
if (MODEL == 1) begin : g_model1
|
||||
always @(*) begin
|
||||
shift = '0;
|
||||
shift[data_in] = {M{1'b1}};
|
||||
if (N != 0) begin : g_decoder
|
||||
logic [D-1:0][M-1:0] shift;
|
||||
if (MODEL == 1) begin : g_model1
|
||||
always @(*) begin
|
||||
shift = '0;
|
||||
shift[sel_in] = {M{1'b1}};
|
||||
end
|
||||
end else begin : g_model0
|
||||
assign shift = ((D*M)'({M{1'b1}})) << (sel_in * M);
|
||||
end
|
||||
end else begin : g_model0
|
||||
assign shift = ((D*M)'({M{1'b1}})) << (data_in * M);
|
||||
assign data_out = {D{data_in}} & shift;
|
||||
end else begin : g_passthru
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = data_in;
|
||||
end
|
||||
assign data_out = {D{valid_in}} & shift;
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
|
@ -104,8 +104,8 @@ module VX_mem_adapter #(
|
|||
.N (D),
|
||||
.M (SRC_DATA_WIDTH/8)
|
||||
) req_be_dec (
|
||||
.data_in (req_idx),
|
||||
.valid_in (mem_req_byteen_in),
|
||||
.sel_in (req_idx),
|
||||
.data_in (mem_req_byteen_in),
|
||||
.data_out (mem_req_byteen_out_w)
|
||||
);
|
||||
|
||||
|
@ -113,8 +113,8 @@ module VX_mem_adapter #(
|
|||
.N (D),
|
||||
.M (SRC_DATA_WIDTH)
|
||||
) req_data_dec (
|
||||
.data_in (req_idx),
|
||||
.valid_in (mem_req_data_in),
|
||||
.sel_in (req_idx),
|
||||
.data_in (mem_req_data_in),
|
||||
.data_out (mem_req_data_out_w)
|
||||
);
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ module VX_mem_coalescer #(
|
|||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_REQS = 1,
|
||||
parameter ADDR_WIDTH = 32,
|
||||
parameter FLAGS_WIDTH = 1,
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
parameter DATA_IN_SIZE = 4,
|
||||
parameter DATA_OUT_SIZE = 64,
|
||||
parameter TAG_WIDTH = 8,
|
||||
|
@ -43,7 +43,7 @@ module VX_mem_coalescer #(
|
|||
input wire [NUM_REQS-1:0] in_req_mask,
|
||||
input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen,
|
||||
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
|
||||
input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags,
|
||||
input wire [NUM_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] in_req_flags,
|
||||
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
|
||||
input wire [TAG_WIDTH-1:0] in_req_tag,
|
||||
output wire in_req_ready,
|
||||
|
@ -61,7 +61,7 @@ module VX_mem_coalescer #(
|
|||
output wire [OUT_REQS-1:0] out_req_mask,
|
||||
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
|
||||
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
|
||||
output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags,
|
||||
output wire [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags,
|
||||
output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data,
|
||||
output wire [OUT_TAG_WIDTH-1:0] out_req_tag,
|
||||
input wire out_req_ready,
|
||||
|
@ -92,7 +92,7 @@ module VX_mem_coalescer #(
|
|||
logic out_req_rw_r, out_req_rw_n;
|
||||
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n;
|
||||
logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] out_req_flags_r, out_req_flags_n;
|
||||
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
|
@ -110,7 +110,7 @@ module VX_mem_coalescer #(
|
|||
|
||||
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
|
||||
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n;
|
||||
logic [OUT_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] seed_flags_r, seed_flags_n;
|
||||
logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
|
||||
logic [NUM_REQS-1:0] req_rem_mask_r, req_rem_mask_n;
|
||||
|
||||
|
@ -139,7 +139,7 @@ module VX_mem_coalescer #(
|
|||
assign addr_base[j] = in_req_addr[DATA_RATIO * i + j][ADDR_WIDTH-1:DATA_RATIO_W];
|
||||
end
|
||||
|
||||
wire [DATA_RATIO-1:0][FLAGS_WIDTH-1:0] req_flags;
|
||||
wire [DATA_RATIO-1:0][`UP(FLAGS_WIDTH)-1:0] req_flags;
|
||||
for (genvar j = 0; j < DATA_RATIO; ++j) begin : g_req_flags
|
||||
assign req_flags[j] = in_req_flags[DATA_RATIO * i + j];
|
||||
end
|
||||
|
@ -221,7 +221,7 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
|
||||
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + OUT_ADDR_WIDTH + `UP(FLAGS_WIDTH) + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
|
||||
.RESETW (1 + NUM_REQS + 1),
|
||||
.INIT_VALUE ({1'b0, {NUM_REQS{1'b1}}, 1'b0})
|
||||
) pipe_reg (
|
||||
|
@ -270,7 +270,12 @@ module VX_mem_coalescer #(
|
|||
assign out_req_mask = out_req_mask_r;
|
||||
assign out_req_byteen = out_req_byteen_r;
|
||||
assign out_req_addr = out_req_addr_r;
|
||||
assign out_req_flags = out_req_flags_r;
|
||||
if (FLAGS_WIDTH != 0) begin : g_out_req_flags
|
||||
assign out_req_flags = out_req_flags_r;
|
||||
end else begin : g_out_req_flags_0
|
||||
`UNUSED_VAR (out_req_flags_r)
|
||||
assign out_req_flags = '0;
|
||||
end
|
||||
assign out_req_data = out_req_data_r;
|
||||
assign out_req_tag = out_req_tag_r;
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ module VX_mem_scheduler #(
|
|||
parameter WORD_SIZE = 4,
|
||||
parameter LINE_SIZE = WORD_SIZE,
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
|
||||
parameter FLAGS_WIDTH = 1,
|
||||
parameter FLAGS_WIDTH = 0,
|
||||
parameter TAG_WIDTH = 8,
|
||||
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
|
||||
parameter CORE_QUEUE_SIZE= 8,
|
||||
|
@ -50,7 +50,7 @@ module VX_mem_scheduler #(
|
|||
input wire [CORE_REQS-1:0] core_req_mask,
|
||||
input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags,
|
||||
input wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] core_req_flags,
|
||||
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire core_req_ready,
|
||||
|
@ -72,7 +72,7 @@ module VX_mem_scheduler #(
|
|||
output wire [MEM_CHANNELS-1:0] mem_req_mask,
|
||||
output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags,
|
||||
output wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags,
|
||||
output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire mem_req_ready,
|
||||
|
@ -112,7 +112,7 @@ module VX_mem_scheduler #(
|
|||
wire reqq_rw;
|
||||
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
|
||||
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
|
||||
wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags;
|
||||
wire [CORE_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags;
|
||||
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data;
|
||||
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
|
||||
wire reqq_ready;
|
||||
|
@ -122,7 +122,7 @@ module VX_mem_scheduler #(
|
|||
wire reqq_rw_s;
|
||||
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
|
||||
wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s;
|
||||
wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s;
|
||||
wire [MERGED_REQS-1:0][`UP(FLAGS_WIDTH)-1:0] reqq_flags_s;
|
||||
wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s;
|
||||
wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s;
|
||||
wire reqq_ready_s;
|
||||
|
@ -132,7 +132,7 @@ module VX_mem_scheduler #(
|
|||
wire mem_req_rw_s;
|
||||
wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
||||
wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s;
|
||||
wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_s;
|
||||
wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire mem_req_ready_s;
|
||||
|
@ -167,7 +167,7 @@ module VX_mem_scheduler #(
|
|||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
|
||||
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + `UP(FLAGS_WIDTH) + WORD_WIDTH) + REQQ_TAG_WIDTH),
|
||||
.SIZE (CORE_QUEUE_SIZE),
|
||||
.OUT_REG (1)
|
||||
) req_queue (
|
||||
|
@ -297,7 +297,7 @@ module VX_mem_scheduler #(
|
|||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
|
||||
|
||||
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
|
||||
|
@ -385,8 +385,10 @@ module VX_mem_scheduler #(
|
|||
|
||||
assign reqq_ready_s = req_sent_all;
|
||||
|
||||
wire [MEM_CHANNELS-1:0][`UP(FLAGS_WIDTH)-1:0] mem_req_flags_u;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
|
||||
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + `UP(FLAGS_WIDTH) + LINE_WIDTH) + MEM_TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
|
@ -395,11 +397,18 @@ module VX_mem_scheduler #(
|
|||
.valid_in (mem_req_valid_s),
|
||||
.ready_in (mem_req_ready_s),
|
||||
.data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}),
|
||||
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}),
|
||||
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags_u, mem_req_data, mem_req_tag}),
|
||||
.valid_out (mem_req_valid),
|
||||
.ready_out (mem_req_ready)
|
||||
);
|
||||
|
||||
if (FLAGS_WIDTH != 0) begin : g_mem_req_flags
|
||||
assign mem_req_flags = mem_req_flags_u;
|
||||
end else begin : g_mem_req_flags_0
|
||||
`UNUSED_VAR (mem_req_flags_u)
|
||||
assign mem_req_flags = '0;
|
||||
end
|
||||
|
||||
// Handle memory responses ////////////////////////////////////////////////
|
||||
|
||||
reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask;
|
||||
|
|
|
@ -484,8 +484,8 @@ module VX_rr_arbiter #(
|
|||
.N (LOG_NUM_REQS),
|
||||
.D (NUM_REQS)
|
||||
) grant_decoder (
|
||||
.data_in (grant_index),
|
||||
.valid_in (grant_valid),
|
||||
.sel_in (grant_index),
|
||||
.data_in (grant_valid),
|
||||
.data_out (grant_onehot)
|
||||
);
|
||||
|
||||
|
|
|
@ -68,8 +68,8 @@ module VX_stream_xbar #(
|
|||
.N (OUT_WIDTH),
|
||||
.D (NUM_OUTPUTS)
|
||||
) sel_in_decoder (
|
||||
.data_in (sel_in[i]),
|
||||
.valid_in (valid_in[i]),
|
||||
.sel_in (sel_in[i]),
|
||||
.data_in (valid_in[i]),
|
||||
.data_out (per_output_valid_in[i])
|
||||
);
|
||||
assign ready_in[i] = | per_output_ready_in_w[i];
|
||||
|
@ -141,8 +141,8 @@ module VX_stream_xbar #(
|
|||
.N (OUT_WIDTH),
|
||||
.D (NUM_OUTPUTS)
|
||||
) sel_in_decoder (
|
||||
.data_in (sel_in[0]),
|
||||
.valid_in (valid_in[0]),
|
||||
.sel_in (sel_in[0]),
|
||||
.data_in (valid_in[0]),
|
||||
.data_out (valid_out_w)
|
||||
);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue