mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
adding uuid support to memory transactions
This commit is contained in:
parent
ae24264a2a
commit
bb9ae8576d
11 changed files with 121 additions and 63 deletions
|
@ -264,14 +264,14 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks) \
|
||||
(`CLOG2(mshr_size) + `CLOG2(num_banks))
|
||||
`define CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width) \
|
||||
(uuid_width + `CLOG2(mshr_size) + `CLOG2(num_banks))
|
||||
|
||||
`define CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width) \
|
||||
(`CLOG2(num_reqs) + `CLOG2(line_size / word_size) + tag_width)
|
||||
|
||||
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width) \
|
||||
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1)
|
||||
`define CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, uuid_width) \
|
||||
(`MAX(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), `CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, tag_width)) + 1)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -281,14 +281,14 @@
|
|||
`define CACHE_CLUSTER_MEM_ARB_TAG(tag_width, num_caches) \
|
||||
(tag_width + `ARB_SEL_BITS(`UP(num_caches), 1))
|
||||
|
||||
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks), num_caches)
|
||||
`define CACHE_CLUSTER_MEM_TAG_WIDTH(mshr_size, num_banks, num_caches, uuid_width) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_MEM_TAG_WIDTH(mshr_size, num_banks, uuid_width), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_BYPASS_TAG_WIDTH(num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
|
||||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches)), num_caches)
|
||||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -166,7 +166,7 @@ package VX_gpu_pkg;
|
|||
|
||||
// Memory request tag bits
|
||||
`ifdef ICACHE_ENABLE
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES);
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_MEM_TAG_WIDTH(`ICACHE_MSHR_SIZE, 1, `NUM_ICACHES, `UUID_WIDTH);
|
||||
`else
|
||||
localparam ICACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(1, ICACHE_LINE_SIZE, ICACHE_WORD_SIZE, ICACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_ICACHES);
|
||||
`endif
|
||||
|
@ -197,7 +197,7 @@ package VX_gpu_pkg;
|
|||
|
||||
// Memory request tag bits
|
||||
`ifdef DCACHE_ENABLE
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_NC_MEM_TAG_WIDTH(`DCACHE_MSHR_SIZE, `DCACHE_NUM_BANKS, DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES, `UUID_WIDTH);
|
||||
`else
|
||||
localparam DCACHE_MEM_TAG_WIDTH = `CACHE_CLUSTER_BYPASS_MEM_TAG_WIDTH(DCACHE_NUM_REQS, DCACHE_LINE_SIZE, DCACHE_WORD_SIZE, DCACHE_TAG_WIDTH, `SOCKET_SIZE, `NUM_DCACHES);
|
||||
`endif
|
||||
|
@ -226,7 +226,7 @@ package VX_gpu_pkg;
|
|||
|
||||
// Memory request tag bits
|
||||
`ifdef L2_ENABLE
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L2_MSHR_SIZE, `L2_NUM_BANKS, L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH, `UUID_WIDTH);
|
||||
`else
|
||||
localparam L2_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L2_NUM_REQS, `L2_LINE_SIZE, L2_WORD_SIZE, L2_TAG_WIDTH);
|
||||
`endif
|
||||
|
@ -247,7 +247,7 @@ package VX_gpu_pkg;
|
|||
|
||||
// Memory request tag bits
|
||||
`ifdef L3_ENABLE
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_NC_MEM_TAG_WIDTH(`L3_MSHR_SIZE, `L3_NUM_BANKS, L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH, `UUID_WIDTH);
|
||||
`else
|
||||
localparam L3_MEM_TAG_WIDTH = `CACHE_BYPASS_TAG_WIDTH(L3_NUM_REQS, `L3_LINE_SIZE, L3_WORD_SIZE, L3_TAG_WIDTH);
|
||||
`endif
|
||||
|
|
|
@ -196,16 +196,19 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
end
|
||||
|
||||
`ifdef DBG_TRACE_MEM
|
||||
wire [`UUID_WIDTH-1:0] mem_req_uuid = mem_req_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
wire [`UUID_WIDTH-1:0] mem_rsp_uuid = mem_rsp_tag[`VX_MEM_TAG_WIDTH-1 -: `UUID_WIDTH];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (mem_req_fire) begin
|
||||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data))
|
||||
`TRACE(1, ("%t: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%h data=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data, mem_req_uuid))
|
||||
end else begin
|
||||
`TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen))
|
||||
`TRACE(1, ("%t: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%h (#%0d)\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_uuid))
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h\n", $time, mem_rsp_tag, mem_rsp_data))
|
||||
`TRACE(1, ("%t: MEM Rd Rsp: tag=0x%0h, data=0x%h (#%0d)\n", $time, mem_rsp_tag, mem_rsp_data, mem_rsp_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -82,9 +82,11 @@ module Vortex_axi import VX_gpu_pkg::*; #(
|
|||
// Status
|
||||
output wire busy
|
||||
);
|
||||
localparam MIN_TAG_WIDTH = `VX_MEM_TAG_WIDTH - `UUID_WIDTH;
|
||||
|
||||
`STATIC_ASSERT((AXI_DATA_WIDTH == `VX_MEM_DATA_WIDTH), ("invalid memory data size: current=%0d, expected=%0d", AXI_DATA_WIDTH, `VX_MEM_DATA_WIDTH))
|
||||
`STATIC_ASSERT((AXI_ADDR_WIDTH >= `MEM_ADDR_WIDTH), ("invalid memory address size: current=%0d, expected=%0d", AXI_ADDR_WIDTH, `VX_MEM_ADDR_WIDTH))
|
||||
//`STATIC_ASSERT((AXI_TID_WIDTH >= `VX_MEM_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, `VX_MEM_TAG_WIDTH))
|
||||
`STATIC_ASSERT((AXI_TID_WIDTH >= MIN_TAG_WIDTH), ("invalid memory tag size: current=%0d, expected=%0d", AXI_TID_WIDTH, MIN_TAG_WIDTH))
|
||||
|
||||
wire mem_req_valid;
|
||||
wire mem_req_rw;
|
||||
|
|
|
@ -227,12 +227,12 @@ module VX_afu_wrap #(
|
|||
.dcr_wr_data (dcr_wr_data)
|
||||
);
|
||||
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_w [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_awaddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
wire [`MEM_ADDR_WIDTH-1:0] m_axi_mem_araddr_u [C_M_AXI_MEM_NUM_BANKS];
|
||||
|
||||
for (genvar i = 0; i < C_M_AXI_MEM_NUM_BANKS; ++i) begin
|
||||
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_w[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
assign m_axi_mem_awaddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_awaddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
assign m_axi_mem_araddr_a[i] = C_M_AXI_MEM_ADDR_WIDTH'(m_axi_mem_araddr_u[i]) + C_M_AXI_MEM_ADDR_WIDTH'(mem_base[i]);
|
||||
end
|
||||
|
||||
`SCOPE_IO_SWITCH (2)
|
||||
|
@ -250,7 +250,7 @@ module VX_afu_wrap #(
|
|||
|
||||
.m_axi_awvalid (m_axi_mem_awvalid_a),
|
||||
.m_axi_awready (m_axi_mem_awready_a),
|
||||
.m_axi_awaddr (m_axi_mem_awaddr_w),
|
||||
.m_axi_awaddr (m_axi_mem_awaddr_u),
|
||||
.m_axi_awid (m_axi_mem_awid_a),
|
||||
.m_axi_awlen (m_axi_mem_awlen_a),
|
||||
`UNUSED_PIN (m_axi_awsize),
|
||||
|
@ -274,7 +274,7 @@ module VX_afu_wrap #(
|
|||
|
||||
.m_axi_arvalid (m_axi_mem_arvalid_a),
|
||||
.m_axi_arready (m_axi_mem_arready_a),
|
||||
.m_axi_araddr (m_axi_mem_araddr_w),
|
||||
.m_axi_araddr (m_axi_mem_araddr_u),
|
||||
.m_axi_arid (m_axi_mem_arid_a),
|
||||
.m_axi_arlen (m_axi_mem_arlen_a),
|
||||
`UNUSED_PIN (m_axi_arsize),
|
||||
|
|
54
hw/rtl/cache/VX_cache.sv
vendored
54
hw/rtl/cache/VX_cache.sv
vendored
|
@ -83,7 +83,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
localparam REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS);
|
||||
localparam WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS);
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
localparam MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH);
|
||||
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
||||
localparam WORD_WIDTH = WORD_SIZE * 8;
|
||||
localparam WORD_SEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
|
@ -92,6 +92,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
localparam LINE_ADDR_WIDTH = (`CS_WORD_ADDR_WIDTH - BANK_SEL_BITS - WORD_SEL_BITS);
|
||||
localparam CORE_REQ_DATAW = LINE_ADDR_WIDTH + 1 + WORD_SEL_WIDTH + WORD_SIZE + WORD_WIDTH + TAG_WIDTH + 1;
|
||||
localparam CORE_RSP_DATAW = WORD_WIDTH + TAG_WIDTH;
|
||||
localparam BANK_MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH;
|
||||
|
||||
localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
|
||||
localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1);
|
||||
|
@ -110,6 +111,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
) core_bus2_if[NUM_REQS]();
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_begin;
|
||||
wire [`UP(UUID_WIDTH)-1:0] flush_uuid;
|
||||
wire [NUM_BANKS-1:0] per_bank_flush_end;
|
||||
|
||||
wire [NUM_BANKS-1:0] per_bank_core_req_fire;
|
||||
|
@ -117,6 +119,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
VX_cache_flush #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.UUID_WIDTH(UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
|
||||
) flush_unit (
|
||||
.clk (clk),
|
||||
|
@ -125,6 +129,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.core_bus_out_if (core_bus2_if),
|
||||
.bank_req_fire (per_bank_core_req_fire),
|
||||
.flush_begin (per_bank_flush_begin),
|
||||
.flush_uuid (flush_uuid),
|
||||
.flush_end (per_bank_flush_end)
|
||||
);
|
||||
|
||||
|
@ -182,6 +187,17 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.ready_out (mem_rsp_ready_s)
|
||||
);
|
||||
|
||||
wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_rsp_tag;
|
||||
wire [`UP(`CS_BANK_SEL_BITS)-1:0] mem_rsp_bank_id;
|
||||
|
||||
if (NUM_BANKS > 1) begin
|
||||
assign bank_mem_rsp_tag = mem_rsp_tag_s[MEM_TAG_WIDTH-1:`CS_BANK_SEL_BITS];
|
||||
assign mem_rsp_bank_id = mem_rsp_tag_s[`CS_BANK_SEL_BITS-1:0];
|
||||
end else begin
|
||||
assign bank_mem_rsp_tag = mem_rsp_tag_s;
|
||||
assign mem_rsp_bank_id = 0;
|
||||
end
|
||||
|
||||
// Memory request buffering
|
||||
|
||||
wire mem_req_valid;
|
||||
|
@ -190,7 +206,6 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [LINE_SIZE-1:0] mem_req_byteen;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_data;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_req_id;
|
||||
wire mem_req_flush;
|
||||
wire mem_req_ready;
|
||||
|
||||
|
@ -243,7 +258,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
|
||||
wire [NUM_BANKS-1:0][LINE_SIZE-1:0] per_bank_mem_req_byteen;
|
||||
wire [NUM_BANKS-1:0][`CS_LINE_WIDTH-1:0] per_bank_mem_req_data;
|
||||
wire [NUM_BANKS-1:0][MSHR_ADDR_WIDTH-1:0] per_bank_mem_req_id;
|
||||
wire [NUM_BANKS-1:0][BANK_MEM_TAG_WIDTH-1:0] per_bank_mem_req_tag;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_flush;
|
||||
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
|
||||
|
||||
|
@ -251,11 +266,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
assign per_bank_core_req_fire = per_bank_core_req_valid & per_bank_mem_req_ready;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
|
||||
end else begin
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready[`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s)];
|
||||
end
|
||||
assign mem_rsp_ready_s = per_bank_mem_rsp_ready[mem_rsp_bank_id];
|
||||
|
||||
// Bank requests dispatch
|
||||
|
||||
|
@ -359,13 +370,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
// Banks access
|
||||
for (genvar bank_id = 0; bank_id < NUM_BANKS; ++bank_id) begin : banks
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
|
||||
wire curr_bank_mem_rsp_valid;
|
||||
|
||||
if (NUM_BANKS == 1) begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s;
|
||||
end else begin
|
||||
assign curr_bank_mem_rsp_valid = mem_rsp_valid_s && (`CS_MEM_TAG_TO_BANK_ID(mem_rsp_tag_s) == bank_id);
|
||||
end
|
||||
wire curr_bank_mem_rsp_valid = mem_rsp_valid_s && (mem_rsp_bank_id == bank_id);
|
||||
|
||||
VX_cache_bank #(
|
||||
.BANK_ID (bank_id),
|
||||
|
@ -421,17 +427,19 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.mem_req_rw (per_bank_mem_req_rw[bank_id]),
|
||||
.mem_req_byteen (per_bank_mem_req_byteen[bank_id]),
|
||||
.mem_req_data (per_bank_mem_req_data[bank_id]),
|
||||
.mem_req_id (per_bank_mem_req_id[bank_id]),
|
||||
.mem_req_tag (per_bank_mem_req_tag[bank_id]),
|
||||
.mem_req_flush (per_bank_mem_req_flush[bank_id]),
|
||||
.mem_req_ready (per_bank_mem_req_ready[bank_id]),
|
||||
|
||||
// Memory response
|
||||
.mem_rsp_valid (curr_bank_mem_rsp_valid),
|
||||
.mem_rsp_data (mem_rsp_data_s),
|
||||
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
|
||||
.mem_rsp_tag (bank_mem_rsp_tag),
|
||||
.mem_rsp_ready (per_bank_mem_rsp_ready[bank_id]),
|
||||
|
||||
// Flush request
|
||||
.flush_begin (per_bank_flush_begin[bank_id]),
|
||||
.flush_uuid (flush_uuid),
|
||||
.flush_end (per_bank_flush_end[bank_id])
|
||||
);
|
||||
|
||||
|
@ -476,7 +484,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
// Memory request arbitration
|
||||
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + MSHR_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + 1)-1:0] data_in;
|
||||
wire [NUM_BANKS-1:0][(`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1)-1:0] data_in;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign data_in[i] = {
|
||||
|
@ -484,14 +492,16 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
per_bank_mem_req_rw[i],
|
||||
per_bank_mem_req_byteen[i],
|
||||
per_bank_mem_req_data[i],
|
||||
per_bank_mem_req_id[i],
|
||||
per_bank_mem_req_tag[i],
|
||||
per_bank_mem_req_flush[i]
|
||||
};
|
||||
end
|
||||
|
||||
wire [BANK_MEM_TAG_WIDTH-1:0] bank_mem_req_tag;
|
||||
|
||||
VX_stream_arb #(
|
||||
.NUM_INPUTS (NUM_BANKS),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + MSHR_ADDR_WIDTH + 1),
|
||||
.DATAW (`CS_MEM_ADDR_WIDTH + 1 + LINE_SIZE + `CS_LINE_WIDTH + BANK_MEM_TAG_WIDTH + 1),
|
||||
.ARBITER ("R")
|
||||
) mem_req_arb (
|
||||
.clk (clk),
|
||||
|
@ -499,7 +509,7 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.valid_in (per_bank_mem_req_valid),
|
||||
.ready_in (per_bank_mem_req_ready),
|
||||
.data_in (data_in),
|
||||
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, mem_req_id, mem_req_flush}),
|
||||
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data, bank_mem_req_tag, mem_req_flush}),
|
||||
.valid_out (mem_req_valid),
|
||||
.ready_out (mem_req_ready),
|
||||
`UNUSED_PIN (sel_out)
|
||||
|
@ -507,9 +517,9 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
if (NUM_BANKS > 1) begin
|
||||
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr);
|
||||
assign mem_req_tag = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id});
|
||||
assign mem_req_tag = MEM_TAG_WIDTH'({bank_mem_req_tag, mem_req_bank_id});
|
||||
end else begin
|
||||
assign mem_req_tag = MEM_TAG_WIDTH'(mem_req_id);
|
||||
assign mem_req_tag = MEM_TAG_WIDTH'(bank_mem_req_tag);
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
|
|
47
hw/rtl/cache/VX_cache_bank.sv
vendored
47
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -60,6 +60,7 @@ module VX_cache_bank #(
|
|||
parameter MEM_OUT_REG = 0,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE),
|
||||
parameter MEM_TAG_WIDTH = UUID_WIDTH + MSHR_ADDR_WIDTH,
|
||||
parameter REQ_SEL_WIDTH = `UP(`CS_REQ_SEL_BITS),
|
||||
parameter WORD_SEL_WIDTH = `UP(`CS_WORD_SEL_BITS)
|
||||
) (
|
||||
|
@ -97,18 +98,19 @@ module VX_cache_bank #(
|
|||
output wire mem_req_rw,
|
||||
output wire [LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [`CS_LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id, // index of the head entry in the mshr
|
||||
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
output wire mem_req_flush,
|
||||
input wire mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire mem_rsp_valid,
|
||||
input wire [`CS_LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id,
|
||||
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// flush
|
||||
input wire flush_begin,
|
||||
input wire [`UP(UUID_WIDTH)-1:0] flush_uuid,
|
||||
output wire flush_end
|
||||
);
|
||||
|
||||
|
@ -241,12 +243,30 @@ module VX_cache_bank #(
|
|||
wire flush_fire = flush_valid && flush_ready;
|
||||
wire core_req_fire = core_req_valid && core_req_ready;
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mem_rsp_id = mem_rsp_tag[MSHR_ADDR_WIDTH-1:0];
|
||||
|
||||
wire [TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
if (TAG_WIDTH > MEM_TAG_WIDTH) begin
|
||||
assign mem_rsp_tag_s = {mem_rsp_tag, (TAG_WIDTH-MEM_TAG_WIDTH)'(1'b0)};
|
||||
end else begin
|
||||
assign mem_rsp_tag_s = mem_rsp_tag[MEM_TAG_WIDTH-1 -: TAG_WIDTH];
|
||||
`UNUSED_VAR (mem_rsp_tag)
|
||||
end
|
||||
|
||||
wire [TAG_WIDTH-1:0] flush_tag;
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign flush_tag = {flush_uuid, (TAG_WIDTH-UUID_WIDTH)'(1'b0)};
|
||||
end else begin
|
||||
assign flush_tag = '0;
|
||||
end
|
||||
|
||||
assign valid_sel = init_fire || replay_fire || mem_rsp_fire || flush_fire || core_req_fire;
|
||||
assign rw_sel = replay_valid ? replay_rw : core_req_rw;
|
||||
assign byteen_sel = replay_valid ? replay_byteen : core_req_byteen;
|
||||
assign wsel_sel = replay_valid ? replay_wsel : core_req_wsel;
|
||||
assign req_idx_sel = replay_valid ? replay_idx : core_req_idx;
|
||||
assign tag_sel = replay_valid ? replay_tag : core_req_tag;
|
||||
assign tag_sel = (init_valid | flush_valid) ? (flush_valid ? flush_tag : '0) :
|
||||
(replay_valid ? replay_tag : (mem_rsp_valid ? mem_rsp_tag_s : core_req_tag));
|
||||
assign creq_flush_sel = core_req_valid && core_req_flush;
|
||||
|
||||
assign addr_sel = (init_valid | flush_valid) ? `CS_LINE_ADDR_WIDTH'(flush_sel) :
|
||||
|
@ -587,7 +607,7 @@ module VX_cache_bank #(
|
|||
wire [`CS_LINE_WIDTH-1:0] mreq_queue_data;
|
||||
wire [LINE_SIZE-1:0] mreq_queue_byteen;
|
||||
wire [`CS_LINE_ADDR_WIDTH-1:0] mreq_queue_addr;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mreq_queue_id;
|
||||
wire [MEM_TAG_WIDTH-1:0] mreq_queue_tag;
|
||||
wire mreq_queue_rw;
|
||||
wire mreq_queue_flush;
|
||||
|
||||
|
@ -613,7 +633,6 @@ module VX_cache_bank #(
|
|||
|
||||
assign mreq_queue_pop = mem_req_valid && mem_req_ready;
|
||||
assign mreq_queue_addr = addr_st1;
|
||||
assign mreq_queue_id = mshr_id_st1;
|
||||
assign mreq_queue_flush = creq_flush_st1;
|
||||
|
||||
if (WRITE_ENABLE) begin
|
||||
|
@ -637,8 +656,14 @@ module VX_cache_bank #(
|
|||
`UNUSED_VAR (dirty_byteen_st1)
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign mreq_queue_tag = {req_uuid_st1, mshr_id_st1};
|
||||
end else begin
|
||||
assign mreq_queue_tag = mshr_id_st1;
|
||||
end
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + 1),
|
||||
.DATAW (1 + `CS_LINE_ADDR_WIDTH + LINE_SIZE + `CS_LINE_WIDTH + MEM_TAG_WIDTH + 1),
|
||||
.DEPTH (MREQ_SIZE),
|
||||
.ALM_FULL (MREQ_SIZE-PIPELINE_STAGES),
|
||||
.OUT_REG (MEM_OUT_REG)
|
||||
|
@ -647,8 +672,8 @@ module VX_cache_bank #(
|
|||
.reset (reset),
|
||||
.push (mreq_queue_push),
|
||||
.pop (mreq_queue_pop),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_id, mreq_queue_byteen, mreq_queue_data, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_id, mem_req_byteen, mem_req_data, mem_req_flush}),
|
||||
.data_in ({mreq_queue_rw, mreq_queue_addr, mreq_queue_byteen, mreq_queue_data, mreq_queue_tag, mreq_queue_flush}),
|
||||
.data_out ({mem_req_rw, mem_req_addr, mem_req_byteen, mem_req_data, mem_req_tag, mem_req_flush}),
|
||||
.empty (mreq_queue_empty),
|
||||
.alm_full (mreq_queue_alm_full),
|
||||
`UNUSED_PIN (full),
|
||||
|
@ -675,7 +700,7 @@ module VX_cache_bank #(
|
|||
`TRACE(3, ("%t: *** %s stall: crsq=%b, mreq=%b, mshr=%b, rdw1=%b, rdw2=%b, rdw3=%b\n", $time, INSTANCE_ID, crsp_queue_stall, mreq_queue_alm_full, mshr_alm_full, rdw_hazard1_sel, rdw_hazard2_sel, rdw_hazard3_st1))
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data))
|
||||
`TRACE(2, ("%t: %s fill-rsp: addr=0x%0h, mshr_id=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data, req_uuid_sel))
|
||||
end
|
||||
if (replay_fire) begin
|
||||
`TRACE(2, ("%t: %s mshr-pop: addr=0x%0h, tag=0x%0h, req_idx=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(replay_addr, BANK_ID), replay_tag, replay_idx, req_uuid_sel))
|
||||
|
@ -694,9 +719,9 @@ module VX_cache_bank #(
|
|||
if (do_creq_wr_st1 && !WRITEBACK) begin
|
||||
`TRACE(2, ("%t: %s writethrough: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
end else if (do_writeback_st1) begin
|
||||
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data))
|
||||
`TRACE(2, ("%t: %s writeback: addr=0x%0h, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_byteen, mreq_queue_data, req_uuid_st1))
|
||||
end else begin
|
||||
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mreq_queue_id, req_uuid_st1))
|
||||
`TRACE(2, ("%t: %s fill-req: addr=0x%0h, mshr_id=%0d (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(mreq_queue_addr, BANK_ID), mshr_id_st1, req_uuid_st1))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
4
hw/rtl/cache/VX_cache_cluster.sv
vendored
4
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -82,8 +82,8 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
|
|||
localparam PASSTHRU = (NUM_UNITS == 0);
|
||||
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH, UUID_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH));
|
||||
|
||||
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
|
||||
|
||||
|
|
1
hw/rtl/cache/VX_cache_define.vh
vendored
1
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -57,7 +57,6 @@
|
|||
`define CS_LINE_TO_MEM_ADDR(x, i) {x, `CS_BANK_SEL_BITS'(i)}
|
||||
`define CS_MEM_ADDR_TO_BANK_ID(x) x[0 +: `CS_BANK_SEL_BITS]
|
||||
`define CS_MEM_TAG_TO_REQ_ID(x) x[MSHR_ADDR_WIDTH-1:0]
|
||||
`define CS_MEM_TAG_TO_BANK_ID(x) x[MSHR_ADDR_WIDTH +: `CS_BANK_SEL_BITS]
|
||||
|
||||
`define CS_LINE_TO_FULL_ADDR(x, i) {x, (`XLEN-$bits(x))'(i << (`XLEN-$bits(x)-`CS_BANK_SEL_BITS))}
|
||||
`define CS_MEM_TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)}
|
||||
|
|
20
hw/rtl/cache/VX_cache_flush.sv
vendored
20
hw/rtl/cache/VX_cache_flush.sv
vendored
|
@ -18,6 +18,10 @@ module VX_cache_flush #(
|
|||
parameter NUM_REQS = 4,
|
||||
// Number of banks
|
||||
parameter NUM_BANKS = 1,
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
// core request tag size
|
||||
parameter TAG_WIDTH = UUID_WIDTH + 1,
|
||||
// Bank select latency
|
||||
parameter BANK_SEL_LATENCY = 1
|
||||
) (
|
||||
|
@ -27,6 +31,7 @@ module VX_cache_flush #(
|
|||
VX_mem_bus_if.master core_bus_out_if [NUM_REQS],
|
||||
input wire [NUM_BANKS-1:0] bank_req_fire,
|
||||
output wire [NUM_BANKS-1:0] flush_begin,
|
||||
output wire [`UP(UUID_WIDTH)-1:0] flush_uuid,
|
||||
input wire [NUM_BANKS-1:0] flush_end
|
||||
);
|
||||
localparam STATE_IDLE = 0;
|
||||
|
@ -88,6 +93,7 @@ module VX_cache_flush #(
|
|||
wire flush_req_enable = (| flush_req_mask);
|
||||
|
||||
reg [NUM_REQS-1:0] lock_released, lock_released_n;
|
||||
reg [`UP(UUID_WIDTH)-1:0] flush_uuid_r, flush_uuid_n;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
wire input_enable = ~flush_req_enable || lock_released[i];
|
||||
|
@ -102,8 +108,14 @@ module VX_cache_flush #(
|
|||
assign core_bus_out_if[i].rsp_ready = core_bus_in_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0][`UP(UUID_WIDTH)-1:0] core_bus_out_uuid;
|
||||
wire [NUM_REQS-1:0] core_bus_out_ready;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign core_bus_out_uuid[i] = core_bus_in_if[i].req_data.tag[TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign core_bus_out_uuid[i] = 0;
|
||||
end
|
||||
assign core_bus_out_ready[i] = core_bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
|
@ -111,10 +123,16 @@ module VX_cache_flush #(
|
|||
state_n = state;
|
||||
flush_done_n = flush_done;
|
||||
lock_released_n = lock_released;
|
||||
flush_uuid_n = flush_uuid_r;
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (flush_req_enable) begin
|
||||
state_n = (BANK_SEL_LATENCY != 0) ? STATE_WAIT1 : STATE_FLUSH;
|
||||
for (integer i = NUM_REQS-1; i >= 0; --i) begin
|
||||
if (flush_req_mask[i]) begin
|
||||
flush_uuid_n = core_bus_out_uuid[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
STATE_WAIT1: begin
|
||||
|
@ -158,8 +176,10 @@ module VX_cache_flush #(
|
|||
flush_done <= flush_done_n;
|
||||
lock_released <= lock_released_n;
|
||||
end
|
||||
flush_uuid_r <= flush_uuid_n;
|
||||
end
|
||||
|
||||
assign flush_begin = {NUM_BANKS{state == STATE_FLUSH}};
|
||||
assign flush_uuid = flush_uuid_r;
|
||||
|
||||
endmodule
|
||||
|
|
9
hw/rtl/cache/VX_cache_wrap.sv
vendored
9
hw/rtl/cache/VX_cache_wrap.sv
vendored
|
@ -84,12 +84,11 @@ module VX_cache_wrap import VX_gpu_pkg::*; #(
|
|||
|
||||
`STATIC_ASSERT(NUM_BANKS == (1 << `CLOG2(NUM_BANKS)), ("invalid parameter"))
|
||||
|
||||
localparam MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE);
|
||||
localparam CACHE_MEM_TAG_WIDTH = MSHR_ADDR_WIDTH + `CS_BANK_SEL_BITS;
|
||||
localparam CACHE_MEM_TAG_WIDTH = `CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, UUID_WIDTH);
|
||||
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
|
||||
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH) :
|
||||
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, TAG_WIDTH, UUID_WIDTH) :
|
||||
CACHE_MEM_TAG_WIDTH);
|
||||
|
||||
localparam NC_OR_BYPASS = (NC_ENABLE || PASSTHRU);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue