memory request flags refactoring

This commit is contained in:
Blaise Tine 2024-08-06 19:05:22 -07:00
parent e86eeab8ea
commit bddf276335
23 changed files with 140 additions and 137 deletions

View file

@ -303,10 +303,10 @@
`define L1_ENABLE
`endif
`define ADDR_TYPE_FLUSH 0
`define ADDR_TYPE_IO 1
`define ADDR_TYPE_LOCAL 2 // shoud be last since optional
`define ADDR_TYPE_WIDTH (`ADDR_TYPE_LOCAL + `LMEM_ENABLED)
`define MEM_REQ_FLAG_FLUSH 0
`define MEM_REQ_FLAG_IO 1
`define MEM_REQ_FLAG_LOCAL 2 // shoud be last since optional
`define MEM_REQ_FLAGS_WIDTH (`MEM_REQ_FLAG_LOCAL + `LMEM_ENABLED)
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
@ -364,7 +364,7 @@
assign dst.req_data.rw = src.req_data.rw; \
assign dst.req_data.byteen = src.req_data.byteen; \
assign dst.req_data.addr = src.req_data.addr; \
assign dst.req_data.atype = src.req_data.atype; \
assign dst.req_data.flags = src.req_data.flags; \
assign dst.req_data.data = src.req_data.data; \
if (TD != TS) \
assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \

View file

@ -109,7 +109,7 @@ module Vortex import VX_gpu_pkg::*; (
assign mem_req_data = mem_bus_if.req_data.data;
assign mem_req_tag = mem_bus_if.req_data.tag;
assign mem_bus_if.req_ready = mem_req_ready;
`UNUSED_VAR (mem_bus_if.req_data.atype)
`UNUSED_VAR (mem_bus_if.req_data.flags)
assign mem_bus_if.rsp_valid = mem_rsp_valid;
assign mem_bus_if.rsp_data.data = mem_rsp_data;

View file

@ -517,8 +517,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.mem_rsp_ready_out (cci_vx_mem_bus_if[1].rsp_ready)
);
assign cci_vx_mem_bus_if[1].req_data.atype = '0;
`UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.atype)
assign cci_vx_mem_bus_if[1].req_data.flags = '0;
`UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.flags)
//--
@ -570,8 +570,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.mem_rsp_ready_out (cci_vx_mem_bus_if[0].rsp_ready)
);
assign cci_vx_mem_bus_if[0].req_data.atype = '0;
`UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.atype)
assign cci_vx_mem_bus_if[0].req_data.flags = '0;
`UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.flags)
//--
VX_mem_bus_if #(
@ -639,8 +639,8 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.avs_readdatavalid(avs_readdatavalid)
);
assign mem_bus_if[0].req_data.atype = '0;
`UNUSED_VAR (mem_bus_if[0].req_data.atype)
assign mem_bus_if[0].req_data.flags = '0;
`UNUSED_VAR (mem_bus_if[0].req_data.flags)
// CCI-P Read Request ///////////////////////////////////////////////////////////

View file

@ -185,7 +185,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.ready_out (mem_bus_if.req_ready)
);
assign mem_bus_if.req_data.atype = mem_bus_if_flush ? `ADDR_TYPE_WIDTH'(1 << `ADDR_TYPE_FLUSH) : '0;
assign mem_bus_if.req_data.flags = mem_bus_if_flush ? `MEM_REQ_FLAGS_WIDTH'(1 << `MEM_REQ_FLAG_FLUSH) : '0;
///////////////////////////////////////////////////////////////////////////
@ -273,7 +273,7 @@ module VX_cache import VX_gpu_pkg::*; #(
assign core_req_addr[i] = core_bus2_if[i].req_data.addr;
assign core_req_data[i] = core_bus2_if[i].req_data.data;
assign core_req_tag[i] = core_bus2_if[i].req_data.tag;
assign core_req_flush[i] = core_bus2_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
assign core_req_flush[i] = core_bus2_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
assign core_bus2_if[i].req_ready = core_req_ready[i];
end

View file

@ -56,7 +56,7 @@ module VX_cache_bypass #(
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH;
localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH;
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE);
@ -80,7 +80,7 @@ module VX_cache_bypass #(
if (PASSTHRU != 0) begin
assign core_req_nc_idxs[i] = 1'b1;
end else if (NC_ENABLE) begin
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO];
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_IO];
end else begin
assign core_req_nc_idxs[i] = 1'b0;
end
@ -113,7 +113,7 @@ module VX_cache_bypass #(
wire mem_req_out_rw;
wire [LINE_SIZE-1:0] mem_req_out_byteen;
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr;
wire [`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype;
wire [`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_out_flags;
wire [`CS_LINE_WIDTH-1:0] mem_req_out_data;
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag;
wire mem_req_out_ready;
@ -121,7 +121,7 @@ module VX_cache_bypass #(
wire core_req_nc_sel_rw;
wire [WORD_SIZE-1:0] core_req_nc_sel_byteen;
wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr;
wire [`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype;
wire [`MEM_REQ_FLAGS_WIDTH-1:0] core_req_nc_sel_flags;
wire [CORE_DATA_WIDTH-1:0] core_req_nc_sel_data;
wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag;
@ -131,7 +131,7 @@ module VX_cache_bypass #(
core_bus_in_if[i].req_data.rw,
core_bus_in_if[i].req_data.byteen,
core_bus_in_if[i].req_data.addr,
core_bus_in_if[i].req_data.atype,
core_bus_in_if[i].req_data.flags,
core_bus_in_if[i].req_data.data,
core_bus_in_if[i].req_data.tag
};
@ -141,7 +141,7 @@ module VX_cache_bypass #(
core_req_nc_sel_rw,
core_req_nc_sel_byteen,
core_req_nc_sel_addr,
core_req_nc_sel_atype,
core_req_nc_sel_flags,
core_req_nc_sel_data,
core_req_nc_sel_tag
} = core_req_nc_mux_in[core_req_nc_idx];
@ -151,7 +151,7 @@ module VX_cache_bypass #(
assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid;
assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw;
assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH];
assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype;
assign mem_req_out_flags = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.flags : core_req_nc_sel_flags;
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
@ -218,7 +218,7 @@ module VX_cache_bypass #(
assign mem_bus_in_if.req_ready = mem_req_out_ready;
VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
@ -226,8 +226,8 @@ module VX_cache_bypass #(
.reset (reset),
.valid_in (mem_req_out_valid),
.ready_in (mem_req_out_ready),
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_flags, mem_req_out_data, mem_req_out_tag}),
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.flags, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
.valid_out (mem_bus_out_if.req_valid),
.ready_out (mem_bus_out_if.req_ready)
);

View file

@ -83,7 +83,7 @@ module VX_cache_flush #(
wire [NUM_REQS-1:0] flush_req_mask;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.atype[`ADDR_TYPE_FLUSH];
assign flush_req_mask[i] = core_bus_in_if[i].req_valid && core_bus_in_if[i].req_data.flags[`MEM_REQ_FLAG_FLUSH];
end
wire flush_req_enable = (| flush_req_mask);

View file

@ -75,7 +75,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
input wire [NUM_REQS-1:0] core_req_rw,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_atype,
input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] core_req_flags,
input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_REQS-1:0] core_req_ready,
@ -117,7 +117,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
assign core_bus_if[i].req_data.rw = core_req_rw[i];
assign core_bus_if[i].req_data.byteen = core_req_byteen[i];
assign core_bus_if[i].req_data.addr = core_req_addr[i];
assign core_bus_if[i].req_data.atype = core_req_atype[i];
assign core_bus_if[i].req_data.flags = core_req_flags[i];
assign core_bus_if[i].req_data.data = core_req_data[i];
assign core_bus_if[i].req_data.tag = core_req_tag[i];
assign core_req_ready[i] = core_bus_if[i].req_ready;
@ -139,7 +139,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
assign mem_req_data = mem_bus_if.req_data.data;
assign mem_req_tag = mem_bus_if.req_data.tag;
assign mem_bus_if.req_ready = mem_req_ready;
`UNUSED_VAR (mem_bus_if.req_data.atype)
`UNUSED_VAR (mem_bus_if.req_data.flags)
// Memory response
assign mem_bus_if.rsp_valid = mem_rsp_valid;

View file

@ -250,7 +250,7 @@ module VX_core import VX_gpu_pkg::*; #(
.DATA_IN_SIZE (LSU_WORD_SIZE),
.DATA_OUT_SIZE (DCACHE_WORD_SIZE),
.ADDR_WIDTH (LSU_ADDR_WIDTH),
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.TAG_WIDTH (LSU_TAG_WIDTH),
.UUID_WIDTH (`UUID_WIDTH),
.QUEUE_SIZE (`LSUQ_OUT_SIZE)
@ -264,7 +264,7 @@ module VX_core import VX_gpu_pkg::*; #(
.in_req_rw (lsu_dcache_if[i].req_data.rw),
.in_req_byteen (lsu_dcache_if[i].req_data.byteen),
.in_req_addr (lsu_dcache_if[i].req_data.addr),
.in_req_atype (lsu_dcache_if[i].req_data.atype),
.in_req_flags (lsu_dcache_if[i].req_data.flags),
.in_req_data (lsu_dcache_if[i].req_data.data),
.in_req_tag (lsu_dcache_if[i].req_data.tag),
.in_req_ready (lsu_dcache_if[i].req_ready),
@ -282,7 +282,7 @@ module VX_core import VX_gpu_pkg::*; #(
.out_req_rw (dcache_coalesced_if.req_data.rw),
.out_req_byteen (dcache_coalesced_if.req_data.byteen),
.out_req_addr (dcache_coalesced_if.req_data.addr),
.out_req_atype (dcache_coalesced_if.req_data.atype),
.out_req_flags (dcache_coalesced_if.req_data.flags),
.out_req_data (dcache_coalesced_if.req_data.data),
.out_req_tag (dcache_coalesced_if.req_data.tag),
.out_req_ready (dcache_coalesced_if.req_ready),

View file

@ -32,7 +32,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr,
output wire [DCACHE_NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] dcache_req_atype,
output wire [DCACHE_NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] dcache_req_flags,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data,
output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] dcache_req_tag,
input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready,
@ -96,7 +96,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw;
assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen;
assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr;
assign dcache_req_atype[i] = dcache_bus_if[i].req_data.atype;
assign dcache_req_flags[i] = dcache_bus_if[i].req_data.flags;
assign dcache_req_data[i] = dcache_bus_if[i].req_data.data;
assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag;
assign dcache_bus_if[i].req_ready = dcache_req_ready[i];
@ -119,7 +119,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
assign icache_req_data = icache_bus_if.req_data.data;
assign icache_req_tag = icache_bus_if.req_data.tag;
assign icache_bus_if.req_ready = icache_req_ready;
`UNUSED_VAR (icache_bus_if.req_data.atype)
`UNUSED_VAR (icache_bus_if.req_data.flags)
assign icache_bus_if.rsp_valid = icache_rsp_valid;
assign icache_bus_if.rsp_data.tag = icache_rsp_tag;

View file

@ -61,7 +61,8 @@ module VX_dispatch import VX_gpu_pkg::*; #(
.DATAW (DATAW),
.SIZE (2),
.OUT_REG (2), // 2-cycle EB for area reduction
.LUTRAM (1)
.LUTRAM (1),
.MAX_FANOUT (`MAX_FANOUT * 64)
) buffer (
.clk (clk),
.reset (buffer_reset),

View file

@ -116,7 +116,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
.ready_out (icache_bus_if.req_ready)
);
assign icache_bus_if.req_data.atype = '0;
assign icache_bus_if.req_data.flags = '0;
assign icache_bus_if.req_data.rw = 0;
assign icache_bus_if.req_data.byteen = 4'b1111;
assign icache_bus_if.req_data.data = '0;

View file

@ -29,7 +29,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
`STATIC_ASSERT(`IS_DIVISBLE((1 << `LMEM_LOG_SIZE), `MEM_BLOCK_SIZE), ("invalid parameter"))
`STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter"))
localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `ADDR_TYPE_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
localparam REQ_DATAW = `NUM_LSU_LANES + 1 + `NUM_LSU_LANES * (LSU_WORD_SIZE + LSU_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
localparam RSP_DATAW = `NUM_LSU_LANES + `NUM_LSU_LANES * (LSU_WORD_SIZE * 8) + LSU_TAG_WIDTH;
localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(LSU_WORD_SIZE);
@ -45,7 +45,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
wire [`NUM_LSU_LANES-1:0] is_addr_local_mask;
for (genvar j = 0; j < `NUM_LSU_LANES; ++j) begin
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.atype[j][`ADDR_TYPE_LOCAL];
assign is_addr_local_mask[j] = lsu_mem_in_if[i].req_data.flags[j][`MEM_REQ_FLAGE_LOCAL];
end
wire is_addr_global = | (lsu_mem_in_if[i].req_data.mask & ~is_addr_local_mask);
@ -67,7 +67,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lsu_mem_in_if[i].req_data.rw,
lsu_mem_in_if[i].req_data.byteen,
lsu_mem_in_if[i].req_data.addr,
lsu_mem_in_if[i].req_data.atype,
lsu_mem_in_if[i].req_data.flags,
lsu_mem_in_if[i].req_data.data,
lsu_mem_in_if[i].req_data.tag
}),
@ -78,7 +78,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lsu_mem_out_if[i].req_data.rw,
lsu_mem_out_if[i].req_data.byteen,
lsu_mem_out_if[i].req_data.addr,
lsu_mem_out_if[i].req_data.atype,
lsu_mem_out_if[i].req_data.flags,
lsu_mem_out_if[i].req_data.data,
lsu_mem_out_if[i].req_data.tag
}),
@ -98,7 +98,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lsu_mem_in_if[i].req_data.rw,
lsu_mem_in_if[i].req_data.byteen,
lsu_mem_in_if[i].req_data.addr,
lsu_mem_in_if[i].req_data.atype,
lsu_mem_in_if[i].req_data.flags,
lsu_mem_in_if[i].req_data.data,
lsu_mem_in_if[i].req_data.tag
}),
@ -109,7 +109,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
lsu_switch_if[i].req_data.rw,
lsu_switch_if[i].req_data.byteen,
lsu_switch_if[i].req_data.addr,
lsu_switch_if[i].req_data.atype,
lsu_switch_if[i].req_data.flags,
lsu_switch_if[i].req_data.data,
lsu_switch_if[i].req_data.tag
}),

View file

@ -29,7 +29,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
VX_mem_bus_if.master mem_bus_if [NUM_LANES]
);
localparam REQ_ADDR_WIDTH = `MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE);
localparam REQ_DATA_WIDTH = 1 + DATA_SIZE + REQ_ADDR_WIDTH + `ADDR_TYPE_WIDTH + DATA_SIZE * 8;
localparam REQ_DATA_WIDTH = 1 + DATA_SIZE + REQ_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + DATA_SIZE * 8;
localparam RSP_DATA_WIDTH = DATA_SIZE * 8;
// handle request unpacking
@ -46,7 +46,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
lsu_mem_if.req_data.rw,
lsu_mem_if.req_data.byteen[i],
lsu_mem_if.req_data.addr[i],
lsu_mem_if.req_data.atype[i],
lsu_mem_if.req_data.flags[i],
lsu_mem_if.req_data.data[i]
};
end
@ -57,7 +57,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
mem_bus_if[i].req_data.rw,
mem_bus_if[i].req_data.byteen,
mem_bus_if[i].req_data.addr,
mem_bus_if[i].req_data.atype,
mem_bus_if[i].req_data.flags,
mem_bus_if[i].req_data.data
} = req_data_out[i];
assign mem_bus_if[i].req_data.tag = req_tag_out[i];

View file

@ -65,19 +65,19 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
// address type calculation
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype;
wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags;
for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
// is I/O address
wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT);
wire [MEM_ADDRW-1:0] io_addr_end = MEM_ADDRW'(`XLEN'(`IO_END_ADDR) >> MEM_ASHIFT);
assign mem_req_atype[i][`ADDR_TYPE_FLUSH] = req_is_fence;
assign mem_req_atype[i][`ADDR_TYPE_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end);
assign mem_req_flags[i][`MEM_REQ_FLAG_FLUSH] = req_is_fence;
assign mem_req_flags[i][`MEM_REQ_FLAG_IO] = (block_addr >= io_addr_start) && (block_addr < io_addr_end);
`ifdef LMEM_ENABLE
// is local memory address
wire [MEM_ADDRW-1:0] lmem_addr_start = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT);
wire [MEM_ADDRW-1:0] lmem_addr_end = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + `XLEN'(1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT);
assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
assign mem_req_flags[i][`MEM_REQ_FLAG_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
`endif
end
@ -300,7 +300,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
wire [NUM_LANES-1:0] lsu_mem_req_mask;
wire [NUM_LANES-1:0][LSU_WORD_SIZE-1:0] lsu_mem_req_byteen;
wire [NUM_LANES-1:0][LSU_ADDR_WIDTH-1:0] lsu_mem_req_addr;
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] lsu_mem_req_atype;
wire [NUM_LANES-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] lsu_mem_req_flags;
wire [NUM_LANES-1:0][(LSU_WORD_SIZE*8)-1:0] lsu_mem_req_data;
wire [LSU_TAG_WIDTH-1:0] lsu_mem_req_tag;
wire lsu_mem_req_ready;
@ -320,7 +320,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
.WORD_SIZE (LSU_WORD_SIZE),
.LINE_SIZE (LSU_WORD_SIZE),
.ADDR_WIDTH (LSU_ADDR_WIDTH),
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
.FLAGS_WIDTH (`MEM_REQ_FLAGS_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.CORE_QUEUE_SIZE (`LSUQ_IN_SIZE),
.MEM_QUEUE_SIZE (`LSUQ_OUT_SIZE),
@ -338,7 +338,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
.core_req_mask (mem_req_mask),
.core_req_byteen(mem_req_byteen),
.core_req_addr (mem_req_addr),
.core_req_atype (mem_req_atype),
.core_req_flags (mem_req_flags),
.core_req_data (mem_req_data),
.core_req_tag (mem_req_tag),
.core_req_ready (mem_req_ready),
@ -360,7 +360,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
.mem_req_mask (lsu_mem_req_mask),
.mem_req_byteen (lsu_mem_req_byteen),
.mem_req_addr (lsu_mem_req_addr),
.mem_req_atype (lsu_mem_req_atype),
.mem_req_flags (lsu_mem_req_flags),
.mem_req_data (lsu_mem_req_data),
.mem_req_tag (lsu_mem_req_tag),
.mem_req_ready (lsu_mem_req_ready),
@ -378,7 +378,7 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
assign lsu_mem_if.req_data.rw = lsu_mem_req_rw;
assign lsu_mem_if.req_data.byteen = lsu_mem_req_byteen;
assign lsu_mem_if.req_data.addr = lsu_mem_req_addr;
assign lsu_mem_if.req_data.atype = lsu_mem_req_atype;
assign lsu_mem_if.req_data.flags = lsu_mem_req_flags;
assign lsu_mem_if.req_data.data = lsu_mem_req_data;
assign lsu_mem_if.req_data.tag = lsu_mem_req_tag;
assign lsu_mem_req_ready = lsu_mem_if.req_ready;
@ -513,16 +513,16 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
if (mem_req_rw) begin
`TRACE(1, ("%d: %s Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
`TRACE(1, (", flags="));
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES);
`TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen));
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if.data.uuid));
end else begin
`TRACE(1, ("%d: %s Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, INSTANCE_ID, execute_if.data.wid, {execute_if.data.PC, 1'b0}, mem_req_mask));
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
`TRACE(1, (", flags="));
`TRACE_ARRAY1D(1, "%b", mem_req_flags, NUM_LANES);
`TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if.data.rd, mem_req_tag, execute_if.data.uuid));
end
end

View file

@ -183,7 +183,8 @@ module VX_operands import VX_gpu_pkg::*; #(
VX_pipe_register #(
.DATAW (1 + NUM_SRC_REGS * REGS_DATAW + NUM_BANKS + NUM_BANKS * REGS_DATAW + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH),
.RESETW (1 + NUM_SRC_REGS * REGS_DATAW)
.RESETW (1 + NUM_SRC_REGS * REGS_DATAW),
.MAX_FANOUT (`MAX_FANOUT * 64)
) pipe_reg2 (
.clk (clk),
.reset (pipe2_reset),
@ -205,7 +206,8 @@ module VX_operands import VX_gpu_pkg::*; #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (1)
.LUTRAM (1),
.MAX_FANOUT (`MAX_FANOUT * 64)
) out_buf (
.clk (clk),
.reset (reset),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,18 +16,18 @@
interface VX_lsu_mem_if #(
parameter NUM_LANES = 1,
parameter DATA_SIZE = 1,
parameter ATYPE_WIDTH= `ADDR_TYPE_WIDTH,
parameter FLAGS_WIDTH= `MEM_REQ_FLAGS_WIDTH,
parameter TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE)
) ();
typedef struct packed {
logic rw;
logic rw;
logic [NUM_LANES-1:0] mask;
logic [NUM_LANES-1:0][DATA_SIZE-1:0] byteen;
logic [NUM_LANES-1:0][ADDR_WIDTH-1:0] addr;
logic [NUM_LANES-1:0][ATYPE_WIDTH-1:0] atype;
logic [NUM_LANES-1:0][FLAGS_WIDTH-1:0] flags;
logic [NUM_LANES-1:0][DATA_SIZE*8-1:0] data;
logic [TAG_WIDTH-1:0] tag;
} req_data_t;

View file

@ -18,7 +18,7 @@ module VX_mem_coalescer #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_REQS = 1,
parameter ADDR_WIDTH = 32,
parameter ATYPE_WIDTH = 1,
parameter FLAGS_WIDTH = 1,
parameter DATA_IN_SIZE = 4,
parameter DATA_OUT_SIZE = 64,
parameter TAG_WIDTH = 8,
@ -43,7 +43,7 @@ module VX_mem_coalescer #(
input wire [NUM_REQS-1:0] in_req_mask,
input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen,
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype,
input wire [NUM_REQS-1:0][FLAGS_WIDTH-1:0] in_req_flags,
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
input wire [TAG_WIDTH-1:0] in_req_tag,
output wire in_req_ready,
@ -61,7 +61,7 @@ module VX_mem_coalescer #(
output wire [OUT_REQS-1:0] out_req_mask,
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype,
output wire [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags,
output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data,
output wire [OUT_TAG_WIDTH-1:0] out_req_tag,
input wire out_req_ready,
@ -93,7 +93,7 @@ module VX_mem_coalescer #(
logic out_req_rw_r, out_req_rw_n;
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] out_req_flags_r, out_req_flags_n;
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
@ -111,7 +111,7 @@ module VX_mem_coalescer #(
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
logic [OUT_REQS-1:0][FLAGS_WIDTH-1:0] seed_flags_r, seed_flags_n;
logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
@ -144,7 +144,7 @@ module VX_mem_coalescer #(
for (genvar i = 0; i < OUT_REQS; ++i) begin
assign seed_addr_n[i] = in_addr_base[seed_idx[i]];
assign seed_atype_n[i] = in_req_atype[seed_idx[i]];
assign seed_flags_n[i] = in_req_flags[seed_idx[i]];
end
for (genvar i = 0; i < OUT_REQS; ++i) begin
@ -188,7 +188,7 @@ module VX_mem_coalescer #(
out_req_mask_n = out_req_mask_r;
out_req_rw_n = out_req_rw_r;
out_req_addr_n = out_req_addr_r;
out_req_atype_n = out_req_atype_r;
out_req_flags_n = out_req_flags_r;
out_req_byteen_n = out_req_byteen_r;
out_req_data_n = out_req_data_r;
out_req_tag_n = out_req_tag_r;
@ -211,7 +211,7 @@ module VX_mem_coalescer #(
out_req_mask_n = batch_valid_r;
out_req_rw_n = in_req_rw;
out_req_addr_n = seed_addr_r;
out_req_atype_n = seed_atype_r;
out_req_flags_n = seed_flags_r;
out_req_byteen_n= req_byteen_merged;
out_req_data_n = req_data_merged;
out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
@ -230,14 +230,14 @@ module VX_mem_coalescer #(
end
VX_pipe_register #(
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + ATYPE_WIDTH + OUT_ADDR_WIDTH + ATYPE_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + FLAGS_WIDTH + OUT_ADDR_WIDTH + FLAGS_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH) + OUT_TAG_WIDTH),
.RESETW (1 + NUM_REQS + 1)
) pipe_reg (
.clk (clk),
.reset (reset),
.enable (1'b1),
.data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_atype_n, out_req_addr_n, out_req_atype_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}),
.data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_atype_r, out_req_addr_r, out_req_atype_r, out_req_byteen_r, out_req_data_r, out_req_tag_r})
.data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_flags_n, out_req_addr_n, out_req_flags_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}),
.data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_flags_r, out_req_addr_r, out_req_flags_r, out_req_byteen_r, out_req_data_r, out_req_tag_r})
);
wire out_rsp_fire = out_rsp_valid && out_rsp_ready;
@ -278,7 +278,7 @@ module VX_mem_coalescer #(
assign out_req_mask = out_req_mask_r;
assign out_req_byteen = out_req_byteen_r;
assign out_req_addr = out_req_addr_r;
assign out_req_atype = out_req_atype_r;
assign out_req_flags = out_req_flags_r;
assign out_req_data = out_req_data_r;
assign out_req_tag = out_req_tag_r;
@ -350,8 +350,8 @@ module VX_mem_coalescer #(
if (out_req_rw) begin
`TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
`TRACE(1, (", flags="));
`TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS);
`TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS);
`TRACE(1, (", data="));
@ -359,8 +359,8 @@ module VX_mem_coalescer #(
end else begin
`TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
`TRACE(1, (", flags="));
`TRACE_ARRAY1D(1, "%b", out_req_flags, OUT_REQS);
end
`TRACE(1, (", offset="));
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS);

View file

@ -21,7 +21,7 @@ module VX_mem_scheduler #(
parameter WORD_SIZE = 4,
parameter LINE_SIZE = WORD_SIZE,
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter ATYPE_WIDTH = 1,
parameter FLAGS_WIDTH = 1,
parameter TAG_WIDTH = 8,
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter CORE_QUEUE_SIZE= 8,
@ -50,7 +50,7 @@ module VX_mem_scheduler #(
input wire [CORE_REQS-1:0] core_req_mask,
input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr,
input wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] core_req_atype,
input wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] core_req_flags,
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
input wire [TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
@ -72,7 +72,7 @@ module VX_mem_scheduler #(
output wire [MEM_CHANNELS-1:0] mem_req_mask,
output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen,
output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype,
output wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags,
output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
@ -113,7 +113,7 @@ module VX_mem_scheduler #(
wire reqq_rw;
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype;
wire [CORE_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags;
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data;
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
wire reqq_ready;
@ -123,7 +123,7 @@ module VX_mem_scheduler #(
wire reqq_rw_s;
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s;
wire [MERGED_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype_s;
wire [MERGED_REQS-1:0][FLAGS_WIDTH-1:0] reqq_flags_s;
wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s;
wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s;
wire reqq_ready_s;
@ -133,7 +133,7 @@ module VX_mem_scheduler #(
wire mem_req_rw_s;
wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s;
wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_s;
wire [MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_s;
wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
wire mem_req_ready_s;
@ -168,7 +168,7 @@ module VX_mem_scheduler #(
end
VX_elastic_buffer #(
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + FLAGS_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
.SIZE (CORE_QUEUE_SIZE),
.OUT_REG (1)
) req_queue (
@ -176,8 +176,8 @@ module VX_mem_scheduler #(
.reset (reset),
.valid_in (reqq_valid_in),
.ready_in (reqq_ready_in),
.data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_atype, core_req_data, reqq_tag_u}),
.data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_atype, reqq_data, reqq_tag}),
.data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_flags, core_req_data, reqq_tag_u}),
.data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_flags, reqq_data, reqq_tag}),
.valid_out(reqq_valid),
.ready_out(reqq_ready)
);
@ -231,7 +231,7 @@ module VX_mem_scheduler #(
.DATA_IN_SIZE (WORD_SIZE),
.DATA_OUT_SIZE (LINE_SIZE),
.ADDR_WIDTH (ADDR_WIDTH),
.ATYPE_WIDTH (ATYPE_WIDTH),
.FLAGS_WIDTH (FLAGS_WIDTH),
.TAG_WIDTH (REQQ_TAG_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.QUEUE_SIZE (MEM_QUEUE_SIZE)
@ -245,7 +245,7 @@ module VX_mem_scheduler #(
.in_req_rw (reqq_rw),
.in_req_byteen (reqq_byteen),
.in_req_addr (reqq_addr),
.in_req_atype (reqq_atype),
.in_req_flags (reqq_flags),
.in_req_data (reqq_data),
.in_req_tag (reqq_tag),
.in_req_ready (reqq_ready),
@ -263,7 +263,7 @@ module VX_mem_scheduler #(
.out_req_rw (reqq_rw_s),
.out_req_byteen (reqq_byteen_s),
.out_req_addr (reqq_addr_s),
.out_req_atype (reqq_atype_s),
.out_req_flags (reqq_flags_s),
.out_req_data (reqq_data_s),
.out_req_tag (reqq_tag_s),
.out_req_ready (reqq_ready_s),
@ -283,7 +283,7 @@ module VX_mem_scheduler #(
assign reqq_rw_s = reqq_rw;
assign reqq_byteen_s= reqq_byteen;
assign reqq_addr_s = reqq_addr;
assign reqq_atype_s = reqq_atype;
assign reqq_flags_s = reqq_flags;
assign reqq_data_s = reqq_data;
assign reqq_tag_s = reqq_tag;
assign reqq_ready = reqq_ready_s;
@ -301,7 +301,7 @@ module VX_mem_scheduler #(
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][FLAGS_WIDTH-1:0] mem_req_flags_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
@ -313,13 +313,13 @@ module VX_mem_scheduler #(
assign mem_req_mask_b[i][j] = reqq_mask_s[r];
assign mem_req_byteen_b[i][j] = reqq_byteen_s[r];
assign mem_req_addr_b[i][j] = reqq_addr_s[r];
assign mem_req_atype_b[i][j] = reqq_atype_s[r];
assign mem_req_flags_b[i][j] = reqq_flags_s[r];
assign mem_req_data_b[i][j] = reqq_data_s[r];
end else begin
assign mem_req_mask_b[i][j] = 0;
assign mem_req_byteen_b[i][j] = '0;
assign mem_req_addr_b[i][j] = '0;
assign mem_req_atype_b[i][j] = '0;
assign mem_req_flags_b[i][j] = '0;
assign mem_req_data_b[i][j] = '0;
end
end
@ -329,7 +329,7 @@ module VX_mem_scheduler #(
assign mem_req_rw_s = reqq_rw_s;
assign mem_req_byteen_s = mem_req_byteen_b[req_batch_idx];
assign mem_req_addr_s = mem_req_addr_b[req_batch_idx];
assign mem_req_atype_s = mem_req_atype_b[req_batch_idx];
assign mem_req_flags_s = mem_req_flags_b[req_batch_idx];
assign mem_req_data_s = mem_req_data_b[req_batch_idx];
if (MEM_BATCHES != 1) begin
@ -390,7 +390,7 @@ module VX_mem_scheduler #(
assign reqq_ready_s = req_sent_all;
VX_elastic_buffer #(
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + FLAGS_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
@ -398,8 +398,8 @@ module VX_mem_scheduler #(
.reset (reset),
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_atype_s, mem_req_data_s, mem_req_tag_s}),
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_atype, mem_req_data, mem_req_tag}),
.data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_flags_s, mem_req_data_s, mem_req_tag_s}),
.data_out ({mem_req_mask, mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_flags, mem_req_data, mem_req_tag}),
.valid_out (mem_req_valid),
.ready_out (mem_req_ready)
);

View file

@ -80,7 +80,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign req_bank_addr[i] = mem_bus_if[i].req_data.addr[BANK_SEL_BITS +: BANK_ADDR_WIDTH];
`UNUSED_VAR (mem_bus_if[i].req_data.atype)
`UNUSED_VAR (mem_bus_if[i].req_data.flags)
end
// bank requests dispatch

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,10 +17,10 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
// Size of cache in bytes
parameter SIZE = (1024*16*8),
parameter SIZE = (1024*16*8),
// Number of Word requests per cycle
parameter NUM_REQS = 4,
parameter NUM_REQS = 4,
// Number of banks
parameter NUM_BANKS = 4,
@ -34,7 +34,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
// Request tag size
parameter TAG_WIDTH = 16
) (
) (
input wire clk,
input wire reset,
@ -43,7 +43,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
input wire [NUM_REQS-1:0] mem_req_rw,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] mem_req_byteen,
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] mem_req_addr,
input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype,
input wire [NUM_REQS-1:0][`MEM_REQ_FLAGS_WIDTH-1:0] mem_req_flags,
input wire [NUM_REQS-1:0][WORD_SIZE*8-1:0] mem_req_data,
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] mem_req_tag,
output wire [NUM_REQS-1:0] mem_req_ready,
@ -65,7 +65,7 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
assign mem_bus_if[i].req_data.rw = mem_req_rw[i];
assign mem_bus_if[i].req_data.byteen = mem_req_byteen[i];
assign mem_bus_if[i].req_data.addr = mem_req_addr[i];
assign mem_bus_if[i].req_data.atype = mem_req_atype[i];
assign mem_bus_if[i].req_data.flags = mem_req_flags[i];
assign mem_bus_if[i].req_data.data = mem_req_data[i];
assign mem_bus_if[i].req_data.tag = mem_req_tag[i];
assign mem_req_ready[i] = mem_bus_if[i].req_ready;
@ -86,9 +86,9 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.ADDR_WIDTH (ADDR_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH)
) local_mem (
) local_mem (
.clk (clk),
.reset (reset),
.mem_bus_if (mem_bus_if)

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -13,14 +13,14 @@
`include "VX_define.vh"
module VX_mem_arb #(
parameter NUM_INPUTS = 1,
module VX_mem_arb #(
parameter NUM_INPUTS = 1,
parameter NUM_OUTPUTS = 1,
parameter DATA_SIZE = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE)),
parameter TAG_WIDTH = 1,
parameter TAG_SEL_IDX = 0,
parameter TAG_WIDTH = 1,
parameter TAG_SEL_IDX = 0,
parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0,
parameter `STRING ARBITER = "R"
@ -30,10 +30,10 @@ module VX_mem_arb #(
VX_mem_bus_if.slave bus_in_if [NUM_INPUTS],
VX_mem_bus_if.master bus_out_if [NUM_OUTPUTS]
);
);
localparam DATA_WIDTH = (8 * DATA_SIZE);
localparam LOG_NUM_REQS = `ARB_SEL_BITS(NUM_INPUTS, NUM_OUTPUTS);
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
`STATIC_ASSERT ((NUM_INPUTS >= NUM_OUTPUTS), ("invalid parameter"))
@ -53,14 +53,14 @@ module VX_mem_arb #(
bus_in_if[i].req_data.rw,
bus_in_if[i].req_data.byteen,
bus_in_if[i].req_data.addr,
bus_in_if[i].req_data.atype,
bus_in_if[i].req_data.flags,
bus_in_if[i].req_data.data,
bus_in_if[i].req_data.tag
};
assign bus_in_if[i].req_ready = req_ready_in[i];
end
VX_stream_arb #(
VX_stream_arb #(
.NUM_INPUTS (NUM_INPUTS),
.NUM_OUTPUTS (NUM_OUTPUTS),
.DATAW (REQ_DATAW),
@ -80,7 +80,7 @@ module VX_mem_arb #(
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
wire [TAG_WIDTH-1:0] req_tag_out;
VX_bits_insert #(
VX_bits_insert #(
.N (TAG_WIDTH),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
@ -94,8 +94,8 @@ module VX_mem_arb #(
bus_out_if[i].req_data.rw,
bus_out_if[i].req_data.byteen,
bus_out_if[i].req_data.addr,
bus_out_if[i].req_data.atype,
bus_out_if[i].req_data.data,
bus_out_if[i].req_data.flags,
bus_out_if[i].req_data.data,
req_tag_out
} = req_data_out[i];
assign req_ready_out[i] = bus_out_if[i].req_ready;
@ -117,7 +117,7 @@ module VX_mem_arb #(
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
wire [TAG_WIDTH-1:0] rsp_tag_out;
VX_bits_remove #(
VX_bits_remove #(
.N (TAG_WIDTH + LOG_NUM_REQS),
.S (LOG_NUM_REQS),
.POS (TAG_SEL_IDX)
@ -135,7 +135,7 @@ module VX_mem_arb #(
end else begin
assign rsp_sel_in[i] = '0;
end
end
end
VX_stream_switch #(
.NUM_INPUTS (NUM_OUTPUTS),
@ -155,11 +155,11 @@ module VX_mem_arb #(
);
end else begin
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
assign rsp_data_in[i] = {
bus_out_if[i].rsp_data.tag,
bus_out_if[i].rsp_data.tag,
bus_out_if[i].rsp_data.data
};
assign bus_out_if[i].rsp_ready = rsp_ready_in[i];
@ -184,11 +184,11 @@ module VX_mem_arb #(
);
end
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
assign bus_in_if[i].rsp_valid = rsp_valid_out[i];
assign {
bus_in_if[i].rsp_data.tag,
bus_in_if[i].rsp_data.tag,
bus_in_if[i].rsp_data.data
} = rsp_data_out[i];
assign rsp_ready_out[i] = bus_in_if[i].rsp_ready;

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,7 +15,7 @@
interface VX_mem_bus_if #(
parameter DATA_SIZE = 1,
parameter ATYPE_WIDTH= `ADDR_TYPE_WIDTH,
parameter FLAGS_WIDTH= `MEM_REQ_FLAGS_WIDTH,
parameter TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE)
@ -25,7 +25,7 @@ interface VX_mem_bus_if #(
logic rw;
logic [DATA_SIZE-1:0] byteen;
logic [ADDR_WIDTH-1:0] addr;
logic [ATYPE_WIDTH-1:0] atype;
logic [FLAGS_WIDTH-1:0] flags;
logic [DATA_SIZE*8-1:0] data;
logic [TAG_WIDTH-1:0] tag;
} req_data_t;

View file

@ -31,7 +31,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
VX_mem_bus_if.master bus_out_if [NUM_REQS]
);
localparam DATA_WIDTH = (8 * DATA_SIZE);
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
// handle requests ////////////////////////////////////////////////////////