mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
extending memory interface with address type
This commit is contained in:
parent
dc19d25bcc
commit
6556e8c66d
16 changed files with 155 additions and 58 deletions
|
@ -282,6 +282,10 @@
|
|||
`define L1_ENABLE
|
||||
`endif
|
||||
|
||||
`define ADDR_TYPE_IO 0
|
||||
`define ADDR_TYPE_LOCAL 1
|
||||
`define ADDR_TYPE_WIDTH (`LMEM_ENABLED + 1)
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_ADDR_WIDTH (`MEM_ADDR_WIDTH - `CLOG2(`L3_LINE_SIZE))
|
||||
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
|
||||
|
@ -333,6 +337,7 @@
|
|||
assign dst.req_data.rw = src.req_data.rw; \
|
||||
assign dst.req_data.byteen = src.req_data.byteen; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.atype = src.req_data.atype; \
|
||||
assign dst.req_data.data = src.req_data.data; \
|
||||
if (TD != TS) \
|
||||
assign dst.req_data.tag = {src.req_data.tag, {(TD-TS){1'b0}}}; \
|
||||
|
|
|
@ -102,6 +102,7 @@ module Vortex import VX_gpu_pkg::*; (
|
|||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
`UNUSED_VAR (mem_bus_if.req_data.atype)
|
||||
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
assign mem_bus_if.rsp_data.data = mem_rsp_data;
|
||||
|
|
|
@ -514,6 +514,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_rsp_tag_out (cci_vx_mem_bus_if[1].rsp_data.tag),
|
||||
.mem_rsp_ready_out (cci_vx_mem_bus_if[1].rsp_ready)
|
||||
);
|
||||
|
||||
assign cci_vx_mem_bus_if[1].req_data.atype = '0;
|
||||
`UNUSED_VAR (cci_vx_mem_bus_if[1].req_data.atype)
|
||||
|
||||
//--
|
||||
|
||||
|
@ -563,6 +566,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.mem_rsp_ready_out (cci_vx_mem_bus_if[0].rsp_ready)
|
||||
);
|
||||
|
||||
assign cci_vx_mem_bus_if[0].req_data.atype = '0;
|
||||
`UNUSED_VAR (cci_vx_mem_bus_if[0].req_data.atype)
|
||||
|
||||
//--
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (LMEM_DATA_SIZE),
|
||||
|
@ -631,6 +637,9 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
|
|||
.avs_readdatavalid(avs_readdatavalid)
|
||||
);
|
||||
|
||||
assign mem_bus_if[0].req_data.atype = '0;
|
||||
`UNUSED_VAR (mem_bus_if[0].req_data.atype)
|
||||
|
||||
// CCI-P Read Request ///////////////////////////////////////////////////////////
|
||||
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_ctr;
|
||||
|
|
10
hw/rtl/cache/VX_cache.sv
vendored
10
hw/rtl/cache/VX_cache.sv
vendored
|
@ -100,13 +100,14 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_valid[i] = core_bus_if[i].req_valid;
|
||||
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
|
||||
assign core_req_rw[i] = core_bus_if[i].req_data.rw;
|
||||
assign core_req_byteen[i] = core_bus_if[i].req_data.byteen;
|
||||
assign core_req_addr[i] = core_bus_if[i].req_data.addr;
|
||||
assign core_req_data[i] = core_bus_if[i].req_data.data;
|
||||
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
|
||||
assign core_bus_if[i].req_ready = core_req_ready[i];
|
||||
end
|
||||
`UNUSED_VAR (core_bus_if[i].req_data.atype)
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -163,6 +164,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.valid_out (mem_bus_if.req_valid),
|
||||
.ready_out (mem_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign mem_bus_if.req_data.atype = '0;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
@ -288,7 +291,8 @@ module VX_cache import VX_gpu_pkg::*; #(
|
|||
.NUM_INPUTS (NUM_REQS),
|
||||
.NUM_OUTPUTS (NUM_BANKS),
|
||||
.DATAW (CORE_REQ_DATAW),
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS)
|
||||
.PERF_CTR_BITS (`PERF_CTR_BITS),
|
||||
.OUT_BUF ((NUM_REQS > 4) ? 2 : 0)
|
||||
) req_xbar (
|
||||
.clk (clk),
|
||||
.reset (req_xbar_reset),
|
||||
|
|
27
hw/rtl/cache/VX_cache_bypass.sv
vendored
27
hw/rtl/cache/VX_cache_bypass.sv
vendored
|
@ -56,7 +56,7 @@ module VX_cache_bypass #(
|
|||
localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1);
|
||||
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
|
||||
localparam MUX_DATAW = CORE_TAG_WIDTH + CORE_DATA_WIDTH + WORD_SIZE + CORE_ADDR_WIDTH + 1;
|
||||
localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `ADDR_TYPE_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH;
|
||||
|
||||
localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE;
|
||||
localparam WSEL_BITS = `CLOG2(WORDS_PER_LINE);
|
||||
|
@ -65,9 +65,6 @@ module VX_cache_bypass #(
|
|||
localparam MEM_TAG_ID_BITS = REQ_SEL_BITS + WSEL_BITS + CORE_TAG_ID_BITS;
|
||||
localparam MEM_TAG_BYPASS_BITS = UUID_WIDTH + MEM_TAG_ID_BITS;
|
||||
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = `MEM_ADDR_WIDTH - MEM_ASHIFT;
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
// handle core requests ///////////////////////////////////////////////////
|
||||
|
@ -83,8 +80,7 @@ module VX_cache_bypass #(
|
|||
if (PASSTHRU != 0) begin
|
||||
assign core_req_nc_idxs[i] = 1'b1;
|
||||
end else if (NC_ENABLE) begin
|
||||
wire [MEM_ADDRW-1:0] block_addr = core_bus_in_if[i].req_data.addr[CORE_ADDR_WIDTH-1 -: MEM_ADDRW];
|
||||
assign core_req_nc_idxs[i] = (block_addr >= MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT));
|
||||
assign core_req_nc_idxs[i] = core_bus_in_if[i].req_data.atype[`ADDR_TYPE_IO];
|
||||
end else begin
|
||||
assign core_req_nc_idxs[i] = 1'b0;
|
||||
end
|
||||
|
@ -118,15 +114,17 @@ module VX_cache_bypass #(
|
|||
wire mem_req_out_rw;
|
||||
wire [LINE_SIZE-1:0] mem_req_out_byteen;
|
||||
wire [`CS_MEM_ADDR_WIDTH-1:0] mem_req_out_addr;
|
||||
wire [`ADDR_TYPE_WIDTH-1:0] mem_req_out_atype;
|
||||
wire [`CS_LINE_WIDTH-1:0] mem_req_out_data;
|
||||
wire [MEM_TAG_OUT_WIDTH-1:0] mem_req_out_tag;
|
||||
wire mem_req_out_ready;
|
||||
|
||||
wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_nc_sel_data;
|
||||
|
||||
wire core_req_nc_sel_rw;
|
||||
wire [WORD_SIZE-1:0] core_req_nc_sel_byteen;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_nc_sel_addr;
|
||||
wire core_req_nc_sel_rw;
|
||||
wire [`ADDR_TYPE_WIDTH-1:0] core_req_nc_sel_atype;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_nc_sel_data;
|
||||
wire [CORE_TAG_WIDTH-1:0] core_req_nc_sel_tag;
|
||||
|
||||
wire [NUM_REQS-1:0][MUX_DATAW-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
|
@ -134,6 +132,7 @@ module VX_cache_bypass #(
|
|||
core_bus_in_if[i].req_data.rw,
|
||||
core_bus_in_if[i].req_data.byteen,
|
||||
core_bus_in_if[i].req_data.addr,
|
||||
core_bus_in_if[i].req_data.atype,
|
||||
core_bus_in_if[i].req_data.data,
|
||||
core_bus_in_if[i].req_data.tag
|
||||
};
|
||||
|
@ -143,6 +142,7 @@ module VX_cache_bypass #(
|
|||
core_req_nc_sel_rw,
|
||||
core_req_nc_sel_byteen,
|
||||
core_req_nc_sel_addr,
|
||||
core_req_nc_sel_atype,
|
||||
core_req_nc_sel_data,
|
||||
core_req_nc_sel_tag
|
||||
} = core_req_nc_mux_in[core_req_nc_idx];
|
||||
|
@ -152,6 +152,7 @@ module VX_cache_bypass #(
|
|||
assign mem_req_out_valid = mem_bus_in_if.req_valid || core_req_nc_valid;
|
||||
assign mem_req_out_rw = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.rw : core_req_nc_sel_rw;
|
||||
assign mem_req_out_addr = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.addr : core_req_nc_sel_addr[WSEL_BITS +: MEM_ADDR_WIDTH];
|
||||
assign mem_req_out_atype = mem_bus_in_if.req_valid ? mem_bus_in_if.req_data.atype : core_req_nc_sel_atype;
|
||||
|
||||
wire [MEM_TAG_ID_BITS-1:0] mem_req_tag_id_bypass;
|
||||
|
||||
|
@ -218,7 +219,7 @@ module VX_cache_bypass #(
|
|||
assign mem_bus_in_if.req_ready = mem_req_out_ready;
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
|
||||
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `ADDR_TYPE_WIDTH + `CS_LINE_WIDTH + MEM_TAG_OUT_WIDTH),
|
||||
.SIZE ((!DIRECT_PASSTHRU) ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
|
@ -226,8 +227,8 @@ module VX_cache_bypass #(
|
|||
.reset (reset),
|
||||
.valid_in (mem_req_out_valid),
|
||||
.ready_in (mem_req_out_ready),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_data, mem_req_out_tag}),
|
||||
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
|
||||
.data_in ({mem_req_out_rw, mem_req_out_byteen, mem_req_out_addr, mem_req_out_atype, mem_req_out_data, mem_req_out_tag}),
|
||||
.data_out ({mem_bus_out_if.req_data.rw, mem_bus_out_if.req_data.byteen, mem_bus_out_if.req_data.addr, mem_bus_out_if.req_data.atype, mem_bus_out_if.req_data.data, mem_bus_out_if.req_data.tag}),
|
||||
.valid_out (mem_bus_out_if.req_valid),
|
||||
.ready_out (mem_bus_out_if.req_ready)
|
||||
);
|
||||
|
|
3
hw/rtl/cache/VX_cache_top.sv
vendored
3
hw/rtl/cache/VX_cache_top.sv
vendored
|
@ -69,6 +69,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
input wire [NUM_REQS-1:0] core_req_rw,
|
||||
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] core_req_atype,
|
||||
input wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire [NUM_REQS-1:0] core_req_ready,
|
||||
|
@ -110,6 +111,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
assign core_bus_if[i].req_data.rw = core_req_rw[i];
|
||||
assign core_bus_if[i].req_data.byteen = core_req_byteen[i];
|
||||
assign core_bus_if[i].req_data.addr = core_req_addr[i];
|
||||
assign core_bus_if[i].req_data.atype = core_req_atype[i];
|
||||
assign core_bus_if[i].req_data.data = core_req_data[i];
|
||||
assign core_bus_if[i].req_data.tag = core_req_tag[i];
|
||||
assign core_req_ready[i] = core_bus_if[i].req_ready;
|
||||
|
@ -131,6 +133,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
|
|||
assign mem_req_data = mem_bus_if.req_data.data;
|
||||
assign mem_req_tag = mem_bus_if.req_data.tag;
|
||||
assign mem_bus_if.req_ready = mem_req_ready;
|
||||
`UNUSED_VAR (mem_bus_if.req_data.atype)
|
||||
|
||||
// Memory response
|
||||
assign mem_bus_if.rsp_valid = mem_rsp_valid;
|
||||
|
|
|
@ -32,6 +32,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
output wire [DCACHE_NUM_REQS-1:0] dcache_req_rw,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE-1:0] dcache_req_byteen,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_ADDR_WIDTH-1:0] dcache_req_addr,
|
||||
output wire [DCACHE_NUM_REQS-1:0][`ADDR_TYPE_WIDTH-1:0] dcache_req_atype,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_WORD_SIZE*8-1:0] dcache_req_data,
|
||||
output wire [DCACHE_NUM_REQS-1:0][DCACHE_TAG_WIDTH-1:0] dcache_req_tag,
|
||||
input wire [DCACHE_NUM_REQS-1:0] dcache_req_ready,
|
||||
|
@ -100,6 +101,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
assign dcache_req_rw[i] = dcache_bus_if[i].req_data.rw;
|
||||
assign dcache_req_byteen[i] = dcache_bus_if[i].req_data.byteen;
|
||||
assign dcache_req_addr[i] = dcache_bus_if[i].req_data.addr;
|
||||
assign dcache_req_atype[i] = dcache_bus_if[i].req_data.atype;
|
||||
assign dcache_req_data[i] = dcache_bus_if[i].req_data.data;
|
||||
assign dcache_req_tag[i] = dcache_bus_if[i].req_data.tag;
|
||||
assign dcache_bus_if[i].req_ready = dcache_req_ready[i];
|
||||
|
@ -122,6 +124,7 @@ module VX_core_top import VX_gpu_pkg::*; #(
|
|||
assign icache_req_data = icache_bus_if.req_data.data;
|
||||
assign icache_req_tag = icache_bus_if.req_data.tag;
|
||||
assign icache_bus_if.req_ready = icache_req_ready;
|
||||
`UNUSED_VAR (icache_bus_if.req_data.atype)
|
||||
|
||||
assign icache_bus_if.rsp_valid = icache_rsp_valid;
|
||||
assign icache_bus_if.rsp_data.tag = icache_rsp_tag;
|
||||
|
|
|
@ -113,6 +113,7 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
.ready_out (icache_bus_if.req_ready)
|
||||
);
|
||||
|
||||
assign icache_bus_if.req_data.atype = '0;
|
||||
assign icache_bus_if.req_data.rw = 0;
|
||||
assign icache_bus_if.req_data.byteen = 4'b1111;
|
||||
assign icache_bus_if.req_data.data = '0;
|
||||
|
|
|
@ -30,10 +30,6 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
`STATIC_ASSERT(0 == (`LMEM_BASE_ADDR % (1 << `LMEM_LOG_SIZE)), ("invalid parameter"))
|
||||
|
||||
localparam LMEM_ADDR_WIDTH = `LMEM_LOG_SIZE - `CLOG2(DCACHE_WORD_SIZE);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = `MEM_ADDR_WIDTH - MEM_ASHIFT;
|
||||
localparam LMEM_START_B = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT);
|
||||
localparam LMEM_END_B = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + (1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT);
|
||||
|
||||
VX_mem_bus_if #(
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
|
@ -47,11 +43,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
`RESET_RELAY (switch_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
|
||||
wire [MEM_ADDRW-1:0] block_addr = dcache_bus_in_if[i].req_data.addr[DCACHE_ADDR_WIDTH-1 -: MEM_ADDRW];
|
||||
wire bus_sel = (block_addr >= LMEM_START_B) && (block_addr < LMEM_END_B);
|
||||
|
||||
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
|
||||
VX_mem_switch #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
|
@ -62,7 +54,7 @@ module VX_lmem_unit import VX_gpu_pkg::*; #(
|
|||
) lmem_switch (
|
||||
.clk (clk),
|
||||
.reset (switch_reset),
|
||||
.bus_sel (bus_sel),
|
||||
.bus_sel (dcache_bus_in_if[i].req_data.atype[`ADDR_TYPE_LOCAL]),
|
||||
.bus_in_if (dcache_bus_in_if[i]),
|
||||
.bus_out_if (switch_out_bus_if[i * 2 +: 2])
|
||||
);
|
||||
|
|
|
@ -37,6 +37,8 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_IN_SIZE);
|
||||
localparam REQ_ASHIFT = `CLOG2(WORD_SIZE);
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = `MEM_ADDR_WIDTH - MEM_ASHIFT;
|
||||
|
||||
// tag_id = wid + PC + rd + op_type + align + pid + pkt_addr
|
||||
localparam TAG_ID_WIDTH = `NW_WIDTH + `XLEN + `NR_BITS + `INST_LSU_BITS + (NUM_LANES * (REQ_ASHIFT)) + PID_WIDTH + LSUQ_SIZEW;
|
||||
|
@ -88,6 +90,22 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
assign full_addr[i] = execute_if[block_idx].data.rs1_data[i][`XLEN-1:0] + execute_if[block_idx].data.imm;
|
||||
end
|
||||
|
||||
// address type calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`ADDR_TYPE_WIDTH-1:0] mem_req_atype;
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [MEM_ADDRW-1:0] block_addr = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
// is I/O address
|
||||
wire [MEM_ADDRW-1:0] io_addr_start = MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_IO] = (block_addr >= io_addr_start);
|
||||
`ifdef LMEM_ENABLE
|
||||
// is local memory address
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_start = MEM_ADDRW'(`XLEN'(`LMEM_BASE_ADDR) >> MEM_ASHIFT);
|
||||
wire [MEM_ADDRW-1:0] lmem_addr_end = MEM_ADDRW'((`XLEN'(`LMEM_BASE_ADDR) + `XLEN'(1 << `LMEM_LOG_SIZE)) >> MEM_ASHIFT);
|
||||
assign mem_req_atype[i][`ADDR_TYPE_LOCAL] = (block_addr >= lmem_addr_start) && (block_addr < lmem_addr_end);
|
||||
`endif
|
||||
end
|
||||
|
||||
wire mem_req_empty;
|
||||
wire st_rsp_ready;
|
||||
wire lsu_valid, lsu_ready;
|
||||
|
@ -277,6 +295,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
wire [DCACHE_CHANNELS-1:0] cache_req_rw;
|
||||
wire [DCACHE_CHANNELS-1:0][DCACHE_WORD_SIZE-1:0] cache_req_byteen;
|
||||
wire [DCACHE_CHANNELS-1:0][DCACHE_ADDR_WIDTH-1:0] cache_req_addr;
|
||||
wire [DCACHE_CHANNELS-1:0][`ADDR_TYPE_WIDTH-1:0] cache_req_atype;
|
||||
wire [DCACHE_CHANNELS-1:0][(DCACHE_WORD_SIZE*8)-1:0] cache_req_data;
|
||||
wire [DCACHE_CHANNELS-1:0][DCACHE_TAG_WIDTH-1:0] cache_req_tag;
|
||||
wire [DCACHE_CHANNELS-1:0] cache_req_ready;
|
||||
|
@ -294,6 +313,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.WORD_SIZE (WORD_SIZE),
|
||||
.LINE_SIZE (DCACHE_WORD_SIZE),
|
||||
.ADDR_WIDTH (ADDR_WIDTH),
|
||||
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.CORE_QUEUE_SIZE (`LSUQ_IN_SIZE),
|
||||
.MEM_QUEUE_SIZE (`LSUQ_OUT_SIZE),
|
||||
|
@ -310,6 +330,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.core_req_mask (mem_req_mask),
|
||||
.core_req_byteen(mem_req_byteen),
|
||||
.core_req_addr (mem_req_addr),
|
||||
.core_req_atype (mem_req_atype),
|
||||
.core_req_data (mem_req_data),
|
||||
.core_req_tag (mem_req_tag),
|
||||
.core_req_ready (mem_req_ready),
|
||||
|
@ -330,6 +351,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.mem_req_rw (cache_req_rw),
|
||||
.mem_req_byteen (cache_req_byteen),
|
||||
.mem_req_addr (cache_req_addr),
|
||||
.mem_req_atype (cache_req_atype),
|
||||
.mem_req_data (cache_req_data),
|
||||
.mem_req_tag (cache_req_tag),
|
||||
.mem_req_ready (cache_req_ready),
|
||||
|
@ -346,6 +368,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
assign cache_bus_if[block_idx * DCACHE_CHANNELS + i].req_data.rw = cache_req_rw[i];
|
||||
assign cache_bus_if[block_idx * DCACHE_CHANNELS + i].req_data.byteen = cache_req_byteen[i];
|
||||
assign cache_bus_if[block_idx * DCACHE_CHANNELS + i].req_data.addr = cache_req_addr[i];
|
||||
assign cache_bus_if[block_idx * DCACHE_CHANNELS + i].req_data.atype = cache_req_atype[i];
|
||||
assign cache_bus_if[block_idx * DCACHE_CHANNELS + i].req_data.data = cache_req_data[i];
|
||||
assign cache_bus_if[block_idx * DCACHE_CHANNELS + i].req_data.tag = cache_req_tag[i];
|
||||
assign cache_req_ready[i] = cache_bus_if[block_idx * DCACHE_CHANNELS + i].req_ready;
|
||||
|
@ -483,20 +506,24 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
if (mem_req_rw) begin
|
||||
`TRACE(1, ("%d: D$%0d Wr Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if[block_idx].data.wid, execute_if[block_idx].data.PC, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, data=", mem_req_tag, mem_req_byteen));
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
`TRACE(1, (", byteen=0x%0h, data=", mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (" (#%0d)\n", execute_if[block_idx].data.uuid));
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_req_tag, execute_if[block_idx].data.uuid));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if[block_idx].data.wid, execute_if[block_idx].data.PC, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, rd=%0d (#%0d)\n", mem_req_tag, mem_req_byteen, execute_if[block_idx].data.rd, execute_if[block_idx].data.uuid));
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", mem_req_atype, NUM_LANES);
|
||||
`TRACE(1, (", byteen=0x%0h, rd=%0d, tag=0x%0h (#%0d)\n", mem_req_byteen, execute_if[block_idx].data.rd, mem_req_tag, execute_if[block_idx].data.uuid));
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, tag=0x%0h, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_pc, mem_rsp_mask, mem_rsp_tag, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_pc, mem_rsp_mask, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (" (#%0d)\n", rsp_uuid));
|
||||
`TRACE(1, (", tag=0x%0h (#%0d)\n", mem_rsp_tag, rsp_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -18,6 +18,7 @@ module VX_mem_coalescer #(
|
|||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_REQS = 1,
|
||||
parameter ADDR_WIDTH = 32,
|
||||
parameter ATYPE_WIDTH = 1,
|
||||
parameter DATA_IN_SIZE = 4,
|
||||
parameter DATA_OUT_SIZE = 64,
|
||||
parameter TAG_WIDTH = 8,
|
||||
|
@ -42,6 +43,7 @@ module VX_mem_coalescer #(
|
|||
input wire [NUM_REQS-1:0] in_req_mask,
|
||||
input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen,
|
||||
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
|
||||
input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype,
|
||||
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
|
||||
input wire [TAG_WIDTH-1:0] in_req_tag,
|
||||
output wire in_req_ready,
|
||||
|
@ -59,6 +61,7 @@ module VX_mem_coalescer #(
|
|||
output wire [OUT_REQS-1:0] out_req_mask,
|
||||
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
|
||||
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
|
||||
output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype,
|
||||
output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data,
|
||||
output wire [OUT_TAG_WIDTH-1:0] out_req_tag,
|
||||
input wire out_req_ready,
|
||||
|
@ -91,6 +94,7 @@ module VX_mem_coalescer #(
|
|||
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
logic [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
|
||||
logic [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
|
||||
|
@ -107,6 +111,7 @@ module VX_mem_coalescer #(
|
|||
|
||||
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
|
||||
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
|
||||
logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
|
||||
|
||||
wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx;
|
||||
|
@ -141,10 +146,12 @@ module VX_mem_coalescer #(
|
|||
state_r <= state_n;
|
||||
out_req_valid_r <= out_req_valid_n;
|
||||
batch_valid_r <= batch_valid_n;
|
||||
seed_addr_r <= seed_addr_n;
|
||||
seed_addr_r <= seed_addr_n;
|
||||
seed_atype_r <= seed_atype_n;
|
||||
out_req_rw_r <= out_req_rw_n;
|
||||
out_req_mask_r <= out_req_mask_n;
|
||||
out_req_addr_r <= out_req_addr_n;
|
||||
out_req_atype_r <= out_req_atype_n;
|
||||
out_req_byteen_r <= out_req_byteen_n;
|
||||
out_req_data_r <= out_req_data_n;
|
||||
out_req_tag_r <= out_req_tag_n;
|
||||
|
@ -171,9 +178,11 @@ module VX_mem_coalescer #(
|
|||
state_n = state_r;
|
||||
out_req_valid_n = out_req_valid_r;
|
||||
seed_addr_n = seed_addr_r;
|
||||
seed_atype_n = seed_atype_r;
|
||||
out_req_rw_n = out_req_rw_r;
|
||||
out_req_mask_n = out_req_mask_r;
|
||||
out_req_addr_n = out_req_addr_r;
|
||||
out_req_atype_n = out_req_atype_r;
|
||||
out_req_byteen_n = out_req_byteen_r;
|
||||
out_req_data_n = out_req_data_r;
|
||||
out_req_tag_n = out_req_tag_r;
|
||||
|
@ -185,6 +194,7 @@ module VX_mem_coalescer #(
|
|||
// find the next seed address
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
seed_addr_n[i] = in_addr_base[seed_idx[i]];
|
||||
seed_atype_n[i] = in_req_atype[seed_idx[i]];
|
||||
end
|
||||
// wait for pending outgoing request to submit
|
||||
if (out_req_valid && out_req_ready) begin
|
||||
|
@ -220,6 +230,7 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
out_req_mask_n[i] = batch_valid_r[i];
|
||||
out_req_addr_n[i] = seed_addr_r[i];
|
||||
out_req_atype_n[i]= seed_atype_r[i];
|
||||
end
|
||||
if (in_req_ready_n) begin
|
||||
processed_mask_n = '0;
|
||||
|
@ -262,15 +273,14 @@ module VX_mem_coalescer #(
|
|||
);
|
||||
`UNUSED_VAR (ibuf_empty)
|
||||
|
||||
assign out_req_valid = out_req_valid_r;
|
||||
assign out_req_rw = out_req_rw_r;
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
assign out_req_mask[i] = out_req_mask_r[i];
|
||||
assign out_req_byteen[i] = out_req_byteen_r[i];
|
||||
assign out_req_addr[i] = out_req_addr_r[i];
|
||||
assign out_req_data[i] = out_req_data_r[i];
|
||||
end
|
||||
assign out_req_tag = out_req_tag_r;
|
||||
assign out_req_valid = out_req_valid_r;
|
||||
assign out_req_rw = out_req_rw_r;
|
||||
assign out_req_mask = out_req_mask_r;
|
||||
assign out_req_byteen = out_req_byteen_r;
|
||||
assign out_req_addr = out_req_addr_r;
|
||||
assign out_req_atype = out_req_atype_r;
|
||||
assign out_req_data = out_req_data_r;
|
||||
assign out_req_tag = out_req_tag_r;
|
||||
|
||||
assign in_req_ready = in_req_ready_n;
|
||||
|
||||
|
@ -341,14 +351,18 @@ module VX_mem_coalescer #(
|
|||
if (out_req_fire) begin
|
||||
if (out_req_rw) begin
|
||||
`TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
|
||||
`TRACE(1, (", byteen="));
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS);
|
||||
`TRACE(1, (", data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS);
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
|
||||
`TRACE(1, (", atype="));
|
||||
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
|
||||
end
|
||||
`TRACE(1, (", offset="));
|
||||
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS);
|
||||
|
|
|
@ -21,6 +21,7 @@ module VX_mem_scheduler #(
|
|||
parameter WORD_SIZE = 4,
|
||||
parameter LINE_SIZE = WORD_SIZE,
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
|
||||
parameter ATYPE_WIDTH = 1,
|
||||
parameter TAG_WIDTH = 8,
|
||||
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
|
||||
parameter CORE_QUEUE_SIZE= 8,
|
||||
|
@ -48,6 +49,7 @@ module VX_mem_scheduler #(
|
|||
input wire [CORE_REQS-1:0] core_req_mask,
|
||||
input wire [CORE_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
|
||||
input wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] core_req_addr,
|
||||
input wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] core_req_atype,
|
||||
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
|
||||
input wire [TAG_WIDTH-1:0] core_req_tag,
|
||||
output wire core_req_ready,
|
||||
|
@ -68,6 +70,7 @@ module VX_mem_scheduler #(
|
|||
output wire [MEM_CHANNELS-1:0] mem_req_rw,
|
||||
output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype,
|
||||
output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_CHANNELS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire [MEM_CHANNELS-1:0] mem_req_ready,
|
||||
|
@ -108,6 +111,7 @@ module VX_mem_scheduler #(
|
|||
wire reqq_rw;
|
||||
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
|
||||
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
|
||||
wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype;
|
||||
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] reqq_data;
|
||||
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
|
||||
wire reqq_ready;
|
||||
|
@ -117,6 +121,7 @@ module VX_mem_scheduler #(
|
|||
wire reqq_rw_s;
|
||||
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
|
||||
wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s;
|
||||
wire [MERGED_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype_s;
|
||||
wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s;
|
||||
wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s;
|
||||
wire reqq_ready_s;
|
||||
|
@ -126,6 +131,7 @@ module VX_mem_scheduler #(
|
|||
wire mem_req_rw_s;
|
||||
wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
||||
wire [MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_s;
|
||||
wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire [MEM_CHANNELS-1:0] mem_req_ready_s;
|
||||
|
@ -166,7 +172,7 @@ module VX_mem_scheduler #(
|
|||
end
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
|
||||
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
|
||||
.SIZE (CORE_QUEUE_SIZE),
|
||||
.OUT_REG (1)
|
||||
) req_queue (
|
||||
|
@ -174,8 +180,8 @@ module VX_mem_scheduler #(
|
|||
.reset (reset),
|
||||
.valid_in (reqq_valid_in),
|
||||
.ready_in (reqq_ready_in),
|
||||
.data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_data, reqq_tag_u}),
|
||||
.data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_data, reqq_tag}),
|
||||
.data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_atype, core_req_data, reqq_tag_u}),
|
||||
.data_out ({reqq_rw, reqq_mask, reqq_byteen, reqq_addr, reqq_atype, reqq_data, reqq_tag}),
|
||||
.valid_out(reqq_valid),
|
||||
.ready_out(reqq_ready)
|
||||
);
|
||||
|
@ -229,6 +235,7 @@ module VX_mem_scheduler #(
|
|||
.DATA_IN_SIZE (WORD_SIZE),
|
||||
.DATA_OUT_SIZE (LINE_SIZE),
|
||||
.ADDR_WIDTH (ADDR_WIDTH),
|
||||
.ATYPE_WIDTH (`ADDR_TYPE_WIDTH),
|
||||
.TAG_WIDTH (REQQ_TAG_WIDTH),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.QUEUE_SIZE (MEM_QUEUE_SIZE)
|
||||
|
@ -242,6 +249,7 @@ module VX_mem_scheduler #(
|
|||
.in_req_rw (reqq_rw),
|
||||
.in_req_byteen (reqq_byteen),
|
||||
.in_req_addr (reqq_addr),
|
||||
.in_req_atype (reqq_atype),
|
||||
.in_req_data (reqq_data),
|
||||
.in_req_tag (reqq_tag),
|
||||
.in_req_ready (reqq_ready),
|
||||
|
@ -259,6 +267,7 @@ module VX_mem_scheduler #(
|
|||
.out_req_rw (reqq_rw_s),
|
||||
.out_req_byteen (reqq_byteen_s),
|
||||
.out_req_addr (reqq_addr_s),
|
||||
.out_req_atype (reqq_atype_s),
|
||||
.out_req_data (reqq_data_s),
|
||||
.out_req_tag (reqq_tag_s),
|
||||
.out_req_ready (reqq_ready_s),
|
||||
|
@ -277,7 +286,8 @@ module VX_mem_scheduler #(
|
|||
assign reqq_mask_s = reqq_mask;
|
||||
assign reqq_rw_s = reqq_rw;
|
||||
assign reqq_byteen_s= reqq_byteen;
|
||||
assign reqq_addr_s = reqq_addr;
|
||||
assign reqq_addr_s = reqq_addr;
|
||||
assign reqq_atype_s = reqq_atype;
|
||||
assign reqq_data_s = reqq_data;
|
||||
assign reqq_tag_s = reqq_tag;
|
||||
assign reqq_ready = reqq_ready_s;
|
||||
|
@ -295,6 +305,7 @@ module VX_mem_scheduler #(
|
|||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
|
||||
|
||||
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
|
||||
|
@ -306,11 +317,13 @@ module VX_mem_scheduler #(
|
|||
assign mem_req_mask_b[i][j] = reqq_mask_s[r];
|
||||
assign mem_req_byteen_b[i][j] = reqq_byteen_s[r];
|
||||
assign mem_req_addr_b[i][j] = reqq_addr_s[r];
|
||||
assign mem_req_atype_b[i][j] = reqq_atype_s[r];
|
||||
assign mem_req_data_b[i][j] = reqq_data_s[r];
|
||||
end else begin
|
||||
assign mem_req_mask_b[i][j] = 0;
|
||||
assign mem_req_byteen_b[i][j] = '0;
|
||||
assign mem_req_addr_b[i][j] = '0;
|
||||
assign mem_req_atype_b[i][j] = '0;
|
||||
assign mem_req_data_b[i][j] = '0;
|
||||
end
|
||||
end
|
||||
|
@ -320,6 +333,7 @@ module VX_mem_scheduler #(
|
|||
assign mem_req_rw_s = reqq_rw_s;
|
||||
assign mem_req_byteen_s = mem_req_byteen_b[req_batch_idx];
|
||||
assign mem_req_addr_s = mem_req_addr_b[req_batch_idx];
|
||||
assign mem_req_atype_s = mem_req_atype_b[req_batch_idx];
|
||||
assign mem_req_data_s = mem_req_data_b[req_batch_idx];
|
||||
|
||||
reg [MEM_CHANNELS-1:0] batch_sent_mask;
|
||||
|
@ -393,7 +407,7 @@ module VX_mem_scheduler #(
|
|||
|
||||
for (genvar i = 0; i < MEM_CHANNELS; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + LINE_SIZE + MEM_ADDR_WIDTH + LINE_WIDTH + MEM_TAG_WIDTH),
|
||||
.DATAW (1 + LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH + MEM_TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
|
@ -401,8 +415,8 @@ module VX_mem_scheduler #(
|
|||
.reset (reset),
|
||||
.valid_in (mem_req_valid_s[i]),
|
||||
.ready_in (mem_req_ready_s[i]),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s[i], mem_req_addr_s[i], mem_req_data_s[i], mem_req_tag_s}),
|
||||
.data_out ({mem_req_rw[i], mem_req_byteen[i], mem_req_addr[i], mem_req_data[i], mem_req_tag[i]}),
|
||||
.data_in ({mem_req_rw_s, mem_req_byteen_s[i], mem_req_addr_s[i], mem_req_atype_s[i], mem_req_data_s[i], mem_req_tag_s}),
|
||||
.data_out ({mem_req_rw[i], mem_req_byteen[i], mem_req_addr[i], mem_req_atype[i], mem_req_data[i], mem_req_tag[i]}),
|
||||
.valid_out (mem_req_valid[i]),
|
||||
.ready_out (mem_req_ready[i])
|
||||
);
|
||||
|
|
|
@ -77,6 +77,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
wire [NUM_REQS-1:0][BANK_ADDR_WIDTH-1:0] req_bank_addr;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign req_bank_addr[i] = mem_bus_if[i].req_data.addr[BANK_SEL_BITS +: BANK_ADDR_WIDTH];
|
||||
`UNUSED_VAR (mem_bus_if[i].req_data.atype)
|
||||
end
|
||||
|
||||
// bank requests dispatch
|
||||
|
|
|
@ -33,7 +33,7 @@ module VX_mem_arb #(
|
|||
);
|
||||
localparam DATA_WIDTH = (8 * DATA_SIZE);
|
||||
localparam LOG_NUM_REQS = `ARB_SEL_BITS(NUM_INPUTS, NUM_OUTPUTS);
|
||||
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
|
||||
|
||||
`STATIC_ASSERT ((NUM_INPUTS >= NUM_OUTPUTS), ("invalid parameter"))
|
||||
|
@ -49,7 +49,14 @@ module VX_mem_arb #(
|
|||
|
||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
||||
assign req_valid_in[i] = bus_in_if[i].req_valid;
|
||||
assign req_data_in[i] = {bus_in_if[i].req_data.tag, bus_in_if[i].req_data.addr, bus_in_if[i].req_data.rw, bus_in_if[i].req_data.byteen, bus_in_if[i].req_data.data};
|
||||
assign req_data_in[i] = {
|
||||
bus_in_if[i].req_data.rw,
|
||||
bus_in_if[i].req_data.byteen,
|
||||
bus_in_if[i].req_data.addr,
|
||||
bus_in_if[i].req_data.atype,
|
||||
bus_in_if[i].req_data.data,
|
||||
bus_in_if[i].req_data.tag
|
||||
};
|
||||
assign bus_in_if[i].req_ready = req_ready_in[i];
|
||||
end
|
||||
|
||||
|
@ -83,7 +90,14 @@ module VX_mem_arb #(
|
|||
.data_out (bus_out_if[i].req_data.tag)
|
||||
);
|
||||
assign bus_out_if[i].req_valid = req_valid_out[i];
|
||||
assign {req_tag_out, bus_out_if[i].req_data.addr, bus_out_if[i].req_data.rw, bus_out_if[i].req_data.byteen, bus_out_if[i].req_data.data} = req_data_out[i];
|
||||
assign {
|
||||
bus_out_if[i].req_data.rw,
|
||||
bus_out_if[i].req_data.byteen,
|
||||
bus_out_if[i].req_data.addr,
|
||||
bus_out_if[i].req_data.atype,
|
||||
bus_out_if[i].req_data.data,
|
||||
req_tag_out
|
||||
} = req_data_out[i];
|
||||
assign req_ready_out[i] = bus_out_if[i].req_ready;
|
||||
end
|
||||
|
||||
|
@ -144,7 +158,10 @@ module VX_mem_arb #(
|
|||
|
||||
for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin
|
||||
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
|
||||
assign rsp_data_in[i] = {bus_out_if[i].rsp_data.tag, bus_out_if[i].rsp_data.data};
|
||||
assign rsp_data_in[i] = {
|
||||
bus_out_if[i].rsp_data.tag,
|
||||
bus_out_if[i].rsp_data.data
|
||||
};
|
||||
assign bus_out_if[i].rsp_ready = rsp_ready_in[i];
|
||||
end
|
||||
|
||||
|
@ -170,7 +187,10 @@ module VX_mem_arb #(
|
|||
|
||||
for (genvar i = 0; i < NUM_INPUTS; ++i) begin
|
||||
assign bus_in_if[i].rsp_valid = rsp_valid_out[i];
|
||||
assign {bus_in_if[i].rsp_data.tag, bus_in_if[i].rsp_data.data} = rsp_data_out[i];
|
||||
assign {
|
||||
bus_in_if[i].rsp_data.tag,
|
||||
bus_in_if[i].rsp_data.data
|
||||
} = rsp_data_out[i];
|
||||
assign rsp_ready_out[i] = bus_in_if[i].rsp_ready;
|
||||
end
|
||||
|
||||
|
|
|
@ -15,15 +15,17 @@
|
|||
|
||||
interface VX_mem_bus_if #(
|
||||
parameter DATA_SIZE = 1,
|
||||
parameter ATYPE_WIDTH= `ADDR_TYPE_WIDTH,
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
|
||||
parameter ADDR_WIDTH = MEM_ADDR_WIDTH - `CLOG2(DATA_SIZE)
|
||||
) ();
|
||||
|
||||
typedef struct packed {
|
||||
logic rw;
|
||||
logic rw;
|
||||
logic [DATA_SIZE-1:0] byteen;
|
||||
logic [ADDR_WIDTH-1:0] addr;
|
||||
logic [ATYPE_WIDTH-1:0] atype;
|
||||
logic [DATA_SIZE*8-1:0] data;
|
||||
logic [TAG_WIDTH-1:0] tag;
|
||||
} req_data_t;
|
||||
|
|
|
@ -32,7 +32,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
|
|||
);
|
||||
localparam ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE));
|
||||
localparam DATA_WIDTH = (8 * DATA_SIZE);
|
||||
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
|
||||
|
||||
// handle requests ////////////////////////////////////////////////////////
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue