mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
texture unit dcache arbitration
This commit is contained in:
parent
6febdf7399
commit
124acfbf12
14 changed files with 606 additions and 115 deletions
|
@ -22,8 +22,8 @@ void kernel_body(int task_id, void* arg) {
|
|||
|
||||
for (uint32_t y = 0; y < _arg->tile_height; ++y) {
|
||||
for (uint32_t x = 0; x < _arg->tile_width; ++x) {
|
||||
int32_t u = (int32_t)(fu * (1<<28));
|
||||
int32_t v = (int32_t)(fv * (1<<28));
|
||||
int32_t u = (int32_t)(fu * (1<<20));
|
||||
int32_t v = (int32_t)(fv * (1<<20));
|
||||
dst_ptr[x] = vx_tex(0, u, v, 0);
|
||||
fu += _arg->deltaX;
|
||||
}
|
||||
|
|
|
@ -71,13 +71,13 @@ module VX_core #(
|
|||
//--
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.NUM_REQS(`DNUM_REQUESTS),
|
||||
.LANES(`DNUM_REQUESTS),
|
||||
.WORD_SIZE(`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) dcache_core_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.NUM_REQS(`DNUM_REQUESTS),
|
||||
.LANES(`DNUM_REQUESTS),
|
||||
.WORD_SIZE(`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) dcache_core_rsp_if();
|
||||
|
|
118
hw/rtl/VX_dcache_arb.v
Normal file
118
hw/rtl/VX_dcache_arb.v
Normal file
|
@ -0,0 +1,118 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_mem_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_OUT_WIDTH = 1,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input requests
|
||||
input wire [NUM_REQS-1:0] req_valid_in,
|
||||
input wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
|
||||
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire [NUM_REQS-1:0] req_rw_in,
|
||||
input wire [NUM_REQS-1:0][DATA_SIZE-1:0] req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][DATA_WIDTH-1:0] req_data_in,
|
||||
output wire [NUM_REQS-1:0] req_ready_in,
|
||||
|
||||
// output request
|
||||
output wire req_valid_out,
|
||||
output wire [TAG_OUT_WIDTH-1:0] req_tag_out,
|
||||
output wire [ADDR_WIDTH-1:0] req_addr_out,
|
||||
output wire req_rw_out,
|
||||
output wire [DATA_SIZE-1:0] req_byteen_out,
|
||||
output wire [DATA_WIDTH-1:0] req_data_out,
|
||||
input wire req_ready_out,
|
||||
|
||||
// input response
|
||||
input wire rsp_valid_in,
|
||||
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
|
||||
input wire [DATA_WIDTH-1:0] rsp_data_in,
|
||||
output wire rsp_ready_in,
|
||||
|
||||
// output responses
|
||||
output wire [NUM_REQS-1:0] rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out,
|
||||
input wire [NUM_REQS-1:0] rsp_ready_out
|
||||
);
|
||||
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
|
||||
localparam RSP_DATAW = TAG_IN_WIDTH + DATA_WIDTH;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign req_merged_data_in[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
end
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (BUFFERED_REQ)
|
||||
) req_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in (req_merged_data_in),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
|
||||
.ready_out (req_ready_out)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0];
|
||||
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
|
||||
end
|
||||
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (BUFFERED_RSP)
|
||||
) rsp_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel (rsp_sel),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_merged_data_out),
|
||||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign req_valid_out = req_valid_in;
|
||||
assign req_tag_out = req_tag_in;
|
||||
assign req_addr_out = req_addr_in;
|
||||
assign req_rw_out = req_rw_in;
|
||||
assign req_byteen_out = req_byteen_in;
|
||||
assign req_data_out = req_data_in;
|
||||
assign req_ready_in = req_ready_out;
|
||||
|
||||
assign rsp_valid_out = rsp_valid_in;
|
||||
assign rsp_tag_out = rsp_tag_in;
|
||||
assign rsp_data_out = rsp_data_in;
|
||||
assign rsp_ready_in = rsp_ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -283,8 +283,13 @@
|
|||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
|
||||
// Core request tag bits
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
`define LSU_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
`define DCORE_TAG_WIDTH (`LSU_TAG_WIDTH+1)
|
||||
`else
|
||||
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
`endif
|
||||
|
||||
// DRAM request data bits
|
||||
`define DDRAM_LINE_WIDTH (`DCACHE_LINE_SIZE * 8)
|
||||
|
||||
|
|
|
@ -47,7 +47,74 @@ module VX_execute #(
|
|||
VX_fpu_to_csr_if fpu_to_csr_if();
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.LANES(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`LSU_TAG_WIDTH)
|
||||
) tex_dcache_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.LANES(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`LSU_TAG_WIDTH)
|
||||
) tex_dcache_rsp_if();
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.LANES(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`LSU_TAG_WIDTH)
|
||||
) lsu_dcache_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.LANES(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`LSU_TAG_WIDTH)
|
||||
) lsu_dcache_rsp_if();
|
||||
|
||||
VX_tex_csr_if tex_csr_if();
|
||||
|
||||
VX_tex_lsu_arb #(
|
||||
.NUM_REQS (2),
|
||||
.LANES (`NUM_THREADS),
|
||||
.WORD_SIZE (4),
|
||||
.TAG_IN_WIDTH (`LSU_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) tex_lsu_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Tex/LSU request
|
||||
.req_valid_in ({tex_dcache_req_if.valid, lsu_dcache_req_if.valid}),
|
||||
.req_rw_in ({tex_dcache_req_if.rw, lsu_dcache_req_if.rw}),
|
||||
.req_byteen_in ({tex_dcache_req_if.byteen, lsu_dcache_req_if.byteen}),
|
||||
.req_addr_in ({tex_dcache_req_if.addr, lsu_dcache_req_if.addr}),
|
||||
.req_data_in ({tex_dcache_req_if.data, lsu_dcache_req_if.data}),
|
||||
.req_tag_in ({tex_dcache_req_if.tag, lsu_dcache_req_if.tag}),
|
||||
.req_ready_in ({tex_dcache_req_if.ready, lsu_dcache_req_if.ready}),
|
||||
|
||||
// Dcache request
|
||||
.req_valid_out (dcache_req_if.valid),
|
||||
.req_rw_out (dcache_req_if.rw),
|
||||
.req_byteen_out (dcache_req_if.byteen),
|
||||
.req_addr_out (dcache_req_if.addr),
|
||||
.req_data_out (dcache_req_if.data),
|
||||
.req_tag_out (dcache_req_if.tag),
|
||||
.req_ready_out (dcache_req_if.ready),
|
||||
|
||||
// Tex/LSU response
|
||||
.rsp_valid_out ({tex_dcache_rsp_if.valid, lsu_dcache_rsp_if.valid}),
|
||||
.rsp_data_out ({tex_dcache_rsp_if.data, lsu_dcache_rsp_if.data}),
|
||||
.rsp_tag_out ({tex_dcache_rsp_if.tag, lsu_dcache_rsp_if.tag}),
|
||||
.rsp_ready_out ({tex_dcache_rsp_if.ready, lsu_dcache_rsp_if.ready}),
|
||||
|
||||
// Dcache response
|
||||
.rsp_valid_in (dcache_rsp_if.valid),
|
||||
.rsp_tag_in (dcache_rsp_if.tag),
|
||||
.rsp_data_in (dcache_rsp_if.data),
|
||||
.rsp_ready_in (dcache_rsp_if.ready)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
wire[`NUM_WARPS-1:0] csr_pending;
|
||||
|
@ -63,105 +130,24 @@ module VX_execute #(
|
|||
.alu_commit_if (alu_commit_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.NUM_REQS(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) tex_dcache_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.NUM_REQS(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) tex_dcache_rsp_if();
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.NUM_REQS(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) lsu_dcache_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.NUM_REQS(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) lsu_dcache_rsp_if();
|
||||
|
||||
VX_mem_arb #(
|
||||
.NUM_REQS (2),
|
||||
.DATA_WIDTH (`WORD_WIDTH),
|
||||
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.TAG_OUT_WIDTH (`DCORE_TAG_WIDTH),
|
||||
.BUFFERED_REQ (0),
|
||||
.BUFFERED_RSP (0)
|
||||
) dcache_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Tex/LSU request
|
||||
.req_valid_in ({tex_dcache_req_if.valid, lsu_dcache_req_if.valid}),
|
||||
.req_rw_in ({tex_dcache_req_if.rw, lsu_dcache_req_if.rw}),
|
||||
.req_byteen_in ({tex_dcache_req_if.byteen, lsu_dcache_req_if.byteen}),
|
||||
.req_addr_in ({tex_dcache_req_if.addr, lsu_dcache_req_if.addr}),
|
||||
.req_data_in ({tex_dcache_req_if.data, lsu_dcache_req_if.data}),
|
||||
.req_tag_in ({tex_dcache_req_if.tag, lsu_dcache_req_if.tag}),
|
||||
.req_ready_in ({tex_dcache_req_if.ready, lsu_dcache_req_if.ready}),
|
||||
|
||||
// Dcache request
|
||||
.req_valid_out (dcache_req_if.valid),
|
||||
.req_rw_out (dcache_req_if.rw),
|
||||
.req_byteen_out (dcache_req_if.byteen),
|
||||
.req_addr_out (dcache_req_if.addr),
|
||||
.req_data_out (dcache_req_if.data),
|
||||
.req_tag_out (dcache_req_if.tag),
|
||||
.req_ready_out (dcache_req_if.ready),
|
||||
|
||||
// Tex/LSU response
|
||||
.rsp_valid_out ({tex_dcache_rsp_if.valid, lsu_dcache_rsp_if.valid}),
|
||||
.rsp_data_out ({tex_dcache_rsp_if.data, lsu_dcache_rsp_if.data}),
|
||||
.rsp_tag_out ({tex_dcache_rsp_if.tag, lsu_dcache_rsp_if.tag}),
|
||||
.rsp_ready_out ({tex_dcache_rsp_if.ready, lsu_dcache_rsp_if.ready}),
|
||||
|
||||
// Dcache response
|
||||
.rsp_valid_in (dcache_rsp_if.valid),
|
||||
.rsp_tag_in (dcache_rsp_if.tag),
|
||||
.rsp_data_in (dcache_rsp_if.data),
|
||||
.rsp_ready_in (dcache_rsp_if.ready)
|
||||
);
|
||||
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) lsu_unit (
|
||||
`SCOPE_BIND_VX_execute_lsu_unit
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`ifdef EXT_TEX_ENABLE
|
||||
.dcache_req_if (lsu_dcache_req_if),
|
||||
.dcache_rsp_if (lsu_dcache_rsp_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.st_commit_if (st_commit_if)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
VX_lsu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) lsu_unit (
|
||||
`SCOPE_BIND_VX_execute_lsu_unit
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
`else
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
`endif
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.ld_commit_if (ld_commit_if),
|
||||
.st_commit_if (st_commit_if)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
VX_csr_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_unit (
|
||||
|
|
|
@ -41,30 +41,30 @@ module VX_mem_unit # (
|
|||
) dcache_dram_rsp_if(), icache_dram_rsp_if();
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.LANES (`DNUM_REQUESTS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) dcache_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.LANES (`DNUM_REQUESTS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) dcache_rsp_if();
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.LANES (`DNUM_REQUESTS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) smem_req_if();
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.NUM_REQS (`DNUM_REQUESTS),
|
||||
.LANES (`DNUM_REQUESTS),
|
||||
.WORD_SIZE (`DWORD_SIZE),
|
||||
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
|
||||
) smem_rsp_if();
|
||||
|
||||
VX_databus_arb databus_arb (
|
||||
VX_smem_arb databus_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ module VX_pipeline #(
|
|||
//
|
||||
|
||||
VX_dcache_core_req_if #(
|
||||
.NUM_REQS(`NUM_THREADS),
|
||||
.LANES(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) dcache_core_req_if();
|
||||
|
@ -79,7 +79,7 @@ module VX_pipeline #(
|
|||
//
|
||||
|
||||
VX_dcache_core_rsp_if #(
|
||||
.NUM_REQS(`NUM_THREADS),
|
||||
.LANES(`NUM_THREADS),
|
||||
.WORD_SIZE(4),
|
||||
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
|
||||
) dcache_core_rsp_if();
|
||||
|
|
118
hw/rtl/VX_smem_arb.v
Normal file
118
hw/rtl/VX_smem_arb.v
Normal file
|
@ -0,0 +1,118 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_smem_arb (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input request
|
||||
VX_dcache_core_req_if core_req_if,
|
||||
|
||||
// output requests
|
||||
VX_dcache_core_req_if cache_req_if,
|
||||
VX_dcache_core_req_if smem_req_if,
|
||||
|
||||
// input responses
|
||||
VX_dcache_core_rsp_if cache_rsp_if,
|
||||
VX_dcache_core_rsp_if smem_rsp_if,
|
||||
|
||||
// output response
|
||||
VX_dcache_core_rsp_if core_rsp_if
|
||||
);
|
||||
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
|
||||
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
|
||||
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
|
||||
localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
|
||||
|
||||
//
|
||||
// handle requests
|
||||
//
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
||||
wire cache_req_ready_in;
|
||||
wire smem_req_ready_in;
|
||||
|
||||
// select shared memory bus
|
||||
wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) cache_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i] && !is_smem_addr),
|
||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (cache_req_ready_in),
|
||||
.valid_out (cache_req_if.valid[i]),
|
||||
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
|
||||
.ready_out (cache_req_if.ready[i])
|
||||
);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW)
|
||||
) smem_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (core_req_if.valid[i] && is_smem_addr),
|
||||
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
|
||||
.ready_in (smem_req_ready_in),
|
||||
.valid_out (smem_req_if.valid[i]),
|
||||
.data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
|
||||
.ready_out (smem_req_if.ready[i])
|
||||
);
|
||||
|
||||
assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in;
|
||||
end
|
||||
|
||||
//
|
||||
// handle responses
|
||||
//
|
||||
|
||||
if (`SM_ENABLE ) begin
|
||||
|
||||
wire [1:0][RSP_DATAW-1:0] rsp_data_in;
|
||||
wire [1:0] rsp_valid_in;
|
||||
wire [1:0] rsp_ready_in;
|
||||
|
||||
wire core_rsp_valid;
|
||||
wire [`NUM_THREADS-1:0] core_rsp_valid_tmask;
|
||||
|
||||
assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, cache_rsp_if.tag};
|
||||
assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, smem_rsp_if.tag};
|
||||
|
||||
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
|
||||
assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE;
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (2),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (0)
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in (rsp_data_in),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (core_rsp_valid),
|
||||
.data_out ({core_rsp_valid_tmask, core_rsp_if.data, core_rsp_if.tag}),
|
||||
.ready_out (core_rsp_if.ready)
|
||||
);
|
||||
|
||||
assign cache_rsp_if.ready = rsp_ready_in[0];
|
||||
assign smem_rsp_if.ready = rsp_ready_in[1];
|
||||
|
||||
assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_valid_tmask;
|
||||
|
||||
end else begin
|
||||
|
||||
assign core_rsp_if.valid = cache_rsp_if.valid;
|
||||
assign core_rsp_if.tag = cache_rsp_if.tag;
|
||||
assign core_rsp_if.data = cache_rsp_if.data;
|
||||
assign cache_rsp_if.ready = core_rsp_if.ready;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
136
hw/rtl/VX_tex_cache_arb.v
Normal file
136
hw/rtl/VX_tex_cache_arb.v
Normal file
|
@ -0,0 +1,136 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_dcache_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LANES = 1,
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_OUT_WIDTH = 1
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input requests
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] req_valid_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] req_rw_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][WORD_SIZE-1:0] req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] req_data_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] req_ready_in,
|
||||
|
||||
// output request
|
||||
output wire [LANES-1:0] req_valid_out,
|
||||
output wire [LANES-1:0] req_rw_out,
|
||||
output wire [LANES-1:0][WORD_SIZE-1:0] req_byteen_out,
|
||||
output wire [LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_out,
|
||||
output wire [LANES-1:0][`WORD_WIDTH-1:0] req_data_out,
|
||||
output wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
|
||||
input wire [LANES-1:0] req_ready_out,
|
||||
|
||||
// input response
|
||||
input wire [LANES-1:0] rsp_valid_in,
|
||||
input wire [LANES-1:0][`WORD_WIDTH-1:0] rsp_data_in,
|
||||
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
|
||||
output wire rsp_ready_in,
|
||||
|
||||
// output responses
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] rsp_data_out,
|
||||
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
input wire [NUM_REQS-1:0] rsp_ready_out
|
||||
);
|
||||
localparam REQ_DATAW = LANES * (1 + TAG_IN_WIDTH + `WORD_ADDR_WIDTH + 1 + WORD_SIZE + `WORD_WIDTH);
|
||||
localparam RSP_DATAW = LANES * `WORD_WIDTH + TAG_IN_WIDTH;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
|
||||
wire [NUM_REQS-1:0] req_valid_in_any;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign req_merged_data_in[i] = {req_valid_in[i], req_tag_in[i], req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
assign req_valid_in_any[i] = (| req_valid_in[i]);
|
||||
end
|
||||
|
||||
wire sel_valid;
|
||||
wire [LOG_NUM_REQS-1:0] sel_idx;
|
||||
wire [NUM_REQS-1:0] sel_1hot;
|
||||
|
||||
wire sel_enable = (| req_ready_out);
|
||||
|
||||
VX_rr_arbiter #(
|
||||
.NUM_REQS(NUM_REQS),
|
||||
.LOCK_ENABLE(1)
|
||||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (req_valid_in_any),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
wire [LANES-1:0] req_valid_out_unqual;
|
||||
wire [LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_out_unqual;
|
||||
|
||||
assign {req_valid_out_unqual, req_tag_out_unqual, req_addr_out, req_rw_out, req_byteen_out, req_data_out} = req_merged_data_in[sel_idx];
|
||||
|
||||
assign req_valid_out = req_valid_out_unqual & {LANES{sel_valid}};
|
||||
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
assign req_tag_out[i] = {req_tag_out_unqual[i], sel_idx};
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign req_ready_in[i] = req_ready_out & {LANES{sel_1hot[i]}};
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0];
|
||||
|
||||
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
|
||||
end
|
||||
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (BUFFERED_RSP)
|
||||
) rsp_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel (rsp_sel),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_merged_data_out),
|
||||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign req_valid_out = req_valid_in;
|
||||
assign req_tag_out = req_tag_in;
|
||||
assign req_addr_out = req_addr_in;
|
||||
assign req_rw_out = req_rw_in;
|
||||
assign req_byteen_out = req_byteen_in;
|
||||
assign req_data_out = req_data_in;
|
||||
assign req_ready_in = req_ready_out;
|
||||
|
||||
assign rsp_valid_out = rsp_valid_in;
|
||||
assign rsp_tag_out = rsp_tag_in;
|
||||
assign rsp_data_out = rsp_data_in;
|
||||
assign rsp_ready_in = rsp_ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
128
hw/rtl/VX_tex_lsu_arb.v
Normal file
128
hw/rtl/VX_tex_lsu_arb.v
Normal file
|
@ -0,0 +1,128 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_tex_lsu_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LANES = 1,
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter TAG_IN_WIDTH = 1,
|
||||
parameter TAG_OUT_WIDTH = 1,
|
||||
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// input requests
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] req_valid_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0] req_rw_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][WORD_SIZE-1:0] req_byteen_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] req_data_in,
|
||||
input wire [NUM_REQS-1:0][LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] req_ready_in,
|
||||
|
||||
// output request
|
||||
output wire [LANES-1:0] req_valid_out,
|
||||
output wire [LANES-1:0] req_rw_out,
|
||||
output wire [LANES-1:0][WORD_SIZE-1:0] req_byteen_out,
|
||||
output wire [LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_out,
|
||||
output wire [LANES-1:0][`WORD_WIDTH-1:0] req_data_out,
|
||||
output wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
|
||||
input wire [LANES-1:0] req_ready_out,
|
||||
|
||||
// input response
|
||||
input wire [LANES-1:0] rsp_valid_in,
|
||||
input wire [LANES-1:0][`WORD_WIDTH-1:0] rsp_data_in,
|
||||
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
|
||||
output wire rsp_ready_in,
|
||||
|
||||
// output responses
|
||||
output wire [NUM_REQS-1:0][LANES-1:0] rsp_valid_out,
|
||||
output wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] rsp_data_out,
|
||||
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
|
||||
input wire [NUM_REQS-1:0] rsp_ready_out
|
||||
);
|
||||
localparam REQ_DATAW = LANES * (1 + TAG_IN_WIDTH + `WORD_ADDR_WIDTH + 1 + WORD_SIZE + `WORD_WIDTH);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
|
||||
wire [NUM_REQS-1:0] req_valid_in_any;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign req_merged_data_in[i] = {req_valid_in[i], req_tag_in[i], req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
|
||||
assign req_valid_in_any[i] = (| req_valid_in[i]);
|
||||
end
|
||||
|
||||
wire sel_valid;
|
||||
wire [LOG_NUM_REQS-1:0] sel_idx;
|
||||
wire [NUM_REQS-1:0] sel_1hot;
|
||||
|
||||
wire sel_enable = (| req_ready_out);
|
||||
|
||||
VX_rr_arbiter #(
|
||||
.NUM_REQS(NUM_REQS),
|
||||
.LOCK_ENABLE(1)
|
||||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (req_valid_in_any),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
wire [LANES-1:0] req_valid_out_unqual;
|
||||
wire [LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_out_unqual;
|
||||
|
||||
assign {req_valid_out_unqual, req_tag_out_unqual, req_addr_out, req_rw_out, req_byteen_out, req_data_out} = req_merged_data_in[sel_idx];
|
||||
|
||||
assign req_valid_out = req_valid_out_unqual & {LANES{sel_valid}};
|
||||
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
assign req_tag_out[i] = {req_tag_out_unqual[i], sel_idx};
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign req_ready_in[i] = req_ready_out & {LANES{sel_1hot[i]}};
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
|
||||
|
||||
reg [NUM_REQS-1:0][LANES-1:0] rsp_valid_out_unqual;
|
||||
always @(*) begin
|
||||
rsp_valid_out_unqual = '0;
|
||||
rsp_valid_out_unqual[rsp_sel] = rsp_valid_in;
|
||||
end
|
||||
assign rsp_valid_out = rsp_valid_out_unqual;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign rsp_data_out[i] = rsp_data_in;
|
||||
assign rsp_tag_out[i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
|
||||
end
|
||||
|
||||
assign rsp_ready_in = rsp_ready_out[rsp_sel];
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
assign req_valid_out = req_valid_in;
|
||||
assign req_tag_out = req_tag_in;
|
||||
assign req_addr_out = req_addr_in;
|
||||
assign req_rw_out = req_rw_in;
|
||||
assign req_byteen_out = req_byteen_in;
|
||||
assign req_data_out = req_data_in;
|
||||
assign req_ready_in = req_ready_out;
|
||||
|
||||
assign rsp_valid_out = rsp_valid_in;
|
||||
assign rsp_tag_out = rsp_tag_in;
|
||||
assign rsp_data_out = rsp_data_in;
|
||||
assign rsp_ready_in = rsp_ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -4,18 +4,18 @@
|
|||
`include "../cache/VX_cache_config.vh"
|
||||
|
||||
interface VX_dcache_core_req_if #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LANES = 1,
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter CORE_TAG_WIDTH = 1
|
||||
) ();
|
||||
|
||||
wire [NUM_REQS-1:0] valid;
|
||||
wire [NUM_REQS-1:0] rw;
|
||||
wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen;
|
||||
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
|
||||
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] tag;
|
||||
wire [NUM_REQS-1:0] ready;
|
||||
wire [LANES-1:0] valid;
|
||||
wire [LANES-1:0] rw;
|
||||
wire [LANES-1:0][WORD_SIZE-1:0] byteen;
|
||||
wire [LANES-1:0][`WORD_ADDR_WIDTH-1:0] addr;
|
||||
wire [LANES-1:0][`WORD_WIDTH-1:0] data;
|
||||
wire [LANES-1:0][CORE_TAG_WIDTH-1:0] tag;
|
||||
wire [LANES-1:0] ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -4,15 +4,15 @@
|
|||
`include "../cache/VX_cache_config.vh"
|
||||
|
||||
interface VX_dcache_core_rsp_if #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LANES = 1,
|
||||
parameter WORD_SIZE = 1,
|
||||
parameter CORE_TAG_WIDTH = 1
|
||||
) ();
|
||||
|
||||
wire [NUM_REQS-1:0] valid;
|
||||
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
|
||||
wire [CORE_TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
wire [LANES-1:0] valid;
|
||||
wire [LANES-1:0][`WORD_WIDTH-1:0]data;
|
||||
wire [CORE_TAG_WIDTH-1:0] tag;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -198,7 +198,7 @@ module VX_tex_memory #(
|
|||
|
||||
// send store commit
|
||||
|
||||
wire is_store_rsp = req_valid && ~req_wb && req_sent_all;
|
||||
//wire is_store_rsp = req_valid && ~req_wb && req_sent_all;
|
||||
|
||||
// assign st_commit_if.valid = is_store_rsp;
|
||||
// assign st_commit_if.wid = req_wid;
|
||||
|
|
|
@ -12,12 +12,11 @@ module VX_tex_unit #(
|
|||
VX_tex_csr_if tex_csr_if,
|
||||
|
||||
// Outputs
|
||||
VX_tex_rsp_if tex_rsp_if
|
||||
VX_tex_rsp_if tex_rsp_if,
|
||||
|
||||
// Texture unit <-> Memory Unit
|
||||
VX_dcache_core_req_if dcache_req_if,
|
||||
VX_dcache_core_rsp_if dcache_rsp_if
|
||||
|
||||
);
|
||||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
@ -83,7 +82,8 @@ module VX_tex_unit #(
|
|||
// texture response
|
||||
`UNUSED_VAR (tex_req_if.u)
|
||||
`UNUSED_VAR (tex_req_if.v)
|
||||
`UNUSED_VAR (tex_req_if.lod_t)
|
||||
`UNUSED_VAR (tex_req_if.lod)
|
||||
`UNUSED_VAR (tex_req_if.t)
|
||||
|
||||
assign stall_in = stall_out;
|
||||
|
||||
|
@ -96,7 +96,7 @@ module VX_tex_unit #(
|
|||
assign rsp_data = {`NUM_THREADS{32'hFF0000FF}}; // dummy blue value
|
||||
|
||||
|
||||
//point sampling texel address computation
|
||||
/*//point sampling texel address computation
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tex_req_if.u[i] = gpu_req_if.rs1_data[i];
|
||||
assign tex_req_if.v[i] = gpu_req_if.rs2_data[i];
|
||||
|
@ -108,7 +108,7 @@ module VX_tex_unit #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
);
|
||||
end
|
||||
end*/
|
||||
|
||||
// fifo/wait buffer for fragments and also to dcache
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue