texture unit dcache arbitration

This commit is contained in:
Blaise Tine 2021-03-18 14:23:53 -04:00
parent 6febdf7399
commit 124acfbf12
14 changed files with 606 additions and 115 deletions

View file

@ -22,8 +22,8 @@ void kernel_body(int task_id, void* arg) {
for (uint32_t y = 0; y < _arg->tile_height; ++y) {
for (uint32_t x = 0; x < _arg->tile_width; ++x) {
int32_t u = (int32_t)(fu * (1<<28));
int32_t v = (int32_t)(fv * (1<<28));
int32_t u = (int32_t)(fu * (1<<20));
int32_t v = (int32_t)(fv * (1<<20));
dst_ptr[x] = vx_tex(0, u, v, 0);
fu += _arg->deltaX;
}

View file

@ -71,13 +71,13 @@ module VX_core #(
//--
VX_dcache_core_req_if #(
.NUM_REQS(`DNUM_REQUESTS),
.LANES(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
) dcache_core_req_if();
VX_dcache_core_rsp_if #(
.NUM_REQS(`DNUM_REQUESTS),
.LANES(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
) dcache_core_rsp_if();

118
hw/rtl/VX_dcache_arb.v Normal file
View file

@ -0,0 +1,118 @@
`include "VX_define.vh"
module VX_mem_arb #(
parameter NUM_REQS = 1,
parameter DATA_WIDTH = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0,
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
// input requests
input wire [NUM_REQS-1:0] req_valid_in,
input wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQS-1:0] req_rw_in,
input wire [NUM_REQS-1:0][DATA_SIZE-1:0] req_byteen_in,
input wire [NUM_REQS-1:0][DATA_WIDTH-1:0] req_data_in,
output wire [NUM_REQS-1:0] req_ready_in,
// output request
output wire req_valid_out,
output wire [TAG_OUT_WIDTH-1:0] req_tag_out,
output wire [ADDR_WIDTH-1:0] req_addr_out,
output wire req_rw_out,
output wire [DATA_SIZE-1:0] req_byteen_out,
output wire [DATA_WIDTH-1:0] req_data_out,
input wire req_ready_out,
// input response
input wire rsp_valid_in,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
input wire [DATA_WIDTH-1:0] rsp_data_in,
output wire rsp_ready_in,
// output responses
output wire [NUM_REQS-1:0] rsp_valid_out,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQS-1:0] rsp_ready_out
);
localparam REQ_DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_IN_WIDTH + DATA_WIDTH;
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_merged_data_in[i] = {{req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
VX_stream_arbiter #(
.NUM_REQS (NUM_REQS),
.DATAW (REQ_DATAW),
.BUFFERED (BUFFERED_REQ)
) req_arb (
.clk (clk),
.reset (reset),
.valid_in (req_valid_in),
.data_in (req_merged_data_in),
.ready_in (req_ready_in),
.valid_out (req_valid_out),
.data_out ({req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
.ready_out (req_ready_out)
);
///////////////////////////////////////////////////////////////////////
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0];
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
end
VX_stream_demux #(
.NUM_REQS (NUM_REQS),
.DATAW (RSP_DATAW),
.BUFFERED (BUFFERED_RSP)
) rsp_demux (
.clk (clk),
.reset (reset),
.sel (rsp_sel),
.valid_in (rsp_valid_in),
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out (rsp_merged_data_out),
.ready_out (rsp_ready_out)
);
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
end
endmodule

View file

@ -283,8 +283,13 @@
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
// Core request tag bits
`ifdef EXT_TEX_ENABLE
`define LSU_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
`define DCORE_TAG_WIDTH (`LSU_TAG_WIDTH+1)
`else
`define DCORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
`endif
// DRAM request data bits
`define DDRAM_LINE_WIDTH (`DCACHE_LINE_SIZE * 8)

View file

@ -47,7 +47,74 @@ module VX_execute #(
VX_fpu_to_csr_if fpu_to_csr_if();
`ifdef EXT_TEX_ENABLE
VX_dcache_core_req_if #(
.LANES(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`LSU_TAG_WIDTH)
) tex_dcache_req_if();
VX_dcache_core_rsp_if #(
.LANES(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`LSU_TAG_WIDTH)
) tex_dcache_rsp_if();
VX_dcache_core_req_if #(
.LANES(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`LSU_TAG_WIDTH)
) lsu_dcache_req_if();
VX_dcache_core_rsp_if #(
.LANES(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`LSU_TAG_WIDTH)
) lsu_dcache_rsp_if();
VX_tex_csr_if tex_csr_if();
VX_tex_lsu_arb #(
.NUM_REQS (2),
.LANES (`NUM_THREADS),
.WORD_SIZE (4),
.TAG_IN_WIDTH (`LSU_TAG_WIDTH),
.TAG_OUT_WIDTH (`DCORE_TAG_WIDTH)
) tex_lsu_arb (
.clk (clk),
.reset (reset),
// Tex/LSU request
.req_valid_in ({tex_dcache_req_if.valid, lsu_dcache_req_if.valid}),
.req_rw_in ({tex_dcache_req_if.rw, lsu_dcache_req_if.rw}),
.req_byteen_in ({tex_dcache_req_if.byteen, lsu_dcache_req_if.byteen}),
.req_addr_in ({tex_dcache_req_if.addr, lsu_dcache_req_if.addr}),
.req_data_in ({tex_dcache_req_if.data, lsu_dcache_req_if.data}),
.req_tag_in ({tex_dcache_req_if.tag, lsu_dcache_req_if.tag}),
.req_ready_in ({tex_dcache_req_if.ready, lsu_dcache_req_if.ready}),
// Dcache request
.req_valid_out (dcache_req_if.valid),
.req_rw_out (dcache_req_if.rw),
.req_byteen_out (dcache_req_if.byteen),
.req_addr_out (dcache_req_if.addr),
.req_data_out (dcache_req_if.data),
.req_tag_out (dcache_req_if.tag),
.req_ready_out (dcache_req_if.ready),
// Tex/LSU response
.rsp_valid_out ({tex_dcache_rsp_if.valid, lsu_dcache_rsp_if.valid}),
.rsp_data_out ({tex_dcache_rsp_if.data, lsu_dcache_rsp_if.data}),
.rsp_tag_out ({tex_dcache_rsp_if.tag, lsu_dcache_rsp_if.tag}),
.rsp_ready_out ({tex_dcache_rsp_if.ready, lsu_dcache_rsp_if.ready}),
// Dcache response
.rsp_valid_in (dcache_rsp_if.valid),
.rsp_tag_in (dcache_rsp_if.tag),
.rsp_data_in (dcache_rsp_if.data),
.rsp_ready_in (dcache_rsp_if.ready)
);
`endif
wire[`NUM_WARPS-1:0] csr_pending;
@ -63,105 +130,24 @@ module VX_execute #(
.alu_commit_if (alu_commit_if)
);
`ifdef EXT_TEX_ENABLE
VX_dcache_core_req_if #(
.NUM_REQS(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
) tex_dcache_req_if();
VX_dcache_core_rsp_if #(
.NUM_REQS(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
) tex_dcache_rsp_if();
VX_dcache_core_req_if #(
.NUM_REQS(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
) lsu_dcache_req_if();
VX_dcache_core_rsp_if #(
.NUM_REQS(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
) lsu_dcache_rsp_if();
VX_mem_arb #(
.NUM_REQS (2),
.DATA_WIDTH (`WORD_WIDTH),
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
.TAG_OUT_WIDTH (`DCORE_TAG_WIDTH),
.BUFFERED_REQ (0),
.BUFFERED_RSP (0)
) dcache_arb (
.clk (clk),
.reset (reset),
// Tex/LSU request
.req_valid_in ({tex_dcache_req_if.valid, lsu_dcache_req_if.valid}),
.req_rw_in ({tex_dcache_req_if.rw, lsu_dcache_req_if.rw}),
.req_byteen_in ({tex_dcache_req_if.byteen, lsu_dcache_req_if.byteen}),
.req_addr_in ({tex_dcache_req_if.addr, lsu_dcache_req_if.addr}),
.req_data_in ({tex_dcache_req_if.data, lsu_dcache_req_if.data}),
.req_tag_in ({tex_dcache_req_if.tag, lsu_dcache_req_if.tag}),
.req_ready_in ({tex_dcache_req_if.ready, lsu_dcache_req_if.ready}),
// Dcache request
.req_valid_out (dcache_req_if.valid),
.req_rw_out (dcache_req_if.rw),
.req_byteen_out (dcache_req_if.byteen),
.req_addr_out (dcache_req_if.addr),
.req_data_out (dcache_req_if.data),
.req_tag_out (dcache_req_if.tag),
.req_ready_out (dcache_req_if.ready),
// Tex/LSU response
.rsp_valid_out ({tex_dcache_rsp_if.valid, lsu_dcache_rsp_if.valid}),
.rsp_data_out ({tex_dcache_rsp_if.data, lsu_dcache_rsp_if.data}),
.rsp_tag_out ({tex_dcache_rsp_if.tag, lsu_dcache_rsp_if.tag}),
.rsp_ready_out ({tex_dcache_rsp_if.ready, lsu_dcache_rsp_if.ready}),
// Dcache response
.rsp_valid_in (dcache_rsp_if.valid),
.rsp_tag_in (dcache_rsp_if.tag),
.rsp_data_in (dcache_rsp_if.data),
.rsp_ready_in (dcache_rsp_if.ready)
);
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_BIND_VX_execute_lsu_unit
.clk (clk),
.reset (reset),
`ifdef EXT_TEX_ENABLE
.dcache_req_if (lsu_dcache_req_if),
.dcache_rsp_if (lsu_dcache_rsp_if),
.lsu_req_if (lsu_req_if),
.ld_commit_if (ld_commit_if),
.st_commit_if (st_commit_if)
);
`else
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_BIND_VX_execute_lsu_unit
.clk (clk),
.reset (reset),
`else
.dcache_req_if (dcache_req_if),
.dcache_rsp_if (dcache_rsp_if),
`endif
.lsu_req_if (lsu_req_if),
.ld_commit_if (ld_commit_if),
.st_commit_if (st_commit_if)
);
`endif
VX_csr_unit #(
.CORE_ID(CORE_ID)
) csr_unit (

View file

@ -41,30 +41,30 @@ module VX_mem_unit # (
) dcache_dram_rsp_if(), icache_dram_rsp_if();
VX_dcache_core_req_if #(
.NUM_REQS (`DNUM_REQUESTS),
.LANES (`DNUM_REQUESTS),
.WORD_SIZE (`DWORD_SIZE),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
) dcache_req_if();
VX_dcache_core_rsp_if #(
.NUM_REQS (`DNUM_REQUESTS),
.LANES (`DNUM_REQUESTS),
.WORD_SIZE (`DWORD_SIZE),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
) dcache_rsp_if();
VX_dcache_core_req_if #(
.NUM_REQS (`DNUM_REQUESTS),
.LANES (`DNUM_REQUESTS),
.WORD_SIZE (`DWORD_SIZE),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
) smem_req_if();
VX_dcache_core_rsp_if #(
.NUM_REQS (`DNUM_REQUESTS),
.LANES (`DNUM_REQUESTS),
.WORD_SIZE (`DWORD_SIZE),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH)
) smem_rsp_if();
VX_databus_arb databus_arb (
VX_smem_arb databus_arb (
.clk (clk),
.reset (reset),

View file

@ -61,7 +61,7 @@ module VX_pipeline #(
//
VX_dcache_core_req_if #(
.NUM_REQS(`NUM_THREADS),
.LANES(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
) dcache_core_req_if();
@ -79,7 +79,7 @@ module VX_pipeline #(
//
VX_dcache_core_rsp_if #(
.NUM_REQS(`NUM_THREADS),
.LANES(`NUM_THREADS),
.WORD_SIZE(4),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH)
) dcache_core_rsp_if();

118
hw/rtl/VX_smem_arb.v Normal file
View file

@ -0,0 +1,118 @@
`include "VX_define.vh"
module VX_smem_arb (
input wire clk,
input wire reset,
// input request
VX_dcache_core_req_if core_req_if,
// output requests
VX_dcache_core_req_if cache_req_if,
VX_dcache_core_req_if smem_req_if,
// input responses
VX_dcache_core_rsp_if cache_rsp_if,
VX_dcache_core_rsp_if smem_rsp_if,
// output response
VX_dcache_core_rsp_if core_rsp_if
);
localparam SMEM_ASHIFT = `CLOG2(`SHARED_MEM_BASE_ADDR_ALIGN);
localparam REQ_ASHIFT = `CLOG2(`DWORD_SIZE);
localparam REQ_ADDRW = 32 - REQ_ASHIFT;
localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
//
// handle requests
//
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire cache_req_ready_in;
wire smem_req_ready_in;
// select shared memory bus
wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) cache_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (core_req_if.valid[i] && !is_smem_addr),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (cache_req_ready_in),
.valid_out (cache_req_if.valid[i]),
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
.ready_out (cache_req_if.ready[i])
);
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) smem_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (core_req_if.valid[i] && is_smem_addr),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (smem_req_ready_in),
.valid_out (smem_req_if.valid[i]),
.data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
.ready_out (smem_req_if.ready[i])
);
assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in;
end
//
// handle responses
//
if (`SM_ENABLE ) begin
wire [1:0][RSP_DATAW-1:0] rsp_data_in;
wire [1:0] rsp_valid_in;
wire [1:0] rsp_ready_in;
wire core_rsp_valid;
wire [`NUM_THREADS-1:0] core_rsp_valid_tmask;
assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, cache_rsp_if.tag};
assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, smem_rsp_if.tag};
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE;
VX_stream_arbiter #(
.NUM_REQS (2),
.DATAW (RSP_DATAW),
.BUFFERED (0)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_valid_tmask, core_rsp_if.data, core_rsp_if.tag}),
.ready_out (core_rsp_if.ready)
);
assign cache_rsp_if.ready = rsp_ready_in[0];
assign smem_rsp_if.ready = rsp_ready_in[1];
assign core_rsp_if.valid = {`NUM_THREADS{core_rsp_valid}} & core_rsp_valid_tmask;
end else begin
assign core_rsp_if.valid = cache_rsp_if.valid;
assign core_rsp_if.tag = cache_rsp_if.tag;
assign core_rsp_if.data = cache_rsp_if.data;
assign cache_rsp_if.ready = core_rsp_if.ready;
end
endmodule

136
hw/rtl/VX_tex_cache_arb.v Normal file
View file

@ -0,0 +1,136 @@
`include "VX_define.vh"
module VX_dcache_arb #(
parameter NUM_REQS = 1,
parameter LANES = 1,
parameter WORD_SIZE = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
// input requests
input wire [NUM_REQS-1:0][LANES-1:0] req_valid_in,
input wire [NUM_REQS-1:0][LANES-1:0] req_rw_in,
input wire [NUM_REQS-1:0][LANES-1:0][WORD_SIZE-1:0] req_byteen_in,
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] req_data_in,
input wire [NUM_REQS-1:0][LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
output wire [NUM_REQS-1:0][LANES-1:0] req_ready_in,
// output request
output wire [LANES-1:0] req_valid_out,
output wire [LANES-1:0] req_rw_out,
output wire [LANES-1:0][WORD_SIZE-1:0] req_byteen_out,
output wire [LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_out,
output wire [LANES-1:0][`WORD_WIDTH-1:0] req_data_out,
output wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
input wire [LANES-1:0] req_ready_out,
// input response
input wire [LANES-1:0] rsp_valid_in,
input wire [LANES-1:0][`WORD_WIDTH-1:0] rsp_data_in,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
output wire rsp_ready_in,
// output responses
output wire [NUM_REQS-1:0][LANES-1:0] rsp_valid_out,
output wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] rsp_data_out,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
input wire [NUM_REQS-1:0] rsp_ready_out
);
localparam REQ_DATAW = LANES * (1 + TAG_IN_WIDTH + `WORD_ADDR_WIDTH + 1 + WORD_SIZE + `WORD_WIDTH);
localparam RSP_DATAW = LANES * `WORD_WIDTH + TAG_IN_WIDTH;
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
wire [NUM_REQS-1:0] req_valid_in_any;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_merged_data_in[i] = {req_valid_in[i], req_tag_in[i], req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
assign req_valid_in_any[i] = (| req_valid_in[i]);
end
wire sel_valid;
wire [LOG_NUM_REQS-1:0] sel_idx;
wire [NUM_REQS-1:0] sel_1hot;
wire sel_enable = (| req_ready_out);
VX_rr_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (req_valid_in_any),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
wire [LANES-1:0] req_valid_out_unqual;
wire [LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_out_unqual;
assign {req_valid_out_unqual, req_tag_out_unqual, req_addr_out, req_rw_out, req_byteen_out, req_data_out} = req_merged_data_in[sel_idx];
assign req_valid_out = req_valid_out_unqual & {LANES{sel_valid}};
for (genvar i = 0; i < LANES; i++) begin
assign req_tag_out[i] = {req_tag_out_unqual[i], sel_idx};
end
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_ready_in[i] = req_ready_out & {LANES{sel_1hot[i]}};
end
///////////////////////////////////////////////////////////////////////
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in [LOG_NUM_REQS-1:0];
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
end
VX_stream_demux #(
.NUM_REQS (NUM_REQS),
.DATAW (RSP_DATAW),
.BUFFERED (BUFFERED_RSP)
) rsp_demux (
.clk (clk),
.reset (reset),
.sel (rsp_sel),
.valid_in (rsp_valid_in),
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out (rsp_merged_data_out),
.ready_out (rsp_ready_out)
);
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
end
endmodule

128
hw/rtl/VX_tex_lsu_arb.v Normal file
View file

@ -0,0 +1,128 @@
`include "VX_define.vh"
module VX_tex_lsu_arb #(
parameter NUM_REQS = 1,
parameter LANES = 1,
parameter WORD_SIZE = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
// input requests
input wire [NUM_REQS-1:0][LANES-1:0] req_valid_in,
input wire [NUM_REQS-1:0][LANES-1:0] req_rw_in,
input wire [NUM_REQS-1:0][LANES-1:0][WORD_SIZE-1:0] req_byteen_in,
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] req_data_in,
input wire [NUM_REQS-1:0][LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
output wire [NUM_REQS-1:0][LANES-1:0] req_ready_in,
// output request
output wire [LANES-1:0] req_valid_out,
output wire [LANES-1:0] req_rw_out,
output wire [LANES-1:0][WORD_SIZE-1:0] req_byteen_out,
output wire [LANES-1:0][`WORD_ADDR_WIDTH-1:0] req_addr_out,
output wire [LANES-1:0][`WORD_WIDTH-1:0] req_data_out,
output wire [LANES-1:0][TAG_OUT_WIDTH-1:0] req_tag_out,
input wire [LANES-1:0] req_ready_out,
// input response
input wire [LANES-1:0] rsp_valid_in,
input wire [LANES-1:0][`WORD_WIDTH-1:0] rsp_data_in,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
output wire rsp_ready_in,
// output responses
output wire [NUM_REQS-1:0][LANES-1:0] rsp_valid_out,
output wire [NUM_REQS-1:0][LANES-1:0][`WORD_WIDTH-1:0] rsp_data_out,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
input wire [NUM_REQS-1:0] rsp_ready_out
);
localparam REQ_DATAW = LANES * (1 + TAG_IN_WIDTH + `WORD_ADDR_WIDTH + 1 + WORD_SIZE + `WORD_WIDTH);
if (NUM_REQS > 1) begin
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_in;
wire [NUM_REQS-1:0] req_valid_in_any;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_merged_data_in[i] = {req_valid_in[i], req_tag_in[i], req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
assign req_valid_in_any[i] = (| req_valid_in[i]);
end
wire sel_valid;
wire [LOG_NUM_REQS-1:0] sel_idx;
wire [NUM_REQS-1:0] sel_1hot;
wire sel_enable = (| req_ready_out);
VX_rr_arbiter #(
.NUM_REQS(NUM_REQS),
.LOCK_ENABLE(1)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (req_valid_in_any),
.enable (sel_enable),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
);
wire [LANES-1:0] req_valid_out_unqual;
wire [LANES-1:0][TAG_IN_WIDTH-1:0] req_tag_out_unqual;
assign {req_valid_out_unqual, req_tag_out_unqual, req_addr_out, req_rw_out, req_byteen_out, req_data_out} = req_merged_data_in[sel_idx];
assign req_valid_out = req_valid_out_unqual & {LANES{sel_valid}};
for (genvar i = 0; i < LANES; i++) begin
assign req_tag_out[i] = {req_tag_out_unqual[i], sel_idx};
end
for (genvar i = 0; i < NUM_REQS; i++) begin
assign req_ready_in[i] = req_ready_out & {LANES{sel_1hot[i]}};
end
///////////////////////////////////////////////////////////////////////
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
reg [NUM_REQS-1:0][LANES-1:0] rsp_valid_out_unqual;
always @(*) begin
rsp_valid_out_unqual = '0;
rsp_valid_out_unqual[rsp_sel] = rsp_valid_in;
end
assign rsp_valid_out = rsp_valid_out_unqual;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign rsp_data_out[i] = rsp_data_in;
assign rsp_tag_out[i] = rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH];
end
assign rsp_ready_in = rsp_ready_out[rsp_sel];
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
end
endmodule

View file

@ -4,18 +4,18 @@
`include "../cache/VX_cache_config.vh"
interface VX_dcache_core_req_if #(
parameter NUM_REQS = 1,
parameter LANES = 1,
parameter WORD_SIZE = 1,
parameter CORE_TAG_WIDTH = 1
) ();
wire [NUM_REQS-1:0] valid;
wire [NUM_REQS-1:0] rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen;
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] tag;
wire [NUM_REQS-1:0] ready;
wire [LANES-1:0] valid;
wire [LANES-1:0] rw;
wire [LANES-1:0][WORD_SIZE-1:0] byteen;
wire [LANES-1:0][`WORD_ADDR_WIDTH-1:0] addr;
wire [LANES-1:0][`WORD_WIDTH-1:0] data;
wire [LANES-1:0][CORE_TAG_WIDTH-1:0] tag;
wire [LANES-1:0] ready;
endinterface

View file

@ -4,15 +4,15 @@
`include "../cache/VX_cache_config.vh"
interface VX_dcache_core_rsp_if #(
parameter NUM_REQS = 1,
parameter LANES = 1,
parameter WORD_SIZE = 1,
parameter CORE_TAG_WIDTH = 1
) ();
wire [NUM_REQS-1:0] valid;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
wire [CORE_TAG_WIDTH-1:0] tag;
wire ready;
wire [LANES-1:0] valid;
wire [LANES-1:0][`WORD_WIDTH-1:0]data;
wire [CORE_TAG_WIDTH-1:0] tag;
wire ready;
endinterface

View file

@ -198,7 +198,7 @@ module VX_tex_memory #(
// send store commit
wire is_store_rsp = req_valid && ~req_wb && req_sent_all;
//wire is_store_rsp = req_valid && ~req_wb && req_sent_all;
// assign st_commit_if.valid = is_store_rsp;
// assign st_commit_if.wid = req_wid;

View file

@ -12,12 +12,11 @@ module VX_tex_unit #(
VX_tex_csr_if tex_csr_if,
// Outputs
VX_tex_rsp_if tex_rsp_if
VX_tex_rsp_if tex_rsp_if,
// Texture unit <-> Memory Unit
VX_dcache_core_req_if dcache_req_if,
VX_dcache_core_rsp_if dcache_rsp_if
);
`UNUSED_PARAM (CORE_ID)
@ -83,7 +82,8 @@ module VX_tex_unit #(
// texture response
`UNUSED_VAR (tex_req_if.u)
`UNUSED_VAR (tex_req_if.v)
`UNUSED_VAR (tex_req_if.lod_t)
`UNUSED_VAR (tex_req_if.lod)
`UNUSED_VAR (tex_req_if.t)
assign stall_in = stall_out;
@ -96,7 +96,7 @@ module VX_tex_unit #(
assign rsp_data = {`NUM_THREADS{32'hFF0000FF}}; // dummy blue value
//point sampling texel address computation
/*//point sampling texel address computation
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign tex_req_if.u[i] = gpu_req_if.rs1_data[i];
assign tex_req_if.v[i] = gpu_req_if.rs2_data[i];
@ -108,7 +108,7 @@ module VX_tex_unit #(
.clk (clk),
.reset (reset),
);
end
end*/
// fifo/wait buffer for fragments and also to dcache