bug fixes

This commit is contained in:
tinebp 2025-02-21 05:56:52 -08:00
parent 983a848467
commit dccf5937ff
15 changed files with 387 additions and 272 deletions

View file

@ -36,6 +36,215 @@
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define ITF_TO_AOS(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
wire [(count)-1:0] prefix``_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_valid[i] = itf[i].valid; \
assign prefix``_data[i] = itf[i].data; \
assign itf[i].ready = prefix``_ready[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
wire [(count)-1:0] prefix``_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].valid = prefix``_valid[i]; \
assign itf[i].data = prefix``_data[i]; \
assign prefix``_ready[i] = itf[i].ready; \
end \
/* verilator lint_on GENUNNAMED */
`define ITF_TO_AOS_V(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_valid[i] = itf[i].valid; \
assign prefix``_data[i] = itf[i].data; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF_V(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].valid = prefix``_valid[i]; \
assign itf[i].data = prefix``_data[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define ITF_TO_AOS_REQ(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_req_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
wire [(count)-1:0] prefix``_req_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_req_valid[i] = itf[i].req_valid; \
assign prefix``_req_data[i] = itf[i].req_data; \
assign itf[i].req_ready = prefix``_req_ready[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF_REQ(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_req_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
wire [(count)-1:0] prefix``_req_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].req_valid = prefix``_req_valid[i]; \
assign itf[i].req_data = prefix``_req_data[i]; \
assign prefix``_req_ready[i] = itf[i].req_ready; \
end \
/* verilator lint_on GENUNNAMED */
`define ITF_TO_AOS_REQ_V(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_req_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_req_valid[i] = itf[i].req_valid; \
assign prefix``_req_data[i] = itf[i].req_data; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF_REQ_V(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_req_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].req_valid = prefix``_req_valid[i]; \
assign itf[i].req_data = prefix``_req_data[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define ITF_TO_AOS_RSP(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_rsp_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
wire [(count)-1:0] prefix``_rsp_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
assign itf[i].rsp_ready = prefix``_rsp_ready[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF_RSP(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_rsp_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
wire [(count)-1:0] prefix``_vready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
assign prefix``_rsp_ready[i] = itf[i].rsp_ready; \
end \
/* verilator lint_off GENUNNAMED */
`define ITF_TO_AOS_RSP_V(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_rsp_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
end \
/* verilator lint_off GENUNNAMED */
`define AOS_TO_ITF_RSP_V(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_rsp_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
end \
/* verilator lint_off GENUNNAMED */
`define REDUCE(__op, __out, __in, __n, __outw) \
/* verilator lint_off GENUNNAMED */ \
if (__n > 1) begin \
reg [(__outw)-1:0] result; \
always @(*) begin \
result = (__outw)'(__in[0]); \
for (integer __i = 1; __i < __n; __i++) begin \
result = result __op (__outw)'(__in[__i]); \
end \
end \
assign __out = result; \
end else begin \
assign __out = (__outw)'(__in[0]); \
end \
/* verilator lint_off GENUNNAMED */
`define REDUCE_TREE(__op, __out, __in, __n, __outw, __inw) \
VX_reduce_tree #( \
.DTAW_IN(__inw), \
.DATAW_OUT(__outw), \
.N(__n), \
.OP("__op") \
) reduce`__LINE__ ( \
.data_in(__in), \
.data_out(__out) \
)
`define POP_COUNT_EX(out, in, model) \
VX_popcount #( \
.N ($bits(in)), \
.MODEL (model) \
) __pop_count_ex`__LINE__ ( \
.data_in (in), \
.data_out (out) \
)
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
`define CONCAT(out, left_in, right_in, L, R) \
/* verilator lint_off GENUNNAMED */ \
if (L == 0) begin \
assign out = right_in; \
end else if (R == 0) begin \
assign out = left_in; \
end else begin \
assign out = {left_in, right_in}; \
end \
/* verilator lint_off GENUNNAMED */
`define BUFFER_EX(dst, src, ena, resetw, latency) \
VX_pipe_register #( \
.DATAW ($bits(dst)), \
.RESETW (resetw), \
.DEPTH (latency) \
) __buffer_ex`__LINE__ ( \
.clk (clk), \
.reset (reset), \
.enable (ena), \
.data_in (src), \
.data_out (dst) \
)
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, $bits(dst), 1)
`define NEG_EDGE(dst, src) \
VX_edge_trigger #( \
.POS (0), \
.INIT (0) \
) __neg_edge`__LINE__ ( \
.clk (clk), \
.reset (1'b0), \
.data_in (src), \
.data_out (dst) \
)
///////////////////////////////////////////////////////////////////////////////
`define REG_EXT_VAL(ext, type) 32'h1 `define REG_EXT_VAL(ext, type) 32'h1
//32'((1 << ((type == 1) ? ext[2:0] : ext[1:0]))-1) //32'((1 << ((type == 1) ? ext[2:0] : ext[1:0]))-1)
@ -71,56 +280,10 @@
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \ `define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches) `CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches)
`define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)} `define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)}
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
`define NEG_EDGE(dst, src) \
VX_edge_trigger #( \
.POS (0), \
.INIT (0) \
) __neg_edge`__LINE__ ( \
.clk (clk), \
.reset (1'b0), \
.data_in (src), \
.data_out (dst) \
)
`define BUFFER_EX(dst, src, ena, resetw, latency) \
VX_pipe_register #( \
.DATAW ($bits(dst)), \
.RESETW (resetw), \
.DEPTH (latency) \
) __buffer_ex`__LINE__ ( \
.clk (clk), \
.reset (reset), \
.enable (ena), \
.data_in (src), \
.data_out (dst) \
)
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, $bits(dst), 1)
`define POP_COUNT_EX(out, in, model) \
VX_popcount #( \
.N ($bits(in)), \
.MODEL (model) \
) __pop_count_ex`__LINE__ ( \
.data_in (in), \
.data_out (out) \
)
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
`define CONCAT(out, left_in, right_in, L, R) \
if (L == 0) begin : g_right`__LINE__ \
assign out = right_in; \
end else if (R == 0) begin : g_left`__LINE__ \
assign out = left_in; \
end else begin : g_concat`__LINE__ \
assign out = {left_in, right_in}; \
end
`define ASSIGN_VX_IF(dst, src) \ `define ASSIGN_VX_IF(dst, src) \
assign dst.valid = src.valid; \ assign dst.valid = src.valid; \
assign dst.data = src.data; \ assign dst.data = src.data; \
@ -149,50 +312,52 @@
assign dst.rsp_ready = src.rsp_ready assign dst.rsp_ready = src.rsp_ready
`define ASSIGN_VX_MEM_BUS_IF_EX(dst, src, TD, TS, UUID) \ `define ASSIGN_VX_MEM_BUS_IF_EX(dst, src, TD, TS, UUID) \
/* verilator lint_off GENUNNAMED */ \
assign dst.req_valid = src.req_valid; \ assign dst.req_valid = src.req_valid; \
assign dst.req_data.rw = src.req_data.rw; \ assign dst.req_data.rw = src.req_data.rw; \
assign dst.req_data.addr = src.req_data.addr; \ assign dst.req_data.addr = src.req_data.addr; \
assign dst.req_data.data = src.req_data.data; \ assign dst.req_data.data = src.req_data.data; \
assign dst.req_data.byteen = src.req_data.byteen; \ assign dst.req_data.byteen = src.req_data.byteen; \
assign dst.req_data.flags = src.req_data.flags; \ assign dst.req_data.flags = src.req_data.flags; \
if (TD != TS) begin : g_reg_tag_ne`__LINE__ \ if (TD != TS) begin \
if (UUID != 0) begin : g_uuid`__LINE__ \ if (UUID != 0) begin \
if (TD > TS) begin : g_td`__LINE__ \ if (TD > TS) begin \
assign dst.req_data.tag = {src.req_data.tag.uuid, {(TD-TS){1'b0}}, src.req_data.tag.value}; \ assign dst.req_data.tag = {src.req_data.tag.uuid, {(TD-TS){1'b0}}, src.req_data.tag.value}; \
end else begin : g_ts`__LINE__ \ end else begin \
assign dst.req_data.tag = {src.req_data.tag.uuid, src.req_data.tag.value[TD-UUID-1:0]}; \ assign dst.req_data.tag = {src.req_data.tag.uuid, src.req_data.tag.value[TD-UUID-1:0]}; \
end \ end \
end else begin : g_no_uuid`__LINE__ \ end else begin \
if (TD > TS) begin : g_td`__LINE__ \ if (TD > TS) begin \
assign dst.req_data.tag = {{(TD-TS){1'b0}}, src.req_data.tag}; \ assign dst.req_data.tag = {{(TD-TS){1'b0}}, src.req_data.tag}; \
end else begin : g_ts`__LINE__ \ end else begin \
assign dst.req_data.tag = src.req_data.tag[TD-1:0]; \ assign dst.req_data.tag = src.req_data.tag[TD-1:0]; \
end \ end \
end \ end \
end else begin : g_req_tag_eq`__LINE__ \ end else begin \
assign dst.req_data.tag = src.req_data.tag; \ assign dst.req_data.tag = src.req_data.tag; \
end \ end \
assign src.req_ready = dst.req_ready; \ assign src.req_ready = dst.req_ready; \
assign src.rsp_valid = dst.rsp_valid; \ assign src.rsp_valid = dst.rsp_valid; \
assign src.rsp_data.data = dst.rsp_data.data; \ assign src.rsp_data.data = dst.rsp_data.data; \
if (TD != TS) begin : g_rsp_tag_ne`__LINE__ \ if (TD != TS) begin \
if (UUID != 0) begin : g_uuid`__LINE__ \ if (UUID != 0) begin \
if (TD > TS) begin : g_td`__LINE__ \ if (TD > TS) begin \
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, dst.rsp_data.tag.value[TS-UUID-1:0]}; \ assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, dst.rsp_data.tag.value[TS-UUID-1:0]}; \
end else begin : g_ts`__LINE__ \ end else begin \
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, {(TS-TD){1'b0}}, dst.rsp_data.tag.value}; \ assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, {(TS-TD){1'b0}}, dst.rsp_data.tag.value}; \
end \ end \
end else begin : g_no_uuid`__LINE__ \ end else begin \
if (TD > TS) begin : g_td`__LINE__ \ if (TD > TS) begin \
assign src.rsp_data.tag = dst.rsp_data.tag[TS-1:0]; \ assign src.rsp_data.tag = dst.rsp_data.tag[TS-1:0]; \
end else begin : g_ts`__LINE__ \ end else begin \
assign src.rsp_data.tag = {{(TS-TD){1'b0}}, dst.rsp_data.tag}; \ assign src.rsp_data.tag = {{(TS-TD){1'b0}}, dst.rsp_data.tag}; \
end \ end \
end \ end \
end else begin : g_rsp_tag_eq`__LINE__ \ end else begin \
assign src.rsp_data.tag = dst.rsp_data.tag; \ assign src.rsp_data.tag = dst.rsp_data.tag; \
end \ end \
assign dst.rsp_ready = src.rsp_ready assign dst.rsp_ready = src.rsp_ready \
/* verilator lint_off GENUNNAMED */
`define INIT_VX_MEM_BUS_IF(itf) \ `define INIT_VX_MEM_BUS_IF(itf) \
assign itf.req_valid = 0; \ assign itf.req_valid = 0; \
@ -211,7 +376,8 @@
`UNUSED_VAR (itf.rsp_ready) `UNUSED_VAR (itf.rsp_ready)
`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \ `define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \
if (latency != 0) begin : g_on_`__LINE__ \ /* verilator lint_off GENUNNAMED */ \
if (latency != 0) begin \
VX_pipe_register #( \ VX_pipe_register #( \
.DATAW (1 + VX_DCR_ADDR_WIDTH + VX_DCR_DATA_WIDTH), \ .DATAW (1 + VX_DCR_ADDR_WIDTH + VX_DCR_DATA_WIDTH), \
.DEPTH (latency) \ .DEPTH (latency) \
@ -222,23 +388,25 @@
.data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \ .data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \
.data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \ .data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \
); \ ); \
end else begin : g_off`__LINE__ \ end else begin \
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \ assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \
end end \
/* verilator lint_off GENUNNAMED */
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \ `define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
if (count > 1) begin : g_on`__LINE__ \ /* verilator lint_off GENUNNAMED */ \
wire [count-1:0][width-1:0] __reduce_add_i_field; \ if ((count) > 1) begin \
wire [width-1:0] __reduce_add_o_field; \ wire [(count)-1:0][(width)-1:0] __reduce_add_i_field; \
for (genvar __i = 0; __i < count; ++__i) begin : g_i`__LINE__ \ wire [(width)-1:0] __reduce_add_o_field; \
for (genvar __i = 0; __i < (count); ++__i) begin \
assign __reduce_add_i_field[__i] = src[__i].``field; \ assign __reduce_add_i_field[__i] = src[__i].``field; \
end \ end \
VX_reduce_tree #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \ VX_reduce_tree #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
__reduce_add_i_field, \ __reduce_add_i_field, \
__reduce_add_o_field \ __reduce_add_o_field \
); \ ); \
if (reg_enable) begin : g_reg`__LINE__ \ if (reg_enable) begin \
reg [width-1:0] __reduce_add_r_field; \ reg [(width)-1:0] __reduce_add_r_field; \
always @(posedge clk) begin \ always @(posedge clk) begin \
if (reset) begin \ if (reset) begin \
__reduce_add_r_field <= '0; \ __reduce_add_r_field <= '0; \
@ -247,130 +415,25 @@
end \ end \
end \ end \
assign dst.``field = __reduce_add_r_field; \ assign dst.``field = __reduce_add_r_field; \
end else begin : g_no_reg`__LINE__ \ end else begin \
assign dst.``field = __reduce_add_o_field; \ assign dst.``field = __reduce_add_o_field; \
end \ end \
end else begin : g_off`__LINE__ \ end else begin \
assign dst.``field = src[0].``field; \ assign dst.``field = src[0].``field; \
end end \
/* verilator lint_off GENUNNAMED */
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \ `define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
if (block_size != 1) begin : g_on`__LINE__ \ /* verilator lint_off GENUNNAMED */ \
if (block_size != `NUM_WARPS) begin : g_eq`__LINE__ \ if (block_size != 1) begin \
if (block_size != `NUM_WARPS) begin \
assign dst = {src[NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \ assign dst = {src[NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \
end else begin : g_ne`__LINE__ \ end else begin \
assign dst = NW_WIDTH'(block_idx); \ assign dst = NW_WIDTH'(block_idx); \
end \ end \
end else begin : g_off`__LINE__ \ end else begin \
assign dst = src; \ assign dst = src; \
end end \
/* verilator lint_off GENUNNAMED */
`define ITF_TO_AOS(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_valid; \
wire [count-1:0][dataw-1:0] prefix``_data; \
wire [count-1:0] prefix``_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_valid[i] = itf[i].valid; \
assign prefix``_data[i] = itf[i].data; \
assign itf[i].ready = prefix``_ready[i]; \
end
`define AOS_TO_ITF(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_valid; \
wire [count-1:0][dataw-1:0] prefix``_data; \
wire [count-1:0] prefix``_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].valid = prefix``_valid[i]; \
assign itf[i].data = prefix``_data[i]; \
assign prefix``_ready[i] = itf[i].ready; \
end
`define ITF_TO_AOS_V(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_valid; \
wire [count-1:0][dataw-1:0] prefix``_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_valid[i] = itf[i].valid; \
assign prefix``_data[i] = itf[i].data; \
end
`define AOS_TO_ITF_V(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_valid; \
wire [count-1:0][dataw-1:0] prefix``_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].valid = prefix``_valid[i]; \
assign itf[i].data = prefix``_data[i]; \
end
`define ITF_TO_AOS_REQ(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_req_valid; \
wire [count-1:0][dataw-1:0] prefix``_req_data; \
wire [count-1:0] prefix``_req_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_req_valid[i] = itf[i].req_valid; \
assign prefix``_req_data[i] = itf[i].req_data; \
assign itf[i].req_ready = prefix``_req_ready[i]; \
end
`define AOS_TO_ITF_REQ(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_req_valid; \
wire [count-1:0][dataw-1:0] prefix``_req_data; \
wire [count-1:0] prefix``_req_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].req_valid = prefix``_req_valid[i]; \
assign itf[i].req_data = prefix``_req_data[i]; \
assign prefix``_req_ready[i] = itf[i].req_ready; \
end
`define ITF_TO_AOS_REQ_V(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_req_valid; \
wire [count-1:0][dataw-1:0] prefix``_req_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_req_valid[i] = itf[i].req_valid; \
assign prefix``_req_data[i] = itf[i].req_data; \
end
`define AOS_TO_ITF_REQ_V(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_req_valid; \
wire [count-1:0][dataw-1:0] prefix``_req_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].req_valid = prefix``_req_valid[i]; \
assign itf[i].req_data = prefix``_req_data[i]; \
end
`define ITF_TO_AOS_RSP(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_rsp_valid; \
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
wire [count-1:0] prefix``_rsp_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
assign itf[i].rsp_ready = prefix``_rsp_ready[i]; \
end
`define AOS_TO_ITF_RSP(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_rsp_valid; \
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
wire [count-1:0] prefix``_vready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
assign prefix``_rsp_ready[i] = itf[i].rsp_ready; \
end
`define ITF_TO_AOS_RSP_V(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_rsp_valid; \
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
end
`define AOS_TO_ITF_RSP_V(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_rsp_valid; \
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
end
`endif // VX_DEFINE_VH `endif // VX_DEFINE_VH

View file

@ -14,7 +14,8 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_dispatch import VX_gpu_pkg::*; #( module VX_dispatch import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "" parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -29,6 +30,7 @@ module VX_dispatch import VX_gpu_pkg::*; #(
VX_dispatch_if.master dispatch_if [NUM_EX_UNITS] VX_dispatch_if.master dispatch_if [NUM_EX_UNITS]
); );
`UNUSED_SPARAM (INSTANCE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (ISSUE_ID)
localparam DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + NT_WIDTH; localparam DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + NT_WIDTH;

View file

@ -41,7 +41,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS); localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS); localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam GPR_BANK_DATAW = `XLEN * `SIMD_WIDTH; localparam GPR_BANK_DATAW = `SIMD_WIDTH * `XLEN;
localparam GPR_BANK_SIZE = (PER_ISSUE_WARPS * NUM_REGS * SIMD_COUNT) / NUM_BANKS; localparam GPR_BANK_SIZE = (PER_ISSUE_WARPS * NUM_REGS * SIMD_COUNT) / NUM_BANKS;
localparam GPR_BANK_ADDRW = `CLOG2(GPR_BANK_SIZE); localparam GPR_BANK_ADDRW = `CLOG2(GPR_BANK_SIZE);
localparam BANKID_WIS_BITS = (BANK_SEL_BITS > 1 && ISSUE_WIS_BITS != 0) ? 1 : 0; localparam BANKID_WIS_BITS = (BANK_SEL_BITS > 1 && ISSUE_WIS_BITS != 0) ? 1 : 0;
@ -52,7 +52,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
localparam PER_BANK_REG_WIDTH = `UP(PER_BANK_REG_BITS); localparam PER_BANK_REG_WIDTH = `UP(PER_BANK_REG_BITS);
localparam GPR_REQ_DATAW = SRC_OPD_WIDTH + SIMD_IDX_W + PER_BANK_WIS_WIDTH + PER_BANK_REG_BITS; localparam GPR_REQ_DATAW = SRC_OPD_WIDTH + SIMD_IDX_W + PER_BANK_WIS_WIDTH + PER_BANK_REG_BITS;
localparam GPR_RSP_DATAW = SRC_OPD_WIDTH + `SIMD_WIDTH * `XLEN; localparam GPR_RSP_DATAW = SRC_OPD_WIDTH + `SIMD_WIDTH * `XLEN;
localparam BYTEENW = `SIMD_WIDTH * XLENB; localparam BYTEENW = GPR_BANK_DATAW / 8;
wire [NUM_REQS-1:0] gpr_req_valid, gpr_req_ready; wire [NUM_REQS-1:0] gpr_req_valid, gpr_req_ready;
wire [NUM_REQS-1:0][GPR_REQ_DATAW-1:0] gpr_req_data; wire [NUM_REQS-1:0][GPR_REQ_DATAW-1:0] gpr_req_data;
@ -133,7 +133,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
wire [BYTEENW-1:0] bank_wr_byteen; wire [BYTEENW-1:0] bank_wr_byteen;
for (genvar i = 0; i < `SIMD_WIDTH; ++i) begin : g_bank_wr_byteen for (genvar i = 0; i < `SIMD_WIDTH; ++i) begin : g_bank_wr_byteen
assign bank_wr_byteen[i*XLENB+:XLENB] = {XLENB{writeback_if.data.tmask[i]}}; assign bank_wr_byteen[i*XLENB +: XLENB] = {XLENB{writeback_if.data.tmask[i]}};
end end
for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_bank_req_data for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_bank_req_data
@ -180,7 +180,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
VX_pipe_buffer #( VX_pipe_buffer #(
.DATAW (REQ_SEL_WIDTH + 2) .DATAW (REQ_SEL_WIDTH + 2)
) pipe_reg1 ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (bank_req_valid[b]), .valid_in (bank_req_valid[b]),

View file

@ -14,7 +14,8 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_ibuffer import VX_gpu_pkg::*; #( module VX_ibuffer import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "" parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -30,6 +31,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS] VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS]
); );
`UNUSED_SPARAM (INSTANCE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (ISSUE_ID)
localparam NUM_OPDS = NUM_SRC_OPDS + 1; localparam NUM_OPDS = NUM_SRC_OPDS + 1;
localparam DATAW = UUID_WIDTH + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS); localparam DATAW = UUID_WIDTH + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS);

View file

@ -15,7 +15,7 @@
module VX_issue_slice import VX_gpu_pkg::*; #( module VX_issue_slice import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "", parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0 parameter ISSUE_ID
) ( ) (
`SCOPE_IO_DECL `SCOPE_IO_DECL
@ -37,7 +37,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
VX_operands_if operands_if(); VX_operands_if operands_if();
VX_ibuffer #( VX_ibuffer #(
.INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID))) .INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID))),
.ISSUE_ID (ISSUE_ID)
) ibuffer ( ) ibuffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -49,7 +50,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
); );
VX_scoreboard #( VX_scoreboard #(
.INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID))) .INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID))),
.ISSUE_ID (ISSUE_ID)
) scoreboard ( ) scoreboard (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -64,7 +66,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
); );
VX_operands #( VX_operands #(
.INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID))) .INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID))),
.ISSUE_ID (ISSUE_ID)
) operands ( ) operands (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -77,7 +80,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
); );
VX_dispatch #( VX_dispatch #(
.INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID))) .INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID))),
.ISSUE_ID (ISSUE_ID)
) dispatch ( ) dispatch (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -97,8 +101,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
`NEG_EDGE (reset_negedge, reset); `NEG_EDGE (reset_negedge, reset);
`SCOPE_TAP_EX (0, 2, 4, 3, ( `SCOPE_TAP_EX (0, 2, 4, 3, (
UUID_WIDTH + NW_WIDTH + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS * 4 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS * 4 +
UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS + (3 * `XLEN) + UUID_WIDTH + ISSUE_WIS_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS + (3 * `XLEN) +
UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + NR_BITS + (`NUM_THREADS * `XLEN) + 1 UUID_WIDTH + ISSUE_WIS_W + `SIMD_WIDTH + NR_BITS + (`SIMD_WIDTH * `XLEN) + 1
), { ), {
decode_if.valid, decode_if.valid,
decode_if.ready, decode_if.ready,
@ -165,11 +169,11 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
`TRACE(1, (", op=")) `TRACE(1, (", op="))
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd)) `TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS) `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `SIMD_WIDTH)
`TRACE(1, (", rs2_data=")) `TRACE(1, (", rs2_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS) `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `SIMD_WIDTH)
`TRACE(1, (", rs3_data=")) `TRACE(1, (", rs3_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS) `TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args); trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid)) `TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
end end

View file

@ -21,46 +21,45 @@ module VX_issue_top import VX_gpu_pkg::*; #(
input wire reset, input wire reset,
input wire decode_valid, input wire decode_valid,
input wire [UUID_WIDTH-1:0] decode_uuid, input wire [UUID_WIDTH-1:0] decode_uuid,
input wire [NW_WIDTH-1:0] decode_wid, input wire [NW_WIDTH-1:0] decode_wid,
input wire [`NUM_THREADS-1:0] decode_tmask, input wire [`NUM_THREADS-1:0] decode_tmask,
input wire [PC_BITS-1:0] decode_PC, input wire [PC_BITS-1:0] decode_PC,
input wire [EX_BITS-1:0] decode_ex_type, input wire [EX_BITS-1:0] decode_ex_type,
input wire [INST_OP_BITS-1:0] decode_op_type, input wire [INST_OP_BITS-1:0] decode_op_type,
input op_args_t decode_op_args, input op_args_t decode_op_args,
input wire decode_wb, input wire decode_wb,
input wire [NR_BITS-1:0] decode_rd, input wire [NR_BITS-1:0] decode_rd,
input wire [NR_BITS-1:0] decode_rs1, input wire [NR_BITS-1:0] decode_rs1,
input wire [NR_BITS-1:0] decode_rs2, input wire [NR_BITS-1:0] decode_rs2,
input wire [NR_BITS-1:0] decode_rs3, input wire [NR_BITS-1:0] decode_rs3,
output wire decode_ready, output wire decode_ready,
input wire writeback_valid[`ISSUE_WIDTH], input wire writeback_valid[`ISSUE_WIDTH],
input wire [UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH], input wire [UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH], input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH], input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH],
input wire [PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH], input wire [PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
input wire [NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH], input wire [NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH], input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH],
input wire writeback_sop[`ISSUE_WIDTH], input wire writeback_sop[`ISSUE_WIDTH],
input wire writeback_eop[`ISSUE_WIDTH], input wire writeback_eop[`ISSUE_WIDTH],
output wire dispatch_valid[NUM_EX_UNITS * `ISSUE_WIDTH], output wire dispatch_valid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [UUID_WIDTH-1:0] dispatch_uuid[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [UUID_WIDTH-1:0] dispatch_uuid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [ISSUE_WIS_W-1:0] dispatch_wis[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [ISSUE_WIS_W-1:0] dispatch_wis[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0] dispatch_tmask[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [`NUM_THREADS-1:0] dispatch_tmask[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [PC_BITS-1:0] dispatch_PC[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [PC_BITS-1:0] dispatch_PC[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [INST_ALU_BITS-1:0] dispatch_op_type[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [INST_ALU_BITS-1:0] dispatch_op_type[NUM_EX_UNITS * `ISSUE_WIDTH],
output op_args_t dispatch_op_args[NUM_EX_UNITS * `ISSUE_WIDTH], output op_args_t dispatch_op_args[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire dispatch_wb[NUM_EX_UNITS * `ISSUE_WIDTH], output wire dispatch_wb[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [NR_BITS-1:0] dispatch_rd[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [NR_BITS-1:0] dispatch_rd[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [NT_WIDTH-1:0] dispatch_tid[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [NT_WIDTH-1:0] dispatch_tid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[NUM_EX_UNITS * `ISSUE_WIDTH], output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[NUM_EX_UNITS * `ISSUE_WIDTH],
input wire dispatch_ready[NUM_EX_UNITS * `ISSUE_WIDTH] input wire dispatch_ready[NUM_EX_UNITS * `ISSUE_WIDTH]
); );
VX_decode_if decode_if(); VX_decode_if decode_if();
VX_dispatch_if dispatch_if[NUM_EX_UNITS * `ISSUE_WIDTH](); VX_dispatch_if dispatch_if[NUM_EX_UNITS * `ISSUE_WIDTH]();
VX_writeback_if writeback_if[`ISSUE_WIDTH](); VX_writeback_if writeback_if[`ISSUE_WIDTH]();

View file

@ -21,7 +21,8 @@
`endif `endif
module VX_opc_unit import VX_gpu_pkg::*; #( module VX_opc_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "" parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -77,7 +78,9 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
wire [NR_BITS-1:0] rs1 = to_reg_number(staging_if.data.rs1); wire [NR_BITS-1:0] rs1 = to_reg_number(staging_if.data.rs1);
wire [NR_BITS-1:0] rs2 = to_reg_number(staging_if.data.rs2); wire [NR_BITS-1:0] rs2 = to_reg_number(staging_if.data.rs2);
wire [NR_BITS-1:0] rs3 = to_reg_number(staging_if.data.rs3); wire [NR_BITS-1:0] rs3 = to_reg_number(staging_if.data.rs3);
wire [NUM_SRC_OPDS-1:0][NR_BITS-1:0] src_regs = {rs3, rs2, rs1};
wire [NUM_SRC_OPDS-1:0][NR_BITS-1:0] src_regs;
assign src_regs = {rs3, rs2, rs1};
always @(*) begin always @(*) begin
state_n = state; state_n = state;
@ -164,7 +167,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
end end
end else begin end else begin
if (gpr_rsp_fire) begin if (gpr_rsp_fire) begin
opd_values[gpr_if.rsp_data.opd_id] <= gpr_if.rsp_data.value; opd_values[gpr_if.rsp_data.opd_id] <= gpr_if.rsp_data.data;
end end
end end
end end
@ -211,4 +214,38 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
.ready_out(operands_if.ready) .ready_out(operands_if.ready)
); );
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (scoreboard_if.valid && scoreboard_if.ready) begin
`TRACE(1, ("%t: %s-input: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(scoreboard_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}))
trace_ex_type(1, scoreboard_if.data.ex_type);
`TRACE(1, (", op="))
trace_ex_op(1, scoreboard_if.data.ex_type, scoreboard_if.data.op_type, scoreboard_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d (#%0d)\n", scoreboard_if.data.tmask, scoreboard_if.data.wb, scoreboard_if.data.rd, scoreboard_if.data.rs1, scoreboard_if.data.rs2, scoreboard_if.data.rs3, scoreboard_if.data.uuid))
end
if (gpr_if.req_valid && gpr_if.req_ready) begin
`TRACE(1, ("%t: %s-gpr-req: opd=%0d, wis=%0d, sid=%0d, reg=%0d\n", $time, INSTANCE_ID, gpr_if.req_data.opd_id, wis_to_wid(gpr_if.req_data.wis, ISSUE_ID), gpr_if.req_data.sid, gpr_if.req_data.reg_id))
end
if (gpr_if.rsp_valid) begin
`TRACE(1, ("%t: %s-gpr-rsp: opd=%0d, data=", $time, INSTANCE_ID, gpr_if.rsp_data.opd_id))
`TRACE_ARRAY1D(1, "0x%0h", gpr_if.rsp_data.data, `SIMD_WIDTH)
`TRACE(1, ("\n"))
end
if (operands_if.valid && operands_if.ready) begin
`TRACE(1, ("%t: %s-output: wid=%0d, sid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), operands_if.data.sid, {operands_if.data.PC, 1'b0}))
trace_ex_type(1, operands_if.data.ex_type);
`TRACE(1, (", op="))
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `SIMD_WIDTH)
`TRACE(1, (", rs2_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `SIMD_WIDTH)
`TRACE(1, (", rs3_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
end
end
`endif
endmodule endmodule

View file

@ -21,7 +21,8 @@
`endif `endif
module VX_operands import VX_gpu_pkg::*; #( module VX_operands import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "" parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -34,8 +35,6 @@ module VX_operands import VX_gpu_pkg::*; #(
VX_scoreboard_if.slave scoreboard_if, VX_scoreboard_if.slave scoreboard_if,
VX_operands_if.master operands_if VX_operands_if.master operands_if
); );
`UNUSED_SPARAM (INSTANCE_ID)
localparam NUM_OPDS = NUM_SRC_OPDS + 1; localparam NUM_OPDS = NUM_SRC_OPDS + 1;
localparam SCB_DATAW = UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS); localparam SCB_DATAW = UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS);
localparam OPD_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN); localparam OPD_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN);
@ -66,9 +65,10 @@ module VX_operands import VX_gpu_pkg::*; #(
`UNUSED_PIN(sel_out) `UNUSED_PIN(sel_out)
); );
for (genvar i = 0; i < `NUM_OPCS; ++i) begin : g_opc_units for (genvar i = 0; i < `NUM_OPCS; ++i) begin : g_collectors
VX_opc_unit #( VX_opc_unit #(
.INSTANCE_ID (INSTANCE_ID) .INSTANCE_ID (`SFORMATF(("%s-collector%0d", INSTANCE_ID, i))),
.ISSUE_ID (ISSUE_ID)
) opc_unit ( ) opc_unit (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -101,12 +101,12 @@ module VX_operands import VX_gpu_pkg::*; #(
VX_writeback_if writeback_if_s(); VX_writeback_if writeback_if_s();
assign writeback_if_s.valid = writeback_if.valid && war_dp_check; assign writeback_if_s.valid = writeback_if.valid && war_dp_check;
assign writeback_if_s.data = writeback_if.data; assign writeback_if_s.data = writeback_if.data;
assign writeback_if.ready = war_dp_check; assign writeback_if.ready = war_dp_check;
`UNUSED_VAR (writeback_if_s.ready) `UNUSED_VAR (writeback_if_s.ready)
VX_gpr_unit #( VX_gpr_unit #(
.INSTANCE_ID (INSTANCE_ID), .INSTANCE_ID (`SFORMATF(("%s-gpr", INSTANCE_ID))),
.NUM_REQS (`NUM_OPCS), .NUM_REQS (`NUM_OPCS),
.NUM_BANKS (`NUM_GPR_BANKS) .NUM_BANKS (`NUM_GPR_BANKS)
) gpr_unit ( ) gpr_unit (

View file

@ -14,7 +14,8 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_scoreboard import VX_gpu_pkg::*; #( module VX_scoreboard import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "" parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@ -30,6 +31,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
VX_scoreboard_if.master scoreboard_if VX_scoreboard_if.master scoreboard_if
); );
`UNUSED_SPARAM (INSTANCE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (ISSUE_ID)
`UNUSED_VAR (writeback_if.data.sop) `UNUSED_VAR (writeback_if.data.sop)
localparam NUM_OPDS = NUM_SRC_OPDS + 1; localparam NUM_OPDS = NUM_SRC_OPDS + 1;
@ -128,36 +130,34 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
&& (writeback_if.data.wis == ISSUE_WIS_W'(w)) && (writeback_if.data.wis == ISSUE_WIS_W'(w))
&& writeback_if.data.eop; && writeback_if.data.eop;
wire [REG_TYPES-1:0][31:0] ibf_rs1_mask, ibf_rs2_mask, ibf_rs3_mask, ibf_rd_mask; reg_idx_t [NUM_OPDS-1:0] ibf_opds, stg_opds;
wire [REG_TYPES-1:0][31:0] stg_rs1_mask, stg_rs2_mask, stg_rs3_mask, stg_rd_mask; assign ibf_opds = {ibuffer_if[w].data.rs3, ibuffer_if[w].data.rs2, ibuffer_if[w].data.rs1, ibuffer_if[w].data.rd};
assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd};
for (genvar i = 0; i < REG_TYPES; ++i) begin : g_opd_masks wire [NUM_OPDS-1:0] ibf_used_rs = {ibuffer_if[w].data.used_rs, ibuffer_if[w].data.wb};
assign ibf_rd_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rd.ext, i) << ibuffer_if[w].data.rd.id) & {32{ibuffer_if[w].data.wb && ibuffer_if[w].data.rd.rtype == i}}; wire [NUM_OPDS-1:0] stg_used_rs = {staging_if[w].data.used_rs, staging_if[w].data.wb};
assign ibf_rs1_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rs1.ext, i) << ibuffer_if[w].data.rs1.id) & {32{ibuffer_if[w].data.used_rs[0] && ibuffer_if[w].data.rs1.rtype == i}};
assign ibf_rs2_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rs2.ext, i) << ibuffer_if[w].data.rs2.id) & {32{ibuffer_if[w].data.used_rs[1] && ibuffer_if[w].data.rs2.rtype == i}};
assign ibf_rs3_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rs3.ext, i) << ibuffer_if[w].data.rs3.id) & {32{ibuffer_if[w].data.used_rs[2] && ibuffer_if[w].data.rs3.rtype == i}};
assign stg_rd_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rd.ext, i) << staging_if[w].data.rd.id) & {32{staging_if[w].data.wb && staging_if[w].data.rd.rtype == i}}; wire [NUM_OPDS-1:0][REG_TYPES-1:0][31:0] ibf_opd_mask, stg_opd_mask;
assign stg_rs1_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rs1.ext, i) << staging_if[w].data.rs1.id) & {32{staging_if[w].data.used_rs[0] && staging_if[w].data.rs1.rtype == i}};
assign stg_rs2_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rs2.ext, i) << staging_if[w].data.rs2.id) & {32{staging_if[w].data.used_rs[1] && staging_if[w].data.rs2.rtype == i}}; for (genvar i = 0; i < NUM_OPDS; ++i) begin : g_opd_masks
assign stg_rs3_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rs3.ext, i) << staging_if[w].data.rs3.id) & {32{staging_if[w].data.used_rs[2] && staging_if[w].data.rs3.rtype == i}}; for (genvar j = 0; j < REG_TYPES; ++j) begin : g_j
assign ibf_opd_mask[i][j] = (`REG_EXT_VAL(ibf_opds[i].ext, j) << ibf_opds[i].id) & {32{ibf_used_rs[i] && ibf_opds[i].rtype == j}};
assign stg_opd_mask[i][j] = (`REG_EXT_VAL(stg_opds[i].ext, j) << stg_opds[i].id) & {32{stg_used_rs[i] && stg_opds[i].rtype == j}};
end
end end
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
reg [NUM_REGS-1:0][EX_WIDTH-1:0] inuse_units; reg [NUM_REGS-1:0][EX_WIDTH-1:0] inuse_units;
reg [NUM_REGS-1:0][SFU_WIDTH-1:0] inuse_sfu; reg [NUM_REGS-1:0][SFU_WIDTH-1:0] inuse_sfu;
reg_idx_t [NUM_OPDS-1:0] stg_opds;
assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd};
always @(*) begin always @(*) begin
perf_inuse_units_per_cycle[w] = '0; perf_inuse_units_per_cycle[w] = '0;
perf_inuse_sfu_per_cycle[w] = '0; perf_inuse_sfu_per_cycle[w] = '0;
for (integer i = 0; i < NUM_OPDS; ++i) begin for (integer i = 0; i < NUM_OPDS; ++i) begin
if (staging_if[w].valid && operands_busy[i]) begin if (staging_if[w].valid && operands_busy[i]) begin
perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i]]] = 1; perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i].id]] = 1;
if (inuse_units[stg_opds[i]] == EX_SFU) begin if (inuse_units[stg_opds[i].id] == EX_SFU) begin
perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i]]] = 1; perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i].id]] = 1;
end end
end end
end end
@ -170,14 +170,14 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
inuse_regs_n[writeback_if.data.rd] = 0; inuse_regs_n[writeback_if.data.rd] = 0;
end end
if (staging_fire && staging_if[w].data.wb) begin if (staging_fire && staging_if[w].data.wb) begin
inuse_regs_n |= stg_rd_mask; inuse_regs_n |= stg_opd_mask[0];
end end
end end
wire [REG_TYPES-1:0][31:0] in_use_mask; wire [REG_TYPES-1:0][31:0] in_use_mask;
for (genvar i = 0; i < REG_TYPES; ++i) begin : g_in_use_mask for (genvar i = 0; i < REG_TYPES; ++i) begin : g_in_use_mask
wire [31:0] ibf_reg_mask = ibf_rs1_mask[i] | ibf_rs2_mask[i] | ibf_rs3_mask[i] | ibf_rd_mask[i]; wire [31:0] ibf_reg_mask = ibf_opd_mask[0][i] | ibf_opd_mask[1][i] | ibf_opd_mask[2][i] | ibf_opd_mask[3][i];
wire [31:0] stg_reg_mask = stg_rs1_mask[i] | stg_rs2_mask[i] | stg_rs3_mask[i] | stg_rd_mask[i]; wire [31:0] stg_reg_mask = stg_opd_mask[0][i] | stg_opd_mask[1][i] | stg_opd_mask[2][i] | stg_opd_mask[3][i];
wire [31:0] regs_mask = ibuffer_fire ? ibf_reg_mask : stg_reg_mask; wire [31:0] regs_mask = ibuffer_fire ? ibf_reg_mask : stg_reg_mask;
assign in_use_mask[i] = inuse_regs_n[i * 32 +: 32] & regs_mask; assign in_use_mask[i] = inuse_regs_n[i * 32 +: 32] & regs_mask;
end end
@ -187,11 +187,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
assign regs_busy[i] = (in_use_mask[i] != 0); assign regs_busy[i] = (in_use_mask[i] != 0);
end end
// per operand busy for (genvar i = 0; i < NUM_OPDS; ++i) begin : g_operands_busy
assign operands_busy[0] = (in_use_mask[staging_if[w].data.rd.rtype] & stg_rd_mask[staging_if[w].data.rd.rtype]) != 0; wire [REG_TYPE_BITS-1:0] rtype = stg_opds[i].rtype;
assign operands_busy[1] = (in_use_mask[staging_if[w].data.rs1.rtype] & stg_rs1_mask[staging_if[w].data.rs1.rtype]) != 0; assign operands_busy[i] = (in_use_mask[rtype] & stg_opd_mask[i][rtype]) != 0;
assign operands_busy[2] = (in_use_mask[staging_if[w].data.rs2.rtype] & stg_rs2_mask[staging_if[w].data.rs2.rtype]) != 0; end
assign operands_busy[3] = (in_use_mask[staging_if[w].data.rs3.rtype] & stg_rs3_mask[staging_if[w].data.rs3.rtype]) != 0;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin

View file

@ -16,7 +16,6 @@
interface VX_decode_if import VX_gpu_pkg::*; #( interface VX_decode_if import VX_gpu_pkg::*; #(
parameter NUM_WARPS = `NUM_WARPS parameter NUM_WARPS = `NUM_WARPS
); );
typedef struct packed { typedef struct packed {
logic [UUID_WIDTH-1:0] uuid; logic [UUID_WIDTH-1:0] uuid;
logic [`LOG2UP(NUM_WARPS)-1:0] wid; logic [`LOG2UP(NUM_WARPS)-1:0] wid;

View file

@ -24,7 +24,7 @@ interface VX_gpr_if import VX_gpu_pkg::*; ();
typedef struct packed { typedef struct packed {
logic [1:0] opd_id; logic [1:0] opd_id;
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] value; logic [`SIMD_WIDTH-1:0][`XLEN-1:0] data;
} rsp_data_t; } rsp_data_t;
logic req_valid; logic req_valid;

View file

@ -43,7 +43,7 @@
`ifdef QUARTUS `ifdef QUARTUS
`define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1]; `define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`define RAM_WRITE_WREN `RAM_RESET_BLOCK \ `define RAM_WRITE_WREN `RAM_RESET_BLOCK \
if (write) begin \ if (write) begin \
for (integer i = 0; i < WRENW; ++i) begin \ for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \ if (wren[i]) begin \
@ -305,9 +305,22 @@ module VX_dp_ram #(
// simulation // simulation
reg [DATAW-1:0] ram [0:SIZE-1]; reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION `RAM_INITIALIZATION
reg [DATAW-1:0] wdata_n;
always @* begin
wdata_n = ram[waddr];
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i]) begin
wdata_n[i * WSELW +: WSELW] = wdata[i * WSELW +: WSELW];
end
end
end
always @(posedge clk) begin always @(posedge clk) begin
`RAM_WRITE_WREN `RAM_RESET_BLOCK
if (write) begin
ram[waddr] <= wdata_n;
end
end end
if (OUT_REG) begin : g_sync if (OUT_REG) begin : g_sync

View file

@ -62,8 +62,10 @@ module VX_stream_xpoint #(
valid_out_w = '0; valid_out_w = '0;
data_out_w = 'x; data_out_w = 'x;
for (integer i = 0; i < NUM_INPUTS; ++i) begin for (integer i = 0; i < NUM_INPUTS; ++i) begin
valid_out_w[sel_in[i]] = valid_in[i]; if (valid_in[i]) begin
data_out_w[sel_in[i]] = data_in[i]; valid_out_w[sel_in[i]] = 1;
data_out_w[sel_in[i]] = data_in[i];
end
end end
end end

View file

@ -46,7 +46,6 @@ module VX_local_mem import VX_gpu_pkg::*; #(
VX_mem_bus_if.slave mem_bus_if [NUM_REQS] VX_mem_bus_if.slave mem_bus_if [NUM_REQS]
); );
`UNUSED_SPARAM (INSTANCE_ID) `UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (UUID_WIDTH)
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);

View file

@ -27,9 +27,6 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE = `XLEN/8, parameter WORD_SIZE = `XLEN/8,
// Request debug identifier
parameter UUID_WIDTH = 0,
// Request tag size // Request tag size
parameter TAG_WIDTH = 16, parameter TAG_WIDTH = 16,
@ -91,7 +88,6 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.ADDR_WIDTH (ADDR_WIDTH), .ADDR_WIDTH (ADDR_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH), .TAG_WIDTH (TAG_WIDTH),
.OUT_BUF (3) .OUT_BUF (3)
) local_mem ( ) local_mem (