mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
bug fixes
This commit is contained in:
parent
983a848467
commit
dccf5937ff
15 changed files with 387 additions and 272 deletions
|
@ -36,6 +36,215 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define ITF_TO_AOS(itf, prefix, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
|
||||
wire [(count)-1:0] prefix``_ready; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign prefix``_valid[i] = itf[i].valid; \
|
||||
assign prefix``_data[i] = itf[i].data; \
|
||||
assign itf[i].ready = prefix``_ready[i]; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define AOS_TO_ITF(prefix, itf, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
|
||||
wire [(count)-1:0] prefix``_ready; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign itf[i].valid = prefix``_valid[i]; \
|
||||
assign itf[i].data = prefix``_data[i]; \
|
||||
assign prefix``_ready[i] = itf[i].ready; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define ITF_TO_AOS_V(itf, prefix, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign prefix``_valid[i] = itf[i].valid; \
|
||||
assign prefix``_data[i] = itf[i].data; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define AOS_TO_ITF_V(prefix, itf, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign itf[i].valid = prefix``_valid[i]; \
|
||||
assign itf[i].data = prefix``_data[i]; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define ITF_TO_AOS_REQ(itf, prefix, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_req_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
|
||||
wire [(count)-1:0] prefix``_req_ready; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign prefix``_req_valid[i] = itf[i].req_valid; \
|
||||
assign prefix``_req_data[i] = itf[i].req_data; \
|
||||
assign itf[i].req_ready = prefix``_req_ready[i]; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define AOS_TO_ITF_REQ(prefix, itf, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_req_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
|
||||
wire [(count)-1:0] prefix``_req_ready; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign itf[i].req_valid = prefix``_req_valid[i]; \
|
||||
assign itf[i].req_data = prefix``_req_data[i]; \
|
||||
assign prefix``_req_ready[i] = itf[i].req_ready; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define ITF_TO_AOS_REQ_V(itf, prefix, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_req_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign prefix``_req_valid[i] = itf[i].req_valid; \
|
||||
assign prefix``_req_data[i] = itf[i].req_data; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define AOS_TO_ITF_REQ_V(prefix, itf, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_req_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign itf[i].req_valid = prefix``_req_valid[i]; \
|
||||
assign itf[i].req_data = prefix``_req_data[i]; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define ITF_TO_AOS_RSP(itf, prefix, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_rsp_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
|
||||
wire [(count)-1:0] prefix``_rsp_ready; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
|
||||
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
|
||||
assign itf[i].rsp_ready = prefix``_rsp_ready[i]; \
|
||||
end \
|
||||
/* verilator lint_on GENUNNAMED */
|
||||
|
||||
`define AOS_TO_ITF_RSP(prefix, itf, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_rsp_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
|
||||
wire [(count)-1:0] prefix``_vready; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
|
||||
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
|
||||
assign prefix``_rsp_ready[i] = itf[i].rsp_ready; \
|
||||
end \
|
||||
/* verilator lint_off GENUNNAMED */
|
||||
|
||||
`define ITF_TO_AOS_RSP_V(itf, prefix, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_rsp_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
|
||||
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
|
||||
end \
|
||||
/* verilator lint_off GENUNNAMED */
|
||||
|
||||
`define AOS_TO_ITF_RSP_V(prefix, itf, count, dataw) \
|
||||
wire [(count)-1:0] prefix``_rsp_valid; \
|
||||
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
for (genvar i = 0; i < (count); ++i) begin \
|
||||
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
|
||||
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
|
||||
end \
|
||||
/* verilator lint_off GENUNNAMED */
|
||||
|
||||
`define REDUCE(__op, __out, __in, __n, __outw) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (__n > 1) begin \
|
||||
reg [(__outw)-1:0] result; \
|
||||
always @(*) begin \
|
||||
result = (__outw)'(__in[0]); \
|
||||
for (integer __i = 1; __i < __n; __i++) begin \
|
||||
result = result __op (__outw)'(__in[__i]); \
|
||||
end \
|
||||
end \
|
||||
assign __out = result; \
|
||||
end else begin \
|
||||
assign __out = (__outw)'(__in[0]); \
|
||||
end \
|
||||
/* verilator lint_off GENUNNAMED */
|
||||
|
||||
`define REDUCE_TREE(__op, __out, __in, __n, __outw, __inw) \
|
||||
VX_reduce_tree #( \
|
||||
.DTAW_IN(__inw), \
|
||||
.DATAW_OUT(__outw), \
|
||||
.N(__n), \
|
||||
.OP("__op") \
|
||||
) reduce`__LINE__ ( \
|
||||
.data_in(__in), \
|
||||
.data_out(__out) \
|
||||
)
|
||||
|
||||
`define POP_COUNT_EX(out, in, model) \
|
||||
VX_popcount #( \
|
||||
.N ($bits(in)), \
|
||||
.MODEL (model) \
|
||||
) __pop_count_ex`__LINE__ ( \
|
||||
.data_in (in), \
|
||||
.data_out (out) \
|
||||
)
|
||||
|
||||
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
|
||||
|
||||
`define CONCAT(out, left_in, right_in, L, R) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (L == 0) begin \
|
||||
assign out = right_in; \
|
||||
end else if (R == 0) begin \
|
||||
assign out = left_in; \
|
||||
end else begin \
|
||||
assign out = {left_in, right_in}; \
|
||||
end \
|
||||
/* verilator lint_off GENUNNAMED */
|
||||
|
||||
`define BUFFER_EX(dst, src, ena, resetw, latency) \
|
||||
VX_pipe_register #( \
|
||||
.DATAW ($bits(dst)), \
|
||||
.RESETW (resetw), \
|
||||
.DEPTH (latency) \
|
||||
) __buffer_ex`__LINE__ ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.enable (ena), \
|
||||
.data_in (src), \
|
||||
.data_out (dst) \
|
||||
)
|
||||
|
||||
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, $bits(dst), 1)
|
||||
|
||||
`define NEG_EDGE(dst, src) \
|
||||
VX_edge_trigger #( \
|
||||
.POS (0), \
|
||||
.INIT (0) \
|
||||
) __neg_edge`__LINE__ ( \
|
||||
.clk (clk), \
|
||||
.reset (1'b0), \
|
||||
.data_in (src), \
|
||||
.data_out (dst) \
|
||||
)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define REG_EXT_VAL(ext, type) 32'h1
|
||||
//32'((1 << ((type == 1) ? ext[2:0] : ext[1:0]))-1)
|
||||
|
||||
|
@ -71,56 +280,10 @@
|
|||
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \
|
||||
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches)
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)}
|
||||
`define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define NEG_EDGE(dst, src) \
|
||||
VX_edge_trigger #( \
|
||||
.POS (0), \
|
||||
.INIT (0) \
|
||||
) __neg_edge`__LINE__ ( \
|
||||
.clk (clk), \
|
||||
.reset (1'b0), \
|
||||
.data_in (src), \
|
||||
.data_out (dst) \
|
||||
)
|
||||
|
||||
`define BUFFER_EX(dst, src, ena, resetw, latency) \
|
||||
VX_pipe_register #( \
|
||||
.DATAW ($bits(dst)), \
|
||||
.RESETW (resetw), \
|
||||
.DEPTH (latency) \
|
||||
) __buffer_ex`__LINE__ ( \
|
||||
.clk (clk), \
|
||||
.reset (reset), \
|
||||
.enable (ena), \
|
||||
.data_in (src), \
|
||||
.data_out (dst) \
|
||||
)
|
||||
|
||||
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, $bits(dst), 1)
|
||||
|
||||
`define POP_COUNT_EX(out, in, model) \
|
||||
VX_popcount #( \
|
||||
.N ($bits(in)), \
|
||||
.MODEL (model) \
|
||||
) __pop_count_ex`__LINE__ ( \
|
||||
.data_in (in), \
|
||||
.data_out (out) \
|
||||
)
|
||||
|
||||
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
|
||||
|
||||
`define CONCAT(out, left_in, right_in, L, R) \
|
||||
if (L == 0) begin : g_right`__LINE__ \
|
||||
assign out = right_in; \
|
||||
end else if (R == 0) begin : g_left`__LINE__ \
|
||||
assign out = left_in; \
|
||||
end else begin : g_concat`__LINE__ \
|
||||
assign out = {left_in, right_in}; \
|
||||
end
|
||||
|
||||
`define ASSIGN_VX_IF(dst, src) \
|
||||
assign dst.valid = src.valid; \
|
||||
assign dst.data = src.data; \
|
||||
|
@ -149,50 +312,52 @@
|
|||
assign dst.rsp_ready = src.rsp_ready
|
||||
|
||||
`define ASSIGN_VX_MEM_BUS_IF_EX(dst, src, TD, TS, UUID) \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
assign dst.req_valid = src.req_valid; \
|
||||
assign dst.req_data.rw = src.req_data.rw; \
|
||||
assign dst.req_data.addr = src.req_data.addr; \
|
||||
assign dst.req_data.data = src.req_data.data; \
|
||||
assign dst.req_data.byteen = src.req_data.byteen; \
|
||||
assign dst.req_data.flags = src.req_data.flags; \
|
||||
if (TD != TS) begin : g_reg_tag_ne`__LINE__ \
|
||||
if (UUID != 0) begin : g_uuid`__LINE__ \
|
||||
if (TD > TS) begin : g_td`__LINE__ \
|
||||
if (TD != TS) begin \
|
||||
if (UUID != 0) begin \
|
||||
if (TD > TS) begin \
|
||||
assign dst.req_data.tag = {src.req_data.tag.uuid, {(TD-TS){1'b0}}, src.req_data.tag.value}; \
|
||||
end else begin : g_ts`__LINE__ \
|
||||
end else begin \
|
||||
assign dst.req_data.tag = {src.req_data.tag.uuid, src.req_data.tag.value[TD-UUID-1:0]}; \
|
||||
end \
|
||||
end else begin : g_no_uuid`__LINE__ \
|
||||
if (TD > TS) begin : g_td`__LINE__ \
|
||||
end else begin \
|
||||
if (TD > TS) begin \
|
||||
assign dst.req_data.tag = {{(TD-TS){1'b0}}, src.req_data.tag}; \
|
||||
end else begin : g_ts`__LINE__ \
|
||||
end else begin \
|
||||
assign dst.req_data.tag = src.req_data.tag[TD-1:0]; \
|
||||
end \
|
||||
end \
|
||||
end else begin : g_req_tag_eq`__LINE__ \
|
||||
end else begin \
|
||||
assign dst.req_data.tag = src.req_data.tag; \
|
||||
end \
|
||||
assign src.req_ready = dst.req_ready; \
|
||||
assign src.rsp_valid = dst.rsp_valid; \
|
||||
assign src.rsp_data.data = dst.rsp_data.data; \
|
||||
if (TD != TS) begin : g_rsp_tag_ne`__LINE__ \
|
||||
if (UUID != 0) begin : g_uuid`__LINE__ \
|
||||
if (TD > TS) begin : g_td`__LINE__ \
|
||||
if (TD != TS) begin \
|
||||
if (UUID != 0) begin \
|
||||
if (TD > TS) begin \
|
||||
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, dst.rsp_data.tag.value[TS-UUID-1:0]}; \
|
||||
end else begin : g_ts`__LINE__ \
|
||||
end else begin \
|
||||
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, {(TS-TD){1'b0}}, dst.rsp_data.tag.value}; \
|
||||
end \
|
||||
end else begin : g_no_uuid`__LINE__ \
|
||||
if (TD > TS) begin : g_td`__LINE__ \
|
||||
end else begin \
|
||||
if (TD > TS) begin \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag[TS-1:0]; \
|
||||
end else begin : g_ts`__LINE__ \
|
||||
end else begin \
|
||||
assign src.rsp_data.tag = {{(TS-TD){1'b0}}, dst.rsp_data.tag}; \
|
||||
end \
|
||||
end \
|
||||
end else begin : g_rsp_tag_eq`__LINE__ \
|
||||
end else begin \
|
||||
assign src.rsp_data.tag = dst.rsp_data.tag; \
|
||||
end \
|
||||
assign dst.rsp_ready = src.rsp_ready
|
||||
assign dst.rsp_ready = src.rsp_ready \
|
||||
/* verilator lint_off GENUNNAMED */
|
||||
|
||||
`define INIT_VX_MEM_BUS_IF(itf) \
|
||||
assign itf.req_valid = 0; \
|
||||
|
@ -211,7 +376,8 @@
|
|||
`UNUSED_VAR (itf.rsp_ready)
|
||||
|
||||
`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \
|
||||
if (latency != 0) begin : g_on_`__LINE__ \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (latency != 0) begin \
|
||||
VX_pipe_register #( \
|
||||
.DATAW (1 + VX_DCR_ADDR_WIDTH + VX_DCR_DATA_WIDTH), \
|
||||
.DEPTH (latency) \
|
||||
|
@ -222,23 +388,25 @@
|
|||
.data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \
|
||||
.data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \
|
||||
); \
|
||||
end else begin : g_off`__LINE__ \
|
||||
end else begin \
|
||||
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \
|
||||
end
|
||||
end \
|
||||
/* verilator lint_off GENUNNAMED */
|
||||
|
||||
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
|
||||
if (count > 1) begin : g_on`__LINE__ \
|
||||
wire [count-1:0][width-1:0] __reduce_add_i_field; \
|
||||
wire [width-1:0] __reduce_add_o_field; \
|
||||
for (genvar __i = 0; __i < count; ++__i) begin : g_i`__LINE__ \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if ((count) > 1) begin \
|
||||
wire [(count)-1:0][(width)-1:0] __reduce_add_i_field; \
|
||||
wire [(width)-1:0] __reduce_add_o_field; \
|
||||
for (genvar __i = 0; __i < (count); ++__i) begin \
|
||||
assign __reduce_add_i_field[__i] = src[__i].``field; \
|
||||
end \
|
||||
VX_reduce_tree #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
|
||||
__reduce_add_i_field, \
|
||||
__reduce_add_o_field \
|
||||
); \
|
||||
if (reg_enable) begin : g_reg`__LINE__ \
|
||||
reg [width-1:0] __reduce_add_r_field; \
|
||||
if (reg_enable) begin \
|
||||
reg [(width)-1:0] __reduce_add_r_field; \
|
||||
always @(posedge clk) begin \
|
||||
if (reset) begin \
|
||||
__reduce_add_r_field <= '0; \
|
||||
|
@ -247,130 +415,25 @@
|
|||
end \
|
||||
end \
|
||||
assign dst.``field = __reduce_add_r_field; \
|
||||
end else begin : g_no_reg`__LINE__ \
|
||||
end else begin \
|
||||
assign dst.``field = __reduce_add_o_field; \
|
||||
end \
|
||||
end else begin : g_off`__LINE__ \
|
||||
end else begin \
|
||||
assign dst.``field = src[0].``field; \
|
||||
end
|
||||
end \
|
||||
/* verilator lint_off GENUNNAMED */
|
||||
|
||||
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
|
||||
if (block_size != 1) begin : g_on`__LINE__ \
|
||||
if (block_size != `NUM_WARPS) begin : g_eq`__LINE__ \
|
||||
/* verilator lint_off GENUNNAMED */ \
|
||||
if (block_size != 1) begin \
|
||||
if (block_size != `NUM_WARPS) begin \
|
||||
assign dst = {src[NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \
|
||||
end else begin : g_ne`__LINE__ \
|
||||
end else begin \
|
||||
assign dst = NW_WIDTH'(block_idx); \
|
||||
end \
|
||||
end else begin : g_off`__LINE__ \
|
||||
end else begin \
|
||||
assign dst = src; \
|
||||
end
|
||||
|
||||
`define ITF_TO_AOS(itf, prefix, count, dataw) \
|
||||
wire [count-1:0] prefix``_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_data; \
|
||||
wire [count-1:0] prefix``_ready; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign prefix``_valid[i] = itf[i].valid; \
|
||||
assign prefix``_data[i] = itf[i].data; \
|
||||
assign itf[i].ready = prefix``_ready[i]; \
|
||||
end
|
||||
|
||||
`define AOS_TO_ITF(prefix, itf, count, dataw) \
|
||||
wire [count-1:0] prefix``_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_data; \
|
||||
wire [count-1:0] prefix``_ready; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign itf[i].valid = prefix``_valid[i]; \
|
||||
assign itf[i].data = prefix``_data[i]; \
|
||||
assign prefix``_ready[i] = itf[i].ready; \
|
||||
end
|
||||
|
||||
`define ITF_TO_AOS_V(itf, prefix, count, dataw) \
|
||||
wire [count-1:0] prefix``_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_data; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign prefix``_valid[i] = itf[i].valid; \
|
||||
assign prefix``_data[i] = itf[i].data; \
|
||||
end
|
||||
|
||||
`define AOS_TO_ITF_V(prefix, itf, count, dataw) \
|
||||
wire [count-1:0] prefix``_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_data; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign itf[i].valid = prefix``_valid[i]; \
|
||||
assign itf[i].data = prefix``_data[i]; \
|
||||
end
|
||||
|
||||
`define ITF_TO_AOS_REQ(itf, prefix, count, dataw) \
|
||||
wire [count-1:0] prefix``_req_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_req_data; \
|
||||
wire [count-1:0] prefix``_req_ready; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign prefix``_req_valid[i] = itf[i].req_valid; \
|
||||
assign prefix``_req_data[i] = itf[i].req_data; \
|
||||
assign itf[i].req_ready = prefix``_req_ready[i]; \
|
||||
end
|
||||
|
||||
`define AOS_TO_ITF_REQ(prefix, itf, count, dataw) \
|
||||
wire [count-1:0] prefix``_req_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_req_data; \
|
||||
wire [count-1:0] prefix``_req_ready; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign itf[i].req_valid = prefix``_req_valid[i]; \
|
||||
assign itf[i].req_data = prefix``_req_data[i]; \
|
||||
assign prefix``_req_ready[i] = itf[i].req_ready; \
|
||||
end
|
||||
|
||||
`define ITF_TO_AOS_REQ_V(itf, prefix, count, dataw) \
|
||||
wire [count-1:0] prefix``_req_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_req_data; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign prefix``_req_valid[i] = itf[i].req_valid; \
|
||||
assign prefix``_req_data[i] = itf[i].req_data; \
|
||||
end
|
||||
|
||||
`define AOS_TO_ITF_REQ_V(prefix, itf, count, dataw) \
|
||||
wire [count-1:0] prefix``_req_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_req_data; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign itf[i].req_valid = prefix``_req_valid[i]; \
|
||||
assign itf[i].req_data = prefix``_req_data[i]; \
|
||||
end
|
||||
|
||||
`define ITF_TO_AOS_RSP(itf, prefix, count, dataw) \
|
||||
wire [count-1:0] prefix``_rsp_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
|
||||
wire [count-1:0] prefix``_rsp_ready; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
|
||||
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
|
||||
assign itf[i].rsp_ready = prefix``_rsp_ready[i]; \
|
||||
end
|
||||
|
||||
`define AOS_TO_ITF_RSP(prefix, itf, count, dataw) \
|
||||
wire [count-1:0] prefix``_rsp_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
|
||||
wire [count-1:0] prefix``_vready; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
|
||||
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
|
||||
assign prefix``_rsp_ready[i] = itf[i].rsp_ready; \
|
||||
end
|
||||
|
||||
`define ITF_TO_AOS_RSP_V(itf, prefix, count, dataw) \
|
||||
wire [count-1:0] prefix``_rsp_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
|
||||
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
|
||||
end
|
||||
|
||||
`define AOS_TO_ITF_RSP_V(prefix, itf, count, dataw) \
|
||||
wire [count-1:0] prefix``_rsp_valid; \
|
||||
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
|
||||
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
|
||||
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
|
||||
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
|
||||
end
|
||||
end \
|
||||
/* verilator lint_off GENUNNAMED */
|
||||
|
||||
`endif // VX_DEFINE_VH
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_dispatch import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -29,6 +30,7 @@ module VX_dispatch import VX_gpu_pkg::*; #(
|
|||
VX_dispatch_if.master dispatch_if [NUM_EX_UNITS]
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (ISSUE_ID)
|
||||
|
||||
localparam DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + NT_WIDTH;
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
|
|||
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
|
||||
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
|
||||
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
|
||||
localparam GPR_BANK_DATAW = `XLEN * `SIMD_WIDTH;
|
||||
localparam GPR_BANK_DATAW = `SIMD_WIDTH * `XLEN;
|
||||
localparam GPR_BANK_SIZE = (PER_ISSUE_WARPS * NUM_REGS * SIMD_COUNT) / NUM_BANKS;
|
||||
localparam GPR_BANK_ADDRW = `CLOG2(GPR_BANK_SIZE);
|
||||
localparam BANKID_WIS_BITS = (BANK_SEL_BITS > 1 && ISSUE_WIS_BITS != 0) ? 1 : 0;
|
||||
|
@ -52,7 +52,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
|
|||
localparam PER_BANK_REG_WIDTH = `UP(PER_BANK_REG_BITS);
|
||||
localparam GPR_REQ_DATAW = SRC_OPD_WIDTH + SIMD_IDX_W + PER_BANK_WIS_WIDTH + PER_BANK_REG_BITS;
|
||||
localparam GPR_RSP_DATAW = SRC_OPD_WIDTH + `SIMD_WIDTH * `XLEN;
|
||||
localparam BYTEENW = `SIMD_WIDTH * XLENB;
|
||||
localparam BYTEENW = GPR_BANK_DATAW / 8;
|
||||
|
||||
wire [NUM_REQS-1:0] gpr_req_valid, gpr_req_ready;
|
||||
wire [NUM_REQS-1:0][GPR_REQ_DATAW-1:0] gpr_req_data;
|
||||
|
@ -133,7 +133,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [BYTEENW-1:0] bank_wr_byteen;
|
||||
for (genvar i = 0; i < `SIMD_WIDTH; ++i) begin : g_bank_wr_byteen
|
||||
assign bank_wr_byteen[i*XLENB+:XLENB] = {XLENB{writeback_if.data.tmask[i]}};
|
||||
assign bank_wr_byteen[i*XLENB +: XLENB] = {XLENB{writeback_if.data.tmask[i]}};
|
||||
end
|
||||
|
||||
for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_bank_req_data
|
||||
|
@ -180,7 +180,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (REQ_SEL_WIDTH + 2)
|
||||
) pipe_reg1 (
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (bank_req_valid[b]),
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_ibuffer import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -30,6 +31,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
|||
VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS]
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (ISSUE_ID)
|
||||
|
||||
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
|
||||
localparam DATAW = UUID_WIDTH + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS);
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
module VX_issue_slice import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
parameter ISSUE_ID
|
||||
) (
|
||||
`SCOPE_IO_DECL
|
||||
|
||||
|
@ -37,7 +37,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
VX_operands_if operands_if();
|
||||
|
||||
VX_ibuffer #(
|
||||
.INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID)))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID))),
|
||||
.ISSUE_ID (ISSUE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -49,7 +50,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID)))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID))),
|
||||
.ISSUE_ID (ISSUE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -64,7 +66,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_operands #(
|
||||
.INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID)))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID))),
|
||||
.ISSUE_ID (ISSUE_ID)
|
||||
) operands (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -77,7 +80,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
);
|
||||
|
||||
VX_dispatch #(
|
||||
.INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID)))
|
||||
.INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID))),
|
||||
.ISSUE_ID (ISSUE_ID)
|
||||
) dispatch (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -97,8 +101,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
`NEG_EDGE (reset_negedge, reset);
|
||||
`SCOPE_TAP_EX (0, 2, 4, 3, (
|
||||
UUID_WIDTH + NW_WIDTH + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS * 4 +
|
||||
UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS + (3 * `XLEN) +
|
||||
UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + NR_BITS + (`NUM_THREADS * `XLEN) + 1
|
||||
UUID_WIDTH + ISSUE_WIS_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS + (3 * `XLEN) +
|
||||
UUID_WIDTH + ISSUE_WIS_W + `SIMD_WIDTH + NR_BITS + (`SIMD_WIDTH * `XLEN) + 1
|
||||
), {
|
||||
decode_if.valid,
|
||||
decode_if.ready,
|
||||
|
@ -165,11 +169,11 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
|
|||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS)
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `SIMD_WIDTH)
|
||||
`TRACE(1, (", rs2_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS)
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `SIMD_WIDTH)
|
||||
`TRACE(1, (", rs3_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS)
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
|
||||
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
|
||||
end
|
||||
|
|
|
@ -21,46 +21,45 @@ module VX_issue_top import VX_gpu_pkg::*; #(
|
|||
input wire reset,
|
||||
|
||||
input wire decode_valid,
|
||||
input wire [UUID_WIDTH-1:0] decode_uuid,
|
||||
input wire [NW_WIDTH-1:0] decode_wid,
|
||||
input wire [UUID_WIDTH-1:0] decode_uuid,
|
||||
input wire [NW_WIDTH-1:0] decode_wid,
|
||||
input wire [`NUM_THREADS-1:0] decode_tmask,
|
||||
input wire [PC_BITS-1:0] decode_PC,
|
||||
input wire [EX_BITS-1:0] decode_ex_type,
|
||||
input wire [INST_OP_BITS-1:0] decode_op_type,
|
||||
input wire [PC_BITS-1:0] decode_PC,
|
||||
input wire [EX_BITS-1:0] decode_ex_type,
|
||||
input wire [INST_OP_BITS-1:0] decode_op_type,
|
||||
input op_args_t decode_op_args,
|
||||
input wire decode_wb,
|
||||
input wire [NR_BITS-1:0] decode_rd,
|
||||
input wire [NR_BITS-1:0] decode_rs1,
|
||||
input wire [NR_BITS-1:0] decode_rs2,
|
||||
input wire [NR_BITS-1:0] decode_rs3,
|
||||
input wire [NR_BITS-1:0] decode_rd,
|
||||
input wire [NR_BITS-1:0] decode_rs1,
|
||||
input wire [NR_BITS-1:0] decode_rs2,
|
||||
input wire [NR_BITS-1:0] decode_rs3,
|
||||
output wire decode_ready,
|
||||
|
||||
input wire writeback_valid[`ISSUE_WIDTH],
|
||||
input wire [UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
|
||||
input wire [UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
|
||||
input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH],
|
||||
input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH],
|
||||
input wire [PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
|
||||
input wire [NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
|
||||
input wire [PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
|
||||
input wire [NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
|
||||
input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH],
|
||||
input wire writeback_sop[`ISSUE_WIDTH],
|
||||
input wire writeback_eop[`ISSUE_WIDTH],
|
||||
|
||||
output wire dispatch_valid[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [UUID_WIDTH-1:0] dispatch_uuid[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [UUID_WIDTH-1:0] dispatch_uuid[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [ISSUE_WIS_W-1:0] dispatch_wis[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0] dispatch_tmask[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [PC_BITS-1:0] dispatch_PC[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [INST_ALU_BITS-1:0] dispatch_op_type[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [PC_BITS-1:0] dispatch_PC[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [INST_ALU_BITS-1:0] dispatch_op_type[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output op_args_t dispatch_op_args[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire dispatch_wb[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [NR_BITS-1:0] dispatch_rd[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [NT_WIDTH-1:0] dispatch_tid[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [NR_BITS-1:0] dispatch_rd[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [NT_WIDTH-1:0] dispatch_tid[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[NUM_EX_UNITS * `ISSUE_WIDTH],
|
||||
input wire dispatch_ready[NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
input wire dispatch_ready[NUM_EX_UNITS * `ISSUE_WIDTH]
|
||||
);
|
||||
|
||||
VX_decode_if decode_if();
|
||||
VX_dispatch_if dispatch_if[NUM_EX_UNITS * `ISSUE_WIDTH]();
|
||||
VX_writeback_if writeback_if[`ISSUE_WIDTH]();
|
||||
|
|
|
@ -21,7 +21,8 @@
|
|||
`endif
|
||||
|
||||
module VX_opc_unit import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -77,7 +78,9 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
wire [NR_BITS-1:0] rs1 = to_reg_number(staging_if.data.rs1);
|
||||
wire [NR_BITS-1:0] rs2 = to_reg_number(staging_if.data.rs2);
|
||||
wire [NR_BITS-1:0] rs3 = to_reg_number(staging_if.data.rs3);
|
||||
wire [NUM_SRC_OPDS-1:0][NR_BITS-1:0] src_regs = {rs3, rs2, rs1};
|
||||
|
||||
wire [NUM_SRC_OPDS-1:0][NR_BITS-1:0] src_regs;
|
||||
assign src_regs = {rs3, rs2, rs1};
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
|
@ -164,7 +167,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
end
|
||||
end else begin
|
||||
if (gpr_rsp_fire) begin
|
||||
opd_values[gpr_if.rsp_data.opd_id] <= gpr_if.rsp_data.value;
|
||||
opd_values[gpr_if.rsp_data.opd_id] <= gpr_if.rsp_data.data;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -211,4 +214,38 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
|
|||
.ready_out(operands_if.ready)
|
||||
);
|
||||
|
||||
`ifdef DBG_TRACE_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (scoreboard_if.valid && scoreboard_if.ready) begin
|
||||
`TRACE(1, ("%t: %s-input: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(scoreboard_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}))
|
||||
trace_ex_type(1, scoreboard_if.data.ex_type);
|
||||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, scoreboard_if.data.ex_type, scoreboard_if.data.op_type, scoreboard_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d (#%0d)\n", scoreboard_if.data.tmask, scoreboard_if.data.wb, scoreboard_if.data.rd, scoreboard_if.data.rs1, scoreboard_if.data.rs2, scoreboard_if.data.rs3, scoreboard_if.data.uuid))
|
||||
end
|
||||
if (gpr_if.req_valid && gpr_if.req_ready) begin
|
||||
`TRACE(1, ("%t: %s-gpr-req: opd=%0d, wis=%0d, sid=%0d, reg=%0d\n", $time, INSTANCE_ID, gpr_if.req_data.opd_id, wis_to_wid(gpr_if.req_data.wis, ISSUE_ID), gpr_if.req_data.sid, gpr_if.req_data.reg_id))
|
||||
end
|
||||
if (gpr_if.rsp_valid) begin
|
||||
`TRACE(1, ("%t: %s-gpr-rsp: opd=%0d, data=", $time, INSTANCE_ID, gpr_if.rsp_data.opd_id))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", gpr_if.rsp_data.data, `SIMD_WIDTH)
|
||||
`TRACE(1, ("\n"))
|
||||
end
|
||||
if (operands_if.valid && operands_if.ready) begin
|
||||
`TRACE(1, ("%t: %s-output: wid=%0d, sid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), operands_if.data.sid, {operands_if.data.PC, 1'b0}))
|
||||
trace_ex_type(1, operands_if.data.ex_type);
|
||||
`TRACE(1, (", op="))
|
||||
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `SIMD_WIDTH)
|
||||
`TRACE(1, (", rs2_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `SIMD_WIDTH)
|
||||
`TRACE(1, (", rs3_data="))
|
||||
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
|
||||
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
|
||||
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -21,7 +21,8 @@
|
|||
`endif
|
||||
|
||||
module VX_operands import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -34,8 +35,6 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
VX_scoreboard_if.slave scoreboard_if,
|
||||
VX_operands_if.master operands_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
|
||||
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
|
||||
localparam SCB_DATAW = UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS);
|
||||
localparam OPD_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN);
|
||||
|
@ -66,9 +65,10 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN(sel_out)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_OPCS; ++i) begin : g_opc_units
|
||||
for (genvar i = 0; i < `NUM_OPCS; ++i) begin : g_collectors
|
||||
VX_opc_unit #(
|
||||
.INSTANCE_ID (INSTANCE_ID)
|
||||
.INSTANCE_ID (`SFORMATF(("%s-collector%0d", INSTANCE_ID, i))),
|
||||
.ISSUE_ID (ISSUE_ID)
|
||||
) opc_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -101,12 +101,12 @@ module VX_operands import VX_gpu_pkg::*; #(
|
|||
|
||||
VX_writeback_if writeback_if_s();
|
||||
assign writeback_if_s.valid = writeback_if.valid && war_dp_check;
|
||||
assign writeback_if_s.data = writeback_if.data;
|
||||
assign writeback_if.ready = war_dp_check;
|
||||
assign writeback_if_s.data = writeback_if.data;
|
||||
assign writeback_if.ready = war_dp_check;
|
||||
`UNUSED_VAR (writeback_if_s.ready)
|
||||
|
||||
VX_gpr_unit #(
|
||||
.INSTANCE_ID (INSTANCE_ID),
|
||||
.INSTANCE_ID (`SFORMATF(("%s-gpr", INSTANCE_ID))),
|
||||
.NUM_REQS (`NUM_OPCS),
|
||||
.NUM_BANKS (`NUM_GPR_BANKS)
|
||||
) gpr_unit (
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_scoreboard import VX_gpu_pkg::*; #(
|
||||
parameter `STRING INSTANCE_ID = ""
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter ISSUE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -30,6 +31,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
VX_scoreboard_if.master scoreboard_if
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (ISSUE_ID)
|
||||
`UNUSED_VAR (writeback_if.data.sop)
|
||||
|
||||
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
|
||||
|
@ -128,36 +130,34 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
&& (writeback_if.data.wis == ISSUE_WIS_W'(w))
|
||||
&& writeback_if.data.eop;
|
||||
|
||||
wire [REG_TYPES-1:0][31:0] ibf_rs1_mask, ibf_rs2_mask, ibf_rs3_mask, ibf_rd_mask;
|
||||
wire [REG_TYPES-1:0][31:0] stg_rs1_mask, stg_rs2_mask, stg_rs3_mask, stg_rd_mask;
|
||||
reg_idx_t [NUM_OPDS-1:0] ibf_opds, stg_opds;
|
||||
assign ibf_opds = {ibuffer_if[w].data.rs3, ibuffer_if[w].data.rs2, ibuffer_if[w].data.rs1, ibuffer_if[w].data.rd};
|
||||
assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd};
|
||||
|
||||
for (genvar i = 0; i < REG_TYPES; ++i) begin : g_opd_masks
|
||||
assign ibf_rd_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rd.ext, i) << ibuffer_if[w].data.rd.id) & {32{ibuffer_if[w].data.wb && ibuffer_if[w].data.rd.rtype == i}};
|
||||
assign ibf_rs1_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rs1.ext, i) << ibuffer_if[w].data.rs1.id) & {32{ibuffer_if[w].data.used_rs[0] && ibuffer_if[w].data.rs1.rtype == i}};
|
||||
assign ibf_rs2_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rs2.ext, i) << ibuffer_if[w].data.rs2.id) & {32{ibuffer_if[w].data.used_rs[1] && ibuffer_if[w].data.rs2.rtype == i}};
|
||||
assign ibf_rs3_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rs3.ext, i) << ibuffer_if[w].data.rs3.id) & {32{ibuffer_if[w].data.used_rs[2] && ibuffer_if[w].data.rs3.rtype == i}};
|
||||
wire [NUM_OPDS-1:0] ibf_used_rs = {ibuffer_if[w].data.used_rs, ibuffer_if[w].data.wb};
|
||||
wire [NUM_OPDS-1:0] stg_used_rs = {staging_if[w].data.used_rs, staging_if[w].data.wb};
|
||||
|
||||
assign stg_rd_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rd.ext, i) << staging_if[w].data.rd.id) & {32{staging_if[w].data.wb && staging_if[w].data.rd.rtype == i}};
|
||||
assign stg_rs1_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rs1.ext, i) << staging_if[w].data.rs1.id) & {32{staging_if[w].data.used_rs[0] && staging_if[w].data.rs1.rtype == i}};
|
||||
assign stg_rs2_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rs2.ext, i) << staging_if[w].data.rs2.id) & {32{staging_if[w].data.used_rs[1] && staging_if[w].data.rs2.rtype == i}};
|
||||
assign stg_rs3_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rs3.ext, i) << staging_if[w].data.rs3.id) & {32{staging_if[w].data.used_rs[2] && staging_if[w].data.rs3.rtype == i}};
|
||||
wire [NUM_OPDS-1:0][REG_TYPES-1:0][31:0] ibf_opd_mask, stg_opd_mask;
|
||||
|
||||
for (genvar i = 0; i < NUM_OPDS; ++i) begin : g_opd_masks
|
||||
for (genvar j = 0; j < REG_TYPES; ++j) begin : g_j
|
||||
assign ibf_opd_mask[i][j] = (`REG_EXT_VAL(ibf_opds[i].ext, j) << ibf_opds[i].id) & {32{ibf_used_rs[i] && ibf_opds[i].rtype == j}};
|
||||
assign stg_opd_mask[i][j] = (`REG_EXT_VAL(stg_opds[i].ext, j) << stg_opds[i].id) & {32{stg_used_rs[i] && stg_opds[i].rtype == j}};
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [NUM_REGS-1:0][EX_WIDTH-1:0] inuse_units;
|
||||
reg [NUM_REGS-1:0][SFU_WIDTH-1:0] inuse_sfu;
|
||||
|
||||
reg_idx_t [NUM_OPDS-1:0] stg_opds;
|
||||
assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd};
|
||||
|
||||
always @(*) begin
|
||||
perf_inuse_units_per_cycle[w] = '0;
|
||||
perf_inuse_sfu_per_cycle[w] = '0;
|
||||
for (integer i = 0; i < NUM_OPDS; ++i) begin
|
||||
if (staging_if[w].valid && operands_busy[i]) begin
|
||||
perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i]]] = 1;
|
||||
if (inuse_units[stg_opds[i]] == EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i]]] = 1;
|
||||
perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i].id]] = 1;
|
||||
if (inuse_units[stg_opds[i].id] == EX_SFU) begin
|
||||
perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i].id]] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -170,14 +170,14 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
inuse_regs_n[writeback_if.data.rd] = 0;
|
||||
end
|
||||
if (staging_fire && staging_if[w].data.wb) begin
|
||||
inuse_regs_n |= stg_rd_mask;
|
||||
inuse_regs_n |= stg_opd_mask[0];
|
||||
end
|
||||
end
|
||||
|
||||
wire [REG_TYPES-1:0][31:0] in_use_mask;
|
||||
for (genvar i = 0; i < REG_TYPES; ++i) begin : g_in_use_mask
|
||||
wire [31:0] ibf_reg_mask = ibf_rs1_mask[i] | ibf_rs2_mask[i] | ibf_rs3_mask[i] | ibf_rd_mask[i];
|
||||
wire [31:0] stg_reg_mask = stg_rs1_mask[i] | stg_rs2_mask[i] | stg_rs3_mask[i] | stg_rd_mask[i];
|
||||
wire [31:0] ibf_reg_mask = ibf_opd_mask[0][i] | ibf_opd_mask[1][i] | ibf_opd_mask[2][i] | ibf_opd_mask[3][i];
|
||||
wire [31:0] stg_reg_mask = stg_opd_mask[0][i] | stg_opd_mask[1][i] | stg_opd_mask[2][i] | stg_opd_mask[3][i];
|
||||
wire [31:0] regs_mask = ibuffer_fire ? ibf_reg_mask : stg_reg_mask;
|
||||
assign in_use_mask[i] = inuse_regs_n[i * 32 +: 32] & regs_mask;
|
||||
end
|
||||
|
@ -187,11 +187,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
|
|||
assign regs_busy[i] = (in_use_mask[i] != 0);
|
||||
end
|
||||
|
||||
// per operand busy
|
||||
assign operands_busy[0] = (in_use_mask[staging_if[w].data.rd.rtype] & stg_rd_mask[staging_if[w].data.rd.rtype]) != 0;
|
||||
assign operands_busy[1] = (in_use_mask[staging_if[w].data.rs1.rtype] & stg_rs1_mask[staging_if[w].data.rs1.rtype]) != 0;
|
||||
assign operands_busy[2] = (in_use_mask[staging_if[w].data.rs2.rtype] & stg_rs2_mask[staging_if[w].data.rs2.rtype]) != 0;
|
||||
assign operands_busy[3] = (in_use_mask[staging_if[w].data.rs3.rtype] & stg_rs3_mask[staging_if[w].data.rs3.rtype]) != 0;
|
||||
for (genvar i = 0; i < NUM_OPDS; ++i) begin : g_operands_busy
|
||||
wire [REG_TYPE_BITS-1:0] rtype = stg_opds[i].rtype;
|
||||
assign operands_busy[i] = (in_use_mask[rtype] & stg_opd_mask[i][rtype]) != 0;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
interface VX_decode_if import VX_gpu_pkg::*; #(
|
||||
parameter NUM_WARPS = `NUM_WARPS
|
||||
);
|
||||
|
||||
typedef struct packed {
|
||||
logic [UUID_WIDTH-1:0] uuid;
|
||||
logic [`LOG2UP(NUM_WARPS)-1:0] wid;
|
||||
|
|
|
@ -24,7 +24,7 @@ interface VX_gpr_if import VX_gpu_pkg::*; ();
|
|||
|
||||
typedef struct packed {
|
||||
logic [1:0] opd_id;
|
||||
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] value;
|
||||
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] data;
|
||||
} rsp_data_t;
|
||||
|
||||
logic req_valid;
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
|
||||
`ifdef QUARTUS
|
||||
`define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
|
||||
`define RAM_WRITE_WREN `RAM_RESET_BLOCK \
|
||||
`define RAM_WRITE_WREN `RAM_RESET_BLOCK \
|
||||
if (write) begin \
|
||||
for (integer i = 0; i < WRENW; ++i) begin \
|
||||
if (wren[i]) begin \
|
||||
|
@ -305,9 +305,22 @@ module VX_dp_ram #(
|
|||
// simulation
|
||||
reg [DATAW-1:0] ram [0:SIZE-1];
|
||||
`RAM_INITIALIZATION
|
||||
|
||||
reg [DATAW-1:0] wdata_n;
|
||||
always @* begin
|
||||
wdata_n = ram[waddr];
|
||||
for (integer i = 0; i < WRENW; ++i) begin
|
||||
if (wren[i]) begin
|
||||
wdata_n[i * WSELW +: WSELW] = wdata[i * WSELW +: WSELW];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
`RAM_WRITE_WREN
|
||||
`RAM_RESET_BLOCK
|
||||
if (write) begin
|
||||
ram[waddr] <= wdata_n;
|
||||
end
|
||||
end
|
||||
|
||||
if (OUT_REG) begin : g_sync
|
||||
|
|
|
@ -62,8 +62,10 @@ module VX_stream_xpoint #(
|
|||
valid_out_w = '0;
|
||||
data_out_w = 'x;
|
||||
for (integer i = 0; i < NUM_INPUTS; ++i) begin
|
||||
valid_out_w[sel_in[i]] = valid_in[i];
|
||||
data_out_w[sel_in[i]] = data_in[i];
|
||||
if (valid_in[i]) begin
|
||||
valid_out_w[sel_in[i]] = 1;
|
||||
data_out_w[sel_in[i]] = data_in[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -46,7 +46,6 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
VX_mem_bus_if.slave mem_bus_if [NUM_REQS]
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`UNUSED_PARAM (UUID_WIDTH)
|
||||
|
||||
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
|
||||
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
|
||||
|
|
|
@ -27,9 +27,6 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
|
|||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = `XLEN/8,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
||||
// Request tag size
|
||||
parameter TAG_WIDTH = 16,
|
||||
|
||||
|
@ -91,7 +88,6 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
|
|||
.NUM_BANKS (NUM_BANKS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
.ADDR_WIDTH (ADDR_WIDTH),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.TAG_WIDTH (TAG_WIDTH),
|
||||
.OUT_BUF (3)
|
||||
) local_mem (
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue