bug fixes

This commit is contained in:
tinebp 2025-02-21 05:56:52 -08:00
parent 983a848467
commit dccf5937ff
15 changed files with 387 additions and 272 deletions

View file

@ -36,6 +36,215 @@
///////////////////////////////////////////////////////////////////////////////
`define ITF_TO_AOS(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
wire [(count)-1:0] prefix``_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_valid[i] = itf[i].valid; \
assign prefix``_data[i] = itf[i].data; \
assign itf[i].ready = prefix``_ready[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
wire [(count)-1:0] prefix``_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].valid = prefix``_valid[i]; \
assign itf[i].data = prefix``_data[i]; \
assign prefix``_ready[i] = itf[i].ready; \
end \
/* verilator lint_on GENUNNAMED */
`define ITF_TO_AOS_V(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_valid[i] = itf[i].valid; \
assign prefix``_data[i] = itf[i].data; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF_V(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].valid = prefix``_valid[i]; \
assign itf[i].data = prefix``_data[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define ITF_TO_AOS_REQ(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_req_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
wire [(count)-1:0] prefix``_req_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_req_valid[i] = itf[i].req_valid; \
assign prefix``_req_data[i] = itf[i].req_data; \
assign itf[i].req_ready = prefix``_req_ready[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF_REQ(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_req_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
wire [(count)-1:0] prefix``_req_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].req_valid = prefix``_req_valid[i]; \
assign itf[i].req_data = prefix``_req_data[i]; \
assign prefix``_req_ready[i] = itf[i].req_ready; \
end \
/* verilator lint_on GENUNNAMED */
`define ITF_TO_AOS_REQ_V(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_req_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_req_valid[i] = itf[i].req_valid; \
assign prefix``_req_data[i] = itf[i].req_data; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF_REQ_V(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_req_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_req_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].req_valid = prefix``_req_valid[i]; \
assign itf[i].req_data = prefix``_req_data[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define ITF_TO_AOS_RSP(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_rsp_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
wire [(count)-1:0] prefix``_rsp_ready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
assign itf[i].rsp_ready = prefix``_rsp_ready[i]; \
end \
/* verilator lint_on GENUNNAMED */
`define AOS_TO_ITF_RSP(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_rsp_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
wire [(count)-1:0] prefix``_vready; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
assign prefix``_rsp_ready[i] = itf[i].rsp_ready; \
end \
/* verilator lint_off GENUNNAMED */
`define ITF_TO_AOS_RSP_V(itf, prefix, count, dataw) \
wire [(count)-1:0] prefix``_rsp_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
end \
/* verilator lint_off GENUNNAMED */
`define AOS_TO_ITF_RSP_V(prefix, itf, count, dataw) \
wire [(count)-1:0] prefix``_rsp_valid; \
wire [(count)-1:0][(dataw)-1:0] prefix``_rsp_data; \
/* verilator lint_off GENUNNAMED */ \
for (genvar i = 0; i < (count); ++i) begin \
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
end \
/* verilator lint_off GENUNNAMED */
`define REDUCE(__op, __out, __in, __n, __outw) \
/* verilator lint_off GENUNNAMED */ \
if (__n > 1) begin \
reg [(__outw)-1:0] result; \
always @(*) begin \
result = (__outw)'(__in[0]); \
for (integer __i = 1; __i < __n; __i++) begin \
result = result __op (__outw)'(__in[__i]); \
end \
end \
assign __out = result; \
end else begin \
assign __out = (__outw)'(__in[0]); \
end \
/* verilator lint_off GENUNNAMED */
`define REDUCE_TREE(__op, __out, __in, __n, __outw, __inw) \
VX_reduce_tree #( \
.DTAW_IN(__inw), \
.DATAW_OUT(__outw), \
.N(__n), \
.OP("__op") \
) reduce`__LINE__ ( \
.data_in(__in), \
.data_out(__out) \
)
`define POP_COUNT_EX(out, in, model) \
VX_popcount #( \
.N ($bits(in)), \
.MODEL (model) \
) __pop_count_ex`__LINE__ ( \
.data_in (in), \
.data_out (out) \
)
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
`define CONCAT(out, left_in, right_in, L, R) \
/* verilator lint_off GENUNNAMED */ \
if (L == 0) begin \
assign out = right_in; \
end else if (R == 0) begin \
assign out = left_in; \
end else begin \
assign out = {left_in, right_in}; \
end \
/* verilator lint_off GENUNNAMED */
`define BUFFER_EX(dst, src, ena, resetw, latency) \
VX_pipe_register #( \
.DATAW ($bits(dst)), \
.RESETW (resetw), \
.DEPTH (latency) \
) __buffer_ex`__LINE__ ( \
.clk (clk), \
.reset (reset), \
.enable (ena), \
.data_in (src), \
.data_out (dst) \
)
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, $bits(dst), 1)
`define NEG_EDGE(dst, src) \
VX_edge_trigger #( \
.POS (0), \
.INIT (0) \
) __neg_edge`__LINE__ ( \
.clk (clk), \
.reset (1'b0), \
.data_in (src), \
.data_out (dst) \
)
///////////////////////////////////////////////////////////////////////////////
`define REG_EXT_VAL(ext, type) 32'h1
//32'((1 << ((type == 1) ? ext[2:0] : ext[1:0]))-1)
@ -71,56 +280,10 @@
`define CACHE_CLUSTER_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, tag_width, num_inputs, num_caches, uuid_width) \
`CACHE_CLUSTER_MEM_ARB_TAG(`CACHE_NC_MEM_TAG_WIDTH(mshr_size, num_banks, num_reqs, mem_ports, line_size, word_size, `CACHE_CLUSTER_CORE_ARB_TAG(tag_width, num_inputs, num_caches), uuid_width), num_caches)
`define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)}
`define TO_FULL_ADDR(x) {x, (`MEM_ADDR_WIDTH-$bits(x))'(0)}
///////////////////////////////////////////////////////////////////////////////
`define NEG_EDGE(dst, src) \
VX_edge_trigger #( \
.POS (0), \
.INIT (0) \
) __neg_edge`__LINE__ ( \
.clk (clk), \
.reset (1'b0), \
.data_in (src), \
.data_out (dst) \
)
`define BUFFER_EX(dst, src, ena, resetw, latency) \
VX_pipe_register #( \
.DATAW ($bits(dst)), \
.RESETW (resetw), \
.DEPTH (latency) \
) __buffer_ex`__LINE__ ( \
.clk (clk), \
.reset (reset), \
.enable (ena), \
.data_in (src), \
.data_out (dst) \
)
`define BUFFER(dst, src) `BUFFER_EX(dst, src, 1'b1, $bits(dst), 1)
`define POP_COUNT_EX(out, in, model) \
VX_popcount #( \
.N ($bits(in)), \
.MODEL (model) \
) __pop_count_ex`__LINE__ ( \
.data_in (in), \
.data_out (out) \
)
`define POP_COUNT(out, in) `POP_COUNT_EX(out, in, 1)
`define CONCAT(out, left_in, right_in, L, R) \
if (L == 0) begin : g_right`__LINE__ \
assign out = right_in; \
end else if (R == 0) begin : g_left`__LINE__ \
assign out = left_in; \
end else begin : g_concat`__LINE__ \
assign out = {left_in, right_in}; \
end
`define ASSIGN_VX_IF(dst, src) \
assign dst.valid = src.valid; \
assign dst.data = src.data; \
@ -149,50 +312,52 @@
assign dst.rsp_ready = src.rsp_ready
`define ASSIGN_VX_MEM_BUS_IF_EX(dst, src, TD, TS, UUID) \
/* verilator lint_off GENUNNAMED */ \
assign dst.req_valid = src.req_valid; \
assign dst.req_data.rw = src.req_data.rw; \
assign dst.req_data.addr = src.req_data.addr; \
assign dst.req_data.data = src.req_data.data; \
assign dst.req_data.byteen = src.req_data.byteen; \
assign dst.req_data.flags = src.req_data.flags; \
if (TD != TS) begin : g_reg_tag_ne`__LINE__ \
if (UUID != 0) begin : g_uuid`__LINE__ \
if (TD > TS) begin : g_td`__LINE__ \
if (TD != TS) begin \
if (UUID != 0) begin \
if (TD > TS) begin \
assign dst.req_data.tag = {src.req_data.tag.uuid, {(TD-TS){1'b0}}, src.req_data.tag.value}; \
end else begin : g_ts`__LINE__ \
end else begin \
assign dst.req_data.tag = {src.req_data.tag.uuid, src.req_data.tag.value[TD-UUID-1:0]}; \
end \
end else begin : g_no_uuid`__LINE__ \
if (TD > TS) begin : g_td`__LINE__ \
end else begin \
if (TD > TS) begin \
assign dst.req_data.tag = {{(TD-TS){1'b0}}, src.req_data.tag}; \
end else begin : g_ts`__LINE__ \
end else begin \
assign dst.req_data.tag = src.req_data.tag[TD-1:0]; \
end \
end \
end else begin : g_req_tag_eq`__LINE__ \
end else begin \
assign dst.req_data.tag = src.req_data.tag; \
end \
assign src.req_ready = dst.req_ready; \
assign src.rsp_valid = dst.rsp_valid; \
assign src.rsp_data.data = dst.rsp_data.data; \
if (TD != TS) begin : g_rsp_tag_ne`__LINE__ \
if (UUID != 0) begin : g_uuid`__LINE__ \
if (TD > TS) begin : g_td`__LINE__ \
if (TD != TS) begin \
if (UUID != 0) begin \
if (TD > TS) begin \
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, dst.rsp_data.tag.value[TS-UUID-1:0]}; \
end else begin : g_ts`__LINE__ \
end else begin \
assign src.rsp_data.tag = {dst.rsp_data.tag.uuid, {(TS-TD){1'b0}}, dst.rsp_data.tag.value}; \
end \
end else begin : g_no_uuid`__LINE__ \
if (TD > TS) begin : g_td`__LINE__ \
end else begin \
if (TD > TS) begin \
assign src.rsp_data.tag = dst.rsp_data.tag[TS-1:0]; \
end else begin : g_ts`__LINE__ \
end else begin \
assign src.rsp_data.tag = {{(TS-TD){1'b0}}, dst.rsp_data.tag}; \
end \
end \
end else begin : g_rsp_tag_eq`__LINE__ \
end else begin \
assign src.rsp_data.tag = dst.rsp_data.tag; \
end \
assign dst.rsp_ready = src.rsp_ready
assign dst.rsp_ready = src.rsp_ready \
/* verilator lint_off GENUNNAMED */
`define INIT_VX_MEM_BUS_IF(itf) \
assign itf.req_valid = 0; \
@ -211,7 +376,8 @@
`UNUSED_VAR (itf.rsp_ready)
`define BUFFER_DCR_BUS_IF(dst, src, ena, latency) \
if (latency != 0) begin : g_on_`__LINE__ \
/* verilator lint_off GENUNNAMED */ \
if (latency != 0) begin \
VX_pipe_register #( \
.DATAW (1 + VX_DCR_ADDR_WIDTH + VX_DCR_DATA_WIDTH), \
.DEPTH (latency) \
@ -222,23 +388,25 @@
.data_in ({src.write_valid && ena, src.write_addr, src.write_data}), \
.data_out ({dst.write_valid, dst.write_addr, dst.write_data}) \
); \
end else begin : g_off`__LINE__ \
end else begin \
assign {dst.write_valid, dst.write_addr, dst.write_data} = {src.write_valid && ena, src.write_addr, src.write_data}; \
end
end \
/* verilator lint_off GENUNNAMED */
`define PERF_COUNTER_ADD(dst, src, field, width, count, reg_enable) \
if (count > 1) begin : g_on`__LINE__ \
wire [count-1:0][width-1:0] __reduce_add_i_field; \
wire [width-1:0] __reduce_add_o_field; \
for (genvar __i = 0; __i < count; ++__i) begin : g_i`__LINE__ \
/* verilator lint_off GENUNNAMED */ \
if ((count) > 1) begin \
wire [(count)-1:0][(width)-1:0] __reduce_add_i_field; \
wire [(width)-1:0] __reduce_add_o_field; \
for (genvar __i = 0; __i < (count); ++__i) begin \
assign __reduce_add_i_field[__i] = src[__i].``field; \
end \
VX_reduce_tree #(.DATAW_IN(width), .N(count), .OP("+")) __reduce_add_field ( \
__reduce_add_i_field, \
__reduce_add_o_field \
); \
if (reg_enable) begin : g_reg`__LINE__ \
reg [width-1:0] __reduce_add_r_field; \
if (reg_enable) begin \
reg [(width)-1:0] __reduce_add_r_field; \
always @(posedge clk) begin \
if (reset) begin \
__reduce_add_r_field <= '0; \
@ -247,130 +415,25 @@
end \
end \
assign dst.``field = __reduce_add_r_field; \
end else begin : g_no_reg`__LINE__ \
end else begin \
assign dst.``field = __reduce_add_o_field; \
end \
end else begin : g_off`__LINE__ \
end else begin \
assign dst.``field = src[0].``field; \
end
end \
/* verilator lint_off GENUNNAMED */
`define ASSIGN_BLOCKED_WID(dst, src, block_idx, block_size) \
if (block_size != 1) begin : g_on`__LINE__ \
if (block_size != `NUM_WARPS) begin : g_eq`__LINE__ \
/* verilator lint_off GENUNNAMED */ \
if (block_size != 1) begin \
if (block_size != `NUM_WARPS) begin \
assign dst = {src[NW_WIDTH-1:`CLOG2(block_size)], `CLOG2(block_size)'(block_idx)}; \
end else begin : g_ne`__LINE__ \
end else begin \
assign dst = NW_WIDTH'(block_idx); \
end \
end else begin : g_off`__LINE__ \
end else begin \
assign dst = src; \
end
`define ITF_TO_AOS(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_valid; \
wire [count-1:0][dataw-1:0] prefix``_data; \
wire [count-1:0] prefix``_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_valid[i] = itf[i].valid; \
assign prefix``_data[i] = itf[i].data; \
assign itf[i].ready = prefix``_ready[i]; \
end
`define AOS_TO_ITF(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_valid; \
wire [count-1:0][dataw-1:0] prefix``_data; \
wire [count-1:0] prefix``_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].valid = prefix``_valid[i]; \
assign itf[i].data = prefix``_data[i]; \
assign prefix``_ready[i] = itf[i].ready; \
end
`define ITF_TO_AOS_V(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_valid; \
wire [count-1:0][dataw-1:0] prefix``_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_valid[i] = itf[i].valid; \
assign prefix``_data[i] = itf[i].data; \
end
`define AOS_TO_ITF_V(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_valid; \
wire [count-1:0][dataw-1:0] prefix``_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].valid = prefix``_valid[i]; \
assign itf[i].data = prefix``_data[i]; \
end
`define ITF_TO_AOS_REQ(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_req_valid; \
wire [count-1:0][dataw-1:0] prefix``_req_data; \
wire [count-1:0] prefix``_req_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_req_valid[i] = itf[i].req_valid; \
assign prefix``_req_data[i] = itf[i].req_data; \
assign itf[i].req_ready = prefix``_req_ready[i]; \
end
`define AOS_TO_ITF_REQ(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_req_valid; \
wire [count-1:0][dataw-1:0] prefix``_req_data; \
wire [count-1:0] prefix``_req_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].req_valid = prefix``_req_valid[i]; \
assign itf[i].req_data = prefix``_req_data[i]; \
assign prefix``_req_ready[i] = itf[i].req_ready; \
end
`define ITF_TO_AOS_REQ_V(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_req_valid; \
wire [count-1:0][dataw-1:0] prefix``_req_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_req_valid[i] = itf[i].req_valid; \
assign prefix``_req_data[i] = itf[i].req_data; \
end
`define AOS_TO_ITF_REQ_V(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_req_valid; \
wire [count-1:0][dataw-1:0] prefix``_req_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].req_valid = prefix``_req_valid[i]; \
assign itf[i].req_data = prefix``_req_data[i]; \
end
`define ITF_TO_AOS_RSP(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_rsp_valid; \
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
wire [count-1:0] prefix``_rsp_ready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
assign itf[i].rsp_ready = prefix``_rsp_ready[i]; \
end
`define AOS_TO_ITF_RSP(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_rsp_valid; \
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
wire [count-1:0] prefix``_vready; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
assign prefix``_rsp_ready[i] = itf[i].rsp_ready; \
end
`define ITF_TO_AOS_RSP_V(itf, prefix, count, dataw) \
wire [count-1:0] prefix``_rsp_valid; \
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign prefix``_rsp_valid[i] = itf[i].rsp_valid; \
assign prefix``_rsp_data[i] = itf[i].rsp_data; \
end
`define AOS_TO_ITF_RSP_V(prefix, itf, count, dataw) \
wire [count-1:0] prefix``_rsp_valid; \
wire [count-1:0][dataw-1:0] prefix``_rsp_data; \
for (genvar i = 0; i < count; ++i) begin : g_i`__LINE__ \
assign itf[i].rsp_valid = prefix``_rsp_valid[i]; \
assign itf[i].rsp_data = prefix``_rsp_data[i]; \
end
end \
/* verilator lint_off GENUNNAMED */
`endif // VX_DEFINE_VH

View file

@ -14,7 +14,8 @@
`include "VX_define.vh"
module VX_dispatch import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) (
input wire clk,
input wire reset,
@ -29,6 +30,7 @@ module VX_dispatch import VX_gpu_pkg::*; #(
VX_dispatch_if.master dispatch_if [NUM_EX_UNITS]
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (ISSUE_ID)
localparam DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN) + NT_WIDTH;

View file

@ -41,7 +41,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);
localparam BANK_SEL_BITS = `CLOG2(NUM_BANKS);
localparam BANK_SEL_WIDTH = `UP(BANK_SEL_BITS);
localparam GPR_BANK_DATAW = `XLEN * `SIMD_WIDTH;
localparam GPR_BANK_DATAW = `SIMD_WIDTH * `XLEN;
localparam GPR_BANK_SIZE = (PER_ISSUE_WARPS * NUM_REGS * SIMD_COUNT) / NUM_BANKS;
localparam GPR_BANK_ADDRW = `CLOG2(GPR_BANK_SIZE);
localparam BANKID_WIS_BITS = (BANK_SEL_BITS > 1 && ISSUE_WIS_BITS != 0) ? 1 : 0;
@ -52,7 +52,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
localparam PER_BANK_REG_WIDTH = `UP(PER_BANK_REG_BITS);
localparam GPR_REQ_DATAW = SRC_OPD_WIDTH + SIMD_IDX_W + PER_BANK_WIS_WIDTH + PER_BANK_REG_BITS;
localparam GPR_RSP_DATAW = SRC_OPD_WIDTH + `SIMD_WIDTH * `XLEN;
localparam BYTEENW = `SIMD_WIDTH * XLENB;
localparam BYTEENW = GPR_BANK_DATAW / 8;
wire [NUM_REQS-1:0] gpr_req_valid, gpr_req_ready;
wire [NUM_REQS-1:0][GPR_REQ_DATAW-1:0] gpr_req_data;
@ -133,7 +133,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
wire [BYTEENW-1:0] bank_wr_byteen;
for (genvar i = 0; i < `SIMD_WIDTH; ++i) begin : g_bank_wr_byteen
assign bank_wr_byteen[i*XLENB+:XLENB] = {XLENB{writeback_if.data.tmask[i]}};
assign bank_wr_byteen[i*XLENB +: XLENB] = {XLENB{writeback_if.data.tmask[i]}};
end
for (genvar b = 0; b < NUM_BANKS; ++b) begin : g_bank_req_data
@ -180,7 +180,7 @@ module VX_gpr_unit import VX_gpu_pkg::*; #(
VX_pipe_buffer #(
.DATAW (REQ_SEL_WIDTH + 2)
) pipe_reg1 (
) pipe_reg (
.clk (clk),
.reset (reset),
.valid_in (bank_req_valid[b]),

View file

@ -14,7 +14,8 @@
`include "VX_define.vh"
module VX_ibuffer import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) (
input wire clk,
input wire reset,
@ -30,6 +31,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
VX_ibuffer_if.master ibuffer_if [PER_ISSUE_WARPS]
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (ISSUE_ID)
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
localparam DATAW = UUID_WIDTH + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS);

View file

@ -15,7 +15,7 @@
module VX_issue_slice import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
parameter ISSUE_ID
) (
`SCOPE_IO_DECL
@ -37,7 +37,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
VX_operands_if operands_if();
VX_ibuffer #(
.INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID)))
.INSTANCE_ID (`SFORMATF(("%s-ibuffer", INSTANCE_ID))),
.ISSUE_ID (ISSUE_ID)
) ibuffer (
.clk (clk),
.reset (reset),
@ -49,7 +50,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
);
VX_scoreboard #(
.INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID)))
.INSTANCE_ID (`SFORMATF(("%s-scoreboard", INSTANCE_ID))),
.ISSUE_ID (ISSUE_ID)
) scoreboard (
.clk (clk),
.reset (reset),
@ -64,7 +66,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
);
VX_operands #(
.INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID)))
.INSTANCE_ID (`SFORMATF(("%s-operands", INSTANCE_ID))),
.ISSUE_ID (ISSUE_ID)
) operands (
.clk (clk),
.reset (reset),
@ -77,7 +80,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
);
VX_dispatch #(
.INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID)))
.INSTANCE_ID (`SFORMATF(("%s-dispatch", INSTANCE_ID))),
.ISSUE_ID (ISSUE_ID)
) dispatch (
.clk (clk),
.reset (reset),
@ -97,8 +101,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
`NEG_EDGE (reset_negedge, reset);
`SCOPE_TAP_EX (0, 2, 4, 3, (
UUID_WIDTH + NW_WIDTH + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS * 4 +
UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS + (3 * `XLEN) +
UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + NR_BITS + (`NUM_THREADS * `XLEN) + 1
UUID_WIDTH + ISSUE_WIS_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + 1 + NR_BITS + (3 * `XLEN) +
UUID_WIDTH + ISSUE_WIS_W + `SIMD_WIDTH + NR_BITS + (`SIMD_WIDTH * `XLEN) + 1
), {
decode_if.valid,
decode_if.ready,
@ -165,11 +169,11 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
`TRACE(1, (", op="))
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `NUM_THREADS)
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `SIMD_WIDTH)
`TRACE(1, (", rs2_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `NUM_THREADS)
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `SIMD_WIDTH)
`TRACE(1, (", rs3_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `NUM_THREADS)
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
end

View file

@ -21,46 +21,45 @@ module VX_issue_top import VX_gpu_pkg::*; #(
input wire reset,
input wire decode_valid,
input wire [UUID_WIDTH-1:0] decode_uuid,
input wire [NW_WIDTH-1:0] decode_wid,
input wire [UUID_WIDTH-1:0] decode_uuid,
input wire [NW_WIDTH-1:0] decode_wid,
input wire [`NUM_THREADS-1:0] decode_tmask,
input wire [PC_BITS-1:0] decode_PC,
input wire [EX_BITS-1:0] decode_ex_type,
input wire [INST_OP_BITS-1:0] decode_op_type,
input wire [PC_BITS-1:0] decode_PC,
input wire [EX_BITS-1:0] decode_ex_type,
input wire [INST_OP_BITS-1:0] decode_op_type,
input op_args_t decode_op_args,
input wire decode_wb,
input wire [NR_BITS-1:0] decode_rd,
input wire [NR_BITS-1:0] decode_rs1,
input wire [NR_BITS-1:0] decode_rs2,
input wire [NR_BITS-1:0] decode_rs3,
input wire [NR_BITS-1:0] decode_rd,
input wire [NR_BITS-1:0] decode_rs1,
input wire [NR_BITS-1:0] decode_rs2,
input wire [NR_BITS-1:0] decode_rs3,
output wire decode_ready,
input wire writeback_valid[`ISSUE_WIDTH],
input wire [UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
input wire [UUID_WIDTH-1:0] writeback_uuid[`ISSUE_WIDTH],
input wire [ISSUE_WIS_W-1:0] writeback_wis[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0] writeback_tmask[`ISSUE_WIDTH],
input wire [PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
input wire [NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
input wire [PC_BITS-1:0] writeback_PC[`ISSUE_WIDTH],
input wire [NR_BITS-1:0] writeback_rd[`ISSUE_WIDTH],
input wire [`NUM_THREADS-1:0][`XLEN-1:0] writeback_data[`ISSUE_WIDTH],
input wire writeback_sop[`ISSUE_WIDTH],
input wire writeback_eop[`ISSUE_WIDTH],
output wire dispatch_valid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [UUID_WIDTH-1:0] dispatch_uuid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [UUID_WIDTH-1:0] dispatch_uuid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [ISSUE_WIS_W-1:0] dispatch_wis[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0] dispatch_tmask[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [PC_BITS-1:0] dispatch_PC[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [INST_ALU_BITS-1:0] dispatch_op_type[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [PC_BITS-1:0] dispatch_PC[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [INST_ALU_BITS-1:0] dispatch_op_type[NUM_EX_UNITS * `ISSUE_WIDTH],
output op_args_t dispatch_op_args[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire dispatch_wb[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [NR_BITS-1:0] dispatch_rd[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [NT_WIDTH-1:0] dispatch_tid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [NR_BITS-1:0] dispatch_rd[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [NT_WIDTH-1:0] dispatch_tid[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs1_data[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs2_data[NUM_EX_UNITS * `ISSUE_WIDTH],
output wire [`NUM_THREADS-1:0][`XLEN-1:0] dispatch_rs3_data[NUM_EX_UNITS * `ISSUE_WIDTH],
input wire dispatch_ready[NUM_EX_UNITS * `ISSUE_WIDTH]
input wire dispatch_ready[NUM_EX_UNITS * `ISSUE_WIDTH]
);
VX_decode_if decode_if();
VX_dispatch_if dispatch_if[NUM_EX_UNITS * `ISSUE_WIDTH]();
VX_writeback_if writeback_if[`ISSUE_WIDTH]();

View file

@ -21,7 +21,8 @@
`endif
module VX_opc_unit import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) (
input wire clk,
input wire reset,
@ -77,7 +78,9 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
wire [NR_BITS-1:0] rs1 = to_reg_number(staging_if.data.rs1);
wire [NR_BITS-1:0] rs2 = to_reg_number(staging_if.data.rs2);
wire [NR_BITS-1:0] rs3 = to_reg_number(staging_if.data.rs3);
wire [NUM_SRC_OPDS-1:0][NR_BITS-1:0] src_regs = {rs3, rs2, rs1};
wire [NUM_SRC_OPDS-1:0][NR_BITS-1:0] src_regs;
assign src_regs = {rs3, rs2, rs1};
always @(*) begin
state_n = state;
@ -164,7 +167,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
end
end else begin
if (gpr_rsp_fire) begin
opd_values[gpr_if.rsp_data.opd_id] <= gpr_if.rsp_data.value;
opd_values[gpr_if.rsp_data.opd_id] <= gpr_if.rsp_data.data;
end
end
end
@ -211,4 +214,38 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
.ready_out(operands_if.ready)
);
`ifdef DBG_TRACE_PIPELINE
always @(posedge clk) begin
if (scoreboard_if.valid && scoreboard_if.ready) begin
`TRACE(1, ("%t: %s-input: wid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(scoreboard_if.data.wis, ISSUE_ID), {operands_if.data.PC, 1'b0}))
trace_ex_type(1, scoreboard_if.data.ex_type);
`TRACE(1, (", op="))
trace_ex_op(1, scoreboard_if.data.ex_type, scoreboard_if.data.op_type, scoreboard_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d (#%0d)\n", scoreboard_if.data.tmask, scoreboard_if.data.wb, scoreboard_if.data.rd, scoreboard_if.data.rs1, scoreboard_if.data.rs2, scoreboard_if.data.rs3, scoreboard_if.data.uuid))
end
if (gpr_if.req_valid && gpr_if.req_ready) begin
`TRACE(1, ("%t: %s-gpr-req: opd=%0d, wis=%0d, sid=%0d, reg=%0d\n", $time, INSTANCE_ID, gpr_if.req_data.opd_id, wis_to_wid(gpr_if.req_data.wis, ISSUE_ID), gpr_if.req_data.sid, gpr_if.req_data.reg_id))
end
if (gpr_if.rsp_valid) begin
`TRACE(1, ("%t: %s-gpr-rsp: opd=%0d, data=", $time, INSTANCE_ID, gpr_if.rsp_data.opd_id))
`TRACE_ARRAY1D(1, "0x%0h", gpr_if.rsp_data.data, `SIMD_WIDTH)
`TRACE(1, ("\n"))
end
if (operands_if.valid && operands_if.ready) begin
`TRACE(1, ("%t: %s-output: wid=%0d, sid=%0d, PC=0x%0h, ex=", $time, INSTANCE_ID, wis_to_wid(operands_if.data.wis, ISSUE_ID), operands_if.data.sid, {operands_if.data.PC, 1'b0}))
trace_ex_type(1, operands_if.data.ex_type);
`TRACE(1, (", op="))
trace_ex_op(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (", tmask=%b, wb=%b, rd=%0d, rs1_data=", operands_if.data.tmask, operands_if.data.wb, operands_if.data.rd))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs1_data, `SIMD_WIDTH)
`TRACE(1, (", rs2_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `SIMD_WIDTH)
`TRACE(1, (", rs3_data="))
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
`TRACE(1, (" (#%0d)\n", operands_if.data.uuid))
end
end
`endif
endmodule

View file

@ -21,7 +21,8 @@
`endif
module VX_operands import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) (
input wire clk,
input wire reset,
@ -34,8 +35,6 @@ module VX_operands import VX_gpu_pkg::*; #(
VX_scoreboard_if.slave scoreboard_if,
VX_operands_if.master operands_if
);
`UNUSED_SPARAM (INSTANCE_ID)
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
localparam SCB_DATAW = UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + NUM_OPDS + (REG_IDX_BITS * NUM_OPDS);
localparam OPD_DATAW = UUID_WIDTH + ISSUE_WIS_W + SIMD_IDX_W + `SIMD_WIDTH + PC_BITS + EX_BITS + INST_OP_BITS + INST_ARGS_BITS + 1 + NR_BITS + (NUM_SRC_OPDS * `SIMD_WIDTH * `XLEN);
@ -66,9 +65,10 @@ module VX_operands import VX_gpu_pkg::*; #(
`UNUSED_PIN(sel_out)
);
for (genvar i = 0; i < `NUM_OPCS; ++i) begin : g_opc_units
for (genvar i = 0; i < `NUM_OPCS; ++i) begin : g_collectors
VX_opc_unit #(
.INSTANCE_ID (INSTANCE_ID)
.INSTANCE_ID (`SFORMATF(("%s-collector%0d", INSTANCE_ID, i))),
.ISSUE_ID (ISSUE_ID)
) opc_unit (
.clk (clk),
.reset (reset),
@ -101,12 +101,12 @@ module VX_operands import VX_gpu_pkg::*; #(
VX_writeback_if writeback_if_s();
assign writeback_if_s.valid = writeback_if.valid && war_dp_check;
assign writeback_if_s.data = writeback_if.data;
assign writeback_if.ready = war_dp_check;
assign writeback_if_s.data = writeback_if.data;
assign writeback_if.ready = war_dp_check;
`UNUSED_VAR (writeback_if_s.ready)
VX_gpr_unit #(
.INSTANCE_ID (INSTANCE_ID),
.INSTANCE_ID (`SFORMATF(("%s-gpr", INSTANCE_ID))),
.NUM_REQS (`NUM_OPCS),
.NUM_BANKS (`NUM_GPR_BANKS)
) gpr_unit (

View file

@ -14,7 +14,8 @@
`include "VX_define.vh"
module VX_scoreboard import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = ""
parameter `STRING INSTANCE_ID = "",
parameter ISSUE_ID = 0
) (
input wire clk,
input wire reset,
@ -30,6 +31,7 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
VX_scoreboard_if.master scoreboard_if
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (ISSUE_ID)
`UNUSED_VAR (writeback_if.data.sop)
localparam NUM_OPDS = NUM_SRC_OPDS + 1;
@ -128,36 +130,34 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
&& (writeback_if.data.wis == ISSUE_WIS_W'(w))
&& writeback_if.data.eop;
wire [REG_TYPES-1:0][31:0] ibf_rs1_mask, ibf_rs2_mask, ibf_rs3_mask, ibf_rd_mask;
wire [REG_TYPES-1:0][31:0] stg_rs1_mask, stg_rs2_mask, stg_rs3_mask, stg_rd_mask;
reg_idx_t [NUM_OPDS-1:0] ibf_opds, stg_opds;
assign ibf_opds = {ibuffer_if[w].data.rs3, ibuffer_if[w].data.rs2, ibuffer_if[w].data.rs1, ibuffer_if[w].data.rd};
assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd};
for (genvar i = 0; i < REG_TYPES; ++i) begin : g_opd_masks
assign ibf_rd_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rd.ext, i) << ibuffer_if[w].data.rd.id) & {32{ibuffer_if[w].data.wb && ibuffer_if[w].data.rd.rtype == i}};
assign ibf_rs1_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rs1.ext, i) << ibuffer_if[w].data.rs1.id) & {32{ibuffer_if[w].data.used_rs[0] && ibuffer_if[w].data.rs1.rtype == i}};
assign ibf_rs2_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rs2.ext, i) << ibuffer_if[w].data.rs2.id) & {32{ibuffer_if[w].data.used_rs[1] && ibuffer_if[w].data.rs2.rtype == i}};
assign ibf_rs3_mask[i] = (`REG_EXT_VAL(ibuffer_if[w].data.rs3.ext, i) << ibuffer_if[w].data.rs3.id) & {32{ibuffer_if[w].data.used_rs[2] && ibuffer_if[w].data.rs3.rtype == i}};
wire [NUM_OPDS-1:0] ibf_used_rs = {ibuffer_if[w].data.used_rs, ibuffer_if[w].data.wb};
wire [NUM_OPDS-1:0] stg_used_rs = {staging_if[w].data.used_rs, staging_if[w].data.wb};
assign stg_rd_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rd.ext, i) << staging_if[w].data.rd.id) & {32{staging_if[w].data.wb && staging_if[w].data.rd.rtype == i}};
assign stg_rs1_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rs1.ext, i) << staging_if[w].data.rs1.id) & {32{staging_if[w].data.used_rs[0] && staging_if[w].data.rs1.rtype == i}};
assign stg_rs2_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rs2.ext, i) << staging_if[w].data.rs2.id) & {32{staging_if[w].data.used_rs[1] && staging_if[w].data.rs2.rtype == i}};
assign stg_rs3_mask[i] = (`REG_EXT_VAL(staging_if[w].data.rs3.ext, i) << staging_if[w].data.rs3.id) & {32{staging_if[w].data.used_rs[2] && staging_if[w].data.rs3.rtype == i}};
wire [NUM_OPDS-1:0][REG_TYPES-1:0][31:0] ibf_opd_mask, stg_opd_mask;
for (genvar i = 0; i < NUM_OPDS; ++i) begin : g_opd_masks
for (genvar j = 0; j < REG_TYPES; ++j) begin : g_j
assign ibf_opd_mask[i][j] = (`REG_EXT_VAL(ibf_opds[i].ext, j) << ibf_opds[i].id) & {32{ibf_used_rs[i] && ibf_opds[i].rtype == j}};
assign stg_opd_mask[i][j] = (`REG_EXT_VAL(stg_opds[i].ext, j) << stg_opds[i].id) & {32{stg_used_rs[i] && stg_opds[i].rtype == j}};
end
end
`ifdef PERF_ENABLE
reg [NUM_REGS-1:0][EX_WIDTH-1:0] inuse_units;
reg [NUM_REGS-1:0][SFU_WIDTH-1:0] inuse_sfu;
reg_idx_t [NUM_OPDS-1:0] stg_opds;
assign stg_opds = {staging_if[w].data.rs3, staging_if[w].data.rs2, staging_if[w].data.rs1, staging_if[w].data.rd};
always @(*) begin
perf_inuse_units_per_cycle[w] = '0;
perf_inuse_sfu_per_cycle[w] = '0;
for (integer i = 0; i < NUM_OPDS; ++i) begin
if (staging_if[w].valid && operands_busy[i]) begin
perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i]]] = 1;
if (inuse_units[stg_opds[i]] == EX_SFU) begin
perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i]]] = 1;
perf_inuse_units_per_cycle[w][inuse_units[stg_opds[i].id]] = 1;
if (inuse_units[stg_opds[i].id] == EX_SFU) begin
perf_inuse_sfu_per_cycle[w][inuse_sfu[stg_opds[i].id]] = 1;
end
end
end
@ -170,14 +170,14 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
inuse_regs_n[writeback_if.data.rd] = 0;
end
if (staging_fire && staging_if[w].data.wb) begin
inuse_regs_n |= stg_rd_mask;
inuse_regs_n |= stg_opd_mask[0];
end
end
wire [REG_TYPES-1:0][31:0] in_use_mask;
for (genvar i = 0; i < REG_TYPES; ++i) begin : g_in_use_mask
wire [31:0] ibf_reg_mask = ibf_rs1_mask[i] | ibf_rs2_mask[i] | ibf_rs3_mask[i] | ibf_rd_mask[i];
wire [31:0] stg_reg_mask = stg_rs1_mask[i] | stg_rs2_mask[i] | stg_rs3_mask[i] | stg_rd_mask[i];
wire [31:0] ibf_reg_mask = ibf_opd_mask[0][i] | ibf_opd_mask[1][i] | ibf_opd_mask[2][i] | ibf_opd_mask[3][i];
wire [31:0] stg_reg_mask = stg_opd_mask[0][i] | stg_opd_mask[1][i] | stg_opd_mask[2][i] | stg_opd_mask[3][i];
wire [31:0] regs_mask = ibuffer_fire ? ibf_reg_mask : stg_reg_mask;
assign in_use_mask[i] = inuse_regs_n[i * 32 +: 32] & regs_mask;
end
@ -187,11 +187,10 @@ module VX_scoreboard import VX_gpu_pkg::*; #(
assign regs_busy[i] = (in_use_mask[i] != 0);
end
// per operand busy
assign operands_busy[0] = (in_use_mask[staging_if[w].data.rd.rtype] & stg_rd_mask[staging_if[w].data.rd.rtype]) != 0;
assign operands_busy[1] = (in_use_mask[staging_if[w].data.rs1.rtype] & stg_rs1_mask[staging_if[w].data.rs1.rtype]) != 0;
assign operands_busy[2] = (in_use_mask[staging_if[w].data.rs2.rtype] & stg_rs2_mask[staging_if[w].data.rs2.rtype]) != 0;
assign operands_busy[3] = (in_use_mask[staging_if[w].data.rs3.rtype] & stg_rs3_mask[staging_if[w].data.rs3.rtype]) != 0;
for (genvar i = 0; i < NUM_OPDS; ++i) begin : g_operands_busy
wire [REG_TYPE_BITS-1:0] rtype = stg_opds[i].rtype;
assign operands_busy[i] = (in_use_mask[rtype] & stg_opd_mask[i][rtype]) != 0;
end
always @(posedge clk) begin
if (reset) begin

View file

@ -16,7 +16,6 @@
interface VX_decode_if import VX_gpu_pkg::*; #(
parameter NUM_WARPS = `NUM_WARPS
);
typedef struct packed {
logic [UUID_WIDTH-1:0] uuid;
logic [`LOG2UP(NUM_WARPS)-1:0] wid;

View file

@ -24,7 +24,7 @@ interface VX_gpr_if import VX_gpu_pkg::*; ();
typedef struct packed {
logic [1:0] opd_id;
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] value;
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] data;
} rsp_data_t;
logic req_valid;

View file

@ -43,7 +43,7 @@
`ifdef QUARTUS
`define RAM_ARRAY_WREN reg [WRENW-1:0][WSELW-1:0] ram [0:SIZE-1];
`define RAM_WRITE_WREN `RAM_RESET_BLOCK \
`define RAM_WRITE_WREN `RAM_RESET_BLOCK \
if (write) begin \
for (integer i = 0; i < WRENW; ++i) begin \
if (wren[i]) begin \
@ -305,9 +305,22 @@ module VX_dp_ram #(
// simulation
reg [DATAW-1:0] ram [0:SIZE-1];
`RAM_INITIALIZATION
reg [DATAW-1:0] wdata_n;
always @* begin
wdata_n = ram[waddr];
for (integer i = 0; i < WRENW; ++i) begin
if (wren[i]) begin
wdata_n[i * WSELW +: WSELW] = wdata[i * WSELW +: WSELW];
end
end
end
always @(posedge clk) begin
`RAM_WRITE_WREN
`RAM_RESET_BLOCK
if (write) begin
ram[waddr] <= wdata_n;
end
end
if (OUT_REG) begin : g_sync

View file

@ -62,8 +62,10 @@ module VX_stream_xpoint #(
valid_out_w = '0;
data_out_w = 'x;
for (integer i = 0; i < NUM_INPUTS; ++i) begin
valid_out_w[sel_in[i]] = valid_in[i];
data_out_w[sel_in[i]] = data_in[i];
if (valid_in[i]) begin
valid_out_w[sel_in[i]] = 1;
data_out_w[sel_in[i]] = data_in[i];
end
end
end

View file

@ -46,7 +46,6 @@ module VX_local_mem import VX_gpu_pkg::*; #(
VX_mem_bus_if.slave mem_bus_if [NUM_REQS]
);
`UNUSED_SPARAM (INSTANCE_ID)
`UNUSED_PARAM (UUID_WIDTH)
localparam REQ_SEL_BITS = `CLOG2(NUM_REQS);
localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS);

View file

@ -27,9 +27,6 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
// Size of a word in bytes
parameter WORD_SIZE = `XLEN/8,
// Request debug identifier
parameter UUID_WIDTH = 0,
// Request tag size
parameter TAG_WIDTH = 16,
@ -91,7 +88,6 @@ module VX_local_mem_top import VX_gpu_pkg::*; #(
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.ADDR_WIDTH (ADDR_WIDTH),
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.OUT_BUF (3)
) local_mem (