cache uuid support

This commit is contained in:
Blaise Tine 2021-12-09 20:43:22 -05:00
parent 0e2de4f13a
commit d7737542e4
36 changed files with 159 additions and 200 deletions

View file

@ -96,7 +96,7 @@ module VX_alu_unit #(
wire alu_ready_in;
wire alu_valid_out;
wire alu_ready_out;
wire [63:0] alu_uuid;
wire [`UUID_BITS-1:0] alu_uuid;
wire [`NW_BITS-1:0] alu_wid;
wire [`NUM_THREADS-1:0] alu_tmask;
wire [31:0] alu_PC;
@ -113,7 +113,7 @@ module VX_alu_unit #(
assign alu_ready_in = alu_ready_out || ~alu_valid_out;
VX_pipe_register #(
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
.RESETW (1)
) pipe_reg (
.clk (clk),
@ -139,7 +139,7 @@ module VX_alu_unit #(
wire mul_ready_in;
wire mul_valid_out;
wire mul_ready_out;
wire [63:0] mul_uuid;
wire [`UUID_BITS-1:0] mul_uuid;
wire [`NW_BITS-1:0] mul_wid;
wire [`NUM_THREADS-1:0] mul_tmask;
wire [31:0] mul_PC;

View file

@ -25,13 +25,13 @@ module VX_csr_data #(
`endif
input wire read_enable,
input wire [63:0] read_uuid,
input wire [`UUID_BITS-1:0] read_uuid,
input wire[`CSR_ADDR_BITS-1:0] read_addr,
input wire[`NW_BITS-1:0] read_wid,
output wire[31:0] read_data,
input wire write_enable,
input wire [63:0] write_uuid,
input wire [`UUID_BITS-1:0] write_uuid,
input wire[`CSR_ADDR_BITS-1:0] write_addr,
input wire[`NW_BITS-1:0] write_wid,
input wire[31:0] write_data,
@ -100,6 +100,7 @@ module VX_csr_data #(
assign tex_csr_if.write_enable = write_enable;
assign tex_csr_if.write_addr = write_addr;
assign tex_csr_if.write_data = write_data;
assign tex_csr_if.write_uuid = write_uuid;
`endif
always @(posedge clk) begin

View file

@ -110,7 +110,7 @@ module VX_csr_unit #(
wire stall_out = ~csr_commit_if.ready && csr_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32),
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32),
.RESETW (1)
) pipe_reg (
.clk (clk),

View file

@ -34,6 +34,8 @@
`define PERF_CTR_BITS 44
`define UUID_BITS 44
///////////////////////////////////////////////////////////////////////////////
`define EX_NOP 3'h0
@ -239,10 +241,6 @@
///////////////////////////////////////////////////////////////////////////////
// cache request identifier
`define DBG_CACHE_REQ_IDW 48
`define DBG_CACHE_REQ_ID(type, ctr) {4'(type), {`DBG_CACHE_REQ_IDW-4{1'b0}}} + ctr
// non-cacheable tag bits
`define NC_TAG_BIT 1
@ -267,7 +265,7 @@
`define ICACHE_CORE_TAG_ID_BITS `NW_BITS
// Core request tag bits
`define ICACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_IDW + `ICACHE_CORE_TAG_ID_BITS)
`define ICACHE_CORE_TAG_WIDTH (`UUID_BITS + `ICACHE_CORE_TAG_ID_BITS)
// Memory request data bits
`define ICACHE_MEM_DATA_WIDTH (`ICACHE_LINE_SIZE * 8)
@ -293,13 +291,13 @@
`define LSUQ_ADDR_BITS `LOG2UP(`LSUQ_SIZE)
`ifdef EXT_TEX_ENABLE
`define LSU_TAG_ID_BITS `MAX(`LSUQ_ADDR_BITS, 2)
`define LSU_TEX_DCACHE_TAG_BITS (`DBG_CACHE_REQ_IDW + `LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS)
`define LSU_TEX_DCACHE_TAG_BITS (`UUID_BITS + `LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS)
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS + `TEX_TAG_BIT)
`else
`define LSU_TAG_ID_BITS `LSUQ_ADDR_BITS
`define DCACHE_CORE_TAG_ID_BITS (`LSU_TAG_ID_BITS + `CACHE_ADDR_TYPE_BITS)
`endif
`define DCACHE_CORE_TAG_WIDTH (`DBG_CACHE_REQ_IDW + `DCACHE_CORE_TAG_ID_BITS)
`define DCACHE_CORE_TAG_WIDTH (`UUID_BITS + `DCACHE_CORE_TAG_ID_BITS)
// Memory request data bits
`define DCACHE_MEM_DATA_WIDTH (`DCACHE_LINE_SIZE * 8)

View file

@ -42,7 +42,7 @@ module VX_dispatch (
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(ibuffer_if.op_type);
VX_skid_buffer #(
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.OUT_REG (1)
) alu_buffer (
.clk (clk),
@ -63,7 +63,7 @@ module VX_dispatch (
wire lsu_is_prefetch = `INST_LSU_IS_PREFETCH(ibuffer_if.op_mod);
VX_skid_buffer #(
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32) + 1),
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32) + 1),
.OUT_REG (1)
) lsu_buffer (
.clk (clk),
@ -85,7 +85,7 @@ module VX_dispatch (
wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid];
VX_skid_buffer #(
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32),
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + 32),
.OUT_REG (1)
) csr_buffer (
.clk (clk),
@ -105,7 +105,7 @@ module VX_dispatch (
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(ibuffer_if.op_type);
VX_skid_buffer #(
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.OUT_REG (1)
) fpu_buffer (
.clk (clk),
@ -127,7 +127,7 @@ module VX_dispatch (
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(ibuffer_if.op_type);
VX_skid_buffer #(
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)),
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `NT_BITS + (3 * `NUM_THREADS * 32)),
.OUT_REG (1)
) gpu_buffer (
.clk (clk),

View file

@ -22,7 +22,7 @@ module VX_fpu_unit #(
wire valid_out;
wire ready_out;
wire [63:0] rsp_uuid;
wire [`UUID_BITS-1:0] rsp_uuid;
wire [`NW_BITS-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
@ -40,7 +40,7 @@ module VX_fpu_unit #(
wire fpuq_pop = valid_out && ready_out;
VX_index_buffer #(
.DATAW (64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.DATAW (`UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`FPUQ_SIZE)
) req_metadata (
.clk (clk),
@ -181,7 +181,7 @@ module VX_fpu_unit #(
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS),
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFLAGS_BITS),
.RESETW (1)
) pipe_reg (
.clk (clk),

View file

@ -33,7 +33,7 @@ module VX_gpu_unit #(
localparam RSP_DATAW = `MAX(`NUM_THREADS * 32, WCTL_DATAW);
wire rsp_valid;
wire [63:0] rsp_uuid;
wire [`UUID_BITS-1:0] rsp_uuid;
wire [`NW_BITS-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
@ -187,7 +187,7 @@ module VX_gpu_unit #(
assign stall_out = ~gpu_commit_if.ready && gpu_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1),
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1),
.RESETW (1)
) pipe_reg (
.clk (clk),

View file

@ -15,7 +15,7 @@ module VX_ibuffer #(
`UNUSED_PARAM (CORE_ID)
localparam DATAW = 64 + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1;
localparam DATAW = `UUID_BITS + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1;
localparam ADDRW = $clog2(`IBUF_SIZE+1);
localparam NWARPSW = $clog2(`NUM_WARPS+1);

View file

@ -24,24 +24,19 @@ module VX_icache_stage #(
localparam OUT_REG = 0;
reg [`DBG_CACHE_REQ_IDW-1:0] req_id;
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_id;
wire [`NW_BITS-1:0] req_tag, rsp_tag;
`UNUSED_VAR (rsp_id)
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
assign req_tag = ifetch_req_if.wid;
assign rsp_tag = icache_rsp_if.tag[`NW_BITS-1:0];
assign rsp_id = icache_rsp_if.tag[`NW_BITS +: `DBG_CACHE_REQ_IDW];
wire [63:0] rsp_uuid;
wire [`UUID_BITS-1:0] rsp_uuid;
wire [31:0] rsp_PC;
wire [`NUM_THREADS-1:0] rsp_tmask;
VX_dp_ram #(
.DATAW (32 + `NUM_THREADS + 64),
.DATAW (32 + `NUM_THREADS + `UUID_BITS),
.SIZE (`NUM_WARPS),
.LUTRAM (1)
) req_metadata (
@ -59,17 +54,7 @@ module VX_icache_stage #(
// Icache Request
assign icache_req_if.valid = ifetch_req_if.valid;
assign icache_req_if.addr = ifetch_req_if.PC[31:2];
assign icache_req_if.tag = {req_id, req_tag};
always @(posedge clk) begin
if (reset) begin
req_id <= `DBG_CACHE_REQ_ID(0, 0);
end else begin
if (icache_req_fire) begin
req_id <= req_id + 1;
end
end
end
assign icache_req_if.tag = {ifetch_req_if.uuid, req_tag};
// Can accept new request?
assign ifetch_req_if.ready = icache_req_if.ready;
@ -79,7 +64,7 @@ module VX_icache_stage #(
wire stall_out = ~ifetch_rsp_if.ready && (0 == OUT_REG && ifetch_rsp_if.valid);
VX_pipe_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + 64),
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `UUID_BITS),
.RESETW (1),
.DEPTH (OUT_REG)
) pipe_reg (
@ -106,10 +91,10 @@ module VX_icache_stage #(
`ifdef DBG_TRACE_CORE_ICACHE
always @(posedge clk) begin
if (icache_req_fire) begin
dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h, req_id=%0h (#%0d)\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, req_id, ifetch_req_if.uuid);
dpi_trace("%d: I$%0d req: wid=%0d, PC=%0h (#%0d)\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, ifetch_req_if.uuid);
end
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, req_id=%0h, data=%0h (#%0d)\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, rsp_id, ifetch_rsp_if.data, ifetch_rsp_if.uuid);
dpi_trace("%d: I$%0d rsp: wid=%0d, PC=%0h, data=%0h (#%0d)\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.data, ifetch_rsp_if.uuid);
end
end
`endif

View file

@ -28,7 +28,7 @@ module VX_lsu_unit #(
`STATIC_ASSERT(`SMEM_SIZE == `MEM_BLOCK_SIZE * (`SMEM_SIZE / `MEM_BLOCK_SIZE), ("invalid parameter"))
wire req_valid;
wire [63:0] req_uuid;
wire [`UUID_BITS-1:0] req_uuid;
wire [`NUM_THREADS-1:0] req_tmask;
wire [`NUM_THREADS-1:0][31:0] req_addr;
wire [`INST_LSU_BITS-1:0] req_type;
@ -82,7 +82,7 @@ module VX_lsu_unit #(
wire lsu_wb = lsu_req_if.wb | lsu_req_if.is_prefetch;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 64 + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DATAW (1 + 1 + 1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + (`NUM_THREADS * 32) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `INST_LSU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.RESETW (1)
) req_pipe_reg (
.clk (clk),
@ -95,7 +95,7 @@ module VX_lsu_unit #(
// Can accept new request?
assign lsu_req_if.ready = ~stall_in && ~fence_wait;
wire [63:0] rsp_uuid;
wire [`UUID_BITS-1:0] rsp_uuid;
wire [`NW_BITS-1:0] rsp_wid;
wire [31:0] rsp_pc;
wire [`NR_BITS-1:0] rsp_rd;
@ -108,8 +108,6 @@ module VX_lsu_unit #(
wire [`NUM_THREADS-1:0] rsp_rem_mask_n;
wire [`NUM_THREADS-1:0] rsp_tmask;
reg [`DBG_CACHE_REQ_IDW-1:0] req_id;
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id;
reg [`NUM_THREADS-1:0] req_sent_mask;
reg is_req_start;
@ -118,7 +116,6 @@ module VX_lsu_unit #(
`UNUSED_VAR (rsp_type)
`UNUSED_VAR (rsp_is_prefetch)
`UNUSED_VAR (rsp_req_id)
wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_offset, rsp_offset;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
@ -127,8 +124,6 @@ module VX_lsu_unit #(
wire [`NUM_THREADS-1:0] dcache_req_fire = dcache_req_if.valid & dcache_req_if.ready;
wire dcache_req_fire_any = (| dcache_req_fire);
wire dcache_rsp_fire = dcache_rsp_if.valid && dcache_rsp_if.ready;
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
@ -141,14 +136,13 @@ module VX_lsu_unit #(
wire mbuf_pop = dcache_rsp_fire && (0 == rsp_rem_mask_n);
assign mbuf_raddr = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: `LSUQ_ADDR_BITS];
assign rsp_req_id = dcache_rsp_if.tag[(`CACHE_ADDR_TYPE_BITS + `LSU_TAG_ID_BITS) +: `DBG_CACHE_REQ_IDW];
`UNUSED_VAR (dcache_rsp_if.tag)
// do not writeback from software prefetch
wire req_wb2 = req_wb && ~req_is_prefetch;
VX_index_buffer #(
.DATAW (64 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1 + 1),
.DATAW (`UUID_BITS + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + 1 + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1 + 1),
.SIZE (`LSUQ_SIZE)
) req_metadata (
.clk (clk),
@ -241,19 +235,9 @@ module VX_lsu_unit #(
assign dcache_req_if.addr[i] = req_addr[i][31:2];
assign dcache_req_if.byteen[i] = mem_req_byteen;
assign dcache_req_if.data[i] = mem_req_data;
assign dcache_req_if.tag[i] = {req_id, `LSU_TAG_ID_BITS'(req_tag), req_addr_type[i]};
assign dcache_req_if.tag[i] = {req_uuid, `LSU_TAG_ID_BITS'(req_tag), req_addr_type[i]};
end
always @(posedge clk) begin
if (reset) begin
req_id <= `DBG_CACHE_REQ_ID(1, 0);
end else begin
if (dcache_req_fire_any) begin
req_id <= req_id + 1;
end
end
end
assign ready_in = req_dep_ready && dcache_req_ready;
// send store commit
@ -298,7 +282,7 @@ module VX_lsu_unit #(
wire load_rsp_stall = ~ld_commit_if.ready && ld_commit_if.valid;
VX_pipe_register #(
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
.RESETW (1)
) rsp_pipe_reg (
.clk (clk),
@ -325,7 +309,7 @@ module VX_lsu_unit #(
`SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr);
`ifndef SYNTHESIS
reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + `NR_BITS + 64 + 64 + 1)-1:0] pending_reqs;
reg [`LSUQ_SIZE-1:0][(`NW_BITS + 32 + `NR_BITS + `UUID_BITS + 64 + 1)-1:0] pending_reqs;
wire [63:0] delay_timeout = 10000 * (1 ** (`L2_ENABLE + `L3_ENABLE));
always @(posedge clk) begin
@ -344,23 +328,24 @@ module VX_lsu_unit #(
if (pending_reqs[i][0]) begin
`ASSERT(($time - pending_reqs[i][1 +: 64]) < delay_timeout,
("%t: *** D$%0d response timeout: remaining=%b, wid=%0d, PC=%0h, rd=%0d (#%0d)",
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+64+32+`NR_BITS +: `NW_BITS],
pending_reqs[i][1+64+64+`NR_BITS +: 32],
pending_reqs[i][1+64+64 +: `NR_BITS],
pending_reqs[i][1+64 +: 64]));
$time, CORE_ID, rsp_rem_mask[i], pending_reqs[i][1+64+`UUID_BITS+`NR_BITS+32 +: `NW_BITS],
pending_reqs[i][1+64+`UUID_BITS+`NR_BITS +: 32],
pending_reqs[i][1+64+`UUID_BITS +: `NR_BITS],
pending_reqs[i][1+64 +: `UUID_BITS]));
end
end
end
`endif
`ifdef DBG_TRACE_CORE_DCACHE
wire dcache_req_fire_any = (| dcache_req_fire);
always @(posedge clk) begin
if (lsu_req_if.valid && fence_wait) begin
dpi_trace("%d: *** D$%0d fence wait\n", $time, CORE_ID);
end
if (dcache_req_fire_any) begin
if (dcache_req_if.rw[0]) begin
dpi_trace("%d: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_id);
dpi_trace("%d: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
@ -368,7 +353,7 @@ module VX_lsu_unit #(
`TRACE_ARRAY1D(dcache_req_if.data, `NUM_THREADS);
dpi_trace(", (#%0d)\n", req_uuid);
end else begin
dpi_trace("%d: D$%0d Rd Req: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, req_id=%0h, addr=", $time, CORE_ID, req_is_prefetch, req_wid, req_pc, dcache_req_fire, req_id);
dpi_trace("%d: D$%0d Rd Req: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_is_prefetch, req_wid, req_pc, dcache_req_fire);
`TRACE_ARRAY1D(req_addr, `NUM_THREADS);
dpi_trace(", tag=%0h, byteen=%0h, type=", req_tag, dcache_req_if.byteen);
`TRACE_ARRAY1D(req_addr_type, `NUM_THREADS);
@ -376,8 +361,8 @@ module VX_lsu_unit #(
end
end
if (dcache_rsp_fire) begin
dpi_trace("%d: D$%0d Rsp: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, rd=%0d, data=",
$time, CORE_ID, rsp_is_prefetch, rsp_wid, rsp_pc, dcache_rsp_if.tmask, rsp_req_id, mbuf_raddr, rsp_rd);
dpi_trace("%d: D$%0d Rsp: prefetch=%b, wid=%0d, PC=%0h, tmask=%b, tag=%0h, rd=%0d, data=",
$time, CORE_ID, rsp_is_prefetch, rsp_wid, rsp_pc, dcache_rsp_if.tmask, mbuf_raddr, rsp_rd);
`TRACE_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
dpi_trace(", is_dup=%b (#%0d)\n", rsp_is_dup, rsp_uuid);
end

View file

@ -6,7 +6,7 @@ module VX_muldiv (
// Inputs
input wire [`INST_MUL_BITS-1:0] alu_op,
input wire [63:0] uuid_in,
input wire [`UUID_BITS-1:0] uuid_in,
input wire [`NW_BITS-1:0] wid_in,
input wire [`NUM_THREADS-1:0] tmask_in,
input wire [31:0] PC_in,
@ -16,7 +16,7 @@ module VX_muldiv (
input wire [`NUM_THREADS-1:0][31:0] alu_in2,
// Outputs
output wire [63:0] uuid_out,
output wire [`UUID_BITS-1:0] uuid_out,
output wire [`NW_BITS-1:0] wid_out,
output wire [`NUM_THREADS-1:0] tmask_out,
output wire [31:0] PC_out,
@ -34,7 +34,7 @@ module VX_muldiv (
wire is_div_op = `INST_MUL_IS_DIV(alu_op);
wire [`NUM_THREADS-1:0][31:0] mul_result;
wire [63:0] mul_uuid_out;
wire [`UUID_BITS-1:0] mul_uuid_out;
wire [`NW_BITS-1:0] mul_wid_out;
wire [`NUM_THREADS-1:0] mul_tmask_out;
wire [31:0] mul_PC_out;
@ -66,7 +66,7 @@ module VX_muldiv (
end
VX_shift_register #(
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) mul_shift_reg (
@ -106,7 +106,7 @@ module VX_muldiv (
end
VX_shift_register #(
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1),
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) mul_shift_reg (
@ -122,7 +122,7 @@ module VX_muldiv (
///////////////////////////////////////////////////////////////////////////
wire [`NUM_THREADS-1:0][31:0] div_result;
wire [63:0] div_uuid_out;
wire [`UUID_BITS-1:0] div_uuid_out;
wire [`NW_BITS-1:0] div_wid_out;
wire [`NUM_THREADS-1:0] div_tmask_out;
wire [31:0] div_PC_out;
@ -151,7 +151,7 @@ module VX_muldiv (
end
VX_shift_register #(
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) div_shift_reg (
@ -199,7 +199,7 @@ module VX_muldiv (
///////////////////////////////////////////////////////////////////////////
wire rsp_valid = mul_valid_out || div_valid_out;
wire [63:0] rsp_uuid = mul_valid_out ? mul_uuid_out : div_uuid_out;
wire [`UUID_BITS-1:0] rsp_uuid = mul_valid_out ? mul_uuid_out : div_uuid_out;
wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out;
wire [31:0] rsp_PC = mul_valid_out ? mul_PC_out : div_PC_out;
@ -210,7 +210,7 @@ module VX_muldiv (
assign stall_out = ~ready_out && valid_out;
VX_pipe_register #(
.DATAW (1 + 64 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DATAW (1 + `UUID_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.RESETW (1)
) pipe_reg (
.clk (clk),

View file

@ -46,7 +46,7 @@ module VX_warp_sched #(
wire schedule_valid;
wire warp_scheduled;
reg [63:0] issued_instrs;
reg [`UUID_BITS-1:0] issued_instrs;
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;
@ -228,10 +228,10 @@ module VX_warp_sched #(
assign warp_scheduled = schedule_valid && ~stall_out;
wire [63:0] instr_uuid = (issued_instrs * `NUM_CORES * `NUM_CLUSTERS) + 64'(CORE_ID);
wire [`UUID_BITS-1:0] instr_uuid = (issued_instrs * `NUM_CORES * `NUM_CLUSTERS) + `UUID_BITS'(CORE_ID);
VX_pipe_register #(
.DATAW (1 + 64 + `NUM_THREADS + 32 + `NW_BITS),
.DATAW (1 + `UUID_BITS + `NUM_THREADS + 32 + `NW_BITS),
.RESETW (1)
) pipe_reg (
.clk (clk),

View file

@ -488,22 +488,22 @@ module VX_bank #(
dpi_trace("%d: cache%0d:%0d fill-rsp: addr=%0h, id=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_id, mem_rsp_data);
end
if (mshr_fire) begin
dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, req_id_sel);
dpi_trace("%d: cache%0d:%0d mshr-pop: addr=%0h, tag=%0h, pmask=%b, tid=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, req_id_sel);
end
if (creq_fire) begin
if (creq_rw)
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, req_id_sel);
dpi_trace("%d: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, req_id_sel);
else
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, req_id_sel);
dpi_trace("%d: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, req_id_sel);
end
if (crsq_fire) begin
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, req_id_st1);
dpi_trace("%d: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, req_id_st1);
end
if (mreq_push) begin
if (is_write_st1)
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, req_id_st1);
dpi_trace("%d: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, req_id_st1);
else
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, req_id_st1);
dpi_trace("%d: cache%0d:%0d fill-req: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_id, req_id_st1);
end
end
`endif

View file

@ -4,7 +4,7 @@
`include "VX_platform.vh"
// cache request identifier
`define DBG_CACHE_REQ_IDW 48
`define DBG_CACHE_REQ_IDW 44
`define REQS_BITS `LOG2UP(NUM_REQS)

View file

@ -122,10 +122,10 @@ module VX_data_access #(
dpi_trace("%d: cache%0d:%0d data-fill: addr=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, fill_data);
end
if (read && ~stall) begin
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, req_id=%0h, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, line_addr, read_data);
dpi_trace("%d: cache%0d:%0d data-read: addr=%0h, blk_addr=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, read_data, req_id);
end
if (write && ~stall) begin
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, req_id=%0h, byteen=%b, blk_addr=%0d, data=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, byteen, line_addr, write_data);
dpi_trace("%d: cache%0d:%0d data-write: addr=%0h, byteen=%b, blk_addr=%0d, data=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), byteen, line_addr, write_data, req_id);
end
end
`endif

View file

@ -201,22 +201,22 @@ module VX_miss_resrv #(
always @(posedge clk) begin
if (allocate_fire || fill_valid || dequeue_fire || lookup_replay || lookup_valid || release_valid) begin
if (allocate_fire)
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID,
dpi_trace("%d: cache%0d:%0d mshr-allocate: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(allocate_addr, BANK_ID), allocate_id, deq_req_id);
if (fill_valid)
dpi_trace("%d: cache%0d:%0d mshr-fill: addr=%0h, id=%0d, addr=%0h\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(addr_table[fill_id], BANK_ID), fill_id, `LINE_TO_BYTE_ADDR(fill_addr, BANK_ID));
if (dequeue_fire)
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID,
dpi_trace("%d: cache%0d:%0d mshr-dequeue: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(dequeue_addr, BANK_ID), dequeue_id_r, deq_req_id);
if (lookup_replay)
dpi_trace("%d: cache%0d:%0d mshr-replay: addr=%0h, id=%0d\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id);
dpi_trace("%d: cache%0d:%0d mshr-replay: addr=%0h, id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lkp_req_id);
if (lookup_valid)
dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b, req_id=%0h\n", $time, CACHE_ID, BANK_ID,
dpi_trace("%d: cache%0d:%0d mshr-lookup: addr=%0h, id=%0d, match=%b (#%0d)\n", $time, CACHE_ID, BANK_ID,
`LINE_TO_BYTE_ADDR(lookup_addr, BANK_ID), lookup_id, lookup_match, lkp_req_id);
if (release_valid)
dpi_trace("%d: cache%0d:%0d mshr-release id=%0d, req_id=%0h\n", $time, CACHE_ID, BANK_ID, release_id, rel_req_id);
dpi_trace("%d: cache%0d:%0d mshr-release id=%0d (#%0d)\n", $time, CACHE_ID, BANK_ID, release_id, rel_req_id);
dpi_trace("%d: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID);
for (integer i = 0; i < MSHR_SIZE; ++i) begin
if (valid_table[i]) begin

View file

@ -306,10 +306,10 @@ module VX_shared_mem #(
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_unqual[i]) begin
if (per_bank_core_req_rw_unqual[i]) begin
dpi_trace("%d: smem%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, req_id=%0h\n",
dpi_trace("%d: smem%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h (#%0d)\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i], req_id_st0[i]);
end else begin
dpi_trace("%d: smem%0d:%0d core-rd-req: addr=%0h, tag=%0h, req_id=%0h\n",
dpi_trace("%d: smem%0d:%0d core-rd-req: addr=%0h, tag=%0h (#%0d)\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr_unqual[i], i), per_bank_core_req_tag_unqual[i], req_id_st0[i]);
end
end
@ -319,10 +319,10 @@ module VX_shared_mem #(
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]) begin
if (per_bank_core_req_rw[i]) begin
dpi_trace("%d: smem%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, data=%0h, req_id=%0h\n",
dpi_trace("%d: smem%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, data=%0h (#%0d)\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_req_data[i], req_id_st1[i]);
end else begin
dpi_trace("%d: smem%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, data=%0h, req_id=%0h\n",
dpi_trace("%d: smem%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, data=%0h (#%0d)\n",
$time, CACHE_ID, i, `LINE_TO_BYTE_ADDR(per_bank_core_req_addr[i], i), per_bank_core_req_tag[i], per_bank_core_rsp_data[i], req_id_st1[i]);
end
end

View file

@ -68,9 +68,9 @@ module VX_tag_access #(
end
if (lookup && ~stall) begin
if (tag_match) begin
dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, req_id=%0h, blk_addr=%0d, tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, line_addr, line_tag);
dpi_trace("%d: cache%0d:%0d tag-hit: addr=%0h, blk_addr=%0d, tag_id=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, req_id);
end else begin
dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, req_id=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), req_id, line_addr, line_tag, read_tag);
dpi_trace("%d: cache%0d:%0d tag-miss: addr=%0h, blk_addr=%0d, tag_id=%0h, old_tag_id=%0h (#%0d)\n", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr, BANK_ID), line_addr, line_tag, read_tag, req_id);
end
end
end

View file

@ -6,7 +6,7 @@
interface VX_alu_req_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_commit_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_csr_req_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_decode_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_fpu_req_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_gpu_req_if();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_ibuffer_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_ifetch_req_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_ifetch_rsp_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_lsu_req_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -8,17 +8,20 @@ interface VX_tex_csr_if ();
wire write_enable;
wire [`CSR_ADDR_BITS-1:0] write_addr;
wire [31:0] write_data;
wire [`UUID_BITS-1:0] write_uuid;
modport master (
output write_enable,
output write_addr,
output write_data
output write_data,
output write_uuid
);
modport slave (
input write_enable,
input write_addr,
input write_data
input write_data,
input write_uuid
);
endinterface

View file

@ -6,7 +6,7 @@
interface VX_tex_req_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_tex_rsp_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NW_BITS-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;

View file

@ -6,7 +6,7 @@
interface VX_writeback_if ();
wire valid;
wire [63:0] uuid;
wire [`UUID_BITS-1:0] uuid;
wire [`NUM_THREADS-1:0] tmask;
wire [`NW_BITS-1:0] wid;
wire [31:0] PC;

View file

@ -32,4 +32,20 @@
`define TEX_FORMAT_L8 `TEX_FORMAT_BITS'(5)
`define TEX_FORMAT_A8 `TEX_FORMAT_BITS'(6)
task trace_tex_state (
input [`CSR_ADDR_BITS-1:0] state
);
case (state)
`CSR_TEX_ADDR: dpi_trace("ADDR");
`CSR_TEX_WIDTH: dpi_trace("WIDTH");
`CSR_TEX_HEIGHT: dpi_trace("HEIGHT");
`CSR_TEX_FORMAT: dpi_trace("FORMAT");
`CSR_TEX_FILTER: dpi_trace("FILTER");
`CSR_TEX_WRAPU: dpi_trace("WRAPU");
`CSR_TEX_WRAPV: dpi_trace("WRAPV");
//`CSR_TEX_MIPOFF
default: dpi_trace("MIPOFF");
endcase
endtask
`endif

View file

@ -75,6 +75,9 @@ module VX_tex_mem #(
wire [`TEX_LGSTRIDE_BITS-1:0] q_req_lgstride;
wire [3:0][NUM_REQS-1:0][1:0] q_align_offs;
wire [3:0] q_dup_reqs;
wire [`NW_BITS-1:0] q_req_wid;
wire [31:0] q_req_PC;
wire [`UUID_BITS-1:0] q_req_uuid;
assign reqq_push = req_valid && req_ready;
@ -105,12 +108,8 @@ module VX_tex_mem #(
wire sent_all_ready, last_texel_sent;
wire req_texel_dup;
wire [NUM_REQS-1:0][29:0] req_texel_addr;
reg [`DBG_CACHE_REQ_IDW-1:0] req_id;
wire [`DBG_CACHE_REQ_IDW-1:0] rsp_req_id;
reg [1:0] req_texel_idx;
reg req_texels_done;
`UNUSED_VAR (rsp_req_id)
always @(posedge clk) begin
if (reset || last_texel_sent) begin
@ -156,22 +155,16 @@ module VX_tex_mem #(
wire [NUM_REQS-1:0] req_dup_mask = {{(NUM_REQS-1){~req_texel_dup}}, 1'b1};
assign {q_req_wid, q_req_PC, q_req_uuid} = q_req_info[`NW_BITS+32+`UUID_BITS-1:0];
`UNUSED_VAR (q_req_wid)
`UNUSED_VAR (q_req_PC)
assign dcache_req_if.valid = {NUM_REQS{req_texel_valid}} & q_req_tmask & req_dup_mask & ~texel_sent_mask;
assign dcache_req_if.rw = {NUM_REQS{1'b0}};
assign dcache_req_if.addr = req_texel_addr;
assign dcache_req_if.byteen = {NUM_REQS{4'b0}};
assign dcache_req_if.data = 'x;
assign dcache_req_if.tag = {NUM_REQS{req_id, `LSU_TAG_ID_BITS'(req_texel_idx), `CACHE_ADDR_TYPE_BITS'(0)}};
always @(posedge clk) begin
if (reset) begin
req_id <= `DBG_CACHE_REQ_ID(2, 0);
end else begin
if (dcache_req_fire_any) begin
req_id <= req_id + 1;
end
end
end
assign dcache_req_if.tag = {NUM_REQS{q_req_uuid, `LSU_TAG_ID_BITS'(req_texel_idx), `CACHE_ADDR_TYPE_BITS'(0)}};
// Dcache Response
@ -188,7 +181,6 @@ module VX_tex_mem #(
wire rsp_texel_dup;
assign rsp_texel_idx = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS +: 2];
assign rsp_req_id = dcache_rsp_if.tag[`CACHE_ADDR_TYPE_BITS + `LSU_TAG_ID_BITS +: `DBG_CACHE_REQ_IDW];
`UNUSED_VAR (dcache_rsp_if.tag)
assign rsp_texel_dup = q_dup_reqs[rsp_texel_idx];
@ -285,25 +277,25 @@ module VX_tex_mem #(
// Can accept new cache response?
assign dcache_rsp_if.ready = ~(is_last_rsp && stall_out);
`ifdef DBG_TRACE_TEX
wire [`NW_BITS-1:0] q_req_wid, req_wid, rsp_wid;
wire [31:0] q_req_PC, req_PC, rsp_PC;
assign {q_req_wid, q_req_PC} = q_req_info[`NW_BITS+32-1:0];
assign {req_wid, req_PC} = req_info[`NW_BITS+32-1:0];
assign {rsp_wid, rsp_PC} = rsp_info[`NW_BITS+32-1:0];
`ifdef DBG_TRACE_TEX
wire [`NW_BITS-1:0] req_wid, rsp_wid;
wire [31:0] req_PC, rsp_PC;
wire [`UUID_BITS-1:0] req_uuid, rsp_uuid;
assign {req_wid, req_PC, req_uuid} = req_info[`NW_BITS+32+`UUID_BITS-1:0];
assign {rsp_wid, rsp_PC, rsp_uuid} = rsp_info[`NW_BITS+32+`UUID_BITS-1:0];
always @(posedge clk) begin
if (dcache_req_fire_any) begin
dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, addr=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_id, req_texel_idx);
dpi_trace("%d: core%0d-tex-cache-req: wid=%0d, PC=%0h, tmask=%b, tag=%0h, addr=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_req_fire, req_texel_idx);
`TRACE_ARRAY1D(req_texel_addr, NUM_REQS);
dpi_trace(", is_dup=%b\n", req_texel_dup);
dpi_trace(", is_dup=%b (#%0d)\n", req_texel_dup, q_req_uuid);
end
if (dcache_rsp_fire) begin
dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, req_id=%0h, tag=%0h, data=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_req_id, rsp_texel_idx);
dpi_trace("%d: core%0d-tex-cache-rsp: wid=%0d, PC=%0h, tmask=%b, tag=%0h, data=",
$time, CORE_ID, q_req_wid, q_req_PC, dcache_rsp_if.tmask, rsp_texel_idx);
`TRACE_ARRAY1D(dcache_rsp_if.data, NUM_REQS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", q_req_uuid);
end
if (req_valid && req_ready) begin
dpi_trace("%d: core%0d-tex-mem-req: wid=%0d, PC=%0h, tmask=%b, filter=%0d, lgstride=%0d, baseaddr=",
@ -311,13 +303,13 @@ module VX_tex_mem #(
`TRACE_ARRAY1D(req_baseaddr, NUM_REQS);
dpi_trace(", addr=");
`TRACE_ARRAY2D(req_addr, 4, NUM_REQS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", req_uuid);
end
if (rsp_valid && rsp_ready) begin
dpi_trace("%d: core%0d-tex-mem-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, rsp_wid, rsp_PC, rsp_tmask);
`TRACE_ARRAY2D(rsp_data, 4, NUM_REQS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", rsp_uuid);
end
end
`endif

View file

@ -23,11 +23,11 @@ module VX_tex_unit #(
VX_tex_rsp_if.master tex_rsp_if
);
localparam REQ_INFO_W = 64 + `NR_BITS + 1 + `NW_BITS + 32;
localparam REQ_INFO_W = `NR_BITS + 1 + `NW_BITS + 32 + `UUID_BITS;
localparam BLEND_FRAC_W = (2 * `NUM_THREADS * `TEX_BLEND_FRAC);
reg [$clog2(`NUM_TEX_UNITS)-1:0] csr_tex_unit;
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][`TEX_LOD_MAX+1-1:0];
reg [`TEX_MIPOFF_BITS-1:0] tex_mipoff [`NUM_TEX_UNITS-1:0][(`TEX_LOD_MAX+1)-1:0];
reg [1:0][`TEX_LOD_BITS-1:0] tex_logdims [`NUM_TEX_UNITS-1:0];
reg [1:0][`TEX_WRAP_BITS-1:0] tex_wraps [`NUM_TEX_UNITS-1:0];
reg [`TEX_ADDR_BITS-1:0] tex_baddr [`NUM_TEX_UNITS-1:0];
@ -36,9 +36,6 @@ module VX_tex_unit #(
// CSRs programming
reg csrs_dirty [`NUM_TEX_UNITS-1:0];
`UNUSED_VAR (csrs_dirty)
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
case (tex_csr_if.write_addr)
@ -47,50 +44,39 @@ module VX_tex_unit #(
end
`CSR_TEX_ADDR: begin
tex_baddr[csr_tex_unit] <= tex_csr_if.write_data[`TEX_ADDR_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_FORMAT: begin
tex_format[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FORMAT_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_WRAPU: begin
tex_wraps[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_WRAPV: begin
tex_wraps[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_WRAP_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_FILTER: begin
tex_filter[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
tex_filter[csr_tex_unit] <= tex_csr_if.write_data[`TEX_FILTER_BITS-1:0];
end
`CSR_TEX_WIDTH: begin
tex_logdims[csr_tex_unit][0] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
`CSR_TEX_HEIGHT: begin
tex_logdims[csr_tex_unit][1] <= tex_csr_if.write_data[`TEX_LOD_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
end
default: begin
for (integer j = 0; j <= `TEX_LOD_MAX; ++j) begin
`IGNORE_WARNINGS_BEGIN
if (tex_csr_if.write_addr == `CSR_TEX_MIPOFF(j)) begin
`IGNORE_WARNINGS_END
tex_mipoff[csr_tex_unit][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
csrs_dirty[csr_tex_unit] <= 1;
tex_mipoff[csr_tex_unit][j] <= tex_csr_if.write_data[`TEX_MIPOFF_BITS-1:0];
end
end
end
endcase
end
if (reset || (tex_req_if.valid && tex_req_if.ready)) begin
for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin
csrs_dirty[i] <= 0;
end
end
end
wire [`UUID_BITS-1:0] write_uuid = tex_csr_if.write_uuid;
`UNUSED_VAR (write_uuid);
// mipmap attributes
@ -136,7 +122,7 @@ module VX_tex_unit #(
.mip_level (mip_level),
.req_mipoff (sel_mipoff),
.req_logdims(sel_logdims),
.req_info ({tex_format[tex_req_if.unit], tex_req_if.uuid, tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC}),
.req_info ({tex_format[tex_req_if.unit], tex_req_if.rd, tex_req_if.wb, tex_req_if.wid, tex_req_if.PC, tex_req_if.uuid}),
.req_ready (tex_req_if.ready),
.rsp_valid (mem_req_valid),
@ -211,9 +197,9 @@ module VX_tex_unit #(
.rsp_valid (tex_rsp_if.valid),
.rsp_tmask (tex_rsp_if.tmask),
.rsp_data (tex_rsp_if.data),
.rsp_info ({tex_rsp_if.uuid, tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC}),
.rsp_info ({tex_rsp_if.rd, tex_rsp_if.wb, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.uuid}),
.rsp_ready (tex_rsp_if.ready)
);
);
`ifdef PERF_ENABLE
wire [$clog2(`NUM_THREADS+1)-1:0] perf_mem_req_per_cycle;
@ -255,31 +241,24 @@ module VX_tex_unit #(
`ifdef DBG_TRACE_TEX
always @(posedge clk) begin
if (tex_csr_if.write_enable) begin
dpi_trace("%d: core%0d-tex-csr: unit=%0d, state=", $time, CORE_ID, csr_tex_unit);
trace_tex_state(tex_csr_if.write_addr);
dpi_trace(", data=%0h (#%0d)\n", tex_csr_if.write_data, tex_csr_if.write_uuid);
end
if (tex_req_if.valid && tex_req_if.ready) begin
for (integer i = 0; i < `NUM_TEX_UNITS; ++i) begin
if (csrs_dirty[i]) begin
dpi_trace("%d: core%0d-tex-csr: tex%0d_addr=%0h\n", $time, CORE_ID, i, tex_baddr[i]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_logwidth=%0h\n", $time, CORE_ID, i, tex_logdims[i][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_logheight=%0h\n", $time, CORE_ID, i, tex_logdims[i][1]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_format=%0h\n", $time, CORE_ID, i, tex_format[i]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_u=%0h\n", $time, CORE_ID, i, tex_wraps[i][0]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_wrap_v=%0h\n", $time, CORE_ID, i, tex_wraps[i][1]);
dpi_trace("%d: core%0d-tex-csr: tex%0d_filter=%0h\n", $time, CORE_ID, i, tex_filter[i]);
end
end
dpi_trace("%d: core%0d-tex-req: wid=%0d, PC=%0h, tmask=%b, unit=%0d, lod=%0h, u=",
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod);
$time, CORE_ID, tex_req_if.wid, tex_req_if.PC, tex_req_if.tmask, tex_req_if.unit, tex_req_if.lod);
`TRACE_ARRAY1D(tex_req_if.coords[0], `NUM_THREADS);
dpi_trace(", v=");
`TRACE_ARRAY1D(tex_req_if.coords[1], `NUM_THREADS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", tex_req_if.uuid);
end
if (tex_rsp_if.valid && tex_rsp_if.ready) begin
dpi_trace("%d: core%0d-tex-rsp: wid=%0d, PC=%0h, tmask=%b, data=",
$time, CORE_ID, tex_rsp_if.wid, tex_rsp_if.PC, tex_rsp_if.tmask);
`TRACE_ARRAY1D(tex_rsp_if.data, `NUM_THREADS);
dpi_trace("\n");
dpi_trace(" (#%0d)\n", tex_rsp_if.uuid);
end
end
`endif

View file

@ -142,7 +142,7 @@
},
"afu/vortex/cluster/core/pipeline/fetch/warp_sched": {
"?wsched_scheduled": 1,
"wsched_schedule_uuid": 64,
"wsched_schedule_uuid": "`UUID_BITS",
"wsched_active_warps": "`NUM_WARPS",
"wsched_stalled_warps": "`NUM_WARPS",
"wsched_schedule_tmask": "`NUM_THREADS",
@ -151,17 +151,17 @@
},
"afu/vortex/cluster/core/pipeline/fetch/icache_stage": {
"?icache_req_fire": 1,
"icache_req_uuid": 64,
"icache_req_uuid": "`UUID_BITS",
"icache_req_addr": 32,
"icache_req_tag":"`ICACHE_CORE_TAG_ID_BITS",
"?icache_rsp_fire": 1,
"icache_rsp_uuid": 64,
"icache_rsp_uuid": "`UUID_BITS",
"icache_rsp_data": 32,
"icache_rsp_tag":"`ICACHE_CORE_TAG_ID_BITS"
},
"afu/vortex/cluster/core/pipeline/issue": {
"?issue_fire": 1,
"issue_uuid": 64,
"issue_uuid": "`UUID_BITS",
"issue_tmask":"`NUM_THREADS",
"issue_ex_type":"`EX_BITS",
"issue_op_type":"`INST_OP_BITS",
@ -178,7 +178,7 @@
"gpr_rs2":"`NUM_THREADS * 32",
"gpr_rs3":"`NUM_THREADS * 32",
"?writeback_valid": 1,
"writeback_uuid": 64,
"writeback_uuid": "`UUID_BITS",
"writeback_tmask":"`NUM_THREADS",
"writeback_rd":"`NR_BITS",
"writeback_data":"`NUM_THREADS * 32",
@ -188,20 +188,20 @@
},
"afu/vortex/cluster/core/pipeline/execute/lsu_unit": {
"?dcache_req_fire":"`NUM_THREADS",
"dcache_req_uuid": 64,
"dcache_req_uuid": "`UUID_BITS",
"dcache_req_addr":"`NUM_THREADS * 32",
"dcache_req_rw": 1,
"dcache_req_byteen":"`NUM_THREADS * 4",
"dcache_req_data":"`NUM_THREADS * 32",
"dcache_req_tag":"`LSUQ_ADDR_BITS",
"?dcache_rsp_fire":"`NUM_THREADS",
"dcache_rsp_uuid": 64,
"dcache_rsp_uuid": "`UUID_BITS",
"dcache_rsp_data":"`NUM_THREADS * 32",
"dcache_rsp_tag":"`LSUQ_ADDR_BITS"
},
"afu/vortex/cluster/core/pipeline/execute/gpu_unit": {
"?gpu_rsp_valid": 1,
"gpu_rsp_uuid": 64,
"gpu_rsp_uuid": "`UUID_BITS",
"gpu_rsp_tmc": 1,
"gpu_rsp_wspawn": 1,
"gpu_rsp_split": 1,