Merge branch 'gfx' of https://github.gatech.edu/casl/Vortex into graphics

This commit is contained in:
Blaise Tine 2022-08-28 00:39:18 -04:00
commit b0579dc808
28 changed files with 686 additions and 548 deletions

View file

@ -1,174 +0,0 @@
`include "VX_define.vh"
module VX_avs_adapter #(
parameter AVS_DATA_WIDTH = 1,
parameter AVS_ADDR_WIDTH = 1,
parameter AVS_BURST_WIDTH = 1,
parameter AVS_BANKS = 1,
parameter REQ_TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
parameter AVS_BYTEENW = (AVS_DATA_WIDTH / 8)
) (
input wire clk,
input wire reset,
// Memory request
input wire mem_req_valid,
input wire mem_req_rw,
input wire [AVS_BYTEENW-1:0] mem_req_byteen,
input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr,
input wire [AVS_DATA_WIDTH-1:0] mem_req_data,
input wire [REQ_TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready,
// Memory response
output wire mem_rsp_valid,
output wire [AVS_DATA_WIDTH-1:0] mem_rsp_data,
output wire [REQ_TAG_WIDTH-1:0] mem_rsp_tag,
input wire mem_rsp_ready,
// AVS bus
output wire [AVS_DATA_WIDTH-1:0] avs_writedata [AVS_BANKS],
input wire [AVS_DATA_WIDTH-1:0] avs_readdata [AVS_BANKS],
output wire [AVS_ADDR_WIDTH-1:0] avs_address [AVS_BANKS],
input wire avs_waitrequest [AVS_BANKS],
output wire avs_write [AVS_BANKS],
output wire avs_read [AVS_BANKS],
output wire [AVS_BYTEENW-1:0] avs_byteenable [AVS_BANKS],
output wire [AVS_BURST_WIDTH-1:0] avs_burstcount [AVS_BANKS],
input avs_readdatavalid [AVS_BANKS]
);
localparam RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1);
localparam BANK_ADDRW = `LOG2UP(AVS_BANKS);
// Requests handling
wire [AVS_BANKS-1:0] avs_reqq_push, avs_reqq_pop, avs_reqq_ready;
wire [AVS_BANKS-1:0][REQ_TAG_WIDTH-1:0] avs_reqq_tag_out;
wire [AVS_BANKS-1:0] req_queue_going_full;
wire [AVS_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
wire [BANK_ADDRW-1:0] req_bank_sel;
if (AVS_BANKS >= 2) begin
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin
assign req_bank_sel = 0;
end
for (genvar i = 0; i < AVS_BANKS; ++i) begin
assign avs_reqq_ready[i] = !req_queue_going_full[i] && !avs_waitrequest[i];
assign avs_reqq_push[i] = mem_req_valid && !mem_req_rw && avs_reqq_ready[i] && (req_bank_sel == i);
end
for (genvar i = 0; i < AVS_BANKS; ++i) begin
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.incr (avs_reqq_push[i]),
.decr (avs_reqq_pop[i]),
.full (req_queue_going_full[i]),
.size (req_queue_size[i]),
`UNUSED_PIN (empty)
);
`UNUSED_VAR (req_queue_size)
VX_fifo_queue #(
.DATAW (REQ_TAG_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_reqq_push[i]),
.pop (avs_reqq_pop[i]),
.data_in (mem_req_tag),
.data_out (avs_reqq_tag_out[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < AVS_BANKS; ++i) begin
assign avs_read[i] = mem_req_valid && !mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
assign avs_write[i] = mem_req_valid && mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
assign avs_address[i] = mem_req_addr;
assign avs_byteenable[i] = mem_req_byteen;
assign avs_writedata[i] = mem_req_data;
assign avs_burstcount[i] = AVS_BURST_WIDTH'(1);
end
if (AVS_BANKS >= 2) begin
assign mem_req_ready = avs_reqq_ready[req_bank_sel];
end else begin
assign mem_req_ready = avs_reqq_ready;
end
// Responses handling
wire [AVS_BANKS-1:0] rsp_arb_valid_in;
wire [AVS_BANKS-1:0][AVS_DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in;
wire [AVS_BANKS-1:0] rsp_arb_ready_in;
wire [AVS_BANKS-1:0][AVS_DATA_WIDTH-1:0] avs_rspq_data_out;
wire [AVS_BANKS-1:0] avs_rspq_empty;
for (genvar i = 0; i < AVS_BANKS; ++i) begin
VX_fifo_queue #(
.DATAW (AVS_DATA_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_readdatavalid[i]),
.pop (avs_reqq_pop[i]),
.data_in (avs_readdata[i]),
.data_out (avs_rspq_data_out[i]),
.empty (avs_rspq_empty[i]),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < AVS_BANKS; ++i) begin
assign rsp_arb_valid_in[i] = !avs_rspq_empty[i];
assign rsp_arb_data_in[i] = {avs_rspq_data_out[i], avs_reqq_tag_out[i]};
assign avs_reqq_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
end
VX_stream_arb #(
.NUM_INPUTS (AVS_BANKS),
.DATAW (AVS_DATA_WIDTH + REQ_TAG_WIDTH),
.ARBITER ("R")
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_arb_valid_in),
.data_in (rsp_arb_data_in),
.ready_in (rsp_arb_ready_in),
.valid_out (mem_rsp_valid),
.data_out ({mem_rsp_data, mem_rsp_tag}),
.ready_out (mem_rsp_ready)
);
`ifdef DBG_TRACE_AFU
always @(posedge clk) begin
if (mem_req_valid && mem_req_ready) begin
if (mem_req_rw) begin
`TRACE(2, ("%d: AVS Wr Req: addr=0x%0h, byteen=0x%0h, tag=0x%0h, data=0x%0h\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, mem_req_data));
end else begin
`TRACE(2, ("%d: AVS Rd Req: addr=0x%0h, byteen=0x%0h, tag=0x%0h, pending=%0d\n", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, req_queue_size));
end
end
if (mem_rsp_valid && mem_rsp_ready) begin
`TRACE(2, ("%d: AVS Rd Rsp: tag=0x%0h, data=0x%0h, pending=%0d\n", $time, mem_rsp_tag, mem_rsp_data, req_queue_size));
end
end
`endif
endmodule

View file

@ -1,181 +0,0 @@
`include "VX_define.vh"
module VX_mem_adapter #(
parameter SRC_DATA_WIDTH = 1,
parameter SRC_ADDR_WIDTH = 1,
parameter DST_DATA_WIDTH = 1,
parameter DST_ADDR_WIDTH = 1,
parameter SRC_TAG_WIDTH = 1,
parameter DST_TAG_WIDTH = 1,
parameter SRC_DATA_SIZE = (SRC_DATA_WIDTH / 8),
parameter DST_DATA_SIZE = (DST_DATA_WIDTH / 8)
) (
input wire clk,
input wire reset,
input wire mem_req_valid_in,
input wire [SRC_ADDR_WIDTH-1:0] mem_req_addr_in,
input wire mem_req_rw_in,
input wire [SRC_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [SRC_DATA_WIDTH-1:0] mem_req_data_in,
input wire [SRC_TAG_WIDTH-1:0] mem_req_tag_in,
output wire mem_req_ready_in,
output wire mem_req_valid_out,
output wire [DST_ADDR_WIDTH-1:0] mem_req_addr_out,
output wire mem_req_rw_out,
output wire [DST_DATA_SIZE-1:0] mem_req_byteen_out,
output wire [DST_DATA_WIDTH-1:0] mem_req_data_out,
output wire [DST_TAG_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out,
input wire mem_rsp_valid_in,
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_in,
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in,
output wire mem_rsp_ready_in,
output wire mem_rsp_valid_out,
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_out,
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_out,
input wire mem_rsp_ready_out
);
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
localparam DST_LDATAW = $clog2(DST_DATA_WIDTH);
localparam SRC_LDATAW = $clog2(SRC_DATA_WIDTH);
localparam D = `ABS(DST_LDATAW - SRC_LDATAW);
localparam P = 2**D;
`UNUSED_VAR (mem_rsp_tag_in)
if (DST_LDATAW > SRC_LDATAW) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire [D-1:0] req_idx = mem_req_addr_in[D-1:0];
wire [D-1:0] rsp_idx = mem_rsp_tag_in[D-1:0];
wire [SRC_ADDR_WIDTH-D-1:0] mem_req_addr_in_qual = mem_req_addr_in[SRC_ADDR_WIDTH-1:D];
wire [P-1:0][SRC_DATA_WIDTH-1:0] mem_rsp_data_in_w = mem_rsp_data_in;
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin
`UNUSED_VAR (mem_req_addr_in_qual)
assign mem_req_addr_out = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin
assign mem_req_addr_out = DST_ADDR_WIDTH'(mem_req_addr_in_qual);
end else begin
assign mem_req_addr_out = mem_req_addr_in_qual;
end
assign mem_req_valid_out = mem_req_valid_in;
assign mem_req_rw_out = mem_req_rw_in;
assign mem_req_byteen_out = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
assign mem_req_data_out = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW);
assign mem_req_tag_out = DST_TAG_WIDTH'({mem_req_tag_in, req_idx});
assign mem_req_ready_in = mem_req_ready_out;
assign mem_rsp_valid_out = mem_rsp_valid_in;
assign mem_rsp_data_out = mem_rsp_data_in_w[rsp_idx];
assign mem_rsp_tag_out = SRC_TAG_WIDTH'(mem_rsp_tag_in[SRC_TAG_WIDTH+D-1:D]);
assign mem_rsp_ready_in = mem_rsp_ready_out;
end else if (DST_LDATAW < SRC_LDATAW) begin
reg [D-1:0] req_ctr, rsp_ctr;
reg [P-1:0][DST_DATA_WIDTH-1:0] mem_rsp_data_out_r, mem_rsp_data_out_n;
wire mem_req_out_fire = mem_req_valid_out && mem_req_ready_out;
wire mem_rsp_in_fire = mem_rsp_valid_in && mem_rsp_ready_in;
wire [P-1:0][DST_DATA_WIDTH-1:0] mem_req_data_in_w = mem_req_data_in;
wire [P-1:0][DST_DATA_SIZE-1:0] mem_req_byteen_in_w = mem_req_byteen_in;
always @(*) begin
mem_rsp_data_out_n = mem_rsp_data_out_r;
if (mem_rsp_in_fire) begin
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_in;
end
end
always @(posedge clk) begin
if (reset) begin
req_ctr <= 0;
rsp_ctr <= 0;
end else begin
if (mem_req_out_fire) begin
req_ctr <= req_ctr + 1;
end
if (mem_rsp_in_fire) begin
rsp_ctr <= rsp_ctr + 1;
end
end
mem_rsp_data_out_r <= mem_rsp_data_out_n;
end
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;
wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
always @(posedge clk) begin
if (mem_rsp_in_fire) begin
mem_rsp_tag_in_r <= mem_rsp_tag_in;
end
end
assign mem_rsp_tag_in_w = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_in;
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_w == mem_rsp_tag_in),
("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_w, mem_rsp_tag_in))
wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr};
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin
`UNUSED_VAR (mem_req_addr_in_qual)
assign mem_req_addr_out = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin
assign mem_req_addr_out = DST_ADDR_WIDTH'(mem_req_addr_in_qual);
end else begin
assign mem_req_addr_out = mem_req_addr_in_qual;
end
assign mem_req_valid_out = mem_req_valid_in;
assign mem_req_rw_out = mem_req_rw_in;
assign mem_req_byteen_out = mem_req_byteen_in_w[req_ctr];
assign mem_req_data_out = mem_req_data_in_w[req_ctr];
assign mem_req_tag_out = DST_TAG_WIDTH'(mem_req_tag_in);
assign mem_req_ready_in = mem_req_ready_out && (req_ctr == (P-1));
assign mem_rsp_valid_out = mem_rsp_valid_in && (rsp_ctr == (P-1));
assign mem_rsp_data_out = mem_rsp_data_out_n;
assign mem_rsp_tag_out = SRC_TAG_WIDTH'(mem_rsp_tag_in);
assign mem_rsp_ready_in = mem_rsp_ready_out;
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin
`UNUSED_VAR (mem_req_addr_in)
assign mem_req_addr_out = mem_req_addr_in[DST_ADDR_WIDTH-1:0];
end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin
assign mem_req_addr_out = DST_ADDR_WIDTH'(mem_req_addr_in);
end else begin
assign mem_req_addr_out = mem_req_addr_in;
end
assign mem_req_valid_out = mem_req_valid_in;
assign mem_req_rw_out = mem_req_rw_in;
assign mem_req_byteen_out = mem_req_byteen_in;
assign mem_req_data_out = mem_req_data_in;
assign mem_req_tag_out = DST_TAG_WIDTH'(mem_req_tag_in);
assign mem_req_ready_in = mem_req_ready_out;
assign mem_rsp_valid_out = mem_rsp_valid_in;
assign mem_rsp_data_out = mem_rsp_data_in;
assign mem_rsp_tag_out = SRC_TAG_WIDTH'(mem_rsp_tag_in);
assign mem_rsp_ready_in = mem_rsp_ready_out;
end
endmodule

View file

@ -439,7 +439,9 @@ VX_mem_adapter #(
.SRC_ADDR_WIDTH (CCI_ADDR_WIDTH),
.DST_ADDR_WIDTH (LMEM_ADDR_WIDTH),
.SRC_TAG_WIDTH (CCI_ADDR_WIDTH),
.DST_TAG_WIDTH (AVS_REQ_TAGW)
.DST_TAG_WIDTH (AVS_REQ_TAGW),
.BUFFERED_REQ (0),
.BUFFERED_RSP (0)
) cci_mem_adapter (
.clk (clk),
.reset (reset),
@ -452,6 +454,11 @@ VX_mem_adapter #(
.mem_req_tag_in (cci_mem_req_tag),
.mem_req_ready_in (cci_mem_req_ready),
.mem_rsp_valid_in (cci_mem_rsp_valid),
.mem_rsp_data_in (cci_mem_rsp_data),
.mem_rsp_tag_in (cci_mem_rsp_tag),
.mem_rsp_ready_in (cci_mem_rsp_ready),
.mem_req_valid_out (cci_vx_mem_req_if[1].valid),
.mem_req_addr_out (cci_vx_mem_req_if[1].addr),
.mem_req_rw_out (cci_vx_mem_req_if[1].rw),
@ -460,15 +467,10 @@ VX_mem_adapter #(
.mem_req_tag_out (cci_vx_mem_req_if[1].tag),
.mem_req_ready_out (cci_vx_mem_req_if[1].ready),
.mem_rsp_valid_in (cci_vx_mem_rsp_if[1].valid),
.mem_rsp_data_in (cci_vx_mem_rsp_if[1].data),
.mem_rsp_tag_in (cci_vx_mem_rsp_if[1].tag),
.mem_rsp_ready_in (cci_vx_mem_rsp_if[1].ready),
.mem_rsp_valid_out (cci_mem_rsp_valid),
.mem_rsp_data_out (cci_mem_rsp_data),
.mem_rsp_tag_out (cci_mem_rsp_tag),
.mem_rsp_ready_out (cci_mem_rsp_ready)
.mem_rsp_valid_out (cci_vx_mem_rsp_if[1].valid),
.mem_rsp_data_out (cci_vx_mem_rsp_if[1].data),
.mem_rsp_tag_out (cci_vx_mem_rsp_if[1].tag),
.mem_rsp_ready_out (cci_vx_mem_rsp_if[1].ready)
);
//--
@ -485,7 +487,9 @@ VX_mem_adapter #(
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.DST_ADDR_WIDTH (LMEM_ADDR_WIDTH),
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.DST_TAG_WIDTH (AVS_REQ_TAGW)
.DST_TAG_WIDTH (AVS_REQ_TAGW),
.BUFFERED_REQ (0),
.BUFFERED_RSP (2)
) vx_mem_adapter (
.clk (clk),
.reset (reset),
@ -498,6 +502,11 @@ VX_mem_adapter #(
.mem_req_tag_in (vx_mem_req_tag),
.mem_req_ready_in (vx_mem_req_ready_qual),
.mem_rsp_valid_in (vx_mem_rsp_valid),
.mem_rsp_data_in (vx_mem_rsp_data),
.mem_rsp_tag_in (vx_mem_rsp_tag),
.mem_rsp_ready_in (vx_mem_rsp_ready),
.mem_req_valid_out (cci_vx_mem_req_if[0].valid),
.mem_req_addr_out (cci_vx_mem_req_if[0].addr),
.mem_req_rw_out (cci_vx_mem_req_if[0].rw),
@ -506,15 +515,10 @@ VX_mem_adapter #(
.mem_req_tag_out (cci_vx_mem_req_if[0].tag),
.mem_req_ready_out (cci_vx_mem_req_if[0].ready),
.mem_rsp_valid_in (cci_vx_mem_rsp_if[0].valid),
.mem_rsp_data_in (cci_vx_mem_rsp_if[0].data),
.mem_rsp_tag_in (cci_vx_mem_rsp_if[0].tag),
.mem_rsp_ready_in (cci_vx_mem_rsp_if[0].ready),
.mem_rsp_valid_out (vx_mem_rsp_valid),
.mem_rsp_data_out (vx_mem_rsp_data),
.mem_rsp_tag_out (vx_mem_rsp_tag),
.mem_rsp_ready_out (vx_mem_rsp_ready)
.mem_rsp_valid_out (cci_vx_mem_rsp_if[0].valid),
.mem_rsp_data_out (cci_vx_mem_rsp_if[0].data),
.mem_rsp_tag_out (cci_vx_mem_rsp_if[0].tag),
.mem_rsp_ready_out (cci_vx_mem_rsp_if[0].ready)
);
//--
@ -537,8 +541,8 @@ VX_mem_arb #(
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_WIDTH (AVS_REQ_TAGW),
.ARBITER ("P"),
.BUFFERED_REQ (1),
.BUFFERED_RSP (2)
.BUFFERED_REQ (0),
.BUFFERED_RSP (0)
) mem_arb (
.clk (clk),
.reset (mem_arb_reset),
@ -553,12 +557,14 @@ VX_mem_arb #(
`RESET_RELAY (avs_adapter_reset, reset);
VX_avs_adapter #(
.AVS_DATA_WIDTH (LMEM_DATA_WIDTH),
.AVS_ADDR_WIDTH (LMEM_ADDR_WIDTH),
.AVS_BURST_WIDTH (LMEM_BURST_CTRW),
.AVS_BANKS (NUM_LOCAL_MEM_BANKS),
.REQ_TAG_WIDTH (AVS_REQ_TAGW + 1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE)
.DATA_WIDTH (LMEM_DATA_WIDTH),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.BURST_WIDTH (LMEM_BURST_CTRW),
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
.REQ_TAG_WIDTH (AVS_REQ_TAGW + 1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE),
.BUFFERED_REQ (2),
.BUFFERED_RSP (0)
) avs_adapter (
.clk (clk),
.reset (avs_adapter_reset),
@ -659,7 +665,7 @@ always @(posedge clk) begin
cci_rd_req_wait <= 0;
end else begin
if ((STATE_IDLE == state)
&& (CMD_MEM_WRITE == cmd_type)) begin
&& (CMD_MEM_WRITE == cmd_type)) begin
cci_rd_req_valid <= (cmd_data_size != 0);
cci_rd_req_wait <= 0;
end
@ -678,7 +684,7 @@ always @(posedge clk) begin
end
if ((STATE_IDLE == state)
&& (CMD_MEM_WRITE == cmd_type)) begin
&& (CMD_MEM_WRITE == cmd_type)) begin
cci_rd_req_addr <= cmd_io_addr;
cci_rd_req_ctr <= 0;
cci_rd_rsp_ctr <= 0;
@ -813,8 +819,7 @@ assign cmd_mem_rd_done = cci_wr_req_done
&& cci_pending_writes_empty;
// Send write requests to CCI
always @(posedge clk)
begin
always @(posedge clk) begin
if (reset) begin
cci_wr_req_fire <= 0;
end else begin
@ -1021,4 +1026,22 @@ VX_scope #(
`UNUSED_PARAM (MMIO_SCOPE_WRITE)
`endif
///////////////////////////////////////////////////////////////////////////////
`ifdef DBG_TRACE_AFU
always @(posedge clk) begin
for (integer i = 0; i < NUM_LOCAL_MEM_BANKS; ++i) begin
if (avs_write[i] && ~avs_waitrequest[i]) begin
`TRACE(2, ("%d: AVS Wr Req: addr=0x%0h, byteen=0x%0h, burst=0x%0h, data=0x%0h\n", $time, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i], avs_writedata[i]));
end
if (avs_read[i] && ~avs_waitrequest[i]) begin
`TRACE(2, ("%d: AVS Rd Req: addr=0x%0h, byteen=0x%0h, burst=0x%0h\n", $time, `TO_FULL_ADDR(avs_address[i]), avs_byteenable[i], avs_burstcount[i]));
end
if (avs_readdatavalid[i]) begin
`TRACE(2, ("%d: AVS Rd Rsp: data=0x%0h\n", $time, avs_readdata[i]));
end
end
end
`endif
endmodule

View file

@ -16,7 +16,9 @@ module VX_alu_unit #(
`UNUSED_PARAM (CORE_ID)
localparam RSP_ARB_DATAW = `UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32;
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
localparam RSP_ARB_DATAW = UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32;
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
reg [`NUM_THREADS-1:0][31:0] alu_result;
@ -98,8 +100,8 @@ module VX_alu_unit #(
wire alu_ready_in;
wire alu_valid_out;
wire alu_ready_out;
wire [`UP(`UUID_BITS)-1:0] alu_uuid;
wire [`UP(`NW_BITS)-1:0] alu_wid;
wire [UUID_WIDTH-1:0] alu_uuid;
wire [NW_WIDTH-1:0] alu_wid;
wire [`NUM_THREADS-1:0] alu_tmask;
wire [31:0] alu_PC;
wire [`NR_BITS-1:0] alu_rd;
@ -115,7 +117,7 @@ module VX_alu_unit #(
assign alu_ready_in = alu_ready_out || ~alu_valid_out;
VX_pipe_register #(
.DATAW (1 + `UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
.DATAW (1 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + 32),
.RESETW (1)
) pipe_reg (
.clk (clk),
@ -141,8 +143,8 @@ module VX_alu_unit #(
wire mul_ready_in;
wire mul_valid_out;
wire mul_ready_out;
wire [`UP(`UUID_BITS)-1:0] mul_uuid;
wire [`UP(`NW_BITS)-1:0] mul_wid;
wire [UUID_WIDTH-1:0] mul_uuid;
wire [NW_WIDTH-1:0] mul_wid;
wire [`NUM_THREADS-1:0] mul_tmask;
wire [31:0] mul_PC;
wire [`NR_BITS-1:0] mul_rd;
@ -201,7 +203,7 @@ module VX_alu_unit #(
.NUM_INPUTS (RSP_ARB_SIZE),
.DATAW (RSP_ARB_DATAW),
.ARBITER ("R"),
.BUFFERED (1)
.BUFFERED (2)
) rsp_arb (
.clk (clk),
.reset (reset),

View file

@ -402,7 +402,7 @@
// Core Response Queue Size
`ifndef ICACHE_CRSQ_SIZE
`define ICACHE_CRSQ_SIZE 2
`define ICACHE_CRSQ_SIZE 4
`endif
// Miss Handling Register Size
@ -466,7 +466,7 @@
// Core Response Queue Size
`ifndef DCACHE_CRSQ_SIZE
`define DCACHE_CRSQ_SIZE 2
`define DCACHE_CRSQ_SIZE 4
`endif
// Miss Handling Register Size
@ -552,7 +552,7 @@
// Core Response Queue Size
`ifndef TCACHE_CRSQ_SIZE
`define TCACHE_CRSQ_SIZE 2
`define TCACHE_CRSQ_SIZE 4
`endif
// Miss Handling Register Size
@ -616,7 +616,7 @@
// Core Response Queue Size
`ifndef RCACHE_CRSQ_SIZE
`define RCACHE_CRSQ_SIZE 2
`define RCACHE_CRSQ_SIZE 4
`endif
// Miss Handling Register Size
@ -680,7 +680,7 @@
// Core Response Queue Size
`ifndef OCACHE_CRSQ_SIZE
`define OCACHE_CRSQ_SIZE 2
`define OCACHE_CRSQ_SIZE 4
`endif
// Miss Handling Register Size
@ -731,7 +731,7 @@
// Core Response Queue Size
`ifndef L2_CRSQ_SIZE
`define L2_CRSQ_SIZE 2
`define L2_CRSQ_SIZE 4
`endif
// Miss Handling Register Size
@ -782,7 +782,7 @@
// Core Response Queue Size
`ifndef L3_CRSQ_SIZE
`define L3_CRSQ_SIZE 2
`define L3_CRSQ_SIZE 4
`endif
// Miss Handling Register Size

View file

@ -54,6 +54,9 @@ module VX_csr_unit #(
output wire req_pending
);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
reg [`NUM_THREADS-1:0][31:0] csr_read_data;
reg [31:0] csr_write_data;
@ -242,7 +245,8 @@ module VX_csr_unit #(
// send response
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32)
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32),
.OUT_REG (1)
) rsp_sbuf (
.clk (clk),
.reset (reset),

View file

@ -17,6 +17,9 @@ module VX_dispatch (
`endif
VX_gpu_req_if.master gpu_req_if
);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
wire [`UP(`NT_BITS)-1:0] tid;
wire alu_req_ready;
wire lsu_req_ready;
@ -43,7 +46,7 @@ module VX_dispatch (
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(dispatch_if.op_type);
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `UP(`NT_BITS) + (2 * `NUM_THREADS * 32)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `UP(`NT_BITS) + (2 * `NUM_THREADS * 32)),
.OUT_REG (1)
) alu_buffer (
.clk (clk),
@ -63,7 +66,7 @@ module VX_dispatch (
wire lsu_is_fence = `INST_LSU_IS_FENCE(dispatch_if.op_mod);
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.OUT_REG (1)
) lsu_buffer (
.clk (clk),
@ -84,7 +87,7 @@ module VX_dispatch (
wire [`NRI_BITS-1:0] csr_imm = dispatch_if.imm[`CSR_ADDR_BITS +: `NRI_BITS];
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * 32)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * 32)),
.OUT_REG (1)
) csr_buffer (
.clk (clk),
@ -104,7 +107,7 @@ module VX_dispatch (
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(dispatch_if.op_type);
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + (3 * `NUM_THREADS * 32)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + (3 * `NUM_THREADS * 32)),
.OUT_REG (1)
) fpu_buffer (
.clk (clk),
@ -126,7 +129,7 @@ module VX_dispatch (
wire [`INST_GPU_BITS-1:0] gpu_op_type = `INST_GPU_BITS'(dispatch_if.op_type);
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `UP(`NT_BITS) + (3 * `NUM_THREADS * 32)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + 32 + `INST_GPU_BITS + `INST_MOD_BITS + `NR_BITS + 1 + `UP(`NT_BITS) + (3 * `NUM_THREADS * 32)),
.OUT_REG (1)
) gpu_buffer (
.clk (clk),

View file

@ -45,9 +45,11 @@ module VX_gpu_unit #(
);
`UNUSED_PARAM (CORE_ID)
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
localparam WCTL_DATAW = `GPU_TMC_BITS + `GPU_WSPAWN_BITS + `GPU_SPLIT_BITS + `GPU_BARRIER_BITS;
localparam RSP_DATAW = `MAX(`NUM_THREADS * 32, WCTL_DATAW);
localparam RSP_ARB_DATAW = `UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1 + 1;
localparam RSP_ARB_DATAW = UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1 + 1;
localparam RSP_ARB_SIZE = 1 + `EXT_TEX_ENABLED + `EXT_RASTER_ENABLED + `EXT_ROP_ENABLED + `EXT_IMADD_ENABLED;
wire [RSP_DATAW-1:0] rsp_data;
@ -115,7 +117,7 @@ module VX_gpu_unit #(
assign barrier.valid = is_bar;
assign barrier.id = rs1_data[`NB_BITS-1:0];
assign barrier.size_m1 = `UP(`NW_BITS)'(rs2_data - 1);
assign barrier.size_m1 = NW_WIDTH'(rs2_data - 1);
// Warp control response
wire wctl_req_valid = gpu_req_valid && (is_wspawn || is_tmc || is_split || is_join || is_bar || is_pred);
@ -227,8 +229,8 @@ module VX_gpu_unit #(
wire imadd_ready_in;
wire imadd_valid_out;
wire [`UP(`UUID_BITS)-1:0] imadd_uuid_out;
wire [`UP(`NW_BITS)-1:0] imadd_wid_out;
wire [UUID_WIDTH-1:0] imadd_uuid_out;
wire [NW_WIDTH-1:0] imadd_wid_out;
wire [`NUM_THREADS-1:0] imadd_tmask_out;
wire [31:0] imadd_PC_out;
wire [`NR_BITS-1:0] imadd_rd_out;
@ -244,7 +246,7 @@ module VX_gpu_unit #(
.DATA_WIDTH (32),
.MAX_SHIFT (24),
.SIGNED (1),
.TAG_WIDTH (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS)
.TAG_WIDTH (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS)
) imadd (
.clk (clk),
.reset (imadd_reset),
@ -252,9 +254,9 @@ module VX_gpu_unit #(
// Inputs
.valid_in (imadd_valid_in),
.shift_in ({gpu_req_if.op_mod[1:0], 3'b0}),
.data_in1 (gpu_req_if.rs1_data),
.data_in2 (gpu_req_if.rs2_data),
.data_in3 (gpu_req_if.rs3_data),
.data1_in (gpu_req_if.rs1_data),
.data2_in (gpu_req_if.rs2_data),
.data3_in (gpu_req_if.rs3_data),
.tag_in ({gpu_req_if.uuid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.rd}),
.ready_in (imadd_ready_in),
@ -294,7 +296,7 @@ module VX_gpu_unit #(
.NUM_INPUTS (RSP_ARB_SIZE),
.DATAW (RSP_ARB_DATAW),
.ARBITER ("R"),
.BUFFERED (1)
.BUFFERED (2)
) rsp_arb (
.clk (clk),
.reset (reset),

View file

@ -14,6 +14,7 @@ module VX_ibuffer #(
);
`UNUSED_PARAM (CORE_ID)
localparam NW_WIDTH = `UP(`NW_BITS);
localparam SIZE = (`IBUF_SIZE + 1);
localparam ALM_FULL = SIZE - 1;
localparam ALM_EMPTY = 1;
@ -22,7 +23,7 @@ module VX_ibuffer #(
localparam ADDRW = $clog2(SIZE);
localparam NWARPSW = $clog2(`NUM_WARPS+1);
`STATIC_ASSERT ((`IBUF_SIZE >= 2), ("invalid parameter"))
`STATIC_ASSERT ((`IBUF_SIZE > 1), ("invalid parameter"))
wire [`NUM_WARPS-1:0] q_full, q_empty, q_alm_full, q_alm_empty;
wire [DATAW-1:0] q_data_in;
@ -105,8 +106,8 @@ module VX_ibuffer #(
///////////////////////////////////////////////////////////////////////////
reg [`NUM_WARPS-1:0] valid_table, valid_table_n;
reg [`UP(`NW_BITS)-1:0] deq_wid, deq_wid_n;
reg [`UP(`NW_BITS)-1:0] deq_wid_rr, deq_wid_rr_n;
reg [NW_WIDTH-1:0] deq_wid, deq_wid_n;
reg [NW_WIDTH-1:0] deq_wid_rr, deq_wid_rr_n;
reg deq_valid, deq_valid_n;
reg [DATAW-1:0] deq_instr, deq_instr_n;
reg [NWARPSW-1:0] num_warps;
@ -177,7 +178,7 @@ module VX_ibuffer #(
end
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
assign decode_if.ibuf_pop[i] = deq_fire && (ibuffer_if.wid == `UP(`NW_BITS)'(i));
assign decode_if.ibuf_pop[i] = deq_fire && (ibuffer_if.wid == NW_WIDTH'(i));
end
assign decode_if.ready = ~q_full[decode_if.wid];

View file

@ -26,13 +26,16 @@ module VX_icache_stage #(
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
wire icache_req_valid;
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag;
wire icache_req_ready;
wire [`UP(`UUID_BITS)-1:0] rsp_uuid;
wire [`UP(`NW_BITS)-1:0] req_tag, rsp_tag;
wire [UUID_WIDTH-1:0] rsp_uuid;
wire [NW_WIDTH-1:0] req_tag, rsp_tag;
wire icache_req_fire = icache_req_valid && icache_req_ready;
@ -66,7 +69,7 @@ module VX_icache_stage #(
) pending_reads (
.clk (clk),
.reset (reset),
.incr (icache_req_fire && (ifetch_req_if.wid == `UP(`NW_BITS)'(i))),
.incr (icache_req_fire && (ifetch_req_if.wid == NW_WIDTH'(i))),
.decr (ifetch_rsp_if.ibuf_pop[i]),
.full (pending_ibuf_full[i]),
`UNUSED_PIN (size),
@ -104,7 +107,7 @@ module VX_icache_stage #(
// Icache Response
wire [`UP(`NW_BITS)-1:0] rsp_wid = rsp_tag;
wire [NW_WIDTH-1:0] rsp_wid = rsp_tag;
assign ifetch_rsp_if.valid = icache_rsp_if.valid;
assign ifetch_rsp_if.tmask = rsp_tmask;

View file

@ -24,6 +24,9 @@ module VX_lsu_unit #(
VX_commit_if.master ld_commit_if,
VX_commit_if.master st_commit_if
);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
localparam REQ_ASHIFT = `CLOG2(DCACHE_WORD_SIZE);
@ -36,8 +39,8 @@ module VX_lsu_unit #(
localparam STACK_START_W = MEM_ADDRW'(`STACK_BASE_ADDR >> MEM_ASHIFT);
localparam STACK_END_W = MEM_ADDRW'((`STACK_BASE_ADDR - TOTAL_STACK_SIZE) >> MEM_ASHIFT);
// uuid, addr_type, wid, PC, tmask, rd, op_type, align, is_dup
localparam TAG_WIDTH = `UP(`UUID_BITS) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + `UP(`NW_BITS) + 32 + `NUM_THREADS + `NR_BITS + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1;
// uuid, addr_type, wid, PC, tmask, rd, op_type, align, is_dup
localparam TAG_WIDTH = UUID_WIDTH + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + NW_WIDTH + 32 + `NUM_THREADS + `NR_BITS + `INST_LSU_BITS + (`NUM_THREADS * REQ_ASHIFT) + 1;
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
`STATIC_ASSERT(0 == (`STACK_BASE_ADDR % MEM_ASHIFT), ("invalid parameter"))
@ -163,13 +166,13 @@ module VX_lsu_unit #(
VX_cache_req_if #(
.NUM_REQS (DCACHE_NUM_REQS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (`UP(`UUID_BITS) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + LSUQ_TAG_BITS)
.TAG_WIDTH (UUID_WIDTH + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + LSUQ_TAG_BITS)
) cache_req_tmp_if();
VX_cache_rsp_if #(
.NUM_REQS (DCACHE_NUM_REQS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (`UP(`UUID_BITS) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + LSUQ_TAG_BITS)
.TAG_WIDTH (UUID_WIDTH + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS) + LSUQ_TAG_BITS)
) cache_rsp_tmp_if();
`RESET_RELAY (mem_scheduler_reset, reset);
@ -182,7 +185,7 @@ module VX_lsu_unit #(
.DATA_WIDTH (32),
.QUEUE_SIZE (`LSUQ_SIZE),
.TAG_WIDTH (TAG_WIDTH),
.UUID_WIDTH (`UP(`UUID_BITS) + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS)),
.UUID_WIDTH (UUID_WIDTH + (`NUM_THREADS * `CACHE_ADDR_TYPE_BITS)),
.RSP_PARTIAL (1),
.MEM_OUT_REG (3)
) mem_scheduler (
@ -236,7 +239,7 @@ module VX_lsu_unit #(
`ASSIGN_VX_CACHE_RSP_IF_XTAG (cache_rsp_tmp_if, cache_rsp_if);
for (genvar i = 0; i < DCACHE_NUM_REQS; ++i) begin
wire [`UP(`UUID_BITS)-1:0] cache_req_uuid, cache_rsp_uuid;
wire [UUID_WIDTH-1:0] cache_req_uuid, cache_rsp_uuid;
wire [`NUM_THREADS-1:0][`CACHE_ADDR_TYPE_BITS-1:0] cache_req_type, cache_rsp_type;
wire [`CLOG2(`LSUQ_SIZE)-1:0] cache_req_tag, cache_rsp_tag;
@ -282,9 +285,9 @@ module VX_lsu_unit #(
end
end
wire [`UP(`UUID_BITS)-1:0] rsp_uuid;
wire [UUID_WIDTH-1:0] rsp_uuid;
wire [`NUM_THREADS-1:0][`CACHE_ADDR_TYPE_BITS-1:0] rsp_addr_type;
wire [`UP(`NW_BITS)-1:0] rsp_wid;
wire [NW_WIDTH-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_pc;
wire [`NR_BITS-1:0] rsp_rd;
@ -336,7 +339,8 @@ module VX_lsu_unit #(
// send load commit
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1)
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1),
.OUT_REG (1)
) rsp_sbuf (
.clk (clk),
.reset (reset),

View file

@ -188,8 +188,8 @@ module VX_mem_unit # (
.TAG_WIDTH (DCACHE_ARB_TAG_WIDTH),
.TAG_SEL_IDX (0),
.ARBITER ("P"),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
.BUFFERED_REQ (2),
.BUFFERED_RSP (2)
) dcache_smem_switch (
.clk (clk),
.reset (dcache_smem_switch_reset),

View file

@ -6,7 +6,7 @@ module VX_muldiv (
// Inputs
input wire [`INST_MUL_BITS-1:0] alu_op,
input wire [`UP(`UUID_BITS)-1:0] uuid_in,
input wire [`UP(`UUID_BITS)-1:0] uuid_in,
input wire [`UP(`NW_BITS)-1:0] wid_in,
input wire [`NUM_THREADS-1:0] tmask_in,
input wire [31:0] PC_in,
@ -29,13 +29,15 @@ module VX_muldiv (
output wire ready_in,
output wire valid_out,
input wire ready_out
);
);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
wire is_div_op = `INST_MUL_IS_DIV(alu_op);
wire [`NUM_THREADS-1:0][31:0] mul_result;
wire [`UP(`UUID_BITS)-1:0] mul_uuid_out;
wire [`UP(`NW_BITS)-1:0] mul_wid_out;
wire [UUID_WIDTH-1:0] mul_uuid_out;
wire [NW_WIDTH-1:0] mul_wid_out;
wire [`NUM_THREADS-1:0] mul_tmask_out;
wire [31:0] mul_PC_out;
wire [`NR_BITS-1:0] mul_rd_out;
@ -66,7 +68,7 @@ module VX_muldiv (
end
VX_shift_register #(
.DATAW (1 + `UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DATAW (1 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) mul_shift_reg (
@ -106,7 +108,7 @@ module VX_muldiv (
end
VX_shift_register #(
.DATAW (1 + `UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + 1),
.DATAW (1 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + 1),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) mul_shift_reg (
@ -122,8 +124,8 @@ module VX_muldiv (
///////////////////////////////////////////////////////////////////////////
wire [`NUM_THREADS-1:0][31:0] div_result;
wire [`UP(`UUID_BITS)-1:0] div_uuid_out;
wire [`UP(`NW_BITS)-1:0] div_wid_out;
wire [UUID_WIDTH-1:0] div_uuid_out;
wire [NW_WIDTH-1:0] div_wid_out;
wire [`NUM_THREADS-1:0] div_tmask_out;
wire [31:0] div_PC_out;
wire [`NR_BITS-1:0] div_rd_out;
@ -151,7 +153,7 @@ module VX_muldiv (
end
VX_shift_register #(
.DATAW (1 + `UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DATAW (1 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) div_shift_reg (
@ -175,7 +177,7 @@ module VX_muldiv (
.WIDTHQ (32),
.WIDTHR (32),
.LANES (`NUM_THREADS),
.TAGW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + 1)
.TAGW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + 1)
) divide (
.clk (clk),
.reset (reset),
@ -203,8 +205,8 @@ module VX_muldiv (
///////////////////////////////////////////////////////////////////////////
wire rsp_valid = mul_valid_out || div_valid_out;
wire [`UP(`UUID_BITS)-1:0] rsp_uuid = mul_valid_out ? mul_uuid_out : div_uuid_out;
wire [`UP(`NW_BITS)-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
wire [UUID_WIDTH-1:0] rsp_uuid = mul_valid_out ? mul_uuid_out : div_uuid_out;
wire [NW_WIDTH-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out;
wire [31:0] rsp_PC = mul_valid_out ? mul_PC_out : div_PC_out;
wire [`NR_BITS-1:0] rsp_rd = mul_valid_out ? mul_rd_out : div_rd_out;
@ -214,7 +216,7 @@ module VX_muldiv (
assign stall_out = ~ready_out && valid_out;
VX_pipe_register #(
.DATAW (1 + `UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DATAW (1 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.RESETW (1)
) pipe_reg (
.clk (clk),

View file

@ -27,9 +27,11 @@ module VX_warp_sched #(
// Status
output wire busy
);
`UNUSED_PARAM (CORE_ID)
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
wire join_else;
wire [31:0] join_pc;
wire [`NUM_THREADS-1:0] join_tmask;
@ -48,7 +50,7 @@ module VX_warp_sched #(
reg [31:0] wspawn_pc;
reg [`NUM_WARPS-1:0] use_wspawn;
wire [`UP(`NW_BITS)-1:0] schedule_wid;
wire [NW_WIDTH-1:0] schedule_wid;
wire [`NUM_THREADS-1:0] schedule_tmask;
wire [31:0] schedule_pc;
wire schedule_valid;
@ -57,8 +59,8 @@ module VX_warp_sched #(
reg [`PERF_CTR_BITS-1:0] cycles;
reg [`NUM_WARPS-1:0][`UP(`UUID_BITS)-1:0] issued_instrs;
wire [`UP(`UUID_BITS)-1:0] instr_uuid;
reg [`NUM_WARPS-1:0][UUID_WIDTH-1:0] issued_instrs;
wire [UUID_WIDTH-1:0] instr_uuid;
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;
@ -144,7 +146,7 @@ module VX_warp_sched #(
end
use_wspawn[schedule_wid] <= 0;
issued_instrs[schedule_wid] <= issued_instrs[schedule_wid] + `UP(`UUID_BITS)'(1);
issued_instrs[schedule_wid] <= issued_instrs[schedule_wid] + UUID_WIDTH'(1);
end
if (ifetch_req_fire) begin
@ -174,7 +176,7 @@ module VX_warp_sched #(
wire [`NUM_WARPS-1:0] barrier_mask = barrier_masks[warp_ctl_if.barrier.id];
`POP_COUNT(active_barrier_count, barrier_mask);
assign reached_barrier_limit = (active_barrier_count[`UP(`NW_BITS)-1:0] == warp_ctl_if.barrier.size_m1);
assign reached_barrier_limit = (active_barrier_count[NW_WIDTH-1:0] == warp_ctl_if.barrier.size_m1);
reg [`NUM_WARPS-1:0] barrier_stalls;
always @(*) begin
@ -257,7 +259,7 @@ module VX_warp_sched #(
`endif
VX_generic_buffer #(
.DATAW (`UP(`UUID_BITS) + `NUM_THREADS + 32 + `UP(`NW_BITS)),
.DATAW (UUID_WIDTH + `NUM_THREADS + 32 + NW_WIDTH),
.OUT_REG (1)
) pipe_reg (
.clk (clk),

View file

@ -21,10 +21,10 @@ module VX_writeback #(
// simulation helper signals
output reg [`NUM_REGS-1:0][31:0] sim_wb_value
);
`UNUSED_PARAM (CORE_ID)
localparam DATAW = `UP(`NW_BITS) + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1;
localparam NW_WIDTH = `UP(`NW_BITS);
localparam DATAW = NW_WIDTH + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1;
localparam NUM_RSPS = 4 + `EXT_F_ENABLED;
`ifdef EXT_F_ENABLE

View file

@ -45,7 +45,8 @@ module VX_bank #(
parameter MEM_OUT_REG = 0,
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE),
parameter WORD_SEL_BITS = `UP(`WORD_SEL_BITS)
parameter REQ_SEL_WIDTH = `UP(`REQ_SEL_BITS),
parameter WORD_SEL_WIDTH = `UP(`WORD_SEL_BITS)
) (
input wire clk,
input wire reset,
@ -59,10 +60,10 @@ module VX_bank #(
// Core Request
input wire core_req_valid,
input wire [NUM_PORTS-1:0] core_req_pmask,
input wire [NUM_PORTS-1:0][WORD_SEL_BITS-1:0] core_req_wsel,
input wire [NUM_PORTS-1:0][WORD_SEL_WIDTH-1:0] core_req_wsel,
input wire [NUM_PORTS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [NUM_PORTS-1:0][`UP(`REQ_SEL_BITS)-1:0] core_req_idx,
input wire [NUM_PORTS-1:0][REQ_SEL_WIDTH-1:0] core_req_idx,
input wire [NUM_PORTS-1:0][TAG_WIDTH-1:0] core_req_tag,
input wire core_req_rw,
input wire [`LINE_ADDR_WIDTH-1:0] core_req_addr,
@ -71,7 +72,7 @@ module VX_bank #(
// Core Response
output wire core_rsp_valid,
output wire [NUM_PORTS-1:0] core_rsp_pmask,
output wire [NUM_PORTS-1:0][`UP(`REQ_SEL_BITS)-1:0] core_rsp_idx,
output wire [NUM_PORTS-1:0][REQ_SEL_WIDTH-1:0] core_rsp_idx,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
output wire [NUM_PORTS-1:0][TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready,
@ -81,7 +82,7 @@ module VX_bank #(
output wire mem_req_rw,
output wire [NUM_PORTS-1:0] mem_req_pmask,
output wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mem_req_byteen,
output wire [NUM_PORTS-1:0][WORD_SEL_BITS-1:0] mem_req_wsel,
output wire [NUM_PORTS-1:0][WORD_SEL_WIDTH-1:0] mem_req_wsel,
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
output wire [MSHR_ADDR_WIDTH-1:0] mem_req_id,
output wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mem_req_data,
@ -104,17 +105,17 @@ module VX_bank #(
wire creq_valid;
wire [NUM_PORTS-1:0] creq_pmask;
wire [NUM_PORTS-1:0][WORD_SEL_BITS-1:0] creq_wsel;
wire [NUM_PORTS-1:0][WORD_SEL_WIDTH-1:0] creq_wsel;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] creq_byteen;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] creq_data;
wire [NUM_PORTS-1:0][`UP(`REQ_SEL_BITS)-1:0] creq_idx;
wire [NUM_PORTS-1:0][REQ_SEL_WIDTH-1:0] creq_idx;
wire [NUM_PORTS-1:0][TAG_WIDTH-1:0] creq_tag;
wire creq_rw;
wire [`LINE_ADDR_WIDTH-1:0] creq_addr;
wire creq_ready;
VX_elastic_buffer #(
.DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SEL_BITS + WORD_SIZE + `WORD_WIDTH + `UP(`REQ_SEL_BITS) + TAG_WIDTH)),
.DATAW (1 + `LINE_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SEL_WIDTH + WORD_SIZE + `WORD_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH)),
.SIZE (CREQ_SIZE)
) core_req_queue (
.clk (clk),
@ -127,10 +128,10 @@ module VX_bank #(
.ready_out (creq_ready)
);
wire mreq_alm_full;
wire crsq_alm_full;
wire mreq_alm_full;
wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr;
wire crsq_valid, crsq_ready;
wire crsq_stall;
wire mshr_deq_valid;
wire mshr_deq_ready;
@ -139,16 +140,16 @@ module VX_bank #(
wire [MSHR_ADDR_WIDTH-1:0] mshr_deq_id;
wire [`LINE_ADDR_WIDTH-1:0] mshr_deq_addr;
wire [NUM_PORTS-1:0][TAG_WIDTH-1:0] mshr_tag;
wire [NUM_PORTS-1:0][WORD_SEL_BITS-1:0] mshr_wsel;
wire [NUM_PORTS-1:0][`UP(`REQ_SEL_BITS)-1:0] mshr_idx;
wire [NUM_PORTS-1:0][WORD_SEL_WIDTH-1:0] mshr_wsel;
wire [NUM_PORTS-1:0][REQ_SEL_WIDTH-1:0] mshr_idx;
wire [NUM_PORTS-1:0] mshr_pmask;
wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1;
wire is_read_st0, is_read_st1;
wire is_write_st0, is_write_st1;
wire [NUM_PORTS-1:0][WORD_SEL_BITS-1:0] wsel_st0, wsel_st1;
wire [NUM_PORTS-1:0][WORD_SEL_WIDTH-1:0] wsel_st0, wsel_st1;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] byteen_st0, byteen_st1;
wire [NUM_PORTS-1:0][`UP(`REQ_SEL_BITS)-1:0] req_idx_st0, req_idx_st1;
wire [NUM_PORTS-1:0][REQ_SEL_WIDTH-1:0] req_idx_st0, req_idx_st1;
wire [NUM_PORTS-1:0] pmask_st0, pmask_st1;
wire [NUM_PORTS-1:0][TAG_WIDTH-1:0] tag_st0, tag_st1;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] rdata_st1;
@ -162,10 +163,8 @@ module VX_bank #(
wire mshr_pending_st0, mshr_pending_st1;
// prevent read-during-write hazard when accessing tags/data block RAMs
wire rdw_fill_hazard_st0 = valid_st0 && is_fill_st0;
reg rdw_hazard_st1;
wire pipe_stall = crsq_stall || rdw_hazard_st1;
wire rdw_fill_hazard_st0 = valid_st0 && is_fill_st0;
reg pipe_enable;
// determine which input to select in priority order
wire mshr_grant = ~flush_enable;
@ -179,20 +178,21 @@ module VX_bank #(
assign mshr_deq_ready = mshr_grant
&& ~rdw_fill_hazard_st0 // prevent read-during-write hazard
&& ~crsq_stall; // ensure core_rsp_queue not full
&& ~crsq_alm_full; // ensure core_rsp_queue not full
assign mem_rsp_ready = mrsq_grant
&& ~pipe_stall;
&& pipe_enable;
assign creq_ready = creq_grant
&& ~mreq_alm_full // ensure mem_req_queue not full
&& ~crsq_alm_full // ensure core_rsp_queue not full
&& ~mshr_alm_full // ensure mshr not full
&& ~pipe_stall;
&& pipe_enable;
wire flush_fire = flush_enable;
wire mshr_deq_fire= mshr_deq_valid && mshr_deq_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire creq_fire = creq_valid && creq_ready;
wire flush_fire = flush_enable;
wire mshr_deq_fire = mshr_deq_valid && mshr_deq_ready;
wire mem_rsp_fire = mem_rsp_valid && mem_rsp_ready;
wire creq_fire = creq_valid && creq_ready;
wire [TAG_WIDTH-1:0] mshr_creq_tag = mshr_enable ? mshr_tag[0] : creq_tag[0];
`ASSIGN_REQ_UUID (req_uuid_sel, mshr_creq_tag)
@ -205,12 +205,12 @@ module VX_bank #(
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `LINE_WIDTH + NUM_PORTS * (WORD_SEL_BITS + WORD_SIZE + `UP(`REQ_SEL_BITS) + 1 + TAG_WIDTH) + MSHR_ADDR_WIDTH),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `LINE_WIDTH + NUM_PORTS * (WORD_SEL_WIDTH + WORD_SIZE + REQ_SEL_WIDTH + 1 + TAG_WIDTH) + MSHR_ADDR_WIDTH),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.enable (pipe_enable),
.data_in ({
flush_fire || mshr_deq_fire || mem_rsp_fire || creq_fire,
flush_enable,
@ -257,7 +257,7 @@ module VX_bank #(
.req_uuid (req_uuid_st0),
.stall (pipe_stall),
.stall (~pipe_enable),
// read/Fill
.lookup (do_lookup_st0),
@ -272,17 +272,17 @@ module VX_bank #(
assign is_hit_st0 = tag_match_st0;
// ensure mshr reply never get a miss
`RUNTIME_ASSERT (tag_match_st0 || ~(valid_st0 && is_mshr_st0), ("runtime error"));
`RUNTIME_ASSERT (is_hit_st0 || ~(valid_st0 && is_mshr_st0), ("runtime error"));
wire [MSHR_ADDR_WIDTH-1:0] mshr_id_a_st0 = is_read_st0 ? mshr_alloc_id : mshr_id_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `LINE_ADDR_WIDTH + `LINE_WIDTH + NUM_PORTS * (WORD_SEL_BITS + WORD_SIZE + `UP(`REQ_SEL_BITS) + 1 + TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + NUM_WAYS + `LINE_ADDR_WIDTH + `LINE_WIDTH + NUM_PORTS * (WORD_SEL_WIDTH + WORD_SIZE + REQ_SEL_WIDTH + 1 + TAG_WIDTH) + MSHR_ADDR_WIDTH + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.enable (pipe_enable),
.data_in ({valid_st0, is_mshr_st0, is_fill_st0, is_read_st0, is_write_st0, is_hit_st0, way_sel_st0, addr_st0, wdata_st0, wsel_st0, byteen_st0, req_idx_st0, pmask_st0, tag_st0, mshr_id_a_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, is_hit_st1, way_sel_st1, addr_st1, wdata_st1, wsel_st1, byteen_st1, req_idx_st1, pmask_st1, tag_st1, mshr_id_st1, mshr_pending_st1})
);
@ -293,7 +293,7 @@ module VX_bank #(
wire do_read_st1 = valid_st1 && is_read_st1;
wire do_write_st1 = valid_st1 && is_write_st1;
wire do_fill_st1 = valid_st1 && is_fill_st1;
wire do_mshr_st1 = valid_st1 && is_mshr_st1;
wire do_mshr_st1 = valid_st1 && is_mshr_st1;
wire do_read_hit_st1 = do_read_st1 && is_hit_st1;
wire do_read_miss_st1 = do_read_st1 && ~is_hit_st1;
@ -306,10 +306,9 @@ module VX_bank #(
// detect read during write data hazard
always @(posedge clk) begin
if (reset) begin
rdw_hazard_st1 <= 0;
end else if (~crsq_stall) begin
rdw_hazard_st1 <= do_read_st0 && do_write_hit_st1 && (addr_st0 == addr_st1)
&& ~rdw_hazard_st1;
pipe_enable <= 0;
end else begin
pipe_enable <= ~(do_read_st0 && do_write_hit_st1 && (addr_st0 == addr_st1) && pipe_enable);
end
end
@ -333,7 +332,7 @@ module VX_bank #(
.req_uuid (req_uuid_st1),
.stall (pipe_stall),
.stall (~pipe_enable),
.read (do_read_hit_st1 || do_mshr_st1),
.fill (do_fill_st1),
@ -349,10 +348,10 @@ module VX_bank #(
);
wire [MSHR_SIZE-1:0] mshr_matches;
wire mshr_allocate = do_read_st0 && ~crsq_stall;
wire mshr_replay = do_fill_st0 && ~crsq_stall;
wire mshr_allocate = do_read_st0;
wire mshr_replay = do_fill_st0;
wire mshr_lookup = mshr_allocate;
wire mshr_release = do_read_hit_st1 && ~pipe_stall;
wire mshr_release = do_read_hit_st1 && pipe_enable;
VX_pending_size #(
.SIZE (MSHR_SIZE)
@ -424,44 +423,51 @@ module VX_bank #(
end
assign mshr_pending_st0 = (| lookup_matches);
// Enqueue core response
// schedule core response
wire crsq_push, crsq_pop, crsq_empty;
wire [NUM_PORTS-1:0] crsq_pmask;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] crsq_data;
wire [NUM_PORTS-1:0][`UP(`REQ_SEL_BITS)-1:0] crsq_idx;
wire [NUM_PORTS-1:0][REQ_SEL_WIDTH-1:0] crsq_idx;
wire [NUM_PORTS-1:0][TAG_WIDTH-1:0] crsq_tag;
assign crsq_valid = (do_read_hit_st1 && ~rdw_hazard_st1)
assign crsq_push = (do_read_hit_st1 && pipe_enable)
|| do_mshr_st1;
assign crsq_stall = crsq_valid && ~crsq_ready;
assign crsq_pop = core_rsp_valid && core_rsp_ready;
assign crsq_pmask = pmask_st1;
assign crsq_idx = req_idx_st1;
assign crsq_data = rdata_st1;
assign crsq_tag = tag_st1;
VX_elastic_buffer #(
.DATAW (NUM_PORTS * (TAG_WIDTH + 1 + `WORD_WIDTH + `UP(`REQ_SEL_BITS))),
.SIZE (CRSQ_SIZE),
.OUT_REG (CORE_OUT_REG)
VX_fifo_queue #(
.DATAW (NUM_PORTS * (TAG_WIDTH + 1 + `WORD_WIDTH + REQ_SEL_WIDTH)),
.SIZE (CRSQ_SIZE),
.ALM_FULL (CRSQ_SIZE-2),
.OUT_REG (CORE_OUT_REG)
) core_rsp_queue (
.clk (clk),
.reset (reset),
.valid_in (crsq_valid),
.ready_in (crsq_ready),
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_idx}),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_idx}),
.ready_out (core_rsp_ready)
.clk (clk),
.reset (reset),
.push (crsq_push),
.pop (crsq_pop),
.data_in ({crsq_tag, crsq_pmask, crsq_data, crsq_idx}),
.data_out ({core_rsp_tag, core_rsp_pmask, core_rsp_data, core_rsp_idx}),
.empty (crsq_empty),
.alm_full (crsq_alm_full),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
// Enqueue memory request
assign core_rsp_valid = ~crsq_empty;
// schedule memory request
wire mreq_push, mreq_pop, mreq_empty;
wire [NUM_PORTS-1:0][`WORD_WIDTH-1:0] mreq_data;
wire [NUM_PORTS-1:0][WORD_SIZE-1:0] mreq_byteen;
wire [NUM_PORTS-1:0][WORD_SEL_BITS-1:0] mreq_wsel;
wire [NUM_PORTS-1:0][WORD_SEL_WIDTH-1:0] mreq_wsel;
wire [NUM_PORTS-1:0] mreq_pmask;
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
wire [MSHR_ADDR_WIDTH-1:0] mreq_id;
@ -481,7 +487,7 @@ module VX_bank #(
assign mreq_data = creq_data_st1;
VX_fifo_queue #(
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SEL_BITS + `WORD_WIDTH)),
.DATAW (1 + `LINE_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_PORTS * (1 + WORD_SIZE + WORD_SEL_WIDTH + `WORD_WIDTH)),
.SIZE (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-2),
.OUT_REG (MEM_OUT_REG)
@ -510,13 +516,12 @@ module VX_bank #(
`endif
`ifdef DBG_TRACE_CACHE_BANK
wire crsq_fire = crsq_valid && crsq_ready;
wire pipeline_stall = (mshr_deq_valid || mem_rsp_valid || creq_valid)
&& ~(mshr_deq_fire || mem_rsp_fire || creq_fire);
always @(posedge clk) begin
if (pipeline_stall) begin
`TRACE(3, ("%d: *** %s:%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsq_stall, mreq_alm_full, mshr_alm_full));
`TRACE(3, ("%d: *** %s:%0d stall: crsq=%b, mreq=%b, mshr=%b\n", $time, INSTANCE_ID, BANK_ID, crsq_alm_full, mreq_alm_full, mshr_alm_full));
end
if (flush_enable) begin
`TRACE(2, ("%d: %s:%0d flush: addr=0x%0h\n", $time, INSTANCE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID)));
@ -533,7 +538,7 @@ module VX_bank #(
else
`TRACE(2, ("%d: %s:%0d core-rd-req: addr=0x%0h, tag=0x%0h, pmask=%b, tid=%0d (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_idx, req_uuid_sel));
end
if (crsq_fire) begin
if (crsq_push) begin
`TRACE(2, ("%d: %s:%0d core-rd-rsp: addr=0x%0h, tag=0x%0h, pmask=%b, tid=%0d, data=0x%0h (#%0d)\n", $time, INSTANCE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_idx, crsq_data, req_uuid_st1));
end
if (mreq_push) begin

View file

@ -165,8 +165,8 @@ module VX_miss_resrv #(
dequeue_id_r <= dequeue_id_n;
allocate_id_r <= allocate_id_n;
`ASSERT(!allocate_fire || !valid_table[allocate_id_r], ("runtime error"));
`ASSERT(!release_valid || valid_table[release_id], ("runtime error"));
`ASSERT(!allocate_fire || !valid_table[allocate_id_r], ("runtime error: allocating used entry"));
`ASSERT(!release_valid || valid_table[release_id], ("runtime error: releasing unsued entry"));
end
`RUNTIME_ASSERT((!allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s:%0d in-use allocation: addr=0x%0h, id=%0d", $time, INSTANCE_ID, BANK_ID,

View file

@ -20,11 +20,14 @@ module VX_fpu_agent #(
input wire csr_pending,
output wire req_pending
);
);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
// Store request info
wire [`UP(`UUID_BITS)-1:0] rsp_uuid;
wire [`UP(`NW_BITS)-1:0] rsp_wid;
wire [UUID_WIDTH-1:0] rsp_uuid;
wire [NW_WIDTH-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
wire [`NR_BITS-1:0] rsp_rd;
@ -38,7 +41,7 @@ module VX_fpu_agent #(
assign rsp_tag = fpu_rsp_if.tag;
VX_index_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS),
.SIZE (`FPU_REQ_QUEUE_SIZE)
) tag_store (
.clk (clk),
@ -104,7 +107,8 @@ module VX_fpu_agent #(
// commit
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32)),
.OUT_REG (1)
) rsp_sbuf (
.clk (clk),
.reset (reset),

View file

@ -0,0 +1,192 @@
`include "VX_define.vh"
`TRACING_OFF
module VX_avs_adapter #(
parameter DATA_WIDTH = 1,
parameter ADDR_WIDTH = 1,
parameter BURST_WIDTH = 1,
parameter NUM_BANKS = 1,
parameter REQ_TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0
) (
input wire clk,
input wire reset,
// Memory request
input wire mem_req_valid,
input wire mem_req_rw,
input wire [DATA_WIDTH/8-1:0] mem_req_byteen,
input wire [ADDR_WIDTH-1:0] mem_req_addr,
input wire [DATA_WIDTH-1:0] mem_req_data,
input wire [REQ_TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready,
// Memory response
output wire mem_rsp_valid,
output wire [DATA_WIDTH-1:0] mem_rsp_data,
output wire [REQ_TAG_WIDTH-1:0] mem_rsp_tag,
input wire mem_rsp_ready,
// AVS bus
output wire [DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
input wire [DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
output wire [ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
input wire avs_waitrequest [NUM_BANKS],
output wire avs_write [NUM_BANKS],
output wire avs_read [NUM_BANKS],
output wire [DATA_WIDTH/8-1:0] avs_byteenable [NUM_BANKS],
output wire [BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS],
input wire avs_readdatavalid [NUM_BANKS]
);
localparam DATA_SIZE = DATA_WIDTH/8;
localparam RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1);
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
// Requests handling //////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] req_queue_push, req_queue_pop;
wire [NUM_BANKS-1:0][REQ_TAG_WIDTH-1:0] req_queue_tag_out;
wire [NUM_BANKS-1:0] req_queue_going_full;
wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
wire [BANK_ADDRW-1:0] req_bank_sel;
wire [NUM_BANKS-1:0] bank_req_ready;
if (NUM_BANKS > 1) begin
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin
assign req_bank_sel = 0;
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign req_queue_push[i] = mem_req_valid && ~mem_req_rw && bank_req_ready[i] && (req_bank_sel == i);
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.incr (req_queue_push[i]),
.decr (req_queue_pop[i]),
.full (req_queue_going_full[i]),
.size (req_queue_size[i]),
`UNUSED_PIN (empty)
);
`UNUSED_VAR (req_queue_size)
VX_fifo_queue #(
.DATAW (REQ_TAG_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (req_queue_push[i]),
.pop (req_queue_pop[i]),
.data_in (mem_req_tag),
.data_out (req_queue_tag_out[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
wire valid_out;
wire rw_out;
wire [DATA_SIZE-1:0] byteen_out;
wire [ADDR_WIDTH-1:0] addr_out;
wire [DATA_WIDTH-1:0] data_out;
wire ready_out;
wire valid_out_w = mem_req_valid && ~req_queue_going_full[i] && (req_bank_sel == i);
wire ready_out_w;
VX_skid_buffer #(
.DATAW (1 + DATA_SIZE + ADDR_WIDTH + DATA_WIDTH),
.PASSTHRU (BUFFERED_REQ == 0),
.OUT_REG (BUFFERED_REQ > 1)
) req_out_buf (
.clk (clk),
.reset (reset),
.valid_in (valid_out_w),
.ready_in (ready_out_w),
.data_in ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data}),
.data_out ({rw_out, byteen_out, addr_out, data_out}),
.valid_out (valid_out),
.ready_out (ready_out)
);
assign avs_read[i] = valid_out && ~rw_out;
assign avs_write[i] = valid_out && rw_out;
assign avs_address[i] = addr_out;
assign avs_byteenable[i] = byteen_out;
assign avs_writedata[i] = data_out;
assign avs_burstcount[i] = BURST_WIDTH'(1);
assign ready_out = ~avs_waitrequest[i];
assign bank_req_ready[i] = ready_out_w && ~req_queue_going_full[i];
end
if (NUM_BANKS > 1) begin
assign mem_req_ready = bank_req_ready[req_bank_sel];
end else begin
assign mem_req_ready = bank_req_ready;
end
// Responses handling /////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
wire [NUM_BANKS-1:0][DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
wire [NUM_BANKS-1:0][DATA_WIDTH-1:0] rsp_queue_data_out;
wire [NUM_BANKS-1:0] rsp_queue_empty;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
VX_fifo_queue #(
.DATAW (DATA_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_readdatavalid[i]),
.pop (req_queue_pop[i]),
.data_in (avs_readdata[i]),
.data_out (rsp_queue_data_out[i]),
.empty (rsp_queue_empty[i]),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign rsp_arb_valid_in[i] = !rsp_queue_empty[i];
assign rsp_arb_data_in[i] = {rsp_queue_data_out[i], req_queue_tag_out[i]};
assign req_queue_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
end
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (DATA_WIDTH + REQ_TAG_WIDTH),
.ARBITER ("R"),
.BUFFERED (BUFFERED_RSP)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_arb_valid_in),
.data_in (rsp_arb_data_in),
.ready_in (rsp_arb_ready_in),
.valid_out (mem_rsp_valid),
.data_out ({mem_rsp_data, mem_rsp_tag}),
.ready_out (mem_rsp_ready)
);
endmodule
`TRACING_ON

View file

@ -25,7 +25,11 @@ module VX_fifo_queue #(
localparam ADDRW = $clog2(SIZE);
`STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!"))
`STATIC_ASSERT(ALM_FULL > 0, ("alm_full must be greater than 0!"))
`STATIC_ASSERT(ALM_FULL < SIZE, ("alm_full must be smaller than size!"))
`STATIC_ASSERT(ALM_EMPTY > 0, ("alm_empty must be greater than 0!"))
`STATIC_ASSERT(ALM_EMPTY < SIZE, ("alm_empty must be smaller than size!"))
`STATIC_ASSERT(`ISPOW2(SIZE), ("size must be a power of 2!"))
if (SIZE == 1) begin

View file

@ -0,0 +1,227 @@
`include "VX_platform.vh"
`TRACING_OFF
module VX_mem_adapter #(
parameter SRC_DATA_WIDTH = 1,
parameter SRC_ADDR_WIDTH = 1,
parameter DST_DATA_WIDTH = 1,
parameter DST_ADDR_WIDTH = 1,
parameter SRC_TAG_WIDTH = 1,
parameter DST_TAG_WIDTH = 1,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0
) (
input wire clk,
input wire reset,
input wire mem_req_valid_in,
input wire [SRC_ADDR_WIDTH-1:0] mem_req_addr_in,
input wire mem_req_rw_in,
input wire [SRC_DATA_WIDTH/8-1:0] mem_req_byteen_in,
input wire [SRC_DATA_WIDTH-1:0] mem_req_data_in,
input wire [SRC_TAG_WIDTH-1:0] mem_req_tag_in,
output wire mem_req_ready_in,
output wire mem_rsp_valid_in,
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in,
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in,
input wire mem_rsp_ready_in,
output wire mem_req_valid_out,
output wire [DST_ADDR_WIDTH-1:0] mem_req_addr_out,
output wire mem_req_rw_out,
output wire [DST_DATA_WIDTH/8-1:0] mem_req_byteen_out,
output wire [DST_DATA_WIDTH-1:0] mem_req_data_out,
output wire [DST_TAG_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out,
input wire mem_rsp_valid_out,
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_out,
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out,
output wire mem_rsp_ready_out
);
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8);
localparam DST_LDATAW = $clog2(DST_DATA_WIDTH);
localparam SRC_LDATAW = $clog2(SRC_DATA_WIDTH);
localparam D = `ABS(DST_LDATAW - SRC_LDATAW);
localparam P = 2**D;
wire mem_req_valid_out_w;
wire [DST_ADDR_WIDTH-1:0] mem_req_addr_out_w;
wire mem_req_rw_out_w;
wire [DST_DATA_WIDTH/8-1:0] mem_req_byteen_out_w;
wire [DST_DATA_WIDTH-1:0] mem_req_data_out_w;
wire [DST_TAG_WIDTH-1:0] mem_req_tag_out_w;
wire mem_req_ready_out_w;
wire mem_rsp_valid_in_w;
wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in_w;
wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
wire mem_rsp_ready_in_w;
`UNUSED_VAR (mem_rsp_tag_out)
if (DST_LDATAW > SRC_LDATAW) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire [D-1:0] req_idx = mem_req_addr_in[D-1:0];
wire [D-1:0] rsp_idx = mem_rsp_tag_out[D-1:0];
wire [SRC_ADDR_WIDTH-D-1:0] mem_req_addr_in_qual = mem_req_addr_in[SRC_ADDR_WIDTH-1:D];
wire [P-1:0][SRC_DATA_WIDTH-1:0] mem_rsp_data_out_w = mem_rsp_data_out;
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin
`UNUSED_VAR (mem_req_addr_in_qual)
assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin
assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in_qual);
end else begin
assign mem_req_addr_out_w = mem_req_addr_in_qual;
end
assign mem_req_valid_out_w = mem_req_valid_in;
assign mem_req_rw_out_w = mem_req_rw_in;
assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW);
assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx});
assign mem_req_ready_in = mem_req_ready_out_w;
assign mem_rsp_valid_in_w = mem_rsp_valid_out;
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]);
assign mem_rsp_ready_out = mem_rsp_ready_in_w;
end else if (DST_LDATAW < SRC_LDATAW) begin
reg [D-1:0] req_ctr, rsp_ctr;
reg [P-1:0][DST_DATA_WIDTH-1:0] mem_rsp_data_out_r, mem_rsp_data_out_n;
wire mem_req_out_fire = mem_req_valid_out && mem_req_ready_out;
wire mem_rsp_in_fire = mem_rsp_valid_out && mem_rsp_ready_out;
wire [P-1:0][DST_DATA_WIDTH-1:0] mem_req_data_in_w = mem_req_data_in;
wire [P-1:0][DST_DATA_SIZE-1:0] mem_req_byteen_in_w = mem_req_byteen_in;
always @(*) begin
mem_rsp_data_out_n = mem_rsp_data_out_r;
if (mem_rsp_in_fire) begin
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_out;
end
end
always @(posedge clk) begin
if (reset) begin
req_ctr <= 0;
rsp_ctr <= 0;
end else begin
if (mem_req_out_fire) begin
req_ctr <= req_ctr + 1;
end
if (mem_rsp_in_fire) begin
rsp_ctr <= rsp_ctr + 1;
end
end
mem_rsp_data_out_r <= mem_rsp_data_out_n;
end
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;
wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_x;
always @(posedge clk) begin
if (mem_rsp_in_fire) begin
mem_rsp_tag_in_r <= mem_rsp_tag_out;
end
end
assign mem_rsp_tag_in_x = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_out;
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out),
("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_x, mem_rsp_tag_out))
wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr};
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin
`UNUSED_VAR (mem_req_addr_in_qual)
assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin
assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in_qual);
end else begin
assign mem_req_addr_out_w = mem_req_addr_in_qual;
end
assign mem_req_valid_out_w = mem_req_valid_in;
assign mem_req_rw_out_w = mem_req_rw_in;
assign mem_req_byteen_out_w = mem_req_byteen_in_w[req_ctr];
assign mem_req_data_out_w = mem_req_data_in_w[req_ctr];
assign mem_req_tag_out_w = DST_TAG_WIDTH'(mem_req_tag_in);
assign mem_req_ready_in = mem_req_ready_out_w && (req_ctr == (P-1));
assign mem_rsp_valid_in_w = mem_rsp_valid_out && (rsp_ctr == (P-1));
assign mem_rsp_data_in_w = mem_rsp_data_out_n;
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out);
assign mem_rsp_ready_out = mem_rsp_ready_in_w;
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin
`UNUSED_VAR (mem_req_addr_in)
assign mem_req_addr_out_w = mem_req_addr_in[DST_ADDR_WIDTH-1:0];
end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin
assign mem_req_addr_out_w = DST_ADDR_WIDTH'(mem_req_addr_in);
end else begin
assign mem_req_addr_out_w = mem_req_addr_in;
end
assign mem_req_valid_out_w = mem_req_valid_in;
assign mem_req_rw_out_w = mem_req_rw_in;
assign mem_req_byteen_out_w = mem_req_byteen_in;
assign mem_req_data_out_w = mem_req_data_in;
assign mem_req_tag_out_w = DST_TAG_WIDTH'(mem_req_tag_in);
assign mem_req_ready_in = mem_req_ready_out_w;
assign mem_rsp_valid_in_w = mem_rsp_valid_out;
assign mem_rsp_data_in_w = mem_rsp_data_out;
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out);
assign mem_rsp_ready_out = mem_rsp_ready_in_w;
end
VX_skid_buffer #(
.DATAW (1 + DST_DATA_SIZE + DST_ADDR_WIDTH + DST_DATA_WIDTH + DST_TAG_WIDTH),
.PASSTHRU (BUFFERED_REQ == 0),
.OUT_REG (BUFFERED_REQ > 1)
) req_out_buf (
.clk (clk),
.reset (reset),
.valid_in (mem_req_valid_out_w),
.ready_in (mem_req_ready_out_w),
.data_in ({mem_req_rw_out_w, mem_req_byteen_out_w, mem_req_addr_out_w, mem_req_data_out_w, mem_req_tag_out_w}),
.data_out ({mem_req_rw_out, mem_req_byteen_out, mem_req_addr_out, mem_req_data_out, mem_req_tag_out}),
.valid_out (mem_req_valid_out),
.ready_out (mem_req_ready_out)
);
VX_skid_buffer #(
.DATAW (SRC_DATA_WIDTH + SRC_TAG_WIDTH),
.PASSTHRU (BUFFERED_RSP == 0),
.OUT_REG (BUFFERED_RSP > 1)
) rsp_in_buf (
.clk (clk),
.reset (reset),
.valid_in (mem_rsp_valid_in_w),
.ready_in (mem_rsp_ready_in_w),
.data_in ({mem_rsp_data_in_w, mem_rsp_tag_in_w}),
.data_out ({mem_rsp_data_in, mem_rsp_tag_in}),
.valid_out (mem_rsp_valid_in),
.ready_out (mem_rsp_ready_in)
);
endmodule
`TRACING_ON

View file

@ -14,6 +14,10 @@ module VX_raster_agent #(
VX_commit_if.master raster_commit_if,
VX_gpu_csr_if.slave raster_csr_if
);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
wire raster_rsp_valid, raster_rsp_ready;
// CSRs access
@ -51,7 +55,7 @@ module VX_raster_agent #(
end
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
) rsp_sbuf (
.clk (clk),
.reset (reset),

View file

@ -18,11 +18,13 @@ module VX_raster_csr #(
);
`UNUSED_VAR (reset)
localparam NW_WIDTH = `UP(`NW_BITS);
raster_csrs_t [`NUM_THREADS-1:0] wdata;
raster_csrs_t [`NUM_THREADS-1:0] rdata;
wire [`NUM_THREADS-1:0] wren;
wire [`UP(`NW_BITS)-1:0] waddr;
wire [`UP(`NW_BITS)-1:0] raddr;
wire [NW_WIDTH-1:0] waddr;
wire [NW_WIDTH-1:0] raddr;
// CSR registers
for (genvar i = 0; i < `NUM_THREADS; ++i) begin

View file

@ -14,6 +14,10 @@ module VX_rop_agent #(
VX_commit_if.master rop_commit_if,
VX_rop_req_if.master rop_req_if
);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
// CSRs access
rop_csrs_t rop_csrs;
@ -40,7 +44,7 @@ module VX_rop_agent #(
// because of that we need to decouple rop_agent_if and rop_commit_if handshake with a pipe register
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `NUM_THREADS * (1 + 2 * `ROP_DIM_BITS + 32 + `ROP_DEPTH_BITS + 1)),
.DATAW (UUID_WIDTH + `NUM_THREADS * (1 + 2 * `ROP_DIM_BITS + 32 + `ROP_DEPTH_BITS + 1)),
.OUT_REG (1)
) req_sbuf (
.clk (clk),
@ -58,7 +62,7 @@ module VX_rop_agent #(
assign rop_rsp_valid = rop_agent_if.valid && rop_req_ready;
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32)
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32)
) rsp_sbuf (
.clk (clk),
.reset (reset),

View file

@ -17,7 +17,8 @@ module VX_rop_arb #(
VX_rop_req_if.master req_out_if [NUM_OUTPUTS]
);
localparam REQ_DATAW = `UP(`UUID_BITS) + NUM_LANES * (1 + 2 * `ROP_DIM_BITS + $bits(rgba_t) + `ROP_DEPTH_BITS + 1);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam REQ_DATAW = UUID_WIDTH + NUM_LANES * (1 + 2 * `ROP_DIM_BITS + $bits(rgba_t) + `ROP_DEPTH_BITS + 1);
wire [NUM_INPUTS-1:0] req_valid_in;
wire [NUM_INPUTS-1:0][REQ_DATAW-1:0] req_data_in;

View file

@ -20,7 +20,8 @@ module VX_rop_unit #(
VX_dcr_write_if.slave dcr_write_if,
VX_rop_req_if.slave rop_req_if
);
localparam MEM_TAG_WIDTH = `UP(`UUID_BITS) + NUM_LANES * (`ROP_DIM_BITS + `ROP_DIM_BITS + 32 + `ROP_DEPTH_BITS + 1);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam MEM_TAG_WIDTH = UUID_WIDTH + NUM_LANES * (`ROP_DIM_BITS + `ROP_DIM_BITS + 32 + `ROP_DEPTH_BITS + 1);
localparam DS_TAG_WIDTH = NUM_LANES * (`ROP_DIM_BITS + `ROP_DIM_BITS + 1 + 1 + 32);
localparam BLEND_TAG_WIDTH = NUM_LANES * (`ROP_DIM_BITS + `ROP_DIM_BITS + 1);
@ -212,7 +213,7 @@ module VX_rop_unit #(
///////////////////////////////////////////////////////////////////////////
wire [NUM_LANES-1:0][`ROP_DIM_BITS-1:0] mem_rsp_pos_x, mem_rsp_pos_y;
wire [`UP(`UUID_BITS)-1:0] mem_rsp_uuid;
wire [UUID_WIDTH-1:0] mem_rsp_uuid;
`UNUSED_VAR (mem_rsp_uuid)
wire [NUM_LANES-1:0][`ROP_DIM_BITS-1:0] ds_write_pos_x, ds_write_pos_y;

View file

@ -16,6 +16,8 @@ module VX_tex_agent #(
VX_commit_if.master tex_commit_if
);
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
localparam REQ_QUEUE_BITS = `LOG2UP(`TEX_REQ_QUEUE_SIZE);
// CSRs access
@ -39,11 +41,11 @@ module VX_tex_agent #(
// Store request info
wire [`UP(`UUID_BITS)-1:0] rsp_uuid;
wire [`UP(`NW_BITS)-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
wire [`NR_BITS-1:0] rsp_rd;
wire [UUID_WIDTH-1:0] rsp_uuid;
wire [NW_WIDTH-1:0] rsp_wid;
wire [`NUM_THREADS-1:0] rsp_tmask;
wire [31:0] rsp_PC;
wire [`NR_BITS-1:0] rsp_rd;
wire [REQ_QUEUE_BITS-1:0] mdata_waddr, mdata_raddr;
@ -53,7 +55,7 @@ module VX_tex_agent #(
wire mdata_pop = tex_rsp_if.valid && tex_rsp_if.ready;
VX_index_buffer #(
.DATAW (`UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS),
.DATAW (NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS),
.SIZE (`TEX_REQ_QUEUE_SIZE)
) tag_store (
.clk (clk),
@ -94,10 +96,10 @@ module VX_tex_agent #(
// handle texture response
assign mdata_raddr = tex_rsp_if.tag[0 +: REQ_QUEUE_BITS];
assign rsp_uuid = tex_rsp_if.tag[REQ_QUEUE_BITS +: `UP(`UUID_BITS)];
assign rsp_uuid = tex_rsp_if.tag[REQ_QUEUE_BITS +: UUID_WIDTH];
VX_skid_buffer #(
.DATAW (`UP(`UUID_BITS) + `UP(`NW_BITS) + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
) rsp_sbuf (
.clk (clk),
.reset (reset),

View file

@ -29,7 +29,8 @@ module VX_tex_mem #(
input wire rsp_ready
);
localparam TAG_WIDTH = REQ_INFOW + `TEX_LGSTRIDE_BITS + (NUM_LANES * 4 * 2) + 4;
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam TAG_WIDTH = REQ_INFOW + `TEX_LGSTRIDE_BITS + (NUM_LANES * 4 * 2) + 4;
wire mem_req_valid;
wire [3:0][NUM_LANES-1:0] mem_req_mask;
@ -101,7 +102,7 @@ module VX_tex_mem #(
.DATA_WIDTH (32),
.QUEUE_SIZE (`TEX_MEM_QUEUE_SIZE),
.TAG_WIDTH (TAG_WIDTH),
.UUID_WIDTH (`UP(`UUID_BITS)),
.UUID_WIDTH (UUID_WIDTH),
.MEM_OUT_REG (3)
) mem_scheduler (
.clk (clk),