minor update

This commit is contained in:
Blaise Tine 2024-07-11 05:31:46 -07:00
parent a854e9d25b
commit 69f7213afc
4 changed files with 66 additions and 66 deletions

View file

@ -537,14 +537,14 @@ module VX_lsu_slice import VX_gpu_pkg::*, VX_trace_pkg::*; #(
.TRIGGERW (3),
.PROBEW (`UUID_WIDTH+NUM_LANES*(`XLEN+4+`XLEN)+1+`UUID_WIDTH+NUM_LANES*`XLEN)
) scope_tap (
.clk (clk),
.reset (scope_reset),
.start (1'b0),
.stop (1'b0),
.triggers ({reset, mem_req_fire, mem_rsp_fire}),
.clk (clk),
.reset (scope_reset),
.start (1'b0),
.stop (1'b0),
.triggers({reset, mem_req_fire, mem_rsp_fire}),
.probes ({execute_if.data.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
.bus_in (scope_bus_in),
.bus_out (scope_bus_out)
.bus_out(scope_bus_out)
);
`else
`SCOPE_IO_UNUSED()

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -23,7 +23,7 @@ module VX_mem_scheduler #(
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter ATYPE_WIDTH = 1,
parameter TAG_WIDTH = 8,
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter CORE_QUEUE_SIZE= 8,
parameter MEM_QUEUE_SIZE= CORE_QUEUE_SIZE,
parameter RSP_PARTIAL = 0,
@ -54,7 +54,7 @@ module VX_mem_scheduler #(
input wire [CORE_REQS-1:0][WORD_WIDTH-1:0] core_req_data,
input wire [TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
output wire core_req_empty,
output wire core_req_empty,
output wire core_req_sent,
// Core response
@ -81,7 +81,7 @@ module VX_mem_scheduler #(
input wire mem_rsp_valid,
input wire [MEM_CHANNELS-1:0] mem_rsp_mask,
input wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready
);
localparam BATCH_SEL_WIDTH = `UP(MEM_BATCH_BITS);
@ -110,7 +110,7 @@ module VX_mem_scheduler #(
wire reqq_valid;
wire [CORE_REQS-1:0] reqq_mask;
wire reqq_rw;
wire reqq_rw;
wire [CORE_REQS-1:0][WORD_SIZE-1:0] reqq_byteen;
wire [CORE_REQS-1:0][ADDR_WIDTH-1:0] reqq_addr;
wire [CORE_REQS-1:0][ATYPE_WIDTH-1:0] reqq_atype;
@ -118,7 +118,7 @@ module VX_mem_scheduler #(
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
wire reqq_ready;
wire reqq_valid_s;
wire reqq_valid_s;
wire [MERGED_REQS-1:0] reqq_mask_s;
wire reqq_rw_s;
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
@ -159,7 +159,7 @@ module VX_mem_scheduler #(
wire ibuf_ready = (core_req_rw || ~ibuf_full);
wire reqq_valid_in = core_req_valid && ibuf_ready;
wire reqq_ready_in;
wire [REQQ_TAG_WIDTH-1:0] reqq_tag_u;
if (UUID_WIDTH != 0) begin
assign reqq_tag_u = {core_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
@ -169,7 +169,7 @@ module VX_mem_scheduler #(
VX_elastic_buffer #(
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
.SIZE (CORE_QUEUE_SIZE),
.SIZE (CORE_QUEUE_SIZE),
.OUT_REG (1)
) req_queue (
.clk (clk),
@ -188,7 +188,7 @@ module VX_mem_scheduler #(
// no pending requests
assign core_req_empty = !reqq_valid && ibuf_empty;
// notify request submisison
// notify request submisison
assign core_req_sent = reqq_valid && reqq_ready;
// Index buffer ///////////////////////////////////////////////////////////
@ -219,15 +219,15 @@ module VX_mem_scheduler #(
`UNUSED_VAR (ibuf_empty)
// Handle memory coalescing ///////////////////////////////////////////////
// Handle memory coalescing ///////////////////////////////////////////////
if (COALESCE_ENABLE) begin
`RESET_RELAY (coalescer_reset, reset);
VX_mem_coalescer #(
.INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)),
.NUM_REQS (CORE_REQS),
.NUM_REQS (CORE_REQS),
.DATA_IN_SIZE (WORD_SIZE),
.DATA_OUT_SIZE (LINE_SIZE),
.ADDR_WIDTH (ADDR_WIDTH),
@ -238,7 +238,7 @@ module VX_mem_scheduler #(
) coalescer (
.clk (clk),
.reset (coalescer_reset),
// Input request
.in_req_valid (reqq_valid),
.in_req_mask (reqq_mask),
@ -280,7 +280,7 @@ module VX_mem_scheduler #(
assign reqq_valid_s = reqq_valid;
assign reqq_mask_s = reqq_mask;
assign reqq_rw_s = reqq_rw;
assign reqq_rw_s = reqq_rw;
assign reqq_byteen_s= reqq_byteen;
assign reqq_addr_s = reqq_addr;
assign reqq_atype_s = reqq_atype;
@ -292,18 +292,18 @@ module VX_mem_scheduler #(
assign mem_rsp_mask_s = mem_rsp_mask;
assign mem_rsp_data_s = mem_rsp_data;
assign mem_rsp_tag_s = mem_rsp_tag;
assign mem_rsp_ready = mem_rsp_ready_s;
assign mem_rsp_ready = mem_rsp_ready_s;
end
// Handle memory requests /////////////////////////////////////////////////
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][ATYPE_WIDTH-1:0] mem_req_atype_b;
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
@ -331,7 +331,7 @@ module VX_mem_scheduler #(
assign mem_req_addr_s = mem_req_addr_b[req_batch_idx];
assign mem_req_atype_s = mem_req_atype_b[req_batch_idx];
assign mem_req_data_s = mem_req_data_b[req_batch_idx];
if (MEM_BATCHES != 1) begin
reg [MEM_BATCH_BITS-1:0] req_batch_idx_r;
always @(posedge clk) begin
@ -352,10 +352,10 @@ module VX_mem_scheduler #(
wire [MEM_BATCHES-1:0][MEM_BATCH_BITS-1:0] req_batch_idxs;
wire [MEM_BATCH_BITS-1:0] req_batch_idx_last;
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
assign req_batch_valids[i] = (| mem_req_mask_b[i]);
assign req_batch_idxs[i] = MEM_BATCH_BITS'(i);
end
end
VX_find_first #(
.N (MEM_BATCHES),
@ -368,7 +368,7 @@ module VX_mem_scheduler #(
`UNUSED_PIN (valid_out)
);
assign req_batch_idx = req_batch_idx_r;
assign req_batch_idx = req_batch_idx_r;
assign req_sent_all = mem_req_ready_s && (req_batch_idx_r == req_batch_idx_last);
assign mem_req_tag_s = {reqq_tag_s, req_batch_idx};
@ -382,7 +382,7 @@ module VX_mem_scheduler #(
assign mem_req_valid_s = reqq_valid_s;
assign reqq_ready_s = req_sent_all;
VX_elastic_buffer #(
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
@ -415,7 +415,7 @@ module VX_mem_scheduler #(
localparam j = r % CORE_CHANNELS;
assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j];
end
assign rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~curr_mask;
wire rsp_complete = ~(| rsp_rem_mask_n);
@ -457,19 +457,19 @@ module VX_mem_scheduler #(
end else begin
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n;
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n;
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
always @(*) begin
rsp_store_n = rsp_store[ibuf_raddr];
rsp_store_n = rsp_store[ibuf_raddr];
for (integer i = 0; i < CORE_CHANNELS; ++i) begin
if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin
rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i];
end
end
end
end
always @(posedge clk) begin
if (ibuf_push) begin
rsp_orig_mask[ibuf_waddr] <= core_req_mask;
@ -490,6 +490,7 @@ module VX_mem_scheduler #(
end
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
end
if (UUID_WIDTH != 0) begin
@ -509,11 +510,11 @@ module VX_mem_scheduler #(
) rsp_buf (
.clk (clk),
.reset (reset),
.valid_in (crsp_valid),
.valid_in (crsp_valid),
.ready_in (crsp_ready),
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),
.data_out ({core_rsp_mask, core_rsp_sop, core_rsp_eop, core_rsp_data, core_rsp_tag}),
.valid_out (core_rsp_valid),
.valid_out (core_rsp_valid),
.ready_out (core_rsp_ready)
);
@ -541,14 +542,14 @@ module VX_mem_scheduler #(
end
end
if (ibuf_push) begin
if (ibuf_push) begin
pending_reqs_time[ibuf_waddr] <= {req_dbg_uuid, ibuf_din, $time};
end
for (integer i = 0; i < CORE_QUEUE_SIZE; ++i) begin
if (pending_reqs_valid[i]) begin
`ASSERT(($time - pending_reqs_time[i][63:0]) < STALL_TIMEOUT,
("%t: *** %s response timeout: tag=0x%0h (#%0d)",
("%t: *** %s response timeout: tag=0x%0h (#%0d)",
$time, INSTANCE_ID, pending_reqs_time[i][64 +: TAG_ID_WIDTH], pending_reqs_time[i][64+TAG_ID_WIDTH +: `UP(UUID_WIDTH)]));
end
end
@ -578,16 +579,16 @@ module VX_mem_scheduler #(
if (core_req_fire) begin
if (core_req_rw) begin
`TRACE(1, ("%d: %s-core-req-wr: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask));
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
`TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", core_req_byteen, CORE_REQS);
`TRACE(1, (", data="));
`TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS);
`TRACE_ARRAY1D(1, "0x%0h", core_req_data, CORE_REQS);
end else begin
`TRACE(1, ("%d: %s-core-req-rd: valid=%b, addr=", $time, INSTANCE_ID, core_req_mask));
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
end
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid));
`TRACE_ARRAY1D(1, "0x%h", core_req_addr, CORE_REQS);
end
`TRACE(1, (", tag=0x%0h (#%0d)\n", core_req_tag, req_dbg_uuid));
end
if (core_rsp_valid && core_rsp_ready) begin
`TRACE(1, ("%d: %s-rsp: valid=%b, sop=%b, eop=%b, data=", $time, INSTANCE_ID, core_rsp_mask, core_rsp_sop, core_rsp_eop));
@ -601,20 +602,20 @@ module VX_mem_scheduler #(
`TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS);
`TRACE(1, (", data="));
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS);
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS);
end else begin
`TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_mask_s));
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS);
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS);
end
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr, req_batch_idx, mem_req_dbg_uuid));
end
end
if (mem_rsp_fire_s) begin
`TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s));
`TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s));
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS);
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid));
end
end
`endif
endmodule
`TRACING_ON

View file

@ -103,7 +103,7 @@ module VX_stream_arb #(
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF),
.OUT_BUF (3), // registered output
.LUTRAM (LUTRAM)
) fanout_slice_arb (
.clk (clk),
@ -254,7 +254,7 @@ module VX_stream_arb #(
.DATAW (DATAW),
.ARBITER (ARBITER),
.MAX_FANOUT (MAX_FANOUT),
.OUT_BUF (OUT_BUF),
.OUT_BUF (3), // registered output
.LUTRAM (LUTRAM)
) fanout_fork_arb (
.clk (clk),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -17,20 +17,19 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
parameter NUM_REQS = 1,
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter MEM_ADDR_WIDTH = `MEM_ADDR_WIDTH,
parameter ADDR_WIDTH = 1,
parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0,
parameter `STRING ARBITER = "R",
parameter `STRING ARBITER = "R",
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
input wire [`UP(LOG_NUM_REQS)-1:0] bus_sel,
VX_mem_bus_if.slave bus_in_if,
VX_mem_bus_if.master bus_out_if [NUM_REQS]
);
localparam ADDR_WIDTH = (MEM_ADDR_WIDTH-`CLOG2(DATA_SIZE));
);
localparam DATA_WIDTH = (8 * DATA_SIZE);
localparam REQ_DATAW = TAG_WIDTH + ADDR_WIDTH + `ADDR_TYPE_WIDTH + 1 + DATA_SIZE + DATA_WIDTH;
localparam RSP_DATAW = TAG_WIDTH + DATA_WIDTH;
@ -40,7 +39,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0] req_valid_out;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_out;
wire [NUM_REQS-1:0] req_ready_out;
VX_stream_switch #(
.NUM_OUTPUTS (NUM_REQS),
.DATAW (REQ_DATAW),
@ -49,7 +48,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
.clk (clk),
.reset (reset),
.sel_in (bus_sel),
.valid_in (bus_in_if.req_valid),
.valid_in (bus_in_if.req_valid),
.data_in (bus_in_if.req_data),
.ready_in (bus_in_if.req_ready),
.valid_out (req_valid_out),
@ -68,7 +67,7 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0] rsp_valid_in;
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_data_in;
wire [NUM_REQS-1:0] rsp_ready_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign rsp_valid_in[i] = bus_out_if[i].rsp_valid;
assign rsp_data_in[i] = bus_out_if[i].rsp_data;
@ -77,15 +76,15 @@ module VX_mem_switch import VX_gpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (NUM_REQS),
.DATAW (RSP_DATAW),
.DATAW (RSP_DATAW),
.ARBITER (ARBITER),
.OUT_BUF (RSP_OUT_BUF)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_out (bus_in_if.rsp_valid),
.data_out (bus_in_if.rsp_data),
.ready_out (bus_in_if.rsp_ready),