mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
memory coalescing RTL implementation
This commit is contained in:
parent
274e6a4c52
commit
288147ac4f
5 changed files with 525 additions and 68 deletions
|
@ -266,7 +266,7 @@
|
|||
|
||||
// LSU line size
|
||||
`ifndef LSU_LINE_SIZE
|
||||
`define LSU_LINE_SIZE (`XLEN / 8)
|
||||
`define LSU_LINE_SIZE `MIN(`NUM_LSU_LANES * (`XLEN / 8), `L1_LINE_SIZE)
|
||||
`endif
|
||||
|
||||
// LSU Duplicate Address Check
|
||||
|
|
|
@ -114,12 +114,11 @@ package VX_gpu_pkg;
|
|||
// Input request size
|
||||
localparam DCACHE_NUM_REQS = `UP((`NUM_LSU_LANES * (`XLEN / 8)) / DCACHE_WORD_SIZE);
|
||||
|
||||
// Batch select bits
|
||||
localparam DCACHE_NUM_BATCHES = ((`NUM_LSU_LANES + DCACHE_NUM_REQS - 1) / DCACHE_NUM_REQS);
|
||||
localparam DCACHE_BATCH_SEL_BITS = `CLOG2(DCACHE_NUM_BATCHES);
|
||||
|
||||
// Core request tag Id bits
|
||||
localparam DCACHE_TAG_ID_BITS = (`CLOG2(`LSUQ_OUT_SIZE) + DCACHE_BATCH_SEL_BITS);
|
||||
|
||||
localparam DCACHE_MERGED_REQS = (`NUM_LSU_LANES * DCACHE_WORD_SIZE) / DCACHE_LINE_SIZE;
|
||||
localparam DCACHE_MEM_BATCHES = (DCACHE_MERGED_REQS + DCACHE_NUM_REQS - 1) / DCACHE_NUM_REQS;
|
||||
localparam DCACHE_TAG_ID_BITS = (`CLOG2(`LSUQ_OUT_SIZE) + `CLOG2(DCACHE_MEM_BATCHES));
|
||||
|
||||
// Core request tag bits
|
||||
localparam DCACHE_TAG_WIDTH = (`UUID_WIDTH + DCACHE_TAG_ID_BITS);
|
||||
|
|
|
@ -77,8 +77,6 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
// tag = uuid + tag_id
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + TAG_ID_WIDTH;
|
||||
|
||||
`STATIC_ASSERT(0 == (`IO_BASE_ADDR % `MEM_BLOCK_SIZE), ("invalid parameter"))
|
||||
|
||||
// full address calculation
|
||||
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] full_addr;
|
||||
|
|
365
hw/rtl/libs/VX_mem_coalescer.sv
Normal file
365
hw/rtl/libs/VX_mem_coalescer.sv
Normal file
|
@ -0,0 +1,365 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_mem_coalescer #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter NUM_REQS = 1,
|
||||
parameter ADDR_WIDTH = 32,
|
||||
parameter DATA_IN_SIZE = 4,
|
||||
parameter DATA_OUT_SIZE = 64,
|
||||
parameter TAG_WIDTH = 8,
|
||||
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
|
||||
parameter QUEUE_SIZE = 8,
|
||||
|
||||
parameter DATA_IN_WIDTH = DATA_IN_SIZE * 8,
|
||||
parameter DATA_OUT_WIDTH= DATA_OUT_SIZE * 8,
|
||||
parameter OUT_REQS = (NUM_REQS * DATA_IN_WIDTH) / DATA_OUT_WIDTH,
|
||||
parameter BATCH_SIZE = DATA_OUT_SIZE / DATA_IN_SIZE,
|
||||
parameter BATCH_SIZE_W = `LOG2UP(BATCH_SIZE),
|
||||
parameter OUT_ADDR_WIDTH= ADDR_WIDTH - BATCH_SIZE_W,
|
||||
parameter QUEUE_ADDRW = `CLOG2(QUEUE_SIZE),
|
||||
parameter OUT_TAG_WIDTH = UUID_WIDTH + QUEUE_ADDRW
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Input request
|
||||
input wire in_req_valid,
|
||||
input wire in_req_rw,
|
||||
input wire [NUM_REQS-1:0] in_req_mask,
|
||||
input wire [NUM_REQS-1:0][DATA_IN_SIZE-1:0] in_req_byteen,
|
||||
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
|
||||
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
|
||||
input wire [TAG_WIDTH-1:0] in_req_tag,
|
||||
output wire in_req_ready,
|
||||
|
||||
// Input response
|
||||
output wire in_rsp_valid,
|
||||
output wire [NUM_REQS-1:0] in_rsp_mask,
|
||||
output wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data,
|
||||
output wire [TAG_WIDTH-1:0] in_rsp_tag,
|
||||
input wire in_rsp_ready,
|
||||
|
||||
// Output request
|
||||
output wire out_req_valid,
|
||||
output wire out_req_rw,
|
||||
output wire [OUT_REQS-1:0] out_req_mask,
|
||||
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
|
||||
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
|
||||
output wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data,
|
||||
output wire [OUT_TAG_WIDTH-1:0] out_req_tag,
|
||||
input wire out_req_ready,
|
||||
|
||||
// Output response
|
||||
input wire out_rsp_valid,
|
||||
input wire [OUT_REQS-1:0] out_rsp_mask,
|
||||
input wire [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_rsp_data,
|
||||
input wire [OUT_TAG_WIDTH-1:0] out_rsp_tag,
|
||||
output wire out_rsp_ready
|
||||
);
|
||||
`UNUSED_SPARAM (INSTANCE_ID)
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(NUM_REQS * DATA_IN_WIDTH, DATA_OUT_WIDTH), ("invalid parameter"))
|
||||
`STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter"))
|
||||
`RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask"));
|
||||
`RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask"));
|
||||
|
||||
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
|
||||
localparam NUM_REQS_W = `LOG2UP(NUM_REQS);
|
||||
// tag + mask + offest
|
||||
localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * BATCH_SIZE_W);
|
||||
|
||||
localparam STATE_SETUP = 0;
|
||||
localparam STATE_SEND = 1;
|
||||
|
||||
logic state_r, state_n;
|
||||
|
||||
logic out_req_valid_r, out_req_valid_n;
|
||||
logic out_req_rw_r, out_req_rw_n;
|
||||
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
|
||||
logic [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
|
||||
logic [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
|
||||
logic in_req_ready_n;
|
||||
|
||||
wire ibuf_push;
|
||||
wire ibuf_pop;
|
||||
wire [QUEUE_ADDRW-1:0] ibuf_waddr;
|
||||
wire [QUEUE_ADDRW-1:0] ibuf_raddr;
|
||||
wire ibuf_full;
|
||||
wire ibuf_empty;
|
||||
wire [IBUF_DATA_WIDTH-1:0] ibuf_din;
|
||||
wire [IBUF_DATA_WIDTH-1:0] ibuf_dout;
|
||||
|
||||
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
|
||||
logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
|
||||
|
||||
wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx;
|
||||
|
||||
wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base;
|
||||
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] in_addr_offset;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:BATCH_SIZE_W];
|
||||
assign in_addr_offset[i] = in_req_addr[i][BATCH_SIZE_W-1:0];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
wire [BATCH_SIZE-1:0] batch_mask = in_req_mask[BATCH_SIZE * i +: BATCH_SIZE] & ~processed_mask_r[BATCH_SIZE * i +: BATCH_SIZE];
|
||||
wire [BATCH_SIZE_W-1:0] batch_idx;
|
||||
VX_priority_encoder #(
|
||||
.N (BATCH_SIZE)
|
||||
) priority_encoder (
|
||||
.data_in (batch_mask),
|
||||
.index (batch_idx),
|
||||
`UNUSED_PIN (onehot),
|
||||
.valid_out (batch_valid_n[i])
|
||||
);
|
||||
assign seed_idx[i] = NUM_REQS_W'(BATCH_SIZE * i) + NUM_REQS_W'(batch_idx);
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state_r <= STATE_SETUP;
|
||||
processed_mask_r <= '0;
|
||||
end else begin
|
||||
state_r <= state_n;
|
||||
out_req_valid_r <= out_req_valid_n;
|
||||
batch_valid_r <= batch_valid_n;
|
||||
seed_addr_r <= seed_addr_n;
|
||||
out_req_rw_r <= out_req_rw_n;
|
||||
out_req_mask_r <= out_req_mask_n;
|
||||
out_req_addr_r <= out_req_addr_n;
|
||||
out_req_byteen_r <= out_req_byteen_n;
|
||||
out_req_data_r <= out_req_data_n;
|
||||
out_req_tag_r <= out_req_tag_n;
|
||||
processed_mask_r <= processed_mask_n;
|
||||
end
|
||||
end
|
||||
|
||||
logic [NUM_REQS-1:0] addr_matches;
|
||||
|
||||
always @(*) begin
|
||||
addr_matches = '0;
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
for (integer j = 0; j < BATCH_SIZE; j++) begin
|
||||
if (in_addr_base[BATCH_SIZE * i + j] == seed_addr_r[i]) begin
|
||||
addr_matches[BATCH_SIZE * i + j] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state_r;
|
||||
out_req_valid_n = out_req_valid_r;
|
||||
seed_addr_n = seed_addr_r;
|
||||
out_req_rw_n = out_req_rw_r;
|
||||
out_req_mask_n = out_req_mask_r;
|
||||
out_req_addr_n = out_req_addr_r;
|
||||
out_req_byteen_n = out_req_byteen_r;
|
||||
out_req_data_n = out_req_data_r;
|
||||
out_req_tag_n = out_req_tag_r;
|
||||
processed_mask_n = processed_mask_r;
|
||||
in_req_ready_n = 0;
|
||||
|
||||
case (state_r)
|
||||
STATE_SETUP: begin
|
||||
// find the next seed address
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
seed_addr_n[i] = in_addr_base[seed_idx[i]];
|
||||
end
|
||||
// wait for pending outgoing request to submit
|
||||
if (out_req_valid && out_req_ready) begin
|
||||
out_req_valid_n = 0;
|
||||
end
|
||||
if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin
|
||||
state_n = STATE_SEND;
|
||||
end
|
||||
end
|
||||
default/*STATE_SEND*/: begin
|
||||
out_req_valid_n = 1;
|
||||
out_req_rw_n = in_req_rw;
|
||||
out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
|
||||
in_req_ready_n = 1;
|
||||
out_req_byteen_n = '0;
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
for (integer j = 0; j < BATCH_SIZE; j++) begin
|
||||
if (in_req_mask[BATCH_SIZE * i + j]) begin
|
||||
if (addr_matches[BATCH_SIZE * i + j]) begin
|
||||
out_req_byteen_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_SIZE +: DATA_IN_SIZE] = in_req_byteen[BATCH_SIZE * i + j];
|
||||
out_req_data_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH] = in_req_data[BATCH_SIZE * i + j];
|
||||
end else begin
|
||||
if (!processed_mask_r[BATCH_SIZE * i + j]) begin
|
||||
in_req_ready_n = 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
out_req_mask_n[i] = batch_valid_r[i];
|
||||
out_req_addr_n[i] = seed_addr_r[i];
|
||||
end
|
||||
if (in_req_ready_n) begin
|
||||
processed_mask_n = '0;
|
||||
end else begin
|
||||
processed_mask_n = processed_mask_r | current_pmask;
|
||||
end
|
||||
state_n = STATE_SETUP;
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
wire out_rsp_fire = out_rsp_valid && out_rsp_ready;
|
||||
|
||||
wire out_rsp_eop;
|
||||
|
||||
assign ibuf_push = (state_r == STATE_SEND) && ~in_req_rw;
|
||||
assign ibuf_pop = out_rsp_fire && out_rsp_eop;
|
||||
assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0];
|
||||
|
||||
wire [TAG_ID_WIDTH-1:0] ibuf_din_tag = in_req_tag[TAG_ID_WIDTH-1:0];
|
||||
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_din_offset = in_addr_offset;
|
||||
wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask;
|
||||
|
||||
assign ibuf_din = {ibuf_din_tag, ibuf_din_pmask, ibuf_din_offset};
|
||||
|
||||
VX_index_buffer #(
|
||||
.DATAW (IBUF_DATA_WIDTH),
|
||||
.SIZE (QUEUE_SIZE)
|
||||
) req_ibuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_en (ibuf_push),
|
||||
.write_addr (ibuf_waddr),
|
||||
.write_data (ibuf_din),
|
||||
.read_data (ibuf_dout),
|
||||
.read_addr (ibuf_raddr),
|
||||
.release_en (ibuf_pop),
|
||||
.full (ibuf_full),
|
||||
.empty (ibuf_empty)
|
||||
);
|
||||
`UNUSED_VAR (ibuf_empty)
|
||||
|
||||
assign out_req_valid = out_req_valid_r;
|
||||
assign out_req_rw = out_req_rw_r;
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
assign out_req_mask[i] = out_req_mask_r[i];
|
||||
assign out_req_byteen[i] = out_req_byteen_r[i];
|
||||
assign out_req_addr[i] = out_req_addr_r[i];
|
||||
assign out_req_data[i] = out_req_data_r[i];
|
||||
end
|
||||
assign out_req_tag = out_req_tag_r;
|
||||
|
||||
assign in_req_ready = in_req_ready_n;
|
||||
|
||||
// unmerge responses
|
||||
|
||||
reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask;
|
||||
wire [OUT_REQS-1:0] rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~out_rsp_mask;
|
||||
assign out_rsp_eop = ~(| rsp_rem_mask_n);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_push) begin
|
||||
rsp_rem_mask[ibuf_waddr] <= batch_valid_r;
|
||||
end
|
||||
if (out_rsp_fire) begin
|
||||
rsp_rem_mask[ibuf_raddr] <= rsp_rem_mask_n;
|
||||
end
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_dout_offset;
|
||||
reg [NUM_REQS-1:0] ibuf_dout_pmask;
|
||||
wire [TAG_ID_WIDTH-1:0] ibuf_dout_tag;
|
||||
|
||||
assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout;
|
||||
|
||||
logic [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n;
|
||||
logic [NUM_REQS-1:0] in_rsp_mask_n;
|
||||
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
for (integer j = 0; j < BATCH_SIZE; j++) begin
|
||||
in_rsp_mask_n[BATCH_SIZE * i + j] = out_rsp_mask[i] && ibuf_dout_pmask[BATCH_SIZE * i + j];
|
||||
in_rsp_data_n[BATCH_SIZE * i + j] = out_rsp_data[i][ibuf_dout_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign in_rsp_valid = out_rsp_valid;
|
||||
assign in_rsp_mask = in_rsp_mask_n;
|
||||
assign in_rsp_data = in_rsp_data_n;
|
||||
assign in_rsp_tag = {out_rsp_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout_tag};
|
||||
assign out_rsp_ready = in_rsp_ready;
|
||||
|
||||
`ifndef NDEBUG
|
||||
wire [`UP(UUID_WIDTH)-1:0] out_req_uuid;
|
||||
wire [`UP(UUID_WIDTH)-1:0] out_rsp_uuid;
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign out_req_uuid = out_req_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
assign out_rsp_uuid = out_rsp_tag[OUT_TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
end else begin
|
||||
assign out_req_uuid = '0;
|
||||
assign out_rsp_uuid = '0;
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0][BATCH_SIZE_W-1:0] out_req_offset;
|
||||
reg [NUM_REQS-1:0] out_req_pmask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_push) begin
|
||||
out_req_offset <= ibuf_din_offset;
|
||||
out_req_pmask <= ibuf_din_pmask;
|
||||
end
|
||||
end
|
||||
|
||||
wire out_req_fire = out_req_valid && out_req_ready;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (out_req_fire) begin
|
||||
if (out_req_rw) begin
|
||||
`TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
|
||||
`TRACE(1, (", byteen="));
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS);
|
||||
`TRACE(1, (", data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS);
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
|
||||
end
|
||||
`TRACE(1, (", offset="));
|
||||
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS);
|
||||
`TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid));
|
||||
if ($countones(out_req_pmask) > 1) begin
|
||||
`TRACE(1, ("%t: *** %s: coalescing=%b (#%0d)\n", $time, INSTANCE_ID, out_req_pmask, out_req_uuid));
|
||||
end
|
||||
end
|
||||
if (out_rsp_fire) begin
|
||||
`TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS);
|
||||
`TRACE(1, (", offset="));
|
||||
`TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS);
|
||||
`TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
//`TRACING_OFF
|
||||
`TRACING_OFF
|
||||
module VX_mem_scheduler #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
parameter CORE_REQS = 1,
|
||||
|
@ -30,10 +30,13 @@ module VX_mem_scheduler #(
|
|||
parameter MEM_OUT_BUF = 0,
|
||||
|
||||
parameter WORD_WIDTH = WORD_SIZE * 8,
|
||||
parameter MEM_BATCHES = (CORE_REQS + MEM_CHANNELS - 1) / MEM_CHANNELS,
|
||||
parameter MEM_QUEUE_ADDRW= `CLOG2(MEM_QUEUE_SIZE),
|
||||
parameter LINE_WIDTH = LINE_SIZE * 8,
|
||||
parameter PER_LINE_REQS = LINE_SIZE / WORD_SIZE,
|
||||
parameter MERGED_REQS = CORE_REQS / PER_LINE_REQS,
|
||||
parameter MEM_BATCHES = (MERGED_REQS + MEM_CHANNELS - 1) / MEM_CHANNELS,
|
||||
parameter MEM_BATCH_BITS= `CLOG2(MEM_BATCHES),
|
||||
parameter MEM_ADDR_WIDTH= ADDR_WIDTH,
|
||||
parameter MEM_QUEUE_ADDRW= `CLOG2(MEM_QUEUE_SIZE),
|
||||
parameter MEM_ADDR_WIDTH= ADDR_WIDTH - `CLOG2(PER_LINE_REQS),
|
||||
parameter MEM_TAG_WIDTH = UUID_WIDTH + MEM_QUEUE_ADDRW + MEM_BATCH_BITS
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -65,24 +68,28 @@ module VX_mem_scheduler #(
|
|||
output wire [MEM_CHANNELS-1:0] mem_req_rw,
|
||||
output wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen,
|
||||
output wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr,
|
||||
output wire [MEM_CHANNELS-1:0][WORD_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data,
|
||||
output wire [MEM_CHANNELS-1:0][MEM_TAG_WIDTH-1:0] mem_req_tag,
|
||||
input wire [MEM_CHANNELS-1:0] mem_req_ready,
|
||||
|
||||
// Memory response
|
||||
input wire [MEM_CHANNELS-1:0] mem_rsp_valid,
|
||||
input wire [MEM_CHANNELS-1:0][WORD_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_rsp_data,
|
||||
input wire [MEM_CHANNELS-1:0][MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire [MEM_CHANNELS-1:0] mem_rsp_ready
|
||||
);
|
||||
localparam BATCH_SEL_WIDTH = `UP(MEM_BATCH_BITS);
|
||||
localparam STALL_TIMEOUT = 10000000;
|
||||
localparam CORE_QUEUE_ADDRW = `CLOG2(CORE_QUEUE_SIZE);
|
||||
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
|
||||
localparam REQQ_TAG_WIDTH = UUID_WIDTH + CORE_QUEUE_ADDRW;
|
||||
localparam BATCH_SEL_WIDTH = `UP(MEM_BATCH_BITS);
|
||||
localparam STALL_TIMEOUT = 10000000;
|
||||
localparam CORE_QUEUE_ADDRW= `CLOG2(CORE_QUEUE_SIZE);
|
||||
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
|
||||
localparam REQQ_TAG_WIDTH = UUID_WIDTH + CORE_QUEUE_ADDRW;
|
||||
localparam MERGED_TAG_WIDTH= UUID_WIDTH + MEM_QUEUE_ADDRW;
|
||||
localparam COALESCE_ENABLE = (LINE_SIZE != WORD_SIZE);
|
||||
localparam CORE_CHANNELS = COALESCE_ENABLE ? CORE_REQS : MEM_CHANNELS;
|
||||
localparam CORE_BATCHES = COALESCE_ENABLE ? 1 : MEM_BATCHES;
|
||||
localparam CORE_BATCH_BITS = `CLOG2(CORE_BATCHES);
|
||||
|
||||
`STATIC_ASSERT ((WORD_SIZE == LINE_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT ((CORE_QUEUE_SIZE == MEM_QUEUE_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT (`IS_DIVISBLE(CORE_REQS * WORD_SIZE, LINE_SIZE), ("invalid parameter"))
|
||||
`STATIC_ASSERT ((TAG_WIDTH >= UUID_WIDTH), ("invalid parameter"))
|
||||
`STATIC_ASSERT ((0 == RSP_PARTIAL) || (1 == RSP_PARTIAL), ("invalid parameter"))
|
||||
`RUNTIME_ASSERT((~core_req_valid || core_req_mask != 0), ("invalid request mask"));
|
||||
|
@ -105,19 +112,34 @@ module VX_mem_scheduler #(
|
|||
wire [REQQ_TAG_WIDTH-1:0] reqq_tag;
|
||||
wire reqq_ready;
|
||||
|
||||
wire reqq_valid_s;
|
||||
wire [MERGED_REQS-1:0] reqq_mask_s;
|
||||
wire reqq_rw_s;
|
||||
wire [MERGED_REQS-1:0][LINE_SIZE-1:0] reqq_byteen_s;
|
||||
wire [MERGED_REQS-1:0][MEM_ADDR_WIDTH-1:0] reqq_addr_s;
|
||||
wire [MERGED_REQS-1:0][LINE_WIDTH-1:0] reqq_data_s;
|
||||
wire [MERGED_TAG_WIDTH-1:0] reqq_tag_s;
|
||||
wire reqq_ready_s;
|
||||
|
||||
wire [MEM_CHANNELS-1:0] mem_req_valid_s;
|
||||
wire [MEM_CHANNELS-1:0] mem_req_mask_s;
|
||||
wire mem_req_rw_s;
|
||||
wire [MEM_CHANNELS-1:0][WORD_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_s;
|
||||
wire [MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_s;
|
||||
wire [MEM_CHANNELS-1:0][WORD_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_req_tag_s;
|
||||
wire [MEM_CHANNELS-1:0] mem_req_ready_s;
|
||||
|
||||
wire mem_rsp_valid_s2;
|
||||
wire [MEM_CHANNELS-1:0] mem_rsp_mask_s2;
|
||||
wire [MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_rsp_data_s2;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s2;
|
||||
wire mem_rsp_ready_s2;
|
||||
|
||||
wire mem_rsp_valid_s;
|
||||
wire [MEM_CHANNELS-1:0] mem_rsp_mask_s;
|
||||
wire [MEM_CHANNELS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s;
|
||||
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
wire [CORE_REQS-1:0] mem_rsp_mask_s;
|
||||
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s;
|
||||
wire [REQQ_TAG_WIDTH-1:0] mem_rsp_tag_s;
|
||||
wire mem_rsp_ready_s;
|
||||
|
||||
wire crsp_valid;
|
||||
|
@ -174,7 +196,7 @@ module VX_mem_scheduler #(
|
|||
|
||||
assign ibuf_push = core_req_fire && ~core_req_rw;
|
||||
assign ibuf_pop = crsp_fire && crsp_eop;
|
||||
assign ibuf_raddr = mem_rsp_tag_s[MEM_BATCH_BITS +: MEM_QUEUE_ADDRW];
|
||||
assign ibuf_raddr = mem_rsp_tag_s[CORE_BATCH_BITS +: CORE_QUEUE_ADDRW];
|
||||
assign ibuf_din = core_req_tag[TAG_ID_WIDTH-1:0];
|
||||
|
||||
VX_index_buffer #(
|
||||
|
@ -195,23 +217,96 @@ module VX_mem_scheduler #(
|
|||
|
||||
`UNUSED_VAR (ibuf_empty)
|
||||
|
||||
// Handle memory coalescing ///////////////////////////////////////////////
|
||||
|
||||
if (COALESCE_ENABLE) begin
|
||||
|
||||
`RESET_RELAY (coalescer_reset, reset);
|
||||
|
||||
VX_mem_coalescer #(
|
||||
.INSTANCE_ID ($sformatf("%s-coalescer", INSTANCE_ID)),
|
||||
.NUM_REQS (CORE_REQS),
|
||||
.DATA_IN_SIZE (WORD_SIZE),
|
||||
.DATA_OUT_SIZE (LINE_SIZE),
|
||||
.ADDR_WIDTH (ADDR_WIDTH),
|
||||
.TAG_WIDTH (REQQ_TAG_WIDTH),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.QUEUE_SIZE (MEM_QUEUE_SIZE)
|
||||
) coalescer (
|
||||
.clk (clk),
|
||||
.reset (coalescer_reset),
|
||||
|
||||
// Input request
|
||||
.in_req_valid (reqq_valid),
|
||||
.in_req_mask (reqq_mask),
|
||||
.in_req_rw (reqq_rw),
|
||||
.in_req_byteen (reqq_byteen),
|
||||
.in_req_addr (reqq_addr),
|
||||
.in_req_data (reqq_data),
|
||||
.in_req_tag (reqq_tag),
|
||||
.in_req_ready (reqq_ready),
|
||||
|
||||
// Input response
|
||||
.in_rsp_valid (mem_rsp_valid_s),
|
||||
.in_rsp_mask (mem_rsp_mask_s),
|
||||
.in_rsp_data (mem_rsp_data_s),
|
||||
.in_rsp_tag (mem_rsp_tag_s),
|
||||
.in_rsp_ready (mem_rsp_ready_s),
|
||||
|
||||
// Output request
|
||||
.out_req_valid (reqq_valid_s),
|
||||
.out_req_mask (reqq_mask_s),
|
||||
.out_req_rw (reqq_rw_s),
|
||||
.out_req_byteen (reqq_byteen_s),
|
||||
.out_req_addr (reqq_addr_s),
|
||||
.out_req_data (reqq_data_s),
|
||||
.out_req_tag (reqq_tag_s),
|
||||
.out_req_ready (reqq_ready_s),
|
||||
|
||||
// Output response
|
||||
.out_rsp_valid (mem_rsp_valid_s2),
|
||||
.out_rsp_mask (mem_rsp_mask_s2),
|
||||
.out_rsp_data (mem_rsp_data_s2),
|
||||
.out_rsp_tag (mem_rsp_tag_s2),
|
||||
.out_rsp_ready (mem_rsp_ready_s2)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
assign reqq_valid_s = reqq_valid;
|
||||
assign reqq_mask_s = reqq_mask;
|
||||
assign reqq_rw_s = reqq_rw;
|
||||
assign reqq_byteen_s= reqq_byteen;
|
||||
assign reqq_addr_s = reqq_addr;
|
||||
assign reqq_data_s = reqq_data;
|
||||
assign reqq_tag_s = reqq_tag;
|
||||
assign reqq_ready = reqq_ready_s;
|
||||
|
||||
assign mem_rsp_valid_s = mem_rsp_valid_s2;
|
||||
assign mem_rsp_mask_s = mem_rsp_mask_s2;
|
||||
assign mem_rsp_data_s = mem_rsp_data_s2;
|
||||
assign mem_rsp_tag_s = mem_rsp_tag_s2;
|
||||
assign mem_rsp_ready_s2 = mem_rsp_ready_s;
|
||||
|
||||
end
|
||||
|
||||
// Handle memory requests /////////////////////////////////////////////////
|
||||
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0] mem_req_mask_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][WORD_SIZE-1:0] mem_req_byteen_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_SIZE-1:0] mem_req_byteen_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][MEM_ADDR_WIDTH-1:0] mem_req_addr_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][WORD_WIDTH-1:0] mem_req_data_b;
|
||||
wire [MEM_BATCHES-1:0][MEM_CHANNELS-1:0][LINE_WIDTH-1:0] mem_req_data_b;
|
||||
|
||||
wire [BATCH_SEL_WIDTH-1:0] req_batch_idx;
|
||||
|
||||
for (genvar i = 0; i < MEM_BATCHES; ++i) begin
|
||||
for (genvar j = 0; j < MEM_CHANNELS; ++j) begin
|
||||
localparam r = i * MEM_CHANNELS + j;
|
||||
if (r < CORE_REQS) begin
|
||||
assign mem_req_mask_b[i][j] = reqq_mask[r];
|
||||
assign mem_req_byteen_b[i][j] = reqq_byteen[r];
|
||||
assign mem_req_addr_b[i][j] = reqq_addr[r];
|
||||
assign mem_req_data_b[i][j] = reqq_data[r];
|
||||
if (r < MERGED_REQS) begin
|
||||
assign mem_req_mask_b[i][j] = reqq_mask_s[r];
|
||||
assign mem_req_byteen_b[i][j] = reqq_byteen_s[r];
|
||||
assign mem_req_addr_b[i][j] = reqq_addr_s[r];
|
||||
assign mem_req_data_b[i][j] = reqq_data_s[r];
|
||||
end else begin
|
||||
assign mem_req_mask_b[i][j] = 0;
|
||||
assign mem_req_byteen_b[i][j] = '0;
|
||||
|
@ -222,7 +317,7 @@ module VX_mem_scheduler #(
|
|||
end
|
||||
|
||||
assign mem_req_mask_s = mem_req_mask_b[req_batch_idx];
|
||||
assign mem_req_rw_s = reqq_rw;
|
||||
assign mem_req_rw_s = reqq_rw_s;
|
||||
assign mem_req_byteen_s = mem_req_byteen_b[req_batch_idx];
|
||||
assign mem_req_addr_s = mem_req_addr_b[req_batch_idx];
|
||||
assign mem_req_data_s = mem_req_data_b[req_batch_idx];
|
||||
|
@ -235,7 +330,7 @@ module VX_mem_scheduler #(
|
|||
if (reset) begin
|
||||
batch_sent_mask <= '0;
|
||||
end else begin
|
||||
if (reqq_valid) begin
|
||||
if (reqq_valid_s) begin
|
||||
if (batch_sent_all) begin
|
||||
batch_sent_mask <= '0;
|
||||
end else begin
|
||||
|
@ -251,7 +346,7 @@ module VX_mem_scheduler #(
|
|||
if (reset) begin
|
||||
req_batch_idx_r <= '0;
|
||||
end else begin
|
||||
if (reqq_valid && batch_sent_all) begin
|
||||
if (reqq_valid_s && batch_sent_all) begin
|
||||
if (req_sent_all) begin
|
||||
req_batch_idx_r <= '0;
|
||||
end else begin
|
||||
|
@ -283,22 +378,22 @@ module VX_mem_scheduler #(
|
|||
|
||||
assign req_batch_idx = req_batch_idx_r;
|
||||
assign req_sent_all = batch_sent_all && (req_batch_idx_r == req_batch_idx_last);
|
||||
assign mem_req_tag_s = {reqq_tag, req_batch_idx};
|
||||
assign mem_req_tag_s = {reqq_tag_s, req_batch_idx};
|
||||
|
||||
end else begin
|
||||
|
||||
assign req_batch_idx = '0;
|
||||
assign req_sent_all = batch_sent_all;
|
||||
assign mem_req_tag_s = reqq_tag;
|
||||
assign mem_req_tag_s = reqq_tag_s;
|
||||
|
||||
end
|
||||
|
||||
assign mem_req_valid_s = {MEM_CHANNELS{reqq_valid}} & mem_req_mask_s & ~batch_sent_mask;
|
||||
assign reqq_ready = req_sent_all;
|
||||
assign mem_req_valid_s = {MEM_CHANNELS{reqq_valid_s}} & mem_req_mask_s & ~batch_sent_mask;
|
||||
assign reqq_ready_s = req_sent_all;
|
||||
|
||||
for (genvar i = 0; i < MEM_CHANNELS; ++i) begin
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (1 + WORD_SIZE + MEM_ADDR_WIDTH + WORD_WIDTH + MEM_TAG_WIDTH),
|
||||
.DATAW (1 + LINE_SIZE + MEM_ADDR_WIDTH + LINE_WIDTH + MEM_TAG_WIDTH),
|
||||
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
|
||||
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
|
||||
) mem_req_buf (
|
||||
|
@ -318,7 +413,7 @@ module VX_mem_scheduler #(
|
|||
// Select memory response
|
||||
VX_mem_rsp_sel #(
|
||||
.NUM_REQS (MEM_CHANNELS),
|
||||
.DATA_WIDTH (WORD_WIDTH),
|
||||
.DATA_WIDTH (LINE_WIDTH),
|
||||
.TAG_WIDTH (MEM_TAG_WIDTH),
|
||||
.TAG_SEL_BITS (MEM_TAG_WIDTH - UUID_WIDTH),
|
||||
.OUT_BUF (2)
|
||||
|
@ -329,26 +424,26 @@ module VX_mem_scheduler #(
|
|||
.rsp_data_in (mem_rsp_data),
|
||||
.rsp_tag_in (mem_rsp_tag),
|
||||
.rsp_ready_in (mem_rsp_ready),
|
||||
.rsp_valid_out (mem_rsp_valid_s),
|
||||
.rsp_mask_out (mem_rsp_mask_s),
|
||||
.rsp_data_out (mem_rsp_data_s),
|
||||
.rsp_tag_out (mem_rsp_tag_s),
|
||||
.rsp_ready_out (mem_rsp_ready_s)
|
||||
.rsp_valid_out (mem_rsp_valid_s2),
|
||||
.rsp_mask_out (mem_rsp_mask_s2),
|
||||
.rsp_data_out (mem_rsp_data_s2),
|
||||
.rsp_tag_out (mem_rsp_tag_s2),
|
||||
.rsp_ready_out (mem_rsp_ready_s2)
|
||||
);
|
||||
|
||||
reg [CORE_QUEUE_SIZE-1:0][CORE_REQS-1:0] rsp_rem_mask;
|
||||
wire [CORE_REQS-1:0] rsp_rem_mask_n, curr_mask;
|
||||
wire [BATCH_SEL_WIDTH-1:0] rsp_batch_idx;
|
||||
|
||||
if (MEM_BATCHES > 1) begin
|
||||
assign rsp_batch_idx = mem_rsp_tag_s[MEM_BATCH_BITS-1:0];
|
||||
if (CORE_BATCHES > 1) begin
|
||||
assign rsp_batch_idx = mem_rsp_tag_s[CORE_BATCH_BITS-1:0];
|
||||
end else begin
|
||||
assign rsp_batch_idx = '0;
|
||||
end
|
||||
|
||||
for (genvar r = 0; r < CORE_REQS; ++r) begin
|
||||
localparam i = r / MEM_CHANNELS;
|
||||
localparam j = r % MEM_CHANNELS;
|
||||
localparam i = r / CORE_CHANNELS;
|
||||
localparam j = r % CORE_CHANNELS;
|
||||
assign curr_mask[r] = (BATCH_SEL_WIDTH'(i) == rsp_batch_idx) && mem_rsp_mask_s[j];
|
||||
end
|
||||
|
||||
|
@ -385,7 +480,7 @@ module VX_mem_scheduler #(
|
|||
assign crsp_sop = rsp_sop_r[ibuf_raddr];
|
||||
|
||||
for (genvar r = 0; r < CORE_REQS; ++r) begin
|
||||
localparam j = r % MEM_CHANNELS;
|
||||
localparam j = r % CORE_CHANNELS;
|
||||
assign crsp_data[r] = mem_rsp_data_s[j];
|
||||
end
|
||||
|
||||
|
@ -393,15 +488,15 @@ module VX_mem_scheduler #(
|
|||
|
||||
end else begin
|
||||
|
||||
reg [MEM_BATCHES*MEM_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
|
||||
reg [MEM_BATCHES*MEM_CHANNELS*WORD_WIDTH-1:0] rsp_store_n;
|
||||
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
|
||||
reg [CORE_BATCHES*CORE_CHANNELS*WORD_WIDTH-1:0] rsp_store_n;
|
||||
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
|
||||
|
||||
always @(*) begin
|
||||
rsp_store_n = rsp_store[ibuf_raddr];
|
||||
for (integer i = 0; i < MEM_CHANNELS; ++i) begin
|
||||
if ((MEM_CHANNELS == 1) || mem_rsp_mask_s[i]) begin
|
||||
rsp_store_n[(rsp_batch_idx * MEM_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i];
|
||||
for (integer i = 0; i < CORE_CHANNELS; ++i) begin
|
||||
if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin
|
||||
rsp_store_n[(rsp_batch_idx * CORE_CHANNELS + i) * WORD_WIDTH +: WORD_WIDTH] = mem_rsp_data_s[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -420,16 +515,16 @@ module VX_mem_scheduler #(
|
|||
assign crsp_sop = 1'b1;
|
||||
|
||||
for (genvar r = 0; r < CORE_REQS; ++r) begin
|
||||
localparam i = r / MEM_CHANNELS;
|
||||
localparam j = r % MEM_CHANNELS;
|
||||
assign crsp_data[r] = rsp_store_n[(i * MEM_CHANNELS + j) * WORD_WIDTH +: WORD_WIDTH];
|
||||
localparam i = r / CORE_CHANNELS;
|
||||
localparam j = r % CORE_CHANNELS;
|
||||
assign crsp_data[r] = rsp_store_n[(i * CORE_CHANNELS + j) * WORD_WIDTH +: WORD_WIDTH];
|
||||
end
|
||||
|
||||
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;
|
||||
end
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign crsp_tag = {mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout};
|
||||
assign crsp_tag = {mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout};
|
||||
end else begin
|
||||
assign crsp_tag = ibuf_dout;
|
||||
end
|
||||
|
@ -532,24 +627,24 @@ module VX_mem_scheduler #(
|
|||
if (| mem_req_fire_s) begin
|
||||
if (| mem_req_rw_s) begin
|
||||
`TRACE(1, ("%d: %s-mem-req-wr: valid=%b, addr=", $time, INSTANCE_ID, mem_req_fire_s));
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, MEM_CHANNELS);
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS);
|
||||
`TRACE(1, (", byteen="));
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, MEM_CHANNELS);
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_byteen_s, CORE_CHANNELS);
|
||||
`TRACE(1, (", data="));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, MEM_CHANNELS);
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data_s, CORE_CHANNELS);
|
||||
end else begin
|
||||
`TRACE(1, ("%d: %s-mem-req-rd: valid=%b, addr=", $time, INSTANCE_ID, mem_req_fire_s));
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, MEM_CHANNELS);
|
||||
`TRACE_ARRAY1D(1, "0x%h", mem_req_addr_s, CORE_CHANNELS);
|
||||
end
|
||||
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_waddr, req_batch_idx, mem_req_dbg_uuid));
|
||||
end
|
||||
if (mem_rsp_fire_s) begin
|
||||
`TRACE(1, ("%d: %s-mem-rsp: valid=%b, data=", $time, INSTANCE_ID, mem_rsp_mask_s));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, MEM_CHANNELS);
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data_s, CORE_CHANNELS);
|
||||
`TRACE(1, (", ibuf_idx=%0d, batch_idx=%0d (#%0d)\n", ibuf_raddr, rsp_batch_idx, mem_rsp_dbg_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
//`TRACING_ON
|
||||
`TRACING_ON
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue