mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
VX_mem_coalescer fix
This commit is contained in:
parent
b297c29a10
commit
1b9f0a998b
1 changed files with 42 additions and 38 deletions
|
@ -27,10 +27,10 @@ module VX_mem_coalescer #(
|
|||
|
||||
parameter DATA_IN_WIDTH = DATA_IN_SIZE * 8,
|
||||
parameter DATA_OUT_WIDTH= DATA_OUT_SIZE * 8,
|
||||
parameter OUT_REQS = (NUM_REQS * DATA_IN_WIDTH) / DATA_OUT_WIDTH,
|
||||
parameter BATCH_SIZE = DATA_OUT_SIZE / DATA_IN_SIZE,
|
||||
parameter BATCH_SIZE_W = `LOG2UP(BATCH_SIZE),
|
||||
parameter OUT_ADDR_WIDTH= ADDR_WIDTH - BATCH_SIZE_W,
|
||||
parameter DATA_RATIO = DATA_OUT_SIZE / DATA_IN_SIZE,
|
||||
parameter DATA_RATIO_W = `LOG2UP(DATA_RATIO),
|
||||
parameter OUT_REQS = NUM_REQS / DATA_RATIO,
|
||||
parameter OUT_ADDR_WIDTH= ADDR_WIDTH - DATA_RATIO_W,
|
||||
parameter QUEUE_ADDRW = `CLOG2(QUEUE_SIZE),
|
||||
parameter OUT_TAG_WIDTH = UUID_WIDTH + QUEUE_ADDRW
|
||||
) (
|
||||
|
@ -79,15 +79,15 @@ module VX_mem_coalescer #(
|
|||
`RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask"));
|
||||
`RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask"));
|
||||
|
||||
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
|
||||
localparam NUM_REQS_W = `LOG2UP(NUM_REQS);
|
||||
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
|
||||
localparam NUM_REQS_W = `LOG2UP(NUM_REQS);
|
||||
// tag + mask + offest
|
||||
localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * BATCH_SIZE_W);
|
||||
localparam IBUF_DATA_WIDTH = TAG_ID_WIDTH + NUM_REQS + (NUM_REQS * DATA_RATIO_W);
|
||||
|
||||
localparam STATE_SETUP = 0;
|
||||
localparam STATE_SEND = 1;
|
||||
|
||||
logic state_r, state_n;
|
||||
reg state_r, state_n;
|
||||
|
||||
reg out_req_valid_r, out_req_valid_n;
|
||||
reg out_req_rw_r, out_req_rw_n;
|
||||
|
@ -98,7 +98,7 @@ module VX_mem_coalescer #(
|
|||
reg [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data_r, out_req_data_n;
|
||||
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
|
||||
|
||||
logic in_req_ready_n;
|
||||
reg in_req_ready_n;
|
||||
|
||||
wire ibuf_push;
|
||||
wire ibuf_pop;
|
||||
|
@ -109,33 +109,33 @@ module VX_mem_coalescer #(
|
|||
wire [IBUF_DATA_WIDTH-1:0] ibuf_din;
|
||||
wire [IBUF_DATA_WIDTH-1:0] ibuf_dout;
|
||||
|
||||
reg [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
|
||||
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
|
||||
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
|
||||
reg [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
|
||||
reg [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
|
||||
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
|
||||
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
|
||||
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
|
||||
logic [NUM_REQS-1:0] addr_matches_r, addr_matches_n;
|
||||
logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
|
||||
|
||||
wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx;
|
||||
|
||||
wire [NUM_REQS-1:0][OUT_ADDR_WIDTH-1:0] in_addr_base;
|
||||
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] in_addr_offset;
|
||||
wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] in_addr_offset;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:BATCH_SIZE_W];
|
||||
assign in_addr_offset[i] = in_req_addr[i][BATCH_SIZE_W-1:0];
|
||||
assign in_addr_base[i] = in_req_addr[i][ADDR_WIDTH-1:DATA_RATIO_W];
|
||||
assign in_addr_offset[i] = in_req_addr[i][DATA_RATIO_W-1:0];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
wire [BATCH_SIZE-1:0] batch_mask = in_req_mask[BATCH_SIZE * i +: BATCH_SIZE] & ~processed_mask_r[BATCH_SIZE * i +: BATCH_SIZE];
|
||||
wire [BATCH_SIZE_W-1:0] batch_idx;
|
||||
wire [DATA_RATIO-1:0] batch_mask = in_req_mask[i * DATA_RATIO +: DATA_RATIO] & ~processed_mask_r[i * DATA_RATIO +: DATA_RATIO];
|
||||
wire [DATA_RATIO_W-1:0] batch_idx;
|
||||
VX_priority_encoder #(
|
||||
.N (BATCH_SIZE)
|
||||
.N (DATA_RATIO)
|
||||
) priority_encoder (
|
||||
.data_in (batch_mask),
|
||||
.index (batch_idx),
|
||||
.index (batch_idx),
|
||||
`UNUSED_PIN (onehot),
|
||||
.valid_out (batch_valid_n[i])
|
||||
);
|
||||
assign seed_idx[i] = NUM_REQS_W'(BATCH_SIZE * i) + NUM_REQS_W'(batch_idx);
|
||||
assign seed_idx[i] = NUM_REQS_W'(i * DATA_RATIO) + NUM_REQS_W'(batch_idx);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
|
@ -144,8 +144,8 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
for (genvar j = 0; j < BATCH_SIZE; ++j) begin
|
||||
assign addr_matches_n[BATCH_SIZE * i + j] = (in_addr_base[BATCH_SIZE * i + j] == seed_addr_n[i]);
|
||||
for (genvar j = 0; j < DATA_RATIO; ++j) begin
|
||||
assign addr_matches_n[i * DATA_RATIO + j] = (in_addr_base[i * DATA_RATIO + j] == seed_addr_n[i]);
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -174,24 +174,28 @@ module VX_mem_coalescer #(
|
|||
|
||||
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
|
||||
|
||||
reg [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] req_byteen_merged;
|
||||
reg [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] req_data_merged;
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
|
||||
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] req_data_merged;
|
||||
|
||||
always @(*) begin
|
||||
req_byteen_merged = '0;
|
||||
req_data_merged = 'x;
|
||||
for (integer i = 0; i < OUT_REQS; ++i) begin
|
||||
for (integer j = 0; j < BATCH_SIZE; ++j) begin
|
||||
if (current_pmask[BATCH_SIZE * i + j]) begin
|
||||
req_byteen_merged[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_SIZE +: DATA_IN_SIZE] = in_req_byteen[BATCH_SIZE * i + j];
|
||||
req_data_merged[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH] = in_req_data[BATCH_SIZE * i + j];
|
||||
for (integer j = 0; j < DATA_RATIO; ++j) begin
|
||||
if (current_pmask[i * DATA_RATIO + j]) begin
|
||||
for (integer k = 0; k < DATA_IN_SIZE; ++k) begin
|
||||
if (in_req_byteen[DATA_RATIO * i + j][k]) begin
|
||||
req_byteen_merged[i][in_addr_offset[DATA_RATIO * i + j]][k] = 1'b1;
|
||||
req_data_merged[i][in_addr_offset[DATA_RATIO * i + j]][k * 8 +: 8] = in_req_data[DATA_RATIO * i + j][k * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire [OUT_REQS * BATCH_SIZE - 1:0] pending_mask;
|
||||
for (genvar i = 0; i < OUT_REQS * BATCH_SIZE; ++i) begin
|
||||
wire [OUT_REQS * DATA_RATIO - 1:0] pending_mask;
|
||||
for (genvar i = 0; i < OUT_REQS * DATA_RATIO; ++i) begin
|
||||
assign pending_mask[i] = in_req_mask[i] && ~addr_matches_r[i] && ~processed_mask_r[i];
|
||||
end
|
||||
wire batch_completed = ~(| pending_mask);
|
||||
|
@ -253,7 +257,7 @@ module VX_mem_coalescer #(
|
|||
assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0];
|
||||
|
||||
wire [TAG_ID_WIDTH-1:0] ibuf_din_tag = in_req_tag[TAG_ID_WIDTH-1:0];
|
||||
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_din_offset = in_addr_offset;
|
||||
wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_din_offset = in_addr_offset;
|
||||
wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask;
|
||||
|
||||
assign ibuf_din = {ibuf_din_tag, ibuf_din_pmask, ibuf_din_offset};
|
||||
|
@ -301,7 +305,7 @@ module VX_mem_coalescer #(
|
|||
end
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_dout_offset;
|
||||
wire [NUM_REQS-1:0][DATA_RATIO_W-1:0] ibuf_dout_offset;
|
||||
wire [NUM_REQS-1:0] ibuf_dout_pmask;
|
||||
wire [TAG_ID_WIDTH-1:0] ibuf_dout_tag;
|
||||
|
||||
|
@ -311,9 +315,9 @@ module VX_mem_coalescer #(
|
|||
wire [NUM_REQS-1:0] in_rsp_mask_n;
|
||||
|
||||
for (genvar i = 0; i < OUT_REQS; ++i) begin
|
||||
for (genvar j = 0; j < BATCH_SIZE; ++j) begin
|
||||
assign in_rsp_mask_n[BATCH_SIZE * i + j] = out_rsp_mask[i] && ibuf_dout_pmask[BATCH_SIZE * i + j];
|
||||
assign in_rsp_data_n[BATCH_SIZE * i + j] = out_rsp_data[i][ibuf_dout_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
|
||||
for (genvar j = 0; j < DATA_RATIO; ++j) begin
|
||||
assign in_rsp_mask_n[i * DATA_RATIO + j] = out_rsp_mask[i] && ibuf_dout_pmask[i * DATA_RATIO + j];
|
||||
assign in_rsp_data_n[i * DATA_RATIO + j] = out_rsp_data[i][ibuf_dout_offset[i * DATA_RATIO + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -335,7 +339,7 @@ module VX_mem_coalescer #(
|
|||
assign out_rsp_uuid = '0;
|
||||
end
|
||||
|
||||
reg [NUM_REQS-1:0][BATCH_SIZE_W-1:0] out_req_offset;
|
||||
reg [NUM_REQS-1:0][DATA_RATIO_W-1:0] out_req_offset;
|
||||
reg [NUM_REQS-1:0] out_req_pmask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue