minor updates

This commit is contained in:
Blaise Tine 2024-08-03 11:49:12 -07:00
parent 35fb50f9a6
commit 4b93c9ffb5
7 changed files with 97 additions and 75 deletions

View file

@ -83,7 +83,7 @@ module VX_alu_muldiv #(
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) mul_shift_reg (
.clk(clk),
.clk (clk),
.reset (reset),
.enable (mul_ready_in),
.data_in ({mul_valid_in, execute_if.data.uuid, execute_if.data.wid, execute_if.data.tmask, execute_if.data.PC, execute_if.data.rd, execute_if.data.wb, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop, mul_result_tmp}),

View file

@ -100,6 +100,8 @@ module VX_operands import VX_gpu_pkg::*; #(
assign req_in_valid = {NUM_SRC_REGS{scoreboard_if.valid}} & src_valid;
`RESET_RELAY (req_xbar_reset, reset);
VX_stream_xbar #(
.NUM_INPUTS (NUM_SRC_REGS),
.NUM_OUTPUTS (NUM_BANKS),
@ -109,7 +111,7 @@ module VX_operands import VX_gpu_pkg::*; #(
.OUT_BUF (0) // no output buffering
) req_xbar (
.clk (clk),
.reset (reset),
.reset (req_xbar_reset),
`UNUSED_PIN(collisions),
.valid_in (req_in_valid),
.data_in (req_in_data),
@ -162,12 +164,14 @@ module VX_operands import VX_gpu_pkg::*; #(
scoreboard_if.data.uuid
};
`RESET_RELAY (pipe1_reset, reset);
VX_pipe_register #(
.DATAW (1 + NUM_BANKS + NUM_SRC_REGS + META_DATAW + 1 + NUM_BANKS * (PER_BANK_ADDRW + REQ_SEL_WIDTH)),
.RESETW (1 + NUM_BANKS + NUM_SRC_REGS)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.reset (pipe1_reset),
.enable (pipe_in_ready),
.data_in ({scoreboard_if.valid, gpr_rd_valid, data_fetched_n, pipe_data, has_collision_n, gpr_rd_addr, gpr_rd_req_idx}),
.data_out ({pipe_valid_st1, gpr_rd_valid_st1, data_fetched_st1, pipe_data_st1, has_collision_st1, gpr_rd_addr_st1, gpr_rd_req_idx_st1})
@ -179,12 +183,14 @@ module VX_operands import VX_gpu_pkg::*; #(
wire pipe_valid2_st1 = pipe_valid_st1 && ~has_collision_st1;
`RESET_RELAY (pipe2_reset, reset);
VX_pipe_register #(
.DATAW (1 + NUM_BANKS + REGS_DATAW + (NUM_BANKS * `XLEN * `NUM_THREADS) + META_DATAW + NUM_BANKS * REQ_SEL_WIDTH),
.RESETW (1 + NUM_BANKS + REGS_DATAW)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.reset (pipe2_reset),
.enable (pipe_ready_st1),
.data_in ({pipe_valid2_st1, gpr_rd_valid_st1, src_data_st1, gpr_rd_data_st1, pipe_data_st1, gpr_rd_req_idx_st1}),
.data_out ({pipe_valid_st2, gpr_rd_valid_st2, src_data_st2, gpr_rd_data_st2, pipe_data_st2, gpr_rd_req_idx_st2})
@ -199,14 +205,16 @@ module VX_operands import VX_gpu_pkg::*; #(
end
end
`RESET_RELAY (out_buf_reset, reset);
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF)),
.LUTRAM (1)
) out_buffer (
) out_buf (
.clk (clk),
.reset (reset),
.reset (out_buf_reset),
.valid_in (pipe_valid_st2),
.ready_in (pipe_ready_st2),
.data_in ({
@ -273,6 +281,8 @@ module VX_operands import VX_gpu_pkg::*; #(
assign wren[i*XLEN_SIZE+:XLEN_SIZE] = {XLEN_SIZE{writeback_if.data.tmask[i]}};
end
`RESET_RELAY (bram_reset, reset);
VX_dp_ram #(
.DATAW (`XLEN * `NUM_THREADS),
.SIZE (PER_BANK_REGS * PER_ISSUE_WARPS),
@ -283,7 +293,7 @@ module VX_operands import VX_gpu_pkg::*; #(
.NO_RWCHECK (1)
) gpr_ram (
.clk (clk),
.reset (reset),
.reset (bram_reset),
.read (pipe_fire_st1),
.wren (wren),
.write (gpr_wr_enabled),

View file

@ -82,11 +82,14 @@ module VX_avs_adapter #(
end
for (genvar i = 0; i < NUM_BANKS; ++i) begin
`RESET_RELAY (rd_req_reset, reset);
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.reset (rd_req_reset),
.incr (req_queue_push[i]),
.decr (req_queue_pop[i]),
`UNUSED_PIN (empty),
@ -102,7 +105,7 @@ module VX_avs_adapter #(
.DEPTH (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.reset (rd_req_reset),
.push (req_queue_push[i]),
.pop (req_queue_pop[i]),
.data_in (mem_req_tag),
@ -126,13 +129,15 @@ module VX_avs_adapter #(
wire valid_out_w = mem_req_valid && ~req_queue_going_full[i] && (req_bank_sel == i);
wire ready_out_w;
`RESET_RELAY (req_out_reset, reset);
VX_elastic_buffer #(
.DATAW (1 + DATA_SIZE + BANK_OFFSETW + DATA_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
) req_out_buf (
.clk (clk),
.reset (reset),
.reset (req_out_reset),
.valid_in (valid_out_w),
.ready_in (ready_out_w),
.data_in ({mem_req_rw, mem_req_byteen, req_bank_off, mem_req_data}),
@ -168,12 +173,15 @@ module VX_avs_adapter #(
wire [NUM_BANKS-1:0] rsp_queue_empty;
for (genvar i = 0; i < NUM_BANKS; ++i) begin
`RESET_RELAY (rd_rsp_reset, reset);
VX_fifo_queue #(
.DATAW (DATA_WIDTH),
.DEPTH (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.reset (rd_rsp_reset),
.push (avs_readdatavalid[i]),
.pop (req_queue_pop[i]),
.data_in (avs_readdata[i]),
@ -192,6 +200,8 @@ module VX_avs_adapter #(
assign req_queue_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
end
`RESET_RELAY (rsp_arb_reset, reset);
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (DATA_WIDTH + TAG_WIDTH),

View file

@ -204,6 +204,8 @@ module VX_axi_adapter #(
`RUNTIME_ASSERT(~m_axi_rvalid[i] || m_axi_rresp[i] == 0, ("%t: *** AXI response error", $time));
end
`RESET_RELAY (rsp_arb_reset, reset);
VX_stream_arb #(
.NUM_INPUTS (NUM_BANKS),
.DATAW (DATA_WIDTH + TAG_WIDTH),
@ -211,7 +213,7 @@ module VX_axi_adapter #(
.OUT_BUF (RSP_OUT_BUF)
) rsp_arb (
.clk (clk),
.reset (reset),
.reset (rsp_arb_reset),
.valid_in (rsp_arb_valid_in),
.data_in (rsp_arb_data_in),
.ready_in (rsp_arb_ready_in),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,10 +15,10 @@
`TRACING_OFF
module VX_mem_adapter #(
parameter SRC_DATA_WIDTH = 1,
parameter SRC_ADDR_WIDTH = 1,
parameter DST_DATA_WIDTH = 1,
parameter DST_ADDR_WIDTH = 1,
parameter SRC_DATA_WIDTH = 1,
parameter SRC_ADDR_WIDTH = 1,
parameter DST_DATA_WIDTH = 1,
parameter DST_ADDR_WIDTH = 1,
parameter SRC_TAG_WIDTH = 1,
parameter DST_TAG_WIDTH = 1,
parameter REQ_OUT_BUF = 0,
@ -35,9 +35,9 @@ module VX_mem_adapter #(
input wire [SRC_TAG_WIDTH-1:0] mem_req_tag_in,
output wire mem_req_ready_in,
output wire mem_rsp_valid_in,
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in,
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in,
output wire mem_rsp_valid_in,
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in,
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in,
input wire mem_rsp_ready_in,
output wire mem_req_valid_out,
@ -48,12 +48,12 @@ module VX_mem_adapter #(
output wire [DST_TAG_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out,
input wire mem_rsp_valid_out,
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_out,
input wire mem_rsp_valid_out,
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_out,
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_out,
output wire mem_rsp_ready_out
);
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
);
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
localparam DST_DATA_SIZE = (DST_DATA_WIDTH / 8);
localparam DST_LDATAW = `CLOG2(DST_DATA_WIDTH);
@ -69,7 +69,7 @@ module VX_mem_adapter #(
wire [DST_TAG_WIDTH-1:0] mem_req_tag_out_w;
wire mem_req_ready_out_w;
wire mem_rsp_valid_in_w;
wire mem_rsp_valid_in_w;
wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_in_w;
wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
wire mem_rsp_ready_in_w;
@ -80,7 +80,7 @@ module VX_mem_adapter #(
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire [D-1:0] req_idx = mem_req_addr_in[D-1:0];
wire [D-1:0] rsp_idx = mem_rsp_tag_out[D-1:0];
@ -99,31 +99,31 @@ module VX_mem_adapter #(
assign mem_req_valid_out_w = mem_req_valid_in;
assign mem_req_rw_out_w = mem_req_rw_in;
assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW);
assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx});
assign mem_req_ready_in = mem_req_ready_out_w;
assign mem_rsp_valid_in_w = mem_rsp_valid_out;
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
assign mem_rsp_data_in_w = mem_rsp_data_out_w[rsp_idx];
assign mem_rsp_tag_in_w = SRC_TAG_WIDTH'(mem_rsp_tag_out[SRC_TAG_WIDTH+D-1:D]);
assign mem_rsp_ready_out = mem_rsp_ready_in_w;
end else if (DST_LDATAW < SRC_LDATAW) begin
reg [D-1:0] req_ctr, rsp_ctr;
reg [P-1:0][DST_DATA_WIDTH-1:0] mem_rsp_data_out_r, mem_rsp_data_out_n;
wire mem_req_out_fire = mem_req_valid_out && mem_req_ready_out;
wire mem_rsp_in_fire = mem_rsp_valid_out && mem_rsp_ready_out;
wire mem_rsp_in_fire = mem_rsp_valid_out && mem_rsp_ready_out;
wire [P-1:0][DST_DATA_WIDTH-1:0] mem_req_data_in_w = mem_req_data_in;
wire [P-1:0][DST_DATA_SIZE-1:0] mem_req_byteen_in_w = mem_req_byteen_in;
always @(*) begin
mem_rsp_data_out_n = mem_rsp_data_out_r;
if (mem_rsp_in_fire) begin
if (mem_rsp_in_fire) begin
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_out;
end
end
@ -139,24 +139,24 @@ module VX_mem_adapter #(
if (mem_rsp_in_fire) begin
rsp_ctr <= rsp_ctr + 1;
end
end
end
mem_rsp_data_out_r <= mem_rsp_data_out_n;
end
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;
wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_x;
always @(posedge clk) begin
if (mem_rsp_in_fire) begin
mem_rsp_tag_in_r <= mem_rsp_tag_out;
end
end
end
assign mem_rsp_tag_in_x = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_out;
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out),
`RUNTIME_ASSERT(!mem_rsp_in_fire || (mem_rsp_tag_in_x == mem_rsp_tag_out),
("%t: *** out-of-order memory reponse! cur=%d, expected=%d", $time, mem_rsp_tag_in_x, mem_rsp_tag_out))
wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr};
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin
`UNUSED_VAR (mem_req_addr_in_qual)
assign mem_req_addr_out_w = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
@ -181,8 +181,8 @@ module VX_mem_adapter #(
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
`UNUSED_VAR (reset)
if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin
`UNUSED_VAR (mem_req_addr_in)
assign mem_req_addr_out_w = mem_req_addr_in[DST_ADDR_WIDTH-1:0];
@ -206,13 +206,15 @@ module VX_mem_adapter #(
end
`RESET_RELAY (req_out_reset, reset);
VX_elastic_buffer #(
.DATAW (1 + DST_DATA_SIZE + DST_ADDR_WIDTH + DST_DATA_WIDTH + DST_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(REQ_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(REQ_OUT_BUF))
) req_out_buf (
.clk (clk),
.reset (reset),
.reset (req_out_reset),
.valid_in (mem_req_valid_out_w),
.ready_in (mem_req_ready_out_w),
.data_in ({mem_req_rw_out_w, mem_req_byteen_out_w, mem_req_addr_out_w, mem_req_data_out_w, mem_req_tag_out_w}),
@ -221,13 +223,15 @@ module VX_mem_adapter #(
.ready_out (mem_req_ready_out)
);
`RESET_RELAY (rsp_in_reset, reset);
VX_elastic_buffer #(
.DATAW (SRC_DATA_WIDTH + SRC_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(RSP_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(RSP_OUT_BUF))
) rsp_in_buf (
.clk (clk),
.reset (reset),
.reset (rsp_in_reset),
.valid_in (mem_rsp_valid_in_w),
.ready_in (mem_rsp_ready_in_w),
.data_in ({mem_rsp_data_in_w, mem_rsp_tag_in_w}),

View file

@ -89,14 +89,14 @@ module VX_mem_coalescer #(
reg state_r, state_n;
reg out_req_valid_r, out_req_valid_n;
reg out_req_rw_r, out_req_rw_n;
reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
logic out_req_valid_r, out_req_valid_n;
logic out_req_rw_r, out_req_rw_n;
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
logic [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_WIDTH-1:0] out_req_data_r, out_req_data_n;
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
reg in_req_ready_n;
@ -149,29 +149,6 @@ module VX_mem_coalescer #(
end
end
always @(posedge clk) begin
if (reset) begin
state_r <= STATE_SETUP;
processed_mask_r <= '0;
out_req_valid_r <= 0;
end else begin
state_r <= state_n;
batch_valid_r <= batch_valid_n;
seed_addr_r <= seed_addr_n;
seed_atype_r <= seed_atype_n;
addr_matches_r <= addr_matches_n;
out_req_valid_r <= out_req_valid_n;
out_req_mask_r <= out_req_mask_n;
out_req_rw_r <= out_req_rw_n;
out_req_addr_r <= out_req_addr_n;
out_req_atype_r <= out_req_atype_n;
out_req_byteen_r <= out_req_byteen_n;
out_req_data_r <= out_req_data_n;
out_req_tag_r <= out_req_tag_n;
processed_mask_r <= processed_mask_n;
end
end
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches_r;
reg [OUT_REQS-1:0][DATA_RATIO-1:0][DATA_IN_SIZE-1:0] req_byteen_merged;
@ -248,6 +225,19 @@ module VX_mem_coalescer #(
endcase
end
`RESET_RELAY (pipe_reset, reset);
VX_pipe_register #(
.DATAW (1 + NUM_REQS + 1 + 1 + NUM_REQS + OUT_REQS * (1 + 1 + OUT_ADDR_WIDTH + ATYPE_WIDTH + OUT_ADDR_WIDTH + ATYPE_WIDTH + DATA_OUT_SIZE + DATA_OUT_WIDTH + OUT_TAG_WIDTH)),
.RESETW (1 + NUM_REQS + 1)
) pipe_reg (
.clk (clk),
.reset (pipe_reset),
.enable (1'b1),
.data_in ({state_n, processed_mask_n, out_req_valid_n, out_req_rw_n, addr_matches_n, batch_valid_n, out_req_mask_n, seed_addr_n, seed_atype_n, out_req_addr_n, out_req_atype_n, out_req_byteen_n, out_req_data_n, out_req_tag_n}),
.data_out ({state_r, processed_mask_r, out_req_valid_r, out_req_rw_r, addr_matches_r, batch_valid_r, out_req_mask_r, seed_addr_r, seed_atype_r, out_req_addr_r, out_req_atype_r, out_req_byteen_r, out_req_data_r, out_req_tag_r})
);
wire out_rsp_fire = out_rsp_valid && out_rsp_ready;
wire out_rsp_eop;

View file

@ -167,13 +167,15 @@ module VX_mem_scheduler #(
assign reqq_tag_u = ibuf_waddr;
end
`RESET_RELAY (reqq_reset, reset);
VX_elastic_buffer #(
.DATAW (1 + CORE_REQS * (1 + WORD_SIZE + ADDR_WIDTH + ATYPE_WIDTH + WORD_WIDTH) + REQQ_TAG_WIDTH),
.SIZE (CORE_QUEUE_SIZE),
.OUT_REG (1)
) req_queue (
.clk (clk),
.reset (reset),
.reset (reqq_reset),
.valid_in (reqq_valid_in),
.ready_in (reqq_ready_in),
.data_in ({core_req_rw, core_req_mask, core_req_byteen, core_req_addr, core_req_atype, core_req_data, reqq_tag_u}),
@ -389,13 +391,15 @@ module VX_mem_scheduler #(
assign reqq_ready_s = req_sent_all;
`RESET_RELAY (mem_req_reset, reset);
VX_elastic_buffer #(
.DATAW (MEM_CHANNELS + 1 + MEM_CHANNELS * (LINE_SIZE + MEM_ADDR_WIDTH + ATYPE_WIDTH + LINE_WIDTH) + MEM_TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(MEM_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
.clk (clk),
.reset (reset),
.reset (mem_req_reset),
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.data_in ({mem_req_mask_s, mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_atype_s, mem_req_data_s, mem_req_tag_s}),
@ -509,13 +513,15 @@ module VX_mem_scheduler #(
// Send response to caller
`RESET_RELAY (crsp_reset, reset);
VX_elastic_buffer #(
.DATAW (CORE_REQS + 1 + 1 + (CORE_REQS * WORD_WIDTH) + TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(CORE_OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
) rsp_buf (
.clk (clk),
.reset (reset),
.reset (crsp_reset),
.valid_in (crsp_valid),
.ready_in (crsp_ready),
.data_in ({crsp_mask, crsp_sop, crsp_eop, crsp_data, crsp_tag}),