fixed fifo_queue support for BRAM

This commit is contained in:
Blaise Tine 2024-10-14 15:48:49 -07:00
parent fe5442dbb3
commit 37757fab8f
4 changed files with 80 additions and 90 deletions

View file

@ -480,7 +480,7 @@
// Number of Associative Ways
`ifndef ICACHE_NUM_WAYS
`define ICACHE_NUM_WAYS 1
`define ICACHE_NUM_WAYS 4
`endif
// Dcache Configurable Knobs //////////////////////////////////////////////////
@ -529,12 +529,12 @@
// Memory Response Queue Size
`ifndef DCACHE_MRSQ_SIZE
`define DCACHE_MRSQ_SIZE 0
`define DCACHE_MRSQ_SIZE 4
`endif
// Number of Associative Ways
`ifndef DCACHE_NUM_WAYS
`define DCACHE_NUM_WAYS 1
`define DCACHE_NUM_WAYS 4
`endif
// Enable Cache Writeback
@ -594,12 +594,12 @@
// Memory Response Queue Size
`ifndef L2_MRSQ_SIZE
`define L2_MRSQ_SIZE 0
`define L2_MRSQ_SIZE 4
`endif
// Number of Associative Ways
`ifndef L2_NUM_WAYS
`define L2_NUM_WAYS 2
`define L2_NUM_WAYS 4
`endif
// Enable Cache Writeback
@ -641,7 +641,7 @@
// Memory Response Queue Size
`ifndef L3_MRSQ_SIZE
`define L3_MRSQ_SIZE 0
`define L3_MRSQ_SIZE 4
`endif
// Number of Associative Ways

View file

@ -402,8 +402,8 @@ module VX_cache import VX_gpu_pkg::*; #(
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.FLAGS_WIDTH (FLAGS_WIDTH),
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)),
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF))
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : 1),
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : 1)
) bank (
.clk (clk),
.reset (reset),

View file

@ -39,7 +39,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
VX_elastic_buffer #(
.DATAW (DATAW),
.SIZE (`IBUF_SIZE),
.OUT_REG (2) // 2-cycle EB for area reduction
.OUT_REG (1)
) instr_buf (
.clk (clk),
.reset (reset),

View file

@ -42,6 +42,9 @@ module VX_fifo_queue #(
`STATIC_ASSERT(ALM_EMPTY < DEPTH, ("alm_empty must be smaller than size!"))
`STATIC_ASSERT(`IS_POW2(DEPTH), ("depth must be a power of 2!"))
`UNUSED_PARAM (OUT_REG)
`UNUSED_PARAM (LUTRAM)
VX_pending_size #(
.SIZE (DEPTH),
.ALM_EMPTY (ALM_EMPTY),
@ -74,102 +77,89 @@ module VX_fifo_queue #(
localparam ADDRW = `CLOG2(DEPTH);
wire [DATAW-1:0] data_out_w;
reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n;
reg [ADDRW-1:0] wr_ptr_r;
always @(*) begin
rd_ptr_n = rd_ptr_r + ADDRW'(pop);
end
always @(posedge clk) begin
if (reset) begin
wr_ptr_r <= '0;
rd_ptr_r <= (OUT_REG != 0) ? 1 : 0;
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
rd_ptr_r <= rd_ptr_n;
end
end
wire [ADDRW-1:0] rd_ptr_w = LUTRAM ? rd_ptr_r : rd_ptr_n;
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
wire bypass = push && (empty || (going_empty && pop));
wire read = ((OUT_REG != 0) || !LUTRAM) ? ~bypass : pop;
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM),
.OUT_REG(!LUTRAM)
) dp_ram (
.clk (clk),
.reset (reset),
.read (read),
.write (push),
.wren (1'b1),
.waddr (wr_ptr_r),
.wdata (data_in),
.raddr (rd_ptr_w),
.rdata (data_out_w)
);
if (OUT_REG != 0) begin : g_out_reg
reg [DATAW-1:0] data_out_r, data_out_n;
wire [DATAW-1:0] dout;
reg [DATAW-1:0] dout_r;
reg [ADDRW-1:0] wr_ptr_r;
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] rd_ptr_n_r, rd_ptr_n_n;
always @(*) begin
rd_ptr_n_n = rd_ptr_n_r;
if (pop) begin
if (DEPTH > 2) begin
rd_ptr_n_n = rd_ptr_r + ADDRW'(2);
end else begin // (DEPTH == 2);
rd_ptr_n_n = ~rd_ptr_n_r;
if (LUTRAM) begin : g_lutram
assign data_out_n = data_out_w;
end else begin : g_no_lutram
reg [DATAW-1:0] data_out_p;
reg rdw_hazard_r;
wire rdw_hazard = push && (wr_ptr_r == rd_ptr_w);
always @(posedge clk) begin
if (rdw_hazard) begin
data_out_p <= data_in;
end
rdw_hazard_r <= rdw_hazard;
end
assign data_out_n = rdw_hazard_r ? data_out_p : data_out_w;
end
always @(posedge clk) begin
if (reset) begin
wr_ptr_r <= '0;
rd_ptr_r <= '0;
rd_ptr_n_r <= 1;
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
if (pop) begin
rd_ptr_r <= rd_ptr_n_r;
end
rd_ptr_n_r <= rd_ptr_n_n;
end
end
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM)
) dp_ram (
.clk (clk),
.reset (reset),
.read (1'b1),
.write (push),
.wren (1'b1),
.waddr (wr_ptr_r),
.wdata (data_in),
.raddr (rd_ptr_n_r),
.rdata (dout)
);
wire going_empty = (ALM_EMPTY == 1) ? alm_empty : (size[ADDRW-1:0] == ADDRW'(1));
always @(posedge clk) begin
if (push && (empty || (going_empty && pop))) begin
dout_r <= data_in;
if (bypass) begin
data_out_r <= data_in;
end else if (pop) begin
dout_r <= dout;
data_out_r <= data_out_n;
end
end
assign data_out = dout_r;
assign data_out = data_out_r;
end else begin : g_no_out_reg
reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n;
reg [ADDRW-1:0] wr_ptr_r;
always @(*) begin
rd_ptr_n = rd_ptr_r + ADDRW'(pop);
end
always @(posedge clk) begin
if (reset) begin
wr_ptr_r <= '0;
rd_ptr_r <= '0;
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
rd_ptr_r <= rd_ptr_n;
if (LUTRAM) begin : g_lutram
assign data_out = data_out_w;
end else begin : g_no_lutram
reg [DATAW-1:0] data_in_r;
reg bypass_r;
always @(posedge clk) begin
if (bypass) begin
data_in_r <= data_in;
end
bypass_r <= bypass;
end
assign data_out = bypass_r ? data_in_r : data_out_w;
end
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM)
) dp_ram (
.clk (clk),
.reset (reset),
.read (1'b1),
.write (push),
.wren (1'b1),
.waddr (wr_ptr_r),
.wdata (data_in),
.raddr (rd_ptr_r),
.rdata (data_out)
);
end
end