using shiftreg-based skid buffers

This commit is contained in:
Blaise Tine 2021-02-28 02:20:09 -08:00
parent e64996946d
commit 3f5fd6d394
10 changed files with 26 additions and 50 deletions

View file

@ -8,7 +8,7 @@
`endif
`ifndef NUM_CORES
`define NUM_CORES 4
`define NUM_CORES 1
`endif
`ifndef NUM_WARPS
@ -235,11 +235,6 @@
// Pipeline Queues ////////////////////////////////////////////////////////////
// Size of instruction queue
`ifndef IBUF_SIZE
`define IBUF_SIZE 4
`endif
// Size of LSU Request Queue
`ifndef LSUQ_SIZE
`define LSUQ_SIZE 8

View file

@ -45,8 +45,7 @@ module VX_csr_io_arb (
// responses
wire csr_io_rsp_ready;
VX_skid_buffer #(
.DATAW (32),
.BUFFERED (1)
.DATAW (32)
) csr_io_out_buffer (
.clk (clk),
.reset (reset),

View file

@ -39,8 +39,7 @@ module VX_databus_arb (
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
VX_skid_buffer #(
.DATAW (REQ_DATAW),
.BUFFERED (1)
.DATAW (REQ_DATAW)
) cache_out_buffer (
.clk (clk),
.reset (reset),
@ -53,8 +52,7 @@ module VX_databus_arb (
);
VX_skid_buffer #(
.DATAW (REQ_DATAW),
.BUFFERED (1)
.DATAW (REQ_DATAW)
) smem_out_buffer (
.clk (clk),
.reset (reset),

View file

@ -14,8 +14,8 @@ module VX_ibuffer #(
VX_decode_if ibuf_deq_if
);
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
localparam SIZE = `IBUF_SIZE;
localparam ADDRW = $clog2(SIZE+1);
localparam SIZE = 3;
localparam ADDRW = $clog2(SIZE);
localparam NWARPSW = $clog2(`NUM_WARPS+1);
reg [`NUM_WARPS-1:0][ADDRW-1:0] used_r;
@ -39,22 +39,17 @@ module VX_ibuffer #(
wire push = writing && !is_slot0;
wire pop = reading && !alm_empty_r[i];
VX_fifo_queue #(
.DATAW (DATAW),
.SIZE (SIZE),
.BUFFERED (1)
VX_skid_buffer #(
.DATAW (DATAW)
) queue (
.clk (clk),
.reset (reset),
.push (push),
.pop (pop),
.data_in (q_data_in),
.data_out (q_data_prev[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
.valid_in (push),
.data_in (q_data_in),
.ready_out(pop),
.data_out (q_data_prev[i]),
`UNUSED_PIN (ready_in),
`UNUSED_PIN (valid_out)
);
always @(posedge clk) begin
@ -69,7 +64,7 @@ module VX_ibuffer #(
empty_r[i] <= 0;
if (used_r[i] == 1)
alm_empty_r[i] <= 0;
if (used_r[i] == ADDRW'(SIZE))
if (used_r[i] == ADDRW'(SIZE-1))
full_r[i] <= 1;
end
end else if (reading) begin

View file

@ -38,8 +38,7 @@ module VX_instr_demux (
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
) alu_buffer (
.clk (clk),
.reset (reset),
@ -56,8 +55,7 @@ module VX_instr_demux (
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
) lsu_buffer (
.clk (clk),
.reset (reset),
@ -74,8 +72,7 @@ module VX_instr_demux (
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.BUFFERED (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32)
) csr_buffer (
.clk (clk),
.reset (reset),
@ -93,8 +90,7 @@ module VX_instr_demux (
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.BUFFERED (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
) fpu_buffer (
.clk (clk),
.reset (reset),
@ -115,8 +111,7 @@ module VX_instr_demux (
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
.BUFFERED (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
) gpu_buffer (
.clk (clk),
.reset (reset),

View file

@ -488,8 +488,7 @@ module VX_bank #(
end
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.BUFFERED (1)
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS)
) core_rsp_req (
.clk (clk),
.reset (reset),

View file

@ -98,8 +98,7 @@ module VX_cache_core_rsp_merge #(
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
.BUFFERED (1)
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
) pipe_reg (
.clk (clk),
.reset (reset),
@ -147,8 +146,7 @@ module VX_cache_core_rsp_merge #(
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
.BUFFERED (1)
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
) pipe_reg (
.clk (clk),
.reset (reset),

View file

@ -205,8 +205,7 @@ module VX_shared_mem #(
wire crsq_in_valid = ~creq_empty && ~core_rsp_rw;
VX_skid_buffer #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
.BUFFERED (1)
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH)
) core_rsp_req (
.clk (clk),
.reset (reset),

View file

@ -92,8 +92,7 @@ module VX_stream_arbiter #(
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (!BUFFERED),
.BUFFERED (1)
.PASSTHRU (!BUFFERED)
) out_buffer (
.clk (clk),
.reset (reset),

View file

@ -40,8 +40,7 @@ module VX_stream_demux #(
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (!BUFFERED),
.BUFFERED (1)
.PASSTHRU (!BUFFERED)
) out_buffer (
.clk (clk),
.reset (reset),