mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
synthesis optimizations
This commit is contained in:
parent
1e677c8e5e
commit
57143f5889
16 changed files with 173 additions and 229 deletions
|
@ -36,19 +36,16 @@ module VX_ibuffer #(
|
|||
wire writing = enq_fire && (i == ibuf_enq_if.wid);
|
||||
wire reading = deq_fire && (i == ibuf_deq_if.wid);
|
||||
|
||||
wire is_slot0 = empty_r[i] || (alm_empty_r[i] && reading);
|
||||
|
||||
wire push = writing && !is_slot0;
|
||||
wire pop = reading && !alm_empty_r[i];
|
||||
wire is_head_ptr = empty_r[i] || (alm_empty_r[i] && reading);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (push),
|
||||
.valid_in (writing && !is_head_ptr),
|
||||
.data_in (q_data_in),
|
||||
.ready_out(pop),
|
||||
.ready_out(reading),
|
||||
.data_out (q_data_prev[i]),
|
||||
`UNUSED_PIN (ready_in),
|
||||
`UNUSED_PIN (valid_out)
|
||||
|
@ -79,9 +76,9 @@ module VX_ibuffer #(
|
|||
used_r[i] <= used_r[i] + ADDRW'($signed(2'(writing) - 2'(reading)));
|
||||
end
|
||||
|
||||
if (writing && is_slot0) begin
|
||||
if (writing && is_head_ptr) begin
|
||||
q_data_out[i] <= q_data_in;
|
||||
end else if (pop) begin
|
||||
end else if (reading) begin
|
||||
q_data_out[i] <= q_data_prev[i];
|
||||
end
|
||||
end
|
||||
|
@ -111,26 +108,17 @@ module VX_ibuffer #(
|
|||
end
|
||||
|
||||
// schedule the next instruction to issue
|
||||
// do round-robin when multiple warps are active
|
||||
always @(*) begin
|
||||
deq_valid_n = 0;
|
||||
deq_wid_n = 'x;
|
||||
deq_instr_n = 'x;
|
||||
schedule_table_n = 'x;
|
||||
|
||||
always @(*) begin
|
||||
deq_valid_n = 1;
|
||||
if (num_warps > 1) begin
|
||||
deq_valid_n = (| schedule_table);
|
||||
schedule_table_n = schedule_table;
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
if (schedule_table[i]) begin
|
||||
deq_wid_n = `NW_BITS'(i);
|
||||
deq_instr_n = q_data_out[i];
|
||||
schedule_table_n[i] = 0;
|
||||
break;
|
||||
end
|
||||
end
|
||||
end else if (1 == num_warps && !(deq_fire && q_alm_empty[deq_wid])) begin
|
||||
deq_valid_n = 1;
|
||||
deq_wid_n = deq_wid;
|
||||
deq_instr_n = deq_fire ? q_data_prev[deq_wid] : q_data_out[deq_wid];
|
||||
end else begin
|
||||
|
@ -139,6 +127,17 @@ module VX_ibuffer #(
|
|||
deq_instr_n = q_data_in;
|
||||
end
|
||||
end
|
||||
|
||||
// do round-robin with multiple active warps
|
||||
always @(*) begin
|
||||
schedule_table_n = schedule_table;
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
if (schedule_table[i]) begin
|
||||
schedule_table_n[i] = 0;
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire warp_added = enq_fire && q_empty[ibuf_enq_if.wid];
|
||||
wire warp_removed = deq_fire && ~(enq_fire && ibuf_enq_if.wid == deq_wid) && q_alm_empty[deq_wid];
|
||||
|
|
|
@ -38,8 +38,7 @@ module VX_instr_demux (
|
|||
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.USE_FASTREG (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -56,8 +55,7 @@ module VX_instr_demux (
|
|||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.USE_FASTREG (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -74,8 +72,7 @@ module VX_instr_demux (
|
|||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.USE_FASTREG (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -93,8 +90,7 @@ module VX_instr_demux (
|
|||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.USE_FASTREG (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -115,8 +111,7 @@ module VX_instr_demux (
|
|||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
||||
.USE_FASTREG (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -11,11 +11,17 @@ module VX_scoreboard #(
|
|||
output wire delay
|
||||
);
|
||||
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs;
|
||||
wire [`NUM_REGS-1:0] deq_inuse_regs;
|
||||
|
||||
assign deq_inuse_regs = inuse_regs[ibuf_deq_if.wid] & ibuf_deq_if.used_regs;
|
||||
|
||||
assign delay = (| deq_inuse_regs);
|
||||
reg is_reg_busy;
|
||||
always @(*) begin
|
||||
is_reg_busy = 0;
|
||||
for (integer i = 0; i < `NUM_WARPS; ++i) begin
|
||||
if (ibuf_deq_if.wid == `NW_BITS'(i)) begin
|
||||
is_reg_busy = | (inuse_regs[i] & ibuf_deq_if.used_regs);
|
||||
end
|
||||
end
|
||||
end
|
||||
assign delay = is_reg_busy;
|
||||
|
||||
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && (ibuf_deq_if.wb != 0);
|
||||
|
||||
|
@ -37,6 +43,8 @@ module VX_scoreboard #(
|
|||
end
|
||||
end
|
||||
|
||||
wire [`NUM_REGS-1:0] deq_inuse_regs = inuse_regs[ibuf_deq_if.wid];
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
|
|
|
@ -32,8 +32,8 @@ module VX_smem_arb (
|
|||
VX_stream_demux #(
|
||||
.NUM_REQS (2),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (0)
|
||||
) rsp_demux (
|
||||
.BUFFERED (1)
|
||||
) req_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel (core_req_if.tag[i][0]),
|
||||
|
|
|
@ -53,7 +53,6 @@ localparam AVS_REQ_TAGW_CCI = `MAX(CCI_ADDR_WIDTH, CCI_ADDR_WIDTH + $clog2(LME
|
|||
localparam AVS_REQ_TAGW = `MAX(AVS_REQ_TAGW_VX, AVS_REQ_TAGW_CCI);
|
||||
|
||||
localparam CCI_RD_WINDOW_SIZE = 8;
|
||||
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
|
||||
localparam CCI_RW_PENDING_SIZE= 256;
|
||||
|
||||
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
|
||||
|
@ -78,15 +77,15 @@ localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE;
|
|||
|
||||
localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS;
|
||||
|
||||
localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
|
||||
localparam CCI_RD_RQ_DATAW = CCI_LINE_WIDTH + CCI_RD_RQ_TAGW;
|
||||
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
|
||||
localparam CCI_RD_QUEUE_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
|
||||
localparam CCI_RD_QUEUE_DATAW = CCI_LINE_WIDTH + CCI_ADDR_WIDTH;
|
||||
|
||||
localparam STATE_IDLE = 0;
|
||||
localparam STATE_READ = 1;
|
||||
localparam STATE_WRITE = 2;
|
||||
localparam STATE_WRITE = 1;
|
||||
localparam STATE_READ = 2;
|
||||
localparam STATE_START = 3;
|
||||
localparam STATE_RUN = 4;
|
||||
localparam STATE_MAX_VALUE = 5;
|
||||
localparam STATE_MAX_VALUE = 4;
|
||||
localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE);
|
||||
|
||||
`ifdef SCOPE
|
||||
|
@ -114,11 +113,9 @@ wire [`VX_MEM_LINE_WIDTH-1:0] vx_mem_rsp_data;
|
|||
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_rsp_tag;
|
||||
wire vx_mem_rsp_ready;
|
||||
|
||||
reg vx_reset;
|
||||
wire vx_busy;
|
||||
|
||||
reg vx_reset;
|
||||
reg vx_mem_en;
|
||||
|
||||
// CMD variables //////////////////////////////////////////////////////////////
|
||||
|
||||
t_ccip_clAddr cmd_io_addr;
|
||||
|
@ -292,8 +289,9 @@ end
|
|||
// COMMAND FSM ////////////////////////////////////////////////////////////////
|
||||
|
||||
wire cmd_read_done;
|
||||
wire cmd_write_done;
|
||||
reg cmd_write_done;
|
||||
wire cmd_run_done;
|
||||
reg vx_started;
|
||||
|
||||
reg [$clog2(RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
always @(posedge clk) begin
|
||||
|
@ -306,9 +304,9 @@ end
|
|||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
state <= STATE_IDLE;
|
||||
vx_started <= 0;
|
||||
vx_reset <= 0;
|
||||
vx_mem_en <= 0;
|
||||
end else begin
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
|
@ -358,21 +356,20 @@ always @(posedge clk) begin
|
|||
|
||||
STATE_START: begin
|
||||
// vortex reset cycles
|
||||
if (vx_reset_ctr == $bits(vx_reset_ctr)'(RESET_DELAY)) begin
|
||||
vx_reset <= 0;
|
||||
vx_mem_en <= 1;
|
||||
state <= STATE_RUN;
|
||||
end
|
||||
end
|
||||
|
||||
STATE_RUN: begin
|
||||
if (cmd_run_done) begin
|
||||
vx_mem_en <= 0;
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE IDLE", $time);
|
||||
`endif
|
||||
end
|
||||
if (vx_started) begin
|
||||
if (cmd_run_done) begin
|
||||
vx_started <= 0;
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE IDLE", $time);
|
||||
`endif
|
||||
end
|
||||
end else begin
|
||||
if (vx_reset_ctr == $bits(vx_reset_ctr)'(RESET_DELAY)) begin
|
||||
vx_started <= 1;
|
||||
vx_reset <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
|
@ -387,11 +384,12 @@ end
|
|||
|
||||
wire cci_mem_rd_req_valid;
|
||||
wire cci_mem_wr_req_valid;
|
||||
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
|
||||
wire [CCI_RD_QUEUE_DATAW-1:0] cci_rdq_dout;
|
||||
|
||||
wire cci_mem_req_valid;
|
||||
wire cci_mem_req_rw;
|
||||
wire [CCI_ADDR_WIDTH-1:0] cci_mem_req_addr;
|
||||
wire [CCI_LINE_WIDTH-1:0] cci_mem_req_data;
|
||||
wire [CCI_ADDR_WIDTH-1:0] cci_mem_req_tag;
|
||||
wire cci_mem_req_ready;
|
||||
|
||||
|
@ -430,7 +428,7 @@ VX_to_mem #(
|
|||
.mem_req_addr_in (cci_mem_req_addr),
|
||||
.mem_req_rw_in (cci_mem_req_rw),
|
||||
.mem_req_byteen_in ({CCI_LINE_SIZE{1'b1}}),
|
||||
.mem_req_data_in (cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]),
|
||||
.mem_req_data_in (cci_mem_req_data),
|
||||
.mem_req_tag_in (cci_mem_req_tag),
|
||||
.mem_req_ready_in (cci_mem_req_ready),
|
||||
|
||||
|
@ -473,7 +471,7 @@ wire vx_mem_req_valid_qual;
|
|||
wire vx_mem_req_ready_qual;
|
||||
|
||||
assign vx_mem_req_valid_qual = vx_mem_req_valid
|
||||
&& vx_mem_en
|
||||
&& vx_started
|
||||
&& ~vx_mem_is_cout;
|
||||
|
||||
assign vx_mem_req_ready = vx_mem_is_cout ? ~cout_q_full : vx_mem_req_ready_qual;
|
||||
|
@ -617,19 +615,20 @@ VX_avs_wrapper #(
|
|||
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_ctr;
|
||||
wire [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_addr;
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_addr_unqual;
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr;
|
||||
wire [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
|
||||
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag;
|
||||
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_tag;
|
||||
reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr;
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_addr_base;
|
||||
|
||||
wire cci_rd_req_fire;
|
||||
t_ccip_clAddr cci_rd_req_addr;
|
||||
reg cci_rd_req_valid, cci_rd_req_wait;
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr;
|
||||
wire [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
|
||||
wire [CCI_RD_QUEUE_TAGW-1:0] cci_rd_req_tag;
|
||||
|
||||
wire [CCI_RD_QUEUE_TAGW-1:0] cci_rd_rsp_tag;
|
||||
reg [CCI_RD_QUEUE_TAGW-1:0] cci_rd_rsp_ctr;
|
||||
|
||||
wire cci_rdq_push, cci_rdq_pop;
|
||||
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din;
|
||||
wire [CCI_RD_QUEUE_DATAW-1:0] cci_rdq_din;
|
||||
wire cci_rdq_empty;
|
||||
|
||||
always @(*) begin
|
||||
|
@ -641,16 +640,15 @@ end
|
|||
|
||||
wire cci_mem_wr_req_fire = cci_mem_wr_req_valid && cci_mem_req_ready;
|
||||
|
||||
wire cci_rd_rsp_fire = (STATE_WRITE == state)
|
||||
&& cp2af_sRxPort.c0.rspValid
|
||||
wire cci_rd_rsp_fire = cp2af_sRxPort.c0.rspValid
|
||||
&& (cp2af_sRxPort.c0.hdr.resp_type == eRSP_RDLINE);
|
||||
|
||||
assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr);
|
||||
assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
|
||||
assign cci_rd_req_tag = CCI_RD_QUEUE_TAGW'(cci_rd_req_ctr);
|
||||
assign cci_rd_rsp_tag = CCI_RD_QUEUE_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
|
||||
|
||||
assign cci_rdq_push = cci_rd_rsp_fire;
|
||||
assign cci_rdq_pop = cci_mem_wr_req_fire;
|
||||
assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag};
|
||||
assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(cci_rd_rsp_tag)};
|
||||
|
||||
wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads;
|
||||
wire cci_pending_reads_full;
|
||||
|
@ -673,9 +671,7 @@ assign cci_rd_req_fire = cci_rd_req_valid && !(cci_rd_req_wait || cci_pending_re
|
|||
|
||||
assign cci_mem_wr_req_valid = !cci_rdq_empty;
|
||||
|
||||
assign cci_mem_wr_req_addr = cci_mem_wr_req_addr_unqual + (CCI_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
|
||||
|
||||
assign cmd_write_done = (cci_mem_wr_req_ctr == cmd_data_size);
|
||||
assign cci_mem_wr_req_addr = cci_rdq_dout[CCI_ADDR_WIDTH-1:0];
|
||||
|
||||
// Send read requests to CCI
|
||||
always @(posedge clk) begin
|
||||
|
@ -693,11 +689,11 @@ always @(posedge clk) begin
|
|||
&& (cci_rd_req_ctr_next != cmd_data_size)
|
||||
&& !cp2af_sRxPort.c0TxAlmFull;
|
||||
|
||||
if (cci_rd_req_fire && (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
|
||||
if (cci_rd_req_fire && (cci_rd_req_tag == CCI_RD_QUEUE_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
|
||||
cci_rd_req_wait <= 1; // end current request batch
|
||||
end
|
||||
|
||||
if (cci_rd_rsp_fire && (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
|
||||
if (cci_rd_rsp_fire && (cci_rd_rsp_ctr == CCI_RD_QUEUE_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
|
||||
cci_rd_req_wait <= 0; // begin new request batch
|
||||
end
|
||||
end
|
||||
|
@ -708,7 +704,8 @@ always @(posedge clk) begin
|
|||
cci_rd_req_ctr <= 0;
|
||||
cci_rd_rsp_ctr <= 0;
|
||||
cci_mem_wr_req_ctr <= 0;
|
||||
cci_mem_wr_req_addr_unqual <= cmd_mem_addr;
|
||||
cci_mem_wr_req_addr_base <= cmd_mem_addr;
|
||||
cmd_write_done <= 0;
|
||||
end
|
||||
|
||||
if (cci_rd_req_fire) begin
|
||||
|
@ -720,7 +717,7 @@ always @(posedge clk) begin
|
|||
end
|
||||
|
||||
if (cci_rd_rsp_fire) begin
|
||||
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1);
|
||||
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_QUEUE_TAGW'(1);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data);
|
||||
`endif
|
||||
|
@ -733,13 +730,18 @@ always @(posedge clk) begin
|
|||
end
|
||||
|
||||
if (cci_mem_wr_req_fire) begin
|
||||
cci_mem_wr_req_addr_unqual <= cci_mem_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_mem_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : CCI_ADDR_WIDTH'(0));
|
||||
cci_mem_wr_req_ctr <= cci_mem_wr_req_ctr + CCI_ADDR_WIDTH'(1);
|
||||
if (CCI_RD_QUEUE_TAGW'(cci_mem_wr_req_ctr) == CCI_RD_QUEUE_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
|
||||
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
|
||||
end
|
||||
cci_mem_wr_req_ctr <= cci_mem_wr_req_ctr + CCI_ADDR_WIDTH'(1);
|
||||
if (cci_mem_wr_req_ctr == (cmd_data_size-1)) begin
|
||||
cmd_write_done <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (CCI_RD_RQ_DATAW),
|
||||
.DATAW (CCI_RD_QUEUE_DATAW),
|
||||
.SIZE (CCI_RD_QUEUE_SIZE)
|
||||
) cci_rd_req_queue (
|
||||
.clk (clk),
|
||||
|
@ -779,11 +781,13 @@ VX_fifo_queue #(
|
|||
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_mem_rd_req_ctr;
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_mem_rd_req_addr;
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_wr_req_ctr;
|
||||
reg cci_mem_rd_req_done;
|
||||
|
||||
reg [CCI_ADDR_WIDTH-1:0] cci_wr_req_ctr;
|
||||
reg cci_wr_req_fire;
|
||||
t_ccip_clAddr cci_wr_req_addr;
|
||||
t_ccip_clData cci_wr_req_data;
|
||||
reg cci_wr_req_done;
|
||||
|
||||
always @(*) begin
|
||||
af2cp_sTxPort.c1.valid = cci_wr_req_fire;
|
||||
|
@ -818,12 +822,12 @@ VX_pending_size #(
|
|||
`UNUSED_VAR (cci_pending_writes)
|
||||
|
||||
assign cci_mem_rd_req_valid = (STATE_READ == state)
|
||||
&& (cci_mem_rd_req_ctr != cmd_data_size);
|
||||
&& !cci_mem_rd_req_done;
|
||||
|
||||
assign cci_mem_rsp_ready = !cp2af_sRxPort.c1TxAlmFull
|
||||
&& !cci_pending_writes_full;
|
||||
|
||||
assign cmd_read_done = (0 == cci_wr_req_ctr)
|
||||
assign cmd_read_done = cci_wr_req_done
|
||||
&& cci_pending_writes_empty;
|
||||
|
||||
// Send write requests to CCI
|
||||
|
@ -839,12 +843,17 @@ begin
|
|||
&& (CMD_MEM_READ == cmd_type)) begin
|
||||
cci_mem_rd_req_ctr <= 0;
|
||||
cci_mem_rd_req_addr <= cmd_mem_addr;
|
||||
cci_mem_rd_req_done <= 0;
|
||||
cci_wr_req_ctr <= cmd_data_size;
|
||||
cci_wr_req_done <= 0;
|
||||
end
|
||||
|
||||
if (cci_mem_rd_req_fire) begin
|
||||
cci_mem_rd_req_addr <= cci_mem_rd_req_addr + CCI_ADDR_WIDTH'(1);
|
||||
cci_mem_rd_req_ctr <= cci_mem_rd_req_ctr + CCI_ADDR_WIDTH'(1);
|
||||
if (cci_mem_rd_req_ctr == (cmd_data_size-1)) begin
|
||||
cci_mem_rd_req_done <= 1;
|
||||
end
|
||||
end
|
||||
|
||||
cci_wr_req_addr <= cmd_io_addr + t_ccip_clAddr'(cci_mem_rsp_tag);
|
||||
|
@ -853,6 +862,9 @@ begin
|
|||
if (cci_wr_req_fire) begin
|
||||
assert(cci_wr_req_ctr != 0);
|
||||
cci_wr_req_ctr <= cci_wr_req_ctr - CCI_ADDR_WIDTH'(1);
|
||||
if (cci_wr_req_ctr == CCI_ADDR_WIDTH'(1)) begin
|
||||
cci_wr_req_done <= 1;
|
||||
end
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data);
|
||||
`endif
|
||||
|
@ -867,9 +879,10 @@ end
|
|||
|
||||
//--
|
||||
|
||||
assign cci_mem_req_rw = (CMD_MEM_WRITE == state);
|
||||
assign cci_mem_req_rw = state[0]; // STATE_WRITE=00, STATE_WRITE=01
|
||||
assign cci_mem_req_valid = cci_mem_req_rw ? cci_mem_wr_req_valid : cci_mem_rd_req_valid;
|
||||
assign cci_mem_req_addr = cci_mem_req_rw ? cci_mem_wr_req_addr : cci_mem_rd_req_addr;
|
||||
assign cci_mem_req_data = cci_rdq_dout[CCI_RD_QUEUE_DATAW-1:CCI_ADDR_WIDTH];
|
||||
assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_ctr;
|
||||
|
||||
// Vortex /////////////////////////////////////////////////////////////////////
|
||||
|
@ -920,7 +933,7 @@ assign cout_char = vx_mem_req_data_ar[cout_tid];
|
|||
assign vx_mem_is_cout = (vx_mem_req_addr == `VX_MEM_ADDR_WIDTH'(`IO_COUT_ADDR >> (32 - `VX_MEM_ADDR_WIDTH)));
|
||||
|
||||
wire cout_q_push = vx_mem_req_valid
|
||||
&& vx_mem_en
|
||||
&& vx_started
|
||||
&& vx_mem_is_cout
|
||||
&& ~cout_q_full;
|
||||
|
||||
|
|
3
hw/rtl/cache/VX_bank.v
vendored
3
hw/rtl/cache/VX_bank.v
vendored
|
@ -475,8 +475,7 @@ module VX_bank #(
|
|||
end
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.USE_FASTREG (NUM_BANKS == 1)
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
|
@ -106,8 +106,7 @@ module VX_cache_core_rsp_merge #(
|
|||
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
|
||||
.USE_FASTREG (1)
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -155,8 +154,7 @@ module VX_cache_core_rsp_merge #(
|
|||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
.USE_FASTREG (1)
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -48,7 +48,7 @@ module VX_priority_encoder #(
|
|||
VX_onehot_encoder #(
|
||||
.N (N),
|
||||
.REVERSE (REVERSE)
|
||||
) b (
|
||||
) onehot_encoder (
|
||||
.data_in (onehot),
|
||||
.data_out (index),
|
||||
`UNUSED_PIN (valid)
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
module VX_rr_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter LOCK_ENABLE = 0,
|
||||
parameter LOG_NUM_REQS = $clog2(NUM_REQS)
|
||||
parameter LOG_NUM_REQS = $clog2(NUM_REQS),
|
||||
parameter FAST = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -23,6 +24,58 @@ module VX_rr_arbiter #(
|
|||
assign grant_onehot = requests;
|
||||
assign grant_valid = requests[0];
|
||||
|
||||
end else if (FAST == 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0] req_masked;
|
||||
wire [NUM_REQS-1:0] grant, grant_masked, grant_unmasked;
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
wire [NUM_REQS-1:0] mask_higher_pri_reqs;
|
||||
/* verilator lint_off UNOPTFLAT */
|
||||
wire [NUM_REQS-1:0] unmask_higher_pri_reqs;
|
||||
wire no_req_masked;
|
||||
reg [NUM_REQS-1:0] pointer_reg;
|
||||
|
||||
// Simple priority arbitration for masked portion
|
||||
assign req_masked = requests & pointer_reg;
|
||||
assign mask_higher_pri_reqs[NUM_REQS-1:1] = mask_higher_pri_reqs[NUM_REQS-2:0] | req_masked[NUM_REQS-2:0];
|
||||
assign mask_higher_pri_reqs[0] = 1'b0;
|
||||
assign grant_masked[NUM_REQS-1:0] = req_masked[NUM_REQS-1:0] & ~mask_higher_pri_reqs[NUM_REQS-1:0];
|
||||
|
||||
// Simple priority arbitration for unmasked portion
|
||||
assign unmask_higher_pri_reqs[NUM_REQS-1:1] = unmask_higher_pri_reqs[NUM_REQS-2:0] | requests[NUM_REQS-2:0];
|
||||
assign unmask_higher_pri_reqs[0] = 1'b0;
|
||||
assign grant_unmasked[NUM_REQS-1:0] = requests[NUM_REQS-1:0] & ~unmask_higher_pri_reqs[NUM_REQS-1:0];
|
||||
|
||||
// Use grant_masked if there is any there, otherwise use grant_unmasked.
|
||||
assign no_req_masked = ~(| req_masked);
|
||||
assign grant = ({NUM_REQS{no_req_masked}} & grant_unmasked) | grant_masked;
|
||||
|
||||
// Generate arbiter pointer update
|
||||
wire mask_ptr_sel = (| req_masked) & (!LOCK_ENABLE || enable);
|
||||
wire unmask_ptr_sel = (| requests) & (!LOCK_ENABLE || enable);
|
||||
|
||||
// Pointer update
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
pointer_reg <= {NUM_REQS{1'b1}};
|
||||
end else if (mask_ptr_sel) begin // select if masked arbiter used
|
||||
pointer_reg <= mask_higher_pri_reqs;
|
||||
end else if (unmask_ptr_sel) begin // select if unmasked arbiter used
|
||||
pointer_reg <= unmask_higher_pri_reqs;
|
||||
end
|
||||
end
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) onehot_encoder (
|
||||
.data_in (grant),
|
||||
.data_out (grant_index),
|
||||
`UNUSED_PIN (valid)
|
||||
);
|
||||
|
||||
assign grant_onehot = grant;
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end else begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_table [NUM_REQS-1:0];
|
||||
|
|
|
@ -83,7 +83,7 @@ module VX_skid_buffer #(
|
|||
end
|
||||
if (pop && !use_buffer) begin
|
||||
data_out_r <= data_in;
|
||||
end else if (pop) begin
|
||||
end else if (ready_out) begin
|
||||
data_out_r <= buffer;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -76,16 +76,16 @@ $(FPGA_BUILD_DIR)_4c/build/dcp.qpf:
|
|||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_4c
|
||||
|
||||
$(FPGA_BUILD_DIR)_8c/build/dcp.qpf:
|
||||
afu_synth_setup -s setup8.cfg $(FPGA_BUILD_DIR)_8c
|
||||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_8c
|
||||
|
||||
$(FPGA_BUILD_DIR)_16c/build/dcp.qpf:
|
||||
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_16c
|
||||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_16c
|
||||
|
||||
$(FPGA_BUILD_DIR)_32c/build/dcp.qpf:
|
||||
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_32c
|
||||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_32c
|
||||
|
||||
$(FPGA_BUILD_DIR)_64c/build/dcp.qpf:
|
||||
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_64c
|
||||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_64c
|
||||
|
||||
gen-sources-1c:
|
||||
./gen_sources.sh $(CFLAGS) $(CONFIG1) > sources.txt
|
||||
|
|
|
@ -1,7 +0,0 @@
|
|||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
|
||||
vortex_afu16.json
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
C:sources.txt
|
|
@ -1,7 +0,0 @@
|
|||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
|
||||
vortex_afu8.json
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
C:sources.txt
|
|
@ -2,8 +2,8 @@
|
|||
"version": 1,
|
||||
"afu-image": {
|
||||
"power": 0,
|
||||
"clock-frequency-high": "auto-220",
|
||||
"clock-frequency-low": "auto-220",
|
||||
"clock-frequency-high": "auto-210",
|
||||
"clock-frequency-low": "auto-210",
|
||||
|
||||
"cmd-mem-read": 1,
|
||||
"cmd-mem-write": 2,
|
||||
|
|
|
@ -1,53 +0,0 @@
|
|||
{
|
||||
"version": 1,
|
||||
"afu-image": {
|
||||
"power": 0,
|
||||
"clock-frequency-high": "auto-200",
|
||||
"clock-frequency-low": "auto-200",
|
||||
|
||||
"cmd-mem-read": 1,
|
||||
"cmd-mem-write": 2,
|
||||
"cmd-run": 3,
|
||||
"cmd-csr-read": 4,
|
||||
"cmd-csr-write": 5,
|
||||
|
||||
"mmio-cmd-type": 10,
|
||||
"mmio-io-addr": 12,
|
||||
"mmio-mem-addr": 14,
|
||||
"mmio-data-size": 16,
|
||||
"mmio-status": 18,
|
||||
"mmio-scope-read": 20,
|
||||
"mmio-scope-write": 22,
|
||||
"mmio-dev-caps": 24,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
"class": "ccip_std_afu_avalon_mm",
|
||||
"module-ports" :
|
||||
[
|
||||
{
|
||||
"class": "cci-p",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"class": "local-memory",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"accelerator-clusters":
|
||||
[
|
||||
{
|
||||
"name": "vortex_afu",
|
||||
"total-contexts": 1,
|
||||
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
{
|
||||
"version": 1,
|
||||
"afu-image": {
|
||||
"power": 0,
|
||||
"clock-frequency-high": "auto-210",
|
||||
"clock-frequency-low": "auto-210",
|
||||
|
||||
"cmd-mem-read": 1,
|
||||
"cmd-mem-write": 2,
|
||||
"cmd-run": 3,
|
||||
"cmd-csr-read": 4,
|
||||
"cmd-csr-write": 5,
|
||||
|
||||
"mmio-cmd-type": 10,
|
||||
"mmio-io-addr": 12,
|
||||
"mmio-mem-addr": 14,
|
||||
"mmio-data-size": 16,
|
||||
"mmio-status": 18,
|
||||
"mmio-scope-read": 20,
|
||||
"mmio-scope-write": 22,
|
||||
"mmio-dev-caps": 24,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
"class": "ccip_std_afu_avalon_mm",
|
||||
"module-ports" :
|
||||
[
|
||||
{
|
||||
"class": "cci-p",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
},
|
||||
{
|
||||
"class": "local-memory",
|
||||
"params":
|
||||
{
|
||||
"clock": "uClk_usr"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"accelerator-clusters":
|
||||
[
|
||||
{
|
||||
"name": "vortex_afu",
|
||||
"total-contexts": 1,
|
||||
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue