OPAE rtl fixes

This commit is contained in:
Blaise Tine 2020-05-20 12:08:10 -07:00
parent e3bead147a
commit b5569dd525
10 changed files with 112 additions and 78 deletions

View file

@ -5,10 +5,11 @@ CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
# control RTL debug print states
DBG_PRINT = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CORE_DCACHE \
-DDBG_PRINT_BANK \
-DDBG_PRINT_DRAM \
-DDBG_PRINT_SNP_FWD
-DDBG_PRINT_CORE_DCACHE \
-DDBG_PRINT_CACHE_BANK \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSRQ \
-DDBG_PRINT_DRAM
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2

View file

@ -2,14 +2,15 @@ vortex_afu.json
+define+GLOBAL_BLOCK_SIZE=64
#+define+NUM_CORES=2
#+define+L2_ENABLE=0
+define+NUM_CORES=2
+define+L2_ENABLE=0
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_BANK
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_SNP_FWD
+incdir+.
+incdir+../rtl

View file

@ -3,6 +3,8 @@ import local_mem_cfg_pkg::*;
`include "afu_json_info.vh"
`include "VX_define.vh"
`define DRAM_TO_BYTE_ADDR(x) {x, 6'b0}
module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2
) (
@ -110,6 +112,7 @@ logic avs_rdq_pop;
t_local_mem_data avs_rdq_dout;
logic avs_rdq_empty;
logic avs_rdq_full;
logic [`LOG2UP(AVS_RD_QUEUE_SIZE+1)-1:0] avs_rdq_size;
// CSR variables //////////////////////////////////////////////////////////////
@ -149,11 +152,11 @@ begin
case (mmioHdr.address)
MMIO_CSR_IO_ADDR: begin
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
$display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
$display("%t: CSR_IO_ADDR: 0x%0h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
end
MMIO_CSR_MEM_ADDR: begin
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
$display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
$display("%t: CSR_MEM_ADDR: 0x%0h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
end
MMIO_CSR_DATA_SIZE: begin
csr_data_size <= $bits(csr_data_size)'(cp2af_sRxPort.c0.data);
@ -235,11 +238,11 @@ begin
STATE_IDLE: begin
case (csr_cmd)
CMD_TYPE_READ: begin
$display("%t: STATE READ: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
$display("%t: STATE READ: ia=%0h da=%0h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
state <= STATE_READ;
end
CMD_TYPE_WRITE: begin
$display("%t: STATE WRITE: ia=%h da=%h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
$display("%t: STATE WRITE: ia=%0h da=%0h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
state <= STATE_WRITE;
end
CMD_TYPE_RUN: begin
@ -248,7 +251,7 @@ begin
state <= STATE_START;
end
CMD_TYPE_CLFLUSH: begin
$display("%t: STATE CFLUSH: da=%h sz=%0d", $time, csr_mem_addr, csr_data_size);
$display("%t: STATE CFLUSH: da=%0h sz=%0d", $time, csr_mem_addr, csr_data_size);
state <= STATE_CLFLUSH;
end
endcase
@ -296,7 +299,8 @@ logic cci_dram_req_read_fire;
logic cci_dram_req_write_fire;
logic vx_dram_req_read_fire;
logic vx_dram_req_write_fire;
logic [`LOG2UP(AVS_RD_QUEUE_SIZE):0] avs_pending_reads, avs_pending_rds_next;
logic vx_dram_rsp_fire;
logic [`LOG2UP(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads, avs_pending_rds_next;
t_ccip_clAddr next_avs_address;
always_comb
@ -311,7 +315,7 @@ begin
&& avs_write_ctr < csr_data_size);
cci_dram_req_read_fire = (state == STATE_READ)
&& (avs_pending_reads < AVS_RD_QUEUE_SIZE)
&& ((avs_pending_reads + avs_rdq_size) < AVS_RD_QUEUE_SIZE)
&& !avs_waitrequest
&& avs_read_ctr < csr_data_size;
@ -322,6 +326,8 @@ begin
vx_dram_req_write_fire = vx_dram_req_write && vx_dram_req_ready;
vx_dram_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready;
if ((cci_dram_req_read_fire || vx_dram_req_read_fire)
&& ~avs_readdatavalid) begin
avs_pending_rds_next = avs_pending_reads + 1;
@ -363,7 +369,7 @@ begin
avs_address <= csr_mem_addr + avs_read_ctr;
avs_read_ctr <= avs_read_ctr + 1;
avs_read <= 1;
$display("%t: AVS Rd Req: addr=%h, pending=%0d", $time, (csr_mem_addr + avs_read_ctr), avs_pending_reads);
$display("%t: AVS Rd Req: addr=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(csr_mem_addr + avs_read_ctr), avs_pending_reads);
end
if (cci_dram_req_write_fire) begin
@ -371,20 +377,20 @@ begin
avs_address <= next_avs_address;
avs_write_ctr <= avs_write_ctr + 1;
avs_write <= 1;
$display("%t: AVS Wr Req: addr=%h (%0d/%0d)", $time, next_avs_address, avs_write_ctr + 1, csr_data_size);
$display("%t: AVS Wr Req: addr=%0h (%0d/%0d)", $time, `DRAM_TO_BYTE_ADDR(next_avs_address), avs_write_ctr + 1, csr_data_size);
end
if (vx_dram_req_read_fire) begin
avs_address <= vx_dram_req_addr;
avs_read <= 1;
$display("%t: AVS Rd Req: addr=%h, pending=%0d", $time, vx_dram_req_addr, avs_pending_reads);
$display("%t: AVS Rd Req: addr=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_dram_req_addr), avs_pending_reads);
end
if (vx_dram_req_write_fire) begin
avs_address <= vx_dram_req_addr;
avs_writedata <= vx_dram_req_data;
avs_write <= 1;
$display("%t: AVS Wr Req: addr=%h", $time, vx_dram_req_addr);
$display("%t: AVS Wr Req: addr=%0h", $time, `DRAM_TO_BYTE_ADDR(vx_dram_req_addr));
end
if (avs_readdatavalid) begin
@ -399,7 +405,9 @@ end
always_comb
begin
vx_dram_req_ready = vortex_enabled && !avs_waitrequest && (avs_pending_reads < AVS_RD_QUEUE_SIZE);
vx_dram_req_ready = vortex_enabled
&& !avs_waitrequest
&& ((avs_pending_reads + avs_rdq_size) < AVS_RD_QUEUE_SIZE);
end
// Vortex DRAM fill response
@ -419,7 +427,7 @@ always_comb
begin
avs_rtq_push = vx_dram_req_read_fire;
avs_rtq_din = vx_dram_req_tag;
avs_rtq_pop = vx_dram_rsp_valid;
avs_rtq_pop = vx_dram_rsp_fire;
end
VX_generic_queue #(
@ -442,7 +450,7 @@ always_comb
begin
avs_rdq_push = avs_readdatavalid;
avs_rdq_din = avs_readdata;
avs_rdq_pop = vx_dram_rsp_valid || cci_wr_req;
avs_rdq_pop = vx_dram_rsp_fire || cci_wr_req;
end
VX_generic_queue #(
@ -456,7 +464,8 @@ VX_generic_queue #(
.pop (avs_rdq_pop),
.data_out (avs_rdq_dout),
.empty (avs_rdq_empty),
.full (avs_rdq_full)
.full (avs_rdq_full),
.size (avs_rdq_size)
);
// CCI Read Request ///////////////////////////////////////////////////////////
@ -513,7 +522,7 @@ begin
if (t_cci_rdq_tag'(cci_read_ctr) == (CCI_RD_WINDOW_SIZE-1)) begin
cci_read_wait <= 1; // end current request batch
end
$display("%t: CCI Rd Req: addr=%h, ctr=%0d", $time, cci_read_hdr.address, cci_read_ctr);
$display("%t: CCI Rd Req: addr=%0h, ctr=%0d", $time, `DRAM_TO_BYTE_ADDR(cci_read_hdr.address), cci_read_ctr);
end
if (cci_rdq_push) begin
@ -591,7 +600,7 @@ begin
af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_rdq_dout);
af2cp_sTxPort.c1.valid <= 1;
cci_write_ctr <= cci_write_ctr + 1;
$display("%t: CCI Wr Req: addr=%h (%0d/%0d)", $time, cci_write_hdr.address, cci_write_ctr + 1, csr_data_size);
$display("%t: CCI Wr Req: addr=%0h (%0d/%0d)", $time, `DRAM_TO_BYTE_ADDR(cci_write_hdr.address), cci_write_ctr + 1, csr_data_size);
end
if (cp2af_sRxPort.c1.rspValid) begin
@ -607,9 +616,12 @@ end
logic [DRAM_ADDR_WIDTH-1:0] snp_req_ctr;
logic [DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr;
logic vx_snp_rsp_fire;
always_comb
begin
cmd_clflush_done = (snp_rsp_ctr >= csr_data_size);
vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready;
end
always_ff @(posedge clk)
@ -642,8 +654,7 @@ begin
if ((STATE_CLFLUSH == state)
&& (snp_rsp_ctr < csr_data_size)
&& vx_snp_rsp_valid
&& vx_snp_rsp_ready) begin
&& vx_snp_rsp_fire) begin
snp_rsp_ctr <= snp_rsp_ctr + 1;
end
end

View file

@ -42,11 +42,11 @@ module VX_scheduler (
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
assign schedule_delay = (|bckE_req_if.valid) &&
((rename_valid ) ||
(memory_delay && is_mem) ||
(gpr_stage_delay && (is_mem || is_exec)) ||
(exec_delay && is_exec));
assign schedule_delay = (| bckE_req_if.valid)
&& ((rename_valid )
|| (memory_delay && is_mem)
|| (gpr_stage_delay && (is_mem || is_exec))
|| (exec_delay && is_exec));
integer i, w;

View file

@ -105,35 +105,33 @@ module VX_bank #(
);
`DEBUG_BEGIN
wire[31:0] debug_use_pc_st0;
wire[1:0] debug_wb_st0;
wire[4:0] debug_rd_st0;
wire[`NW_BITS-1:0] debug_warp_num_st0;
wire[2:0] debug_mem_read_st0;
wire[2:0] debug_mem_write_st0;
wire[`REQS_BITS-1:0] debug_tid_st0;
wire[31:0] debug_use_pc_st0;
wire[1:0] debug_wb_st0;
wire[4:0] debug_rd_st0;
wire[`NW_BITS-1:0] debug_warp_num_st0;
wire[2:0] debug_mem_read_st0;
wire[2:0] debug_mem_write_st0;
wire[`REQS_BITS-1:0] debug_tid_st0;
wire[31:0] debug_use_pc_st1e;
wire[1:0] debug_wb_st1e;
wire[4:0] debug_rd_st1e;
wire[`NW_BITS-1:0] debug_warp_num_st1e;
wire[2:0] debug_mem_read_st1e;
wire[2:0] debug_mem_write_st1e;
wire[`REQS_BITS-1:0] debug_tid_st1e;
wire[31:0] debug_use_pc_st1e;
wire[1:0] debug_wb_st1e;
wire[4:0] debug_rd_st1e;
wire[`NW_BITS-1:0] debug_warp_num_st1e;
wire[2:0] debug_mem_read_st1e;
wire[2:0] debug_mem_write_st1e;
wire[`REQS_BITS-1:0] debug_tid_st1e;
wire[31:0] debug_use_pc_st2;
wire[1:0] debug_wb_st2;
wire[4:0] debug_rd_st2;
wire[`NW_BITS-1:0] debug_warp_num_st2;
wire[2:0] debug_mem_read_st2;
wire[2:0] debug_mem_write_st2;
wire[`REQS_BITS-1:0] debug_tid_st2;
wire[31:0] debug_use_pc_st2;
wire[1:0] debug_wb_st2;
wire[4:0] debug_rd_st2;
wire[`NW_BITS-1:0] debug_warp_num_st2;
wire[2:0] debug_mem_read_st2;
wire[2:0] debug_mem_write_st2;
wire[`REQS_BITS-1:0] debug_tid_st2;
`DEBUG_END
wire snrq_pop;
wire snrq_empty;
wire snrq_full;
@ -505,6 +503,8 @@ module VX_bank #(
assign mrvq_init_ready_state_st2 = mrvq_init_ready_state_unqual_st2 || mrvq_init_ready_state_hazard_st0_st1 || mrvq_init_ready_state_hazard_st1e_st1;
VX_cache_miss_resrv #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
@ -681,7 +681,7 @@ module VX_bank #(
|| msrq_push_stall
|| dram_fill_req_stall;
`ifdef DBG_PRINT_BANK
`ifdef DBG_PRINT_CACHE_BANK
always_ff @(posedge clk) begin
if (dram_fill_req_valid && dram_fill_req_ready) begin
$display("%t: bank%02d:%01d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));

View file

@ -228,7 +228,7 @@ module VX_cache #(
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS)
) cache_core_req_bank_sell (
) cache_core_req_bank_sel (
.core_req_valid (core_req_valid),
.core_req_addr (core_req_addr),
.per_bank_valids (per_bank_valids)

View file

@ -11,25 +11,22 @@ module VX_cache_core_req_bank_sel #(
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 0
) (
input wire [NUM_REQUESTS-1:0] core_req_valid,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
input wire [NUM_REQUESTS-1:0] core_req_valid,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids
);
integer i;
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids
);
generate
integer i;
always @(*) begin
per_bank_valids = 0;
for (i = 0; i < NUM_REQUESTS; i++) begin
if (NUM_BANKS == 1) begin
// If there is only one bank, then only map requests to that bank
per_bank_valids[0][i] = core_req_valid[i];
end else begin
per_bank_valids[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
end
always @(*) begin
per_bank_valids = 0;
for (i = 0; i < NUM_REQUESTS; i++) begin
if (NUM_BANKS == 1) begin
// If there is only one bank, then only map requests to that bank
per_bank_valids[0][i] = core_req_valid[i];
end else begin
per_bank_valids[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
end
end
endgenerate
end
endmodule

View file

@ -1,6 +1,8 @@
`include "VX_cache_config.vh"
module VX_cache_miss_resrv #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
@ -141,4 +143,21 @@ module VX_cache_miss_resrv #(
end
end
`ifdef DBG_PRINT_CACHE_MSRQ
always_ff @(posedge clk) begin
if (mrvq_push || mrvq_pop) begin
$write("%t: bank%02d:%01d msrq: push=%b pop=%b", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop);
for (int i = 0; i < MRVQ_SIZE; i++) begin
if (valid_table[i]) begin
$write(" ");
if (i == head_ptr) $write("*");
if (~ready_table[i]) $write("!");
$write("addr%0d=%0h", i, `LINE_TO_BYTE_ADDR(addr_table[i], BANK_ID));
end
end
$write("\n");
end
end
`endif
endmodule

View file

@ -112,7 +112,7 @@ module VX_snp_forwarder #(
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
end
`ifdef DBG_PRINT_SNP_FWD
`ifdef DBG_PRINT_CACHE_SNP
always_ff @(posedge clk) begin
if (snp_req_valid && snp_req_ready) begin
$display("%t: snp req: addr=%0h, tag=%0h", $time, snp_req_addr, snp_req_tag);

View file

@ -13,13 +13,15 @@ module VX_generic_queue #(
output wire full,
`IGNORE_WARNINGS_END
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out
output wire [DATAW-1:0] data_out,
output wire [`LOG2UP(SIZE+1)-1:0] size
);
if (SIZE == 0) begin
assign empty = 1;
assign data_out = data_in;
assign full = 0;
assign size = 0;
end else begin // (SIZE > 0)
@ -59,6 +61,8 @@ module VX_generic_queue #(
assign data_out = head_r;
assign empty = (size_r == 0);
assign full = (size_r != 0);
assign size = size_r;
end else begin // (SIZE > 1)
reg [DATAW-1:0] curr_r;
@ -131,8 +135,9 @@ module VX_generic_queue #(
end
assign data_out = bypass_r ? curr_r : head_r;
assign empty = empty_r;
assign full = full_r;
assign empty = empty_r;
assign full = full_r;
assign size = size_r;
end
end