core interfaces refactoring

This commit is contained in:
Blaise Tine 2023-06-22 14:49:22 -04:00
parent b4a2c0204f
commit e4fec33f5d
26 changed files with 353 additions and 432 deletions

View file

@ -18,8 +18,8 @@ module VX_commit #(
// outputs
VX_writeback_if.master writeback_if,
VX_cmt_to_csr_if.master cmt_to_csr_if,
VX_cmt_to_fetch_if.master cmt_to_fetch_if,
VX_commit_csr_if.master commit_csr_if,
VX_commit_sched_if.master commit_sched_if,
// simulation helper signals
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value
@ -87,7 +87,7 @@ module VX_commit #(
end
end
assign cmt_to_csr_if.instret = instret;
assign commit_csr_if.instret = instret;
// Committed instructions
@ -127,8 +127,8 @@ module VX_commit #(
.data_out ({final_commit_fire_r, final_commit_size_r})
);
assign cmt_to_fetch_if.valid = final_commit_fire_r;
assign cmt_to_fetch_if.committed = final_commit_size_r;
assign commit_sched_if.valid = final_commit_fire_r;
assign commit_sched_if.committed = final_commit_size_r;
// Writeback

View file

@ -80,13 +80,12 @@ module VX_core #(
// Status
output wire busy
);
VX_fetch_to_csr_if fetch_to_csr_if();
VX_cmt_to_fetch_if cmt_to_fetch_if();
VX_cmt_to_csr_if cmt_to_csr_if();
VX_decode_if decode_if();
VX_sched_csr_if sched_csr_if();
VX_decode_sched_if decode_sched_if();
VX_commit_sched_if commit_sched_if();
VX_commit_csr_if commit_csr_if();
VX_branch_ctl_if branch_ctl_if();
VX_warp_ctl_if warp_ctl_if();
VX_ifetch_rsp_if ifetch_rsp_if();
VX_warp_ctl_if warp_ctl_if();
VX_alu_req_if alu_req_if();
VX_lsu_req_if lsu_req_if();
VX_csr_req_if csr_req_if();
@ -94,9 +93,9 @@ module VX_core #(
VX_fpu_agent_if fpu_agent_if();
`endif
VX_gpu_req_if gpu_req_if();
VX_writeback_if writeback_if();
VX_wrelease_if wrelease_if();
VX_join_if join_if();
VX_schedule_if schedule_if();
VX_fetch_if fetch_if();
VX_decode_if decode_if();
VX_commit_if alu_commit_if();
VX_commit_if ld_commit_if();
VX_commit_if st_commit_if();
@ -104,13 +103,15 @@ module VX_core #(
`ifdef EXT_F_ENABLE
VX_commit_if fpu_commit_if();
`endif
VX_commit_if gpu_commit_if();
VX_commit_if gpu_commit_if();
VX_writeback_if writeback_if();
`ifdef PERF_ENABLE
VX_perf_pipeline_if perf_pipeline_if();
`endif
`RESET_RELAY (dcr_data_reset, reset);
`RESET_RELAY (schedule_reset, reset);
`RESET_RELAY (fetch_reset, reset);
`RESET_RELAY (decode_reset, reset);
`RESET_RELAY (issue_reset, reset);
@ -128,38 +129,49 @@ module VX_core #(
`SCOPE_IO_SWITCH (3)
VX_schedule #(
.CORE_ID (CORE_ID)
) schedule (
.clk (clk),
.reset (schedule_reset),
.base_dcrs (base_dcrs),
.warp_ctl_if (warp_ctl_if),
.branch_ctl_if (branch_ctl_if),
.decode_sched_if(decode_sched_if),
.commit_sched_if(commit_sched_if),
.schedule_if (schedule_if),
.gbar_bus_if (gbar_bus_if),
.sched_csr_if (sched_csr_if),
.busy (busy)
);
VX_fetch #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) fetch (
`SCOPE_IO_BIND (0)
.clk (clk),
.reset (fetch_reset),
.base_dcrs (base_dcrs),
.icache_bus_if (icache_bus_if),
.wrelease_if (wrelease_if),
.join_if (join_if),
.warp_ctl_if (warp_ctl_if),
.gbar_bus_if (gbar_bus_if),
.branch_ctl_if (branch_ctl_if),
.ifetch_rsp_if (ifetch_rsp_if),
.fetch_to_csr_if(fetch_to_csr_if),
.cmt_to_fetch_if(cmt_to_fetch_if),
.busy (busy)
.icache_bus_if (icache_bus_if),
.schedule_if (schedule_if),
.fetch_if (fetch_if)
);
VX_decode #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) decode (
.clk (clk),
.reset (decode_reset),
.ifetch_rsp_if (ifetch_rsp_if),
.fetch_if (fetch_if),
.decode_if (decode_if),
.wrelease_if (wrelease_if),
.join_if (join_if)
.decode_sched_if(decode_sched_if)
);
VX_issue #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) issue (
`SCOPE_IO_BIND (1)
@ -183,7 +195,7 @@ module VX_core #(
);
VX_execute #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) execute (
`SCOPE_IO_BIND (2)
@ -229,8 +241,8 @@ module VX_core #(
`endif
`endif
.cmt_to_csr_if (cmt_to_csr_if),
.fetch_to_csr_if(fetch_to_csr_if),
.commit_csr_if (commit_csr_if),
.sched_csr_if (sched_csr_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
@ -249,7 +261,7 @@ module VX_core #(
);
VX_commit #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) commit (
.clk (clk),
.reset (commit_reset),
@ -264,9 +276,9 @@ module VX_core #(
.gpu_commit_if (gpu_commit_if),
.writeback_if (writeback_if),
.cmt_to_csr_if (cmt_to_csr_if),
.cmt_to_fetch_if(cmt_to_fetch_if),
.commit_csr_if (commit_csr_if),
.commit_sched_if(commit_sched_if),
.sim_wb_value (sim_wb_value)
);

View file

@ -33,8 +33,8 @@ module VX_csr_data #(
`endif
`endif
VX_cmt_to_csr_if.slave cmt_to_csr_if,
VX_fetch_to_csr_if.slave fetch_to_csr_if,
VX_commit_csr_if.slave commit_csr_if,
VX_sched_csr_if.slave sched_csr_if,
`ifdef EXT_F_ENABLE
VX_fpu_to_csr_if.slave fpu_to_csr_if,
@ -136,12 +136,12 @@ module VX_csr_data #(
`CSR_NW : read_data_ro_r = 32'(`NUM_WARPS);
`CSR_NC : read_data_ro_r = 32'(`NUM_CORES * `NUM_CLUSTERS);
`CSR_MCYCLE : read_data_ro_r = 32'(fetch_to_csr_if.cycles[31:0]);
`CSR_MCYCLE_H : read_data_ro_r = 32'(fetch_to_csr_if.cycles[`PERF_CTR_BITS-1:32]);
`CSR_MCYCLE : read_data_ro_r = 32'(sched_csr_if.cycles[31:0]);
`CSR_MCYCLE_H : read_data_ro_r = 32'(sched_csr_if.cycles[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RESERVED : read_data_ro_r = 'x;
`CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
`CSR_MINSTRET : read_data_ro_r = 32'(cmt_to_csr_if.instret[31:0]);
`CSR_MINSTRET_H : read_data_ro_r = 32'(cmt_to_csr_if.instret[`PERF_CTR_BITS-1:32]);
`CSR_MINSTRET : read_data_ro_r = 32'(commit_csr_if.instret[31:0]);
`CSR_MINSTRET_H : read_data_ro_r = 32'(commit_csr_if.instret[`PERF_CTR_BITS-1:32]);
`CSR_SATP : read_data_ro_r = 32'(csr_satp);

View file

@ -41,8 +41,8 @@ module VX_csr_unit #(
`endif
`endif
VX_cmt_to_csr_if.slave cmt_to_csr_if,
VX_fetch_to_csr_if.slave fetch_to_csr_if,
VX_commit_csr_if.slave commit_csr_if,
VX_sched_csr_if.slave sched_csr_if,
VX_csr_req_if.slave csr_req_if,
VX_commit_if.master csr_commit_if,
@ -140,7 +140,7 @@ module VX_csr_unit #(
`endif
VX_csr_data #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) csr_data (
.clk (clk),
.reset (reset),
@ -165,8 +165,8 @@ module VX_csr_unit #(
`endif
`endif
.cmt_to_csr_if (cmt_to_csr_if),
.fetch_to_csr_if(fetch_to_csr_if),
.commit_csr_if (commit_csr_if),
.sched_csr_if (sched_csr_if),
`ifdef EXT_F_ENABLE
.fpu_to_csr_if (fpu_to_csr_if),

View file

@ -21,12 +21,11 @@ module VX_decode #(
input wire reset,
// inputs
VX_ifetch_rsp_if.slave ifetch_rsp_if,
VX_fetch_if.slave fetch_if,
// outputs
VX_decode_if.master decode_if,
VX_wrelease_if.master wrelease_if,
VX_join_if.master join_if
VX_decode_sched_if.master decode_sched_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (clk)
@ -40,7 +39,7 @@ module VX_decode #(
reg use_rd, use_PC, use_imm;
reg is_join, is_wstall;
wire [31:0] instr = ifetch_rsp_if.data;
wire [31:0] instr = fetch_if.data;
wire [6:0] opcode = instr[6:0];
wire [1:0] func2 = instr[26:25];
wire [2:0] func3 = instr[14:12];
@ -528,11 +527,11 @@ module VX_decode #(
// disable write to integer register r0
wire wb = use_rd && (rd_r != 0);
assign decode_if.valid = ifetch_rsp_if.valid;
assign decode_if.uuid = ifetch_rsp_if.uuid;
assign decode_if.wid = ifetch_rsp_if.wid;
assign decode_if.tmask = ifetch_rsp_if.tmask;
assign decode_if.PC = ifetch_rsp_if.PC;
assign decode_if.valid = fetch_if.valid;
assign decode_if.uuid = fetch_if.uuid;
assign decode_if.wid = fetch_if.wid;
assign decode_if.tmask = fetch_if.tmask;
assign decode_if.PC = fetch_if.PC;
assign decode_if.ex_type = ex_type;
assign decode_if.op_type = op_type;
assign decode_if.op_mod = op_mod;
@ -547,16 +546,15 @@ module VX_decode #(
///////////////////////////////////////////////////////////////////////////
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
wire fetch_fire = fetch_if.valid && fetch_if.ready;
assign join_if.valid = ifetch_rsp_fire && is_join;
assign join_if.wid = ifetch_rsp_if.wid;
assign wrelease_if.valid = ifetch_rsp_fire && ~is_wstall;
assign wrelease_if.wid = ifetch_rsp_if.wid;
assign ifetch_rsp_if.ibuf_pop = decode_if.ibuf_pop;
assign ifetch_rsp_if.ready = decode_if.ready;
assign decode_sched_if.valid = fetch_fire;
assign decode_sched_if.wid = fetch_if.wid;
assign decode_sched_if.is_wstall = is_wstall;
assign decode_sched_if.is_join = is_join;
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
assign fetch_if.ready = decode_if.ready;
`ifdef DBG_TRACE_CORE_PIPELINE
always @(posedge clk) begin

View file

@ -6,7 +6,6 @@ module VX_dispatch (
// inputs
VX_dispatch_if.slave dispatch_if,
VX_gpr_rsp_if.slave gpr_rsp_if,
// outputs
VX_alu_req_if.master alu_req_if,
@ -53,7 +52,7 @@ module VX_dispatch (
.reset (reset),
.valid_in (alu_req_valid),
.ready_in (alu_req_ready),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, next_PC, alu_op_type, dispatch_if.op_mod, dispatch_if.imm, dispatch_if.use_PC, dispatch_if.use_imm, dispatch_if.rd, dispatch_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, next_PC, alu_op_type, dispatch_if.op_mod, dispatch_if.imm, dispatch_if.use_PC, dispatch_if.use_imm, dispatch_if.rd, dispatch_if.wb, tid, dispatch_if.rs1_data, dispatch_if.rs2_data}),
.data_out ({alu_req_if.uuid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
.valid_out (alu_req_if.valid),
.ready_out (alu_req_if.ready)
@ -72,7 +71,7 @@ module VX_dispatch (
.reset (reset),
.valid_in (lsu_req_valid),
.ready_in (lsu_req_ready),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, lsu_op_type, dispatch_if.imm, dispatch_if.rd, dispatch_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, lsu_op_type, dispatch_if.imm, dispatch_if.rd, dispatch_if.wb, dispatch_if.rs1_data, dispatch_if.rs2_data}),
.data_out ({lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
.valid_out (lsu_req_if.valid),
.ready_out (lsu_req_if.ready)
@ -87,7 +86,7 @@ module VX_dispatch (
wire [`NUM_THREADS-1:0][31:0] csr_data;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign csr_data[i] = gpr_rsp_if.rs1_data[i][31:0];
assign csr_data[i] = dispatch_if.rs1_data[i][31:0];
end
VX_skid_buffer #(
@ -120,13 +119,13 @@ module VX_dispatch (
.reset (reset),
.valid_in (fpu_req_valid),
.ready_in (fpu_req_ready),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, fpu_op_type, fpu_fmt, fpu_frm, dispatch_if.rd, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({fpu_agent_if.uuid, fpu_agent_if.wid, fpu_agent_if.tmask, fpu_agent_if.PC, fpu_agent_if.op_type, fpu_agent_if.fmt, fpu_agent_if.frm, fpu_agent_if.rd, fpu_agent_if.rs1_data, fpu_agent_if.rs2_data, fpu_agent_if.rs3_data}),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, fpu_op_type, fpu_fmt, fpu_frm, dispatch_if.rd, dispatch_if.rs1_data, dispatch_if.rs2_data, dispatch_if.rs3_data}),
.data_out ({fpu_agent_if.uuid, fpu_agent_if.wid, fpu_agent_if.tmask, fpu_agent_if.PC, fpu_agent_if.op_type, fpu_agent_if.fmt, fpu_agent_if.frm, fpu_agent_if.rd, fpu_agent_if.rs1_data, fpu_agent_if.rs2_data, fpu_agent_if.rs3_data}),
.valid_out (fpu_agent_if.valid),
.ready_out (fpu_agent_if.ready)
);
`else
`UNUSED_VAR (gpr_rsp_if.rs3_data)
`UNUSED_VAR (dispatch_if.rs3_data)
`endif
// gpu unit
@ -142,8 +141,8 @@ module VX_dispatch (
.reset (reset),
.valid_in (gpu_req_valid),
.ready_in (gpu_req_ready),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, next_PC, gpu_op_type, dispatch_if.op_mod, dispatch_if.rd, dispatch_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({gpu_req_if.uuid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, next_PC, gpu_op_type, dispatch_if.op_mod, dispatch_if.rd, dispatch_if.wb, tid, dispatch_if.rs1_data, dispatch_if.rs2_data, dispatch_if.rs3_data}),
.data_out ({gpu_req_if.uuid, gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.op_mod, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.tid, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.rs3_data}),
.valid_out (gpu_req_if.valid),
.ready_out (gpu_req_if.ready)
);

View file

@ -19,10 +19,10 @@ module VX_execute #(
VX_cache_bus_if.master dcache_bus_if,
// commit interface
VX_cmt_to_csr_if.slave cmt_to_csr_if,
VX_commit_csr_if.slave commit_csr_if,
// fetch interface
VX_fetch_to_csr_if.slave fetch_to_csr_if,
VX_sched_csr_if.slave sched_csr_if,
`ifdef PERF_ENABLE
VX_perf_memsys_if.slave perf_memsys_if,
@ -108,7 +108,7 @@ module VX_execute #(
`RESET_RELAY (gpu_reset, reset);
VX_alu_unit #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) alu_unit (
.clk (clk),
.reset (alu_reset),
@ -120,7 +120,7 @@ module VX_execute #(
`SCOPE_IO_SWITCH (1)
VX_lsu_unit #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) lsu_unit (
`SCOPE_IO_BIND (0)
.clk (clk),
@ -132,7 +132,7 @@ module VX_execute #(
);
VX_csr_unit #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) csr_unit (
.clk (clk),
.reset (csr_reset),
@ -178,8 +178,8 @@ module VX_execute #(
`endif
`endif
.cmt_to_csr_if (cmt_to_csr_if),
.fetch_to_csr_if(fetch_to_csr_if),
.commit_csr_if (commit_csr_if),
.sched_csr_if (sched_csr_if),
.csr_req_if (csr_req_if),
.csr_commit_if (csr_commit_if)
);
@ -188,7 +188,7 @@ module VX_execute #(
`RESET_RELAY (fpu_reset, reset);
VX_fpu_agent #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) fpu_agent (
.clk (clk),
.reset (fpu_reset),
@ -202,7 +202,7 @@ module VX_execute #(
`endif
VX_gpu_unit #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) gpu_unit (
.clk (clk),
.reset (gpu_reset),

View file

@ -10,82 +10,124 @@ module VX_fetch #(
) (
`SCOPE_IO_DECL
input wire clk,
input wire reset,
input base_dcrs_t base_dcrs,
input wire clk,
input wire reset,
// Icache interface
VX_cache_bus_if.master icache_bus_if,
// inputs
VX_wrelease_if.slave wrelease_if,
VX_join_if.slave join_if,
VX_branch_ctl_if.slave branch_ctl_if,
VX_warp_ctl_if.slave warp_ctl_if,
// inputs
VX_schedule_if.slave schedule_if,
// outputs
VX_ifetch_rsp_if.master ifetch_rsp_if,
VX_gbar_bus_if.master gbar_bus_if,
// csr interface
VX_fetch_to_csr_if.master fetch_to_csr_if,
// commit interface
VX_cmt_to_fetch_if.slave cmt_to_fetch_if,
// Status
output wire busy
VX_fetch_if.master fetch_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
VX_ifetch_req_if ifetch_req_if();
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
VX_warp_sched #(
.CORE_ID(CORE_ID)
) warp_sched (
.clk (clk),
.reset (reset),
wire icache_req_valid;
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag;
wire icache_req_ready;
.base_dcrs (base_dcrs),
wire [UUID_WIDTH-1:0] rsp_uuid;
wire [NW_WIDTH-1:0] req_tag, rsp_tag;
.warp_ctl_if (warp_ctl_if),
.wrelease_if (wrelease_if),
.join_if (join_if),
.branch_ctl_if (branch_ctl_if),
wire icache_req_fire = icache_req_valid && icache_req_ready;
assign req_tag = schedule_if.wid;
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_tag;
.ifetch_req_if (ifetch_req_if),
.gbar_bus_if (gbar_bus_if),
wire [`XLEN-1:0] rsp_PC;
wire [`NUM_THREADS-1:0] rsp_tmask;
.fetch_to_csr_if(fetch_to_csr_if),
.cmt_to_fetch_if(cmt_to_fetch_if),
.busy (busy)
);
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
.clk (clk),
.reset (reset),
.icache_bus_if (icache_bus_if),
.ifetch_req_if (ifetch_req_if),
.ifetch_rsp_if (ifetch_rsp_if)
VX_dp_ram #(
.DATAW (`XLEN + `NUM_THREADS),
.SIZE (`NUM_WARPS),
.LUTRAM (1)
) tag_store (
.clk (clk),
.write (icache_req_fire),
`UNUSED_PIN (wren),
.waddr (req_tag),
.wdata ({schedule_if.PC, schedule_if.tmask}),
.raddr (rsp_tag),
.rdata ({rsp_PC, rsp_tmask})
);
// Ensure that the ibuffer doesn't fill up.
// This will resolve potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
// This issue is particularly prevalent when the icache and dcache is disabled and both request share the same bus.
wire [`NUM_WARPS-1:0] pending_ibuf_full;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
VX_pending_size #(
.SIZE (`IBUF_SIZE + 1)
) pending_reads (
.clk (clk),
.reset (reset),
.incr (icache_req_fire && (schedule_if.wid == NW_WIDTH'(i))),
.decr (fetch_if.ibuf_pop[i]),
.full (pending_ibuf_full[i]),
`UNUSED_PIN (size),
`UNUSED_PIN (empty)
);
end
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.PC != 0),
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.PC, schedule_if.wid, schedule_if.tmask, schedule_if.uuid))
// Icache Request
assign icache_req_valid = schedule_if.valid && ~pending_ibuf_full[schedule_if.wid];
assign icache_req_addr = schedule_if.PC[`XLEN-1:2];
assign icache_req_tag = {schedule_if.uuid, req_tag};
assign schedule_if.ready = icache_req_ready && ~pending_ibuf_full[schedule_if.wid];
VX_skid_buffer #(
.DATAW (ICACHE_ADDR_WIDTH + ICACHE_TAG_WIDTH),
.OUT_REG (1)
) req_sbuf (
.clk (clk),
.reset (reset),
.valid_in (icache_req_valid),
.ready_in (icache_req_ready),
.data_in ({icache_req_addr, icache_req_tag}),
.data_out ({icache_bus_if.req_addr, icache_bus_if.req_tag}),
.valid_out (icache_bus_if.req_valid),
.ready_out (icache_bus_if.req_ready)
);
assign icache_bus_if.req_rw = 0;
assign icache_bus_if.req_byteen = 4'b1111;
assign icache_bus_if.req_data = '0;
// Icache Response
wire [NW_WIDTH-1:0] rsp_wid = rsp_tag;
assign fetch_if.valid = icache_bus_if.rsp_valid;
assign fetch_if.tmask = rsp_tmask;
assign fetch_if.wid = rsp_wid;
assign fetch_if.PC = rsp_PC;
assign fetch_if.data = icache_bus_if.rsp_data;
assign fetch_if.uuid = rsp_uuid;
// Can accept new response?
assign icache_bus_if.rsp_ready = fetch_if.ready;
`ifdef DBG_SCOPE_FETCH
if (CORE_ID == 0) begin
`ifdef SCOPE
localparam UUID_WIDTH = `UP(`UUID_BITS);
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;
wire icache_req_fire = icache_bus_if.req_valid && icache_bus_if.req_ready;
wire schedule_fire = schedule_if.valid && schedule_if.ready;
wire icache_rsp_fire = icache_bus_if.rsp_valid && icache_bus_if.rsp_ready;
VX_scope_tap #(
.SCOPE_ID (1),
.TRIGGERW (7),
.PROBEW (3*UUID_WIDTH + 237)
.TRIGGERW (4),
.PROBEW (3*UUID_WIDTH + 108)
) scope_tap (
.clk(clk),
.reset(scope_reset),
@ -93,19 +135,14 @@ module VX_fetch #(
.stop(1'b0),
.triggers({
reset,
ifetch_req_fire,
schedule_fire,
icache_req_fire,
icache_rsp_fire,
warp_ctl_if.valid,
branch_ctl_if.valid,
join_if.valid
icache_rsp_fire
}),
.probes({
ifetch_req_if.uuid, ifetch_req_if.wid, ifetch_req_if.tmask, ifetch_req_if.PC,
schedule_if.uuid, schedule_if.wid, schedule_if.tmask, schedule_if.PC,
icache_bus_if.req_tag, icache_bus_if.req_byteen, icache_bus_if.req_addr,
icache_bus_if.rsp_data, icache_bus_if.rsp_tag,
join_if.wid, warp_ctl_if.barrier, warp_ctl_if.split, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.wid,
branch_ctl_if.dest, branch_ctl_if.taken, branch_ctl_if.wid
icache_bus_if.rsp_data, icache_bus_if.rsp_tag
}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
@ -114,11 +151,9 @@ module VX_fetch #(
`ifdef CHIPSCOPE
ila_fetch ila_fetch_inst (
.clk (clk),
.probe0 ({reset, ifetch_req_if.uuid, ifetch_req_if.wid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.ready, ifetch_req_if.valid}),
.probe0 ({reset, schedule_if.uuid, schedule_if.wid, schedule_if.tmask, schedule_if.PC, schedule_if.ready, schedule_if.valid}),
.probe1 ({icache_bus_if.req_tag, icache_bus_if.req_byteen, icache_bus_if.req_addr, icache_bus_if.req_ready, icache_bus_if.req_valid}),
.probe2 ({icache_bus_if.rsp_data, icache_bus_if.rsp_tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid}),
.probe3 ({join_if.wid, join_if.valid, warp_ctl_if.barrier, warp_ctl_if.split, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.wid, warp_ctl_if.valid}),
.probe4 ({branch_ctl_if.dest, branch_ctl_if.taken, branch_ctl_if.wid, branch_ctl_if.valid})
.probe2 ({icache_bus_if.rsp_data, icache_bus_if.rsp_tag, icache_bus_if.rsp_ready, icache_bus_if.rsp_valid})
);
`endif
end
@ -126,4 +161,17 @@ module VX_fetch #(
`SCOPE_IO_UNUSED()
`endif
`ifdef DBG_TRACE_CORE_ICACHE
wire schedule_fire = schedule_if.valid && schedule_if.ready;
wire fetch_fire = fetch_if.valid && fetch_if.ready;
always @(posedge clk) begin
if (schedule_fire) begin
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, schedule_if.wid, schedule_if.PC, schedule_if.tmask, schedule_if.uuid));
end
if (fetch_fire) begin
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, data=0x%0h (#%0d)\n", $time, CORE_ID, fetch_if.wid, fetch_if.PC, fetch_if.tmask, fetch_if.data, fetch_if.uuid));
end
end
`endif
endmodule

View file

@ -6,12 +6,8 @@ module VX_gpr_stage #(
input wire clk,
input wire reset,
// inputs
VX_writeback_if.slave writeback_if,
VX_gpr_req_if.slave gpr_req_if,
// outputs
VX_gpr_rsp_if.master gpr_rsp_if
VX_gpr_stage_if.slave gpr_stage_if
);
`UNUSED_PARAM (CORE_ID)
@ -31,14 +27,14 @@ module VX_gpr_stage #(
wire [RAM_ADDRW-1:0] waddr, raddr1, raddr2;
if (`NUM_WARPS > 1) begin
assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
assign raddr1 = {gpr_stage_if.wid, gpr_stage_if.rs1};
assign raddr2 = {gpr_stage_if.wid, gpr_stage_if.rs2};
end else begin
`UNUSED_VAR (writeback_if.wid)
`UNUSED_VAR (gpr_req_if.wid)
`UNUSED_VAR (gpr_stage_if.wid)
assign waddr = writeback_if.rd;
assign raddr1 = gpr_req_if.rs1;
assign raddr2 = gpr_req_if.rs2;
assign raddr1 = gpr_stage_if.rs1;
assign raddr2 = gpr_stage_if.rs2;
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
@ -54,7 +50,7 @@ module VX_gpr_stage #(
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr (raddr1),
.rdata (gpr_rsp_if.rs1_data[i])
.rdata (gpr_stage_if.rs1_data[i])
);
VX_dp_ram #(
@ -69,16 +65,16 @@ module VX_gpr_stage #(
.waddr (waddr),
.wdata (writeback_if.data[i]),
.raddr (raddr2),
.rdata (gpr_rsp_if.rs2_data[i])
.rdata (gpr_stage_if.rs2_data[i])
);
end
`ifdef EXT_F_ENABLE
wire [RAM_ADDRW-1:0] raddr3;
if (`NUM_WARPS > 1) begin
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
assign raddr3 = {gpr_stage_if.wid, gpr_stage_if.rs3};
end else begin
assign raddr3 = gpr_req_if.rs3;
assign raddr3 = gpr_stage_if.rs3;
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
@ -94,12 +90,12 @@ module VX_gpr_stage #(
.waddr (waddr),
.wdata (writeback_if.data[i][`FLEN-1:0]),
.raddr (raddr3),
.rdata (gpr_rsp_if.rs3_data[i][`FLEN-1:0])
.rdata (gpr_stage_if.rs3_data[i][`FLEN-1:0])
);
end
`else
`UNUSED_VAR (gpr_req_if.rs3)
assign gpr_rsp_if.rs3_data = '0;
`UNUSED_VAR (gpr_stage_if.rs3)
assign gpr_stage_if.rs3_data = '0;
`endif
assign writeback_if.ready = 1'b1;

View file

@ -1,133 +0,0 @@
`include "VX_define.vh"
`include "VX_gpu_types.vh"
`IGNORE_WARNINGS_BEGIN
import VX_gpu_types::*;
`IGNORE_WARNINGS_END
module VX_icache_stage #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// Icache interface
VX_cache_bus_if.master icache_bus_if,
// request
VX_ifetch_req_if.slave ifetch_req_if,
// reponse
VX_ifetch_rsp_if.master ifetch_rsp_if
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
wire icache_req_valid;
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
wire [ICACHE_TAG_WIDTH-1:0] icache_req_tag;
wire icache_req_ready;
wire [UUID_WIDTH-1:0] rsp_uuid;
wire [NW_WIDTH-1:0] req_tag, rsp_tag;
wire icache_req_fire = icache_req_valid && icache_req_ready;
assign req_tag = ifetch_req_if.wid;
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_tag;
wire [`XLEN-1:0] rsp_PC;
wire [`NUM_THREADS-1:0] rsp_tmask;
VX_dp_ram #(
.DATAW (`XLEN + `NUM_THREADS),
.SIZE (`NUM_WARPS),
.LUTRAM (1)
) tag_store (
.clk (clk),
.write (icache_req_fire),
`UNUSED_PIN (wren),
.waddr (req_tag),
.wdata ({ifetch_req_if.PC, ifetch_req_if.tmask}),
.raddr (rsp_tag),
.rdata ({rsp_PC, rsp_tmask})
);
// Ensure that the ibuffer doesn't fill up.
// This will resolve potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
// This issue is particularly prevalent when the icache and dcache is disabled and both request share the same bus.
wire [`NUM_WARPS-1:0] pending_ibuf_full;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
VX_pending_size #(
.SIZE (`IBUF_SIZE + 1)
) pending_reads (
.clk (clk),
.reset (reset),
.incr (icache_req_fire && (ifetch_req_if.wid == NW_WIDTH'(i))),
.decr (ifetch_rsp_if.ibuf_pop[i]),
.full (pending_ibuf_full[i]),
`UNUSED_PIN (size),
`UNUSED_PIN (empty)
);
end
`RUNTIME_ASSERT((!ifetch_req_if.valid || ifetch_req_if.PC != 0),
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, ifetch_req_if.PC, ifetch_req_if.wid, ifetch_req_if.tmask, ifetch_req_if.uuid))
// Icache Request
assign icache_req_valid = ifetch_req_if.valid && ~pending_ibuf_full[ifetch_req_if.wid];
assign icache_req_addr = ifetch_req_if.PC[`XLEN-1:2];
assign icache_req_tag = {ifetch_req_if.uuid, req_tag};
assign ifetch_req_if.ready = icache_req_ready && ~pending_ibuf_full[ifetch_req_if.wid];
VX_skid_buffer #(
.DATAW (ICACHE_ADDR_WIDTH + ICACHE_TAG_WIDTH),
.OUT_REG (1)
) req_sbuf (
.clk (clk),
.reset (reset),
.valid_in (icache_req_valid),
.ready_in (icache_req_ready),
.data_in ({icache_req_addr, icache_req_tag}),
.data_out ({icache_bus_if.req_addr, icache_bus_if.req_tag}),
.valid_out (icache_bus_if.req_valid),
.ready_out (icache_bus_if.req_ready)
);
assign icache_bus_if.req_rw = 0;
assign icache_bus_if.req_byteen = 4'b1111;
assign icache_bus_if.req_data = '0;
// Icache Response
wire [NW_WIDTH-1:0] rsp_wid = rsp_tag;
assign ifetch_rsp_if.valid = icache_bus_if.rsp_valid;
assign ifetch_rsp_if.tmask = rsp_tmask;
assign ifetch_rsp_if.wid = rsp_wid;
assign ifetch_rsp_if.PC = rsp_PC;
assign ifetch_rsp_if.data = icache_bus_if.rsp_data;
assign ifetch_rsp_if.uuid = rsp_uuid;
// Can accept new response?
assign icache_bus_if.rsp_ready = ifetch_rsp_if.ready;
`ifdef DBG_TRACE_CORE_ICACHE
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
always @(posedge clk) begin
if (ifetch_req_fire) begin
`TRACE(1, ("%d: I$%0d req: wid=%0d, PC=0x%0h, tmask=%b (#%0d)\n", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.PC, ifetch_req_if.tmask, ifetch_req_if.uuid));
end
if (ifetch_rsp_fire) begin
`TRACE(1, ("%d: I$%0d rsp: wid=%0d, PC=0x%0h, tmask=%b, data=0x%0h (#%0d)\n", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.PC, ifetch_rsp_if.tmask, ifetch_rsp_if.data, ifetch_rsp_if.uuid));
end
end
`endif
endmodule

View file

@ -1,6 +1,6 @@
`include "VX_platform.vh"
module VX_ipdom_stack #(
module VX_ipdom #(
parameter WIDTH = 1,
parameter DEPTH = 1
) (

View file

@ -27,8 +27,7 @@ module VX_issue #(
VX_gpu_req_if.master gpu_req_if
);
VX_ibuffer_if ibuffer_if();
VX_gpr_req_if gpr_req_if();
VX_gpr_rsp_if gpr_rsp_if();
VX_gpr_stage_if gpr_stage_if();
VX_writeback_if sboard_wb_if();
VX_scoreboard_if scoreboard_if();
VX_dispatch_if dispatch_if();
@ -36,10 +35,10 @@ module VX_issue #(
wire [3:0] in_use_regs;
// GPR request interface
assign gpr_req_if.wid = ibuffer_if.wid;
assign gpr_req_if.rs1 = ibuffer_if.rs1;
assign gpr_req_if.rs2 = ibuffer_if.rs2;
assign gpr_req_if.rs3 = ibuffer_if.rs3;
assign gpr_stage_if.wid = ibuffer_if.wid;
assign gpr_stage_if.rs1 = ibuffer_if.rs1;
assign gpr_stage_if.rs2 = ibuffer_if.rs2;
assign gpr_stage_if.rs3 = ibuffer_if.rs3;
// scoreboard writeback interface
assign sboard_wb_if.valid = writeback_if.valid;
@ -80,6 +79,9 @@ module VX_issue #(
assign dispatch_if.use_imm = ibuffer_if.use_imm;
assign dispatch_if.imm = ibuffer_if.imm;
assign dispatch_if.rd = ibuffer_if.rd;
assign dispatch_if.rs1_data = gpr_stage_if.rs1_data;
assign dispatch_if.rs2_data = gpr_stage_if.rs2_data;
assign dispatch_if.rs3_data = gpr_stage_if.rs3_data;
// issue the instruction
assign ibuffer_if.ready = scoreboard_if.ready && dispatch_if.ready;
@ -90,7 +92,7 @@ module VX_issue #(
`RESET_RELAY (dispatch_reset, reset);
VX_ibuffer #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) ibuffer (
.clk (clk),
.reset (ibuf_reset),
@ -99,7 +101,7 @@ module VX_issue #(
);
VX_scoreboard #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) scoreboard (
.clk (clk),
.reset (scoreboard_reset),
@ -109,20 +111,18 @@ module VX_issue #(
);
VX_gpr_stage #(
.CORE_ID(CORE_ID)
.CORE_ID (CORE_ID)
) gpr_stage (
.clk (clk),
.reset (gpr_reset),
.writeback_if (writeback_if),
.gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if)
.gpr_stage_if (gpr_stage_if)
);
VX_dispatch dispatch (
.clk (clk),
.reset (dispatch_reset),
.dispatch_if(dispatch_if),
.gpr_rsp_if (gpr_rsp_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
.csr_req_if (csr_req_if),
@ -195,9 +195,9 @@ module VX_issue #(
ibuffer_if.imm,
ibuffer_if.use_PC,
ibuffer_if.use_imm,
gpr_rsp_if.rs1_data,
gpr_rsp_if.rs2_data,
gpr_rsp_if.rs3_data,
dispatch_if.rs1_data,
dispatch_if.rs2_data,
dispatch_if.rs3_data,
writeback_if.uuid,
writeback_if.tmask,
writeback_if.rd,
@ -283,11 +283,11 @@ module VX_issue #(
`TRACE(1, (", op="));
trace_ex_op(1, dispatch_if.ex_type, dispatch_if.op_type, dispatch_if.op_mod, dispatch_if.imm);
`TRACE(1, (", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1_data=", dispatch_if.op_mod, dispatch_if.tmask, dispatch_if.wb, dispatch_if.rd));
`TRACE_ARRAY1D(1, gpr_rsp_if.rs1_data, `NUM_THREADS);
`TRACE_ARRAY1D(1, dispatch_if.rs1_data, `NUM_THREADS);
`TRACE(1, (", rs2_data="));
`TRACE_ARRAY1D(1, gpr_rsp_if.rs2_data, `NUM_THREADS);
`TRACE_ARRAY1D(1, dispatch_if.rs2_data, `NUM_THREADS);
`TRACE(1, (", rs3_data="));
`TRACE_ARRAY1D(1, gpr_rsp_if.rs3_data, `NUM_THREADS);
`TRACE_ARRAY1D(1, dispatch_if.rs3_data, `NUM_THREADS);
`TRACE(1, (" (#%0d)\n", dispatch_if.uuid));
end
end

View file

@ -5,27 +5,27 @@
import VX_gpu_types::*;
`IGNORE_WARNINGS_END
module VX_warp_sched #(
module VX_schedule #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
// configuration
input base_dcrs_t base_dcrs,
VX_warp_ctl_if.slave warp_ctl_if,
VX_wrelease_if.slave wrelease_if,
VX_join_if.slave join_if,
// inputsdecode_if
VX_warp_ctl_if.slave warp_ctl_if,
VX_branch_ctl_if.slave branch_ctl_if,
VX_decode_sched_if.slave decode_sched_if,
VX_commit_sched_if.slave commit_sched_if,
VX_ifetch_req_if.master ifetch_req_if,
// outputs
VX_schedule_if.master schedule_if,
VX_gbar_bus_if.master gbar_bus_if,
VX_sched_csr_if.master sched_csr_if,
VX_fetch_to_csr_if.master fetch_to_csr_if,
VX_cmt_to_fetch_if.slave cmt_to_fetch_if,
// Status
// status
output wire busy
);
`UNUSED_PARAM (CORE_ID)
@ -69,8 +69,7 @@ module VX_warp_sched #(
wire [UUID_WIDTH-1:0] instr_uuid;
wire schedule_fire = schedule_valid && schedule_ready;
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;
wire schedule_if_fire = schedule_if.valid && schedule_if.ready;
wire tmc_active = (warp_ctl_if.tmc.tmask != 0);
@ -104,11 +103,11 @@ module VX_warp_sched #(
thread_masks[0] <= 1;
end else begin
// join handling
if (join_if.valid) begin
if (decode_sched_if.valid && decode_sched_if.is_join) begin
if (join_else) begin
warp_pcs[join_if.wid] <= `XLEN'(join_pc);
warp_pcs[decode_sched_if.wid] <= `XLEN'(join_pc);
end
thread_masks[join_if.wid] <= join_tmask;
thread_masks[decode_sched_if.wid] <= join_tmask;
end
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
@ -172,12 +171,12 @@ module VX_warp_sched #(
issued_instrs[schedule_wid] <= issued_instrs[schedule_wid] + UUID_WIDTH'(1);
end
if (ifetch_req_fire) begin
warp_pcs[ifetch_req_if.wid] <= `XLEN'(`XLEN'(ifetch_req_if.PC) + 4);
if (schedule_if_fire) begin
warp_pcs[schedule_if.wid] <= `XLEN'(`XLEN'(schedule_if.PC) + 4);
end
if (wrelease_if.valid) begin
stalled_warps[wrelease_if.wid] <= 0;
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
stalled_warps[decode_sched_if.wid] <= 0;
end
if (busy) begin
@ -193,7 +192,7 @@ module VX_warp_sched #(
end
// export cycles counter
assign fetch_to_csr_if.cycles = cycles;
assign sched_csr_if.cycles = cycles;
// barrier handling
@ -222,14 +221,14 @@ module VX_warp_sched #(
wire [(`XLEN+`NUM_THREADS)-1:0] ipdom_data [`NUM_WARPS-1:0];
wire ipdom_index [`NUM_WARPS-1:0];
`RESET_RELAY (ipdom_stack_reset, reset);
`RESET_RELAY (ipdom_reset, reset);
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
wire push = warp_ctl_if.valid
&& warp_ctl_if.split.valid
&& (i == warp_ctl_if.wid);
wire pop = join_if.valid && (i == join_if.wid);
wire pop = decode_sched_if.valid && decode_sched_if.is_join && (i == decode_sched_if.wid);
wire [`NUM_THREADS-1:0] else_tmask = warp_ctl_if.split.else_tmask;
wire [`NUM_THREADS-1:0] orig_tmask = thread_masks[warp_ctl_if.wid];
@ -237,12 +236,12 @@ module VX_warp_sched #(
wire [(`XLEN+`NUM_THREADS)-1:0] q_else = {warp_ctl_if.split.pc, else_tmask};
wire [(`XLEN+`NUM_THREADS)-1:0] q_end = {`XLEN'(0), orig_tmask};
VX_ipdom_stack #(
VX_ipdom #(
.WIDTH (`XLEN+`NUM_THREADS),
.DEPTH (`IPDOM_STACK_SIZE)
) ipdom_stack (
) ipdom (
.clk (clk),
.reset (ipdom_stack_reset),
.reset (ipdom_reset),
.push (push),
.pop (pop),
.pair (warp_ctl_if.split.diverged),
@ -255,8 +254,8 @@ module VX_warp_sched #(
);
end
assign {join_pc, join_tmask} = ipdom_data[join_if.wid];
assign join_else = ~ipdom_index[join_if.wid];
assign {join_pc, join_tmask} = ipdom_data[decode_sched_if.wid];
assign join_else = ~ipdom_index[decode_sched_if.wid];
// schedule the next ready warp
@ -298,10 +297,10 @@ module VX_warp_sched #(
.reset (reset),
.valid_in (schedule_valid),
.ready_in (schedule_ready),
.data_in ({instr_uuid, schedule_tmask, schedule_pc, schedule_wid}),
.data_out ({ifetch_req_if.uuid, ifetch_req_if.tmask, ifetch_req_if.PC, ifetch_req_if.wid}),
.valid_out (ifetch_req_if.valid),
.ready_out (ifetch_req_if.ready)
.data_in ({instr_uuid, schedule_tmask, schedule_pc, schedule_wid}),
.data_out ({schedule_if.uuid, schedule_if.tmask, schedule_if.PC, schedule_if.wid}),
.valid_out (schedule_if.valid),
.ready_out (schedule_if.ready)
);
reg [7:0] pending_instrs;
@ -312,7 +311,7 @@ module VX_warp_sched #(
end else begin
pending_instrs <= pending_instrs
+ 8'(schedule_fire)
- ({8{cmt_to_fetch_if.valid}} & 8'(cmt_to_fetch_if.committed));
- ({8{commit_sched_if.valid}} & 8'(commit_sched_if.committed));
end
end
@ -325,7 +324,7 @@ module VX_warp_sched #(
timeout_ctr <= '0;
timeout_enable <= 0;
end else begin
if (wrelease_if.valid) begin
if (decode_sched_if.valid && ~decode_sched_if.is_wstall) begin
timeout_enable <= 1;
end
if (timeout_enable && active_warps !=0 && active_warps == stalled_warps) begin

View file

@ -1,6 +1,6 @@
`include "VX_define.vh"
interface VX_cmt_to_csr_if ();
interface VX_commit_csr_if ();
wire [`PERF_CTR_BITS-1:0] instret;

View file

@ -1,6 +1,6 @@
`include "VX_define.vh"
interface VX_cmt_to_fetch_if ();
interface VX_commit_sched_if ();
wire valid;
wire [`EX_UNITS_BITS-1:0] committed;

View file

@ -0,0 +1,24 @@
`include "VX_define.vh"
interface VX_decode_sched_if ();
wire valid;
wire is_wstall;
wire is_join;
wire [`UP(`NW_BITS)-1:0] wid;
modport master (
output valid,
output is_wstall,
output is_join,
output wid
);
modport slave (
input valid,
input is_wstall,
input is_join,
input wid
);
endinterface

View file

@ -16,6 +16,10 @@ interface VX_dispatch_if ();
wire [`XLEN-1:0] imm;
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
wire ready;
modport master (
@ -32,6 +36,9 @@ interface VX_dispatch_if ();
output use_imm,
output imm,
output rd,
output rs1_data,
output rs2_data,
output rs3_data,
input ready
);
@ -49,6 +56,9 @@ interface VX_dispatch_if ();
input use_imm,
input imm,
input rd,
input rs1_data,
input rs2_data,
input rs3_data,
output ready
);

View file

@ -1,6 +1,6 @@
`include "VX_define.vh"
interface VX_fetch_to_csr_if ();
interface VX_sched_csr_if ();
wire [`PERF_CTR_BITS-1:0] cycles;

View file

@ -1,6 +1,6 @@
`include "VX_define.vh"
interface VX_ifetch_rsp_if ();
interface VX_fetch_if ();
wire valid;
wire [`UP(`UUID_BITS)-1:0] uuid;

View file

@ -1,24 +0,0 @@
`include "VX_define.vh"
interface VX_gpr_req_if ();
wire [`UP(`NW_BITS)-1:0] wid;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
modport master (
output wid,
output rs1,
output rs2,
output rs3
);
modport slave (
input wid,
input rs1,
input rs2,
input rs3
);
endinterface

View file

@ -1,21 +1,36 @@
`include "VX_define.vh"
interface VX_gpr_rsp_if ();
interface VX_gpr_stage_if ();
wire [`UP(`NW_BITS)-1:0] wid;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
modport master (
output rs1_data,
output rs2_data,
output rs3_data
);
output wid,
output rs1,
output rs2,
output rs3,
modport slave (
input rs1_data,
input rs2_data,
input rs3_data
);
modport slave (
input wid,
input rs1,
input rs2,
input rs3,
output rs1_data,
output rs2_data,
output rs3_data
);
endinterface

View file

@ -1,18 +0,0 @@
`include "VX_define.vh"
interface VX_join_if ();
wire valid;
wire [`UP(`NW_BITS)-1:0] wid;
modport master (
output valid,
output wid
);
modport slave (
input valid,
input wid
);
endinterface

View file

@ -0,0 +1,15 @@
`include "VX_define.vh"
interface VX_sched_csr_if ();
wire [`PERF_CTR_BITS-1:0] cycles;
modport master (
output cycles
);
modport slave (
input cycles
);
endinterface

View file

@ -1,6 +1,6 @@
`include "VX_define.vh"
interface VX_ifetch_req_if ();
interface VX_schedule_if ();
wire valid;
wire [`UP(`UUID_BITS)-1:0] uuid;

View file

@ -1,18 +0,0 @@
`include "VX_define.vh"
interface VX_wrelease_if();
wire valid;
wire [`UP(`NW_BITS)-1:0] wid;
modport master (
output valid,
output wid
);
modport slave (
input valid,
input wid
);
endinterface

View file

@ -68,12 +68,10 @@ if { $chipscope == 1 } {
set_property -dict [list CONFIG.C_ADV_TRIGGER {true} \
CONFIG.C_EN_STRG_QUAL {1} \
CONFIG.C_DATA_DEPTH {4096} \
CONFIG.C_NUM_OF_PROBES {5} \
CONFIG.C_NUM_OF_PROBES {3} \
CONFIG.C_PROBE0_WIDTH {128} \
CONFIG.C_PROBE1_WIDTH {128} \
CONFIG.C_PROBE2_WIDTH {128} \
CONFIG.C_PROBE3_WIDTH {128} \
CONFIG.C_PROBE4_WIDTH {128} \
] [get_ips ila_fetch]
generate_target {instantiation_template} [get_files ila_fetch.xci]
set_property generate_synth_checkpoint false [get_files ila_fetch.xci]