mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
RTL code refactoring
This commit is contained in:
parent
5671b08a5e
commit
07135263f5
22 changed files with 334 additions and 474 deletions
|
@ -34,7 +34,7 @@ gen-singlecore-t: build_config
|
|||
verilator $(VF) -cc $(SINGLE_CORE) -CFLAGS '$(CF) -DNDEBUG -O2' --threads $(THREADS)
|
||||
|
||||
gen-singlecore-d: build_config
|
||||
verilator $(VF) -cc $(SINGLE_CORE) -CFLAGS '$(CF) -DVCD_OUTPUT' $(DBG)
|
||||
verilator $(VF) -cc $(SINGLE_CORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT' $(DBG)
|
||||
|
||||
gen-multicore: build_config
|
||||
verilator $(VF) -DNDEBUG -cc $(MULTI_CORE) -CFLAGS '$(CF) -DNDEBUG -DUSE_MULTICORE'
|
||||
|
@ -43,7 +43,7 @@ gen-multicore-t: build_config
|
|||
verilator $(VF) -DNDEBUG -cc $(MULTI_CORE) -CFLAGS '$(CF) -DNDEBUG -O2 -DUSE_MULTICORE' --threads $(THREADS)
|
||||
|
||||
gen-multicore-d: build_config
|
||||
verilator $(VF) -cc $(MULTI_CORE) -CFLAGS '$(CF) -DVCD_OUTPUT -DUSE_MULTICORE' $(DBG)
|
||||
verilator $(VF) -cc $(MULTI_CORE) -CFLAGS '$(CF) -g -O0 -DVCD_OUTPUT -DUSE_MULTICORE' $(DBG)
|
||||
|
||||
singlecore: gen-singlecore
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
|
|
@ -347,15 +347,15 @@ logic vortex_enabled;
|
|||
|
||||
always_comb
|
||||
begin
|
||||
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||
vx_dram_req_full = !vortex_enabled || avs_waitrequest || avs_raq_full || avs_rdq_full;
|
||||
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
|
||||
vx_dram_req_ready = vortex_enabled && !avs_waitrequest && !avs_raq_full && !avs_rdq_full;
|
||||
end
|
||||
|
||||
// Vortex DRAM fill response
|
||||
|
||||
always_comb
|
||||
begin
|
||||
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready;
|
||||
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty && vx_dram_rsp_ready;
|
||||
vx_dram_rsp_addr = (avs_raq_dout << 6);
|
||||
{>>{vx_dram_rsp_data}} = avs_rdq_dout;
|
||||
end
|
||||
|
@ -531,7 +531,7 @@ begin
|
|||
|
||||
if ((STATE_CLFLUSH == state)
|
||||
&& vx_snoop_ctr < csr_data_size
|
||||
&& !vx_snp_req_full)
|
||||
&& vx_snp_req_ready)
|
||||
begin
|
||||
vx_snp_req_addr <= (csr_mem_addr + vx_snoop_ctr) << 6;
|
||||
vx_snp_req <= 1;
|
||||
|
@ -556,7 +556,7 @@ Vortex_Socket #() vx_socket (
|
|||
.dram_req_read (vx_dram_req_read),
|
||||
.dram_req_addr (vx_dram_req_addr),
|
||||
.dram_req_data (vx_dram_req_data),
|
||||
.dram_req_full (vx_dram_req_full),
|
||||
.dram_req_ready (vx_dram_req_ready),
|
||||
|
||||
// DRAM Rsp
|
||||
.out_dram_rsp_ready (vx_dram_rsp_ready),
|
||||
|
@ -567,7 +567,7 @@ Vortex_Socket #() vx_socket (
|
|||
// Cache Snooping Req
|
||||
.llc_snp_req_valid (vx_snp_req),
|
||||
.llc_snp_req_addr (vx_snp_req_addr),
|
||||
.llc_snp_req_full (vx_snp_req_full),
|
||||
.llc_snp_req_ready (vx_snp_req_ready),
|
||||
|
||||
// program exit signal
|
||||
.out_ebreak (vx_ebreak)
|
||||
|
|
|
@ -32,53 +32,53 @@ assign writeback_if.wb_pc = writeback_temp_if.wb_pc;
|
|||
|
||||
// assign VX_writeback_if(writeback_temp_if);
|
||||
|
||||
wire no_slot_mem;
|
||||
wire no_slot_exec;
|
||||
wire no_slot_mem;
|
||||
wire no_slot_exec;
|
||||
|
||||
// LSU input + output
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_inst_mem_wb_if mem_wb_if();
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_inst_mem_wb_if mem_wb_if();
|
||||
|
||||
// Exec unit input + output
|
||||
VX_exec_unit_req_if exec_unit_req_if();
|
||||
VX_inst_exec_wb_if inst_exec_wb_if();
|
||||
VX_exec_unit_req_if exec_unit_req_if();
|
||||
VX_inst_exec_wb_if inst_exec_wb_if();
|
||||
|
||||
// GPU unit input
|
||||
VX_gpu_inst_req_if gpu_inst_req_if();
|
||||
VX_gpu_inst_req_if gpu_inst_req_if();
|
||||
|
||||
// CSR unit inputs
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_csr_wb_if csr_wb_if();
|
||||
wire no_slot_csr;
|
||||
wire stall_gpr_csr;
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_csr_wb_if csr_wb_if();
|
||||
wire no_slot_csr;
|
||||
wire stall_gpr_csr;
|
||||
|
||||
VX_gpr_stage gpr_stage (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.writeback_if (writeback_temp_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
.writeback_if (writeback_temp_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
// New
|
||||
.exec_unit_req_if(exec_unit_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.gpu_inst_req_if (gpu_inst_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.stall_gpr_csr (stall_gpr_csr),
|
||||
.exec_unit_req_if (exec_unit_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.gpu_inst_req_if (gpu_inst_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.stall_gpr_csr (stall_gpr_csr),
|
||||
// End new
|
||||
.memory_delay (out_mem_delay),
|
||||
.exec_delay (out_exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
.memory_delay (out_mem_delay),
|
||||
.exec_delay (out_exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
VX_lsu load_store_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.mem_wb_if (mem_wb_if),
|
||||
.dcache_rsp_if(dcache_rsp_if),
|
||||
.dcache_req_if(dcache_req_if),
|
||||
.out_delay (out_mem_delay),
|
||||
.no_slot_mem (no_slot_mem)
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.mem_wb_if (mem_wb_if),
|
||||
.dcache_rsp_if (dcache_rsp_if),
|
||||
.dcache_req_if (dcache_req_if),
|
||||
.out_delay (out_mem_delay),
|
||||
.no_slot_mem (no_slot_mem)
|
||||
);
|
||||
|
||||
VX_execute_unit execUnit (
|
||||
|
@ -97,11 +97,6 @@ VX_gpgpu_inst gpgpu_inst (
|
|||
.warp_ctl_if (warp_ctl_if)
|
||||
);
|
||||
|
||||
// VX_csr_wrapper csr_wrapper(
|
||||
// .csr_req_if(csr_req_if),
|
||||
// .csr_wb_if (csr_wb_if)
|
||||
// );
|
||||
|
||||
VX_csr_pipe #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) csr_pipe (
|
||||
|
|
|
@ -23,8 +23,6 @@
|
|||
`define NUM_BARRIERS 4
|
||||
`endif
|
||||
|
||||
// `define SINGLE_CORE_BENCH
|
||||
|
||||
`ifndef GLOBAL_BLOCK_SIZE_BYTES
|
||||
`define GLOBAL_BLOCK_SIZE_BYTES 16
|
||||
`endif
|
||||
|
|
|
@ -11,16 +11,15 @@ module VX_decode(
|
|||
VX_join_if join_if,
|
||||
|
||||
output wire terminate_sim
|
||||
|
||||
);
|
||||
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
|
||||
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
|
||||
assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
|
||||
|
||||
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
|
||||
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
|
||||
|
||||
wire[6:0] curr_opcode;
|
||||
|
||||
|
@ -122,28 +121,22 @@ module VX_decode(
|
|||
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
|
||||
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
|
||||
|
||||
|
||||
assign join_if.is_join = is_join;
|
||||
assign join_if.join_warp_num = in_warp_num;
|
||||
|
||||
|
||||
assign frE_to_bckE_req_if.is_wspawn = is_wspawn;
|
||||
assign frE_to_bckE_req_if.is_tmc = is_tmc;
|
||||
assign frE_to_bckE_req_if.is_split = is_split;
|
||||
assign frE_to_bckE_req_if.is_barrier = is_barrier;
|
||||
|
||||
|
||||
|
||||
assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
|
||||
assign frE_to_bckE_req_if.is_csr = is_csr;
|
||||
|
||||
|
||||
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
|
||||
is_linst ? `WB_MEM :
|
||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||
`NO_WB;
|
||||
|
||||
|
||||
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
|
||||
|
||||
// MEM signals
|
||||
|
@ -161,7 +154,6 @@ module VX_decode(
|
|||
|
||||
assign frE_to_bckE_req_if.upper_immed = temp_upper_immed;
|
||||
|
||||
|
||||
assign jal_b_19_to_12 = in_instruction[19:12];
|
||||
assign jal_b_11 = in_instruction[20];
|
||||
assign jal_b_10_to_1 = in_instruction[30:21];
|
||||
|
@ -170,11 +162,9 @@ module VX_decode(
|
|||
assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0};
|
||||
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
|
||||
|
||||
|
||||
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
|
||||
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
|
||||
|
||||
|
||||
assign jal_sys_cond1 = func3 == 3'h0;
|
||||
assign jal_sys_cond2 = u_12 < 12'h2;
|
||||
|
||||
|
@ -214,13 +204,11 @@ module VX_decode(
|
|||
|
||||
// wire is_ebreak;
|
||||
|
||||
|
||||
// assign is_ebreak = is_e_inst;
|
||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
|
||||
assign frE_to_bckE_req_if.ebreak = ebreak;
|
||||
assign terminate_sim = is_e_inst;
|
||||
|
||||
|
||||
// CSR
|
||||
|
||||
assign csr_cond1 = func3 != 3'h0;
|
||||
|
@ -228,13 +216,11 @@ module VX_decode(
|
|||
|
||||
assign frE_to_bckE_req_if.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
|
||||
|
||||
|
||||
// ITYPE IMEED
|
||||
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
assign alu_shift_i_immed = {{7{1'b0}}, frE_to_bckE_req_if.rs2};
|
||||
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
|
||||
|
||||
|
||||
always @(*) begin
|
||||
case(curr_opcode)
|
||||
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
|
||||
|
@ -331,11 +317,11 @@ module VX_decode(
|
|||
wire[4:0] temp_final_alu;
|
||||
|
||||
assign temp_final_alu = is_btype ? ((frE_to_bckE_req_if.branch_type < `BLTU) ? `SUB : `SUBU) :
|
||||
is_lui ? `LUI_ALU :
|
||||
is_auipc ? `AUIPC_ALU :
|
||||
is_csr ? csr_alu :
|
||||
(is_stype || is_linst) ? `ADD :
|
||||
alu_op;
|
||||
is_lui ? `LUI_ALU :
|
||||
is_auipc ? `AUIPC_ALU :
|
||||
is_csr ? csr_alu :
|
||||
(is_stype || is_linst) ? `ADD :
|
||||
alu_op;
|
||||
|
||||
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
||||
|
||||
|
|
|
@ -135,6 +135,9 @@
|
|||
|
||||
`define ZERO_REG 5'h0
|
||||
|
||||
// IO BUS
|
||||
`define IO_BUS_ADDR 32'h00010000
|
||||
|
||||
// ======================= Dcache Configurable Knobs ==========================
|
||||
|
||||
// Function ID
|
||||
|
|
|
@ -23,28 +23,29 @@ module VX_dmem_controller (
|
|||
VX_gpu_dcache_req_if icache_req_if
|
||||
);
|
||||
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_smem_if();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_smem_if();
|
||||
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_dcache_if();
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_smem_if();
|
||||
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_dcache_if();
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_dcache_if();
|
||||
|
||||
wire to_shm = dcache_req_if.core_req_addr[0][31:24] == 8'hFF;
|
||||
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_wb_valid);
|
||||
|
||||
// Dcache Request
|
||||
assign dcache_req_dcache_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~to_shm}};
|
||||
assign dcache_req_dcache_if.core_req_addr = dcache_req_if.core_req_addr;
|
||||
assign dcache_req_dcache_if.core_req_writedata = dcache_req_if.core_req_writedata;
|
||||
assign dcache_req_dcache_if.core_req_mem_read = dcache_req_if.core_req_mem_read;
|
||||
assign dcache_req_dcache_if.core_req_mem_write = dcache_req_if.core_req_mem_write;
|
||||
assign dcache_req_dcache_if.core_req_rd = dcache_req_if.core_req_rd;
|
||||
assign dcache_req_dcache_if.core_req_addr = dcache_req_if.core_req_addr;
|
||||
assign dcache_req_dcache_if.core_req_writedata = dcache_req_if.core_req_writedata;
|
||||
assign dcache_req_dcache_if.core_req_rd = dcache_req_if.core_req_rd;
|
||||
assign dcache_req_dcache_if.core_req_wb = dcache_req_if.core_req_wb;
|
||||
assign dcache_req_dcache_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
|
||||
assign dcache_req_dcache_if.core_req_pc = dcache_req_if.core_req_pc;
|
||||
assign dcache_req_dcache_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot;
|
||||
|
||||
// Shred Memory Request
|
||||
assign dcache_req_dcache_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot;
|
||||
|
||||
// Shared Memory Request
|
||||
assign dcache_req_smem_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{to_shm}};
|
||||
assign dcache_req_smem_if.core_req_addr = dcache_req_if.core_req_addr;
|
||||
assign dcache_req_smem_if.core_req_writedata = dcache_req_if.core_req_writedata;
|
||||
|
@ -54,17 +55,18 @@ module VX_dmem_controller (
|
|||
assign dcache_req_smem_if.core_req_wb = dcache_req_if.core_req_wb;
|
||||
assign dcache_req_smem_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
|
||||
assign dcache_req_smem_if.core_req_pc = dcache_req_if.core_req_pc;
|
||||
assign dcache_req_smem_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot || dcache_wants_wb;
|
||||
|
||||
// Dcache Response
|
||||
assign dcache_req_smem_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot || dcache_wants_wb;
|
||||
|
||||
// Dcache Response
|
||||
assign dcache_rsp_if.core_wb_valid = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_valid : dcache_rsp_smem_if.core_wb_valid;
|
||||
assign dcache_rsp_if.core_wb_req_rd = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_req_rd : dcache_rsp_smem_if.core_wb_req_rd;
|
||||
assign dcache_rsp_if.core_wb_req_wb = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_req_wb : dcache_rsp_smem_if.core_wb_req_wb;
|
||||
assign dcache_rsp_if.core_wb_warp_num = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_warp_num : dcache_rsp_smem_if.core_wb_warp_num;
|
||||
assign dcache_rsp_if.core_wb_pc = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_pc : dcache_rsp_smem_if.core_wb_pc;
|
||||
assign dcache_rsp_if.core_wb_readdata = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_readdata : dcache_rsp_smem_if.core_wb_readdata;
|
||||
assign dcache_rsp_if.core_wb_pc = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_pc : dcache_rsp_smem_if.core_wb_pc;
|
||||
assign dcache_rsp_if.core_wb_warp_num = dcache_wants_wb ? dcache_rsp_dcache_if.core_wb_warp_num : dcache_rsp_smem_if.core_wb_warp_num;
|
||||
|
||||
assign dcache_rsp_if.delay_req = to_shm ? dcache_rsp_smem_if.delay_req : dcache_rsp_dcache_if.delay_req;
|
||||
assign dcache_rsp_if.core_req_ready = to_shm ? dcache_rsp_smem_if.core_req_ready : dcache_rsp_dcache_if.core_req_ready;
|
||||
|
||||
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_req_if();
|
||||
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_res_if();
|
||||
|
@ -105,8 +107,8 @@ module VX_dmem_controller (
|
|||
.core_req_warp_num (dcache_req_smem_if.core_req_warp_num),
|
||||
.core_req_pc (dcache_req_smem_if.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (dcache_rsp_smem_if.delay_req),
|
||||
// Can submit core Req
|
||||
.core_req_ready (dcache_rsp_smem_if.core_req_ready),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (dcache_req_smem_if.core_no_wb_slot),
|
||||
|
@ -135,7 +137,7 @@ module VX_dmem_controller (
|
|||
.dram_req_write (gpu_smem_dram_req_if.dram_req_write),
|
||||
.dram_req_addr (gpu_smem_dram_req_if.dram_req_addr),
|
||||
.dram_req_data (gpu_smem_dram_req_if.dram_req_data),
|
||||
.dram_req_full (1),
|
||||
.dram_req_ready (0),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req_valid (0),
|
||||
|
@ -188,8 +190,8 @@ module VX_dmem_controller (
|
|||
.core_req_warp_num (dcache_req_dcache_if.core_req_warp_num),
|
||||
.core_req_pc (dcache_req_dcache_if.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (dcache_rsp_dcache_if.delay_req),
|
||||
// Can submit core Req
|
||||
.core_req_ready (dcache_rsp_dcache_if.core_req_ready),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (dcache_req_dcache_if.core_no_wb_slot),
|
||||
|
@ -218,7 +220,7 @@ module VX_dmem_controller (
|
|||
.dram_req_write (gpu_dcache_dram_req_if.dram_req_write),
|
||||
.dram_req_addr (gpu_dcache_dram_req_if.dram_req_addr),
|
||||
.dram_req_data (gpu_dcache_dram_req_if.dram_req_data),
|
||||
.dram_req_full (gpu_dcache_dram_req_if.dram_req_full),
|
||||
.dram_req_ready (gpu_dcache_dram_req_if.dram_req_ready),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req_valid (gpu_dcache_snp_req_if.snp_req_valid),
|
||||
|
@ -269,8 +271,8 @@ module VX_dmem_controller (
|
|||
.core_req_warp_num (icache_req_if.core_req_warp_num),
|
||||
.core_req_pc (icache_req_if.core_req_pc),
|
||||
|
||||
// Delay Core Req
|
||||
.delay_req (icache_rsp_if.delay_req),
|
||||
// Can submit core Req
|
||||
.core_req_ready (icache_rsp_if.core_req_ready),
|
||||
|
||||
// Core Cache Can't WB
|
||||
.core_no_wb_slot (icache_req_if.core_no_wb_slot),
|
||||
|
@ -299,7 +301,7 @@ module VX_dmem_controller (
|
|||
.dram_req_write (gpu_icache_dram_req_if.dram_req_write),
|
||||
.dram_req_addr (gpu_icache_dram_req_if.dram_req_addr),
|
||||
.dram_req_data (gpu_icache_dram_req_if.dram_req_data),
|
||||
.dram_req_full (gpu_icache_dram_req_if.dram_req_full),
|
||||
.dram_req_ready (gpu_icache_dram_req_if.dram_req_ready),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req_valid (gpu_icache_snp_req_if.snp_req_valid),
|
||||
|
|
|
@ -1,22 +1,22 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_front_end (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire schedule_delay,
|
||||
input wire schedule_delay,
|
||||
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
|
||||
VX_gpu_dcache_rsp_if icache_rsp_if,
|
||||
VX_gpu_dcache_req_if icache_req_if,
|
||||
VX_gpu_dcache_rsp_if icache_rsp_if,
|
||||
VX_gpu_dcache_req_if icache_req_if,
|
||||
|
||||
VX_jal_response_if jal_rsp_if,
|
||||
VX_branch_response_if branch_rsp_if,
|
||||
VX_jal_response_if jal_rsp_if,
|
||||
VX_branch_response_if branch_rsp_if,
|
||||
|
||||
VX_frE_to_bckE_req_if bckE_req_if,
|
||||
VX_frE_to_bckE_req_if bckE_req_if,
|
||||
|
||||
output wire fetch_ebreak
|
||||
output wire fetch_ebreak
|
||||
);
|
||||
|
||||
VX_inst_meta_if fe_inst_meta_fi();
|
||||
|
@ -35,16 +35,7 @@ module VX_front_end (
|
|||
wire[`NW_BITS-1:0] icache_stage_wid;
|
||||
wire[`NUM_THREADS-1:0] icache_stage_valids;
|
||||
|
||||
reg old_ebreak; // This should be eventually removed
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
old_ebreak <= 0;
|
||||
end else begin
|
||||
old_ebreak <= old_ebreak || fetch_ebreak;
|
||||
end
|
||||
end
|
||||
|
||||
assign fetch_ebreak = vortex_ebreak || terminate_sim || old_ebreak;
|
||||
assign fetch_ebreak = vortex_ebreak || terminate_sim;
|
||||
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
|
|
|
@ -39,7 +39,7 @@ module VX_icache_stage (
|
|||
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
|
||||
|
||||
// Cache can't accept request
|
||||
assign icache_stage_delay = icache_rsp_if.delay_req;
|
||||
assign icache_stage_delay = ~icache_rsp_if.core_req_ready;
|
||||
|
||||
// Core can't accept response
|
||||
assign icache_req_if.core_no_wb_slot = total_freeze;
|
||||
|
|
|
@ -59,7 +59,7 @@ module VX_lsu (
|
|||
assign dcache_req_if.core_no_wb_slot = no_slot_mem;
|
||||
|
||||
// Cache can't accept request
|
||||
assign out_delay = dcache_rsp_if.delay_req;
|
||||
assign out_delay = ~dcache_rsp_if.core_req_ready;
|
||||
|
||||
// Core Response
|
||||
assign mem_wb_if.rd = dcache_rsp_if.core_wb_req_rd;
|
||||
|
|
154
hw/rtl/Vortex.v
154
hw/rtl/Vortex.v
|
@ -1,26 +1,23 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module Vortex
|
||||
#(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
`ifdef SINGLE_CORE_BENCH
|
||||
|
||||
module Vortex #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// IO
|
||||
output wire io_valid,
|
||||
output wire [31:0] io_data,
|
||||
output wire io_valid,
|
||||
output wire [31:0] io_data,
|
||||
|
||||
// DRAM Dcache Req
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
input wire dram_req_full,
|
||||
input wire dram_req_ready,
|
||||
|
||||
// DRAM Dcache Rsp
|
||||
input wire dram_rsp_valid,
|
||||
|
@ -33,7 +30,7 @@ module Vortex
|
|||
output wire I_dram_req_write,
|
||||
output wire [31:0] I_dram_req_addr,
|
||||
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
||||
input wire I_dram_req_full,
|
||||
input wire I_dram_req_ready,
|
||||
|
||||
// DRAM Icache Rsp
|
||||
input wire I_dram_rsp_valid,
|
||||
|
@ -42,52 +39,11 @@ module Vortex
|
|||
output wire I_dram_rsp_ready,
|
||||
|
||||
// LLC Snooping
|
||||
input wire snp_req_valid,
|
||||
input wire [31:0] snp_req_addr,
|
||||
output wire snp_req_full,
|
||||
input wire llc_snp_req_valid,
|
||||
input wire [31:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_full,
|
||||
|
||||
output wire out_ebreak
|
||||
|
||||
`else
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
// IO
|
||||
output wire io_valid,
|
||||
output wire[31:0] io_data,
|
||||
|
||||
// DRAM Dcache Req
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
input wire dram_req_full,
|
||||
|
||||
// DRAM Dcache Rsp
|
||||
input wire dram_rsp_valid,
|
||||
input wire [31:0] dram_rsp_addr,
|
||||
input wire [`DBANK_LINE_SIZE-1:0] dram_rsp_data,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// DRAM Icache Req
|
||||
output wire I_dram_req_read,
|
||||
output wire I_dram_req_write,
|
||||
output wire [31:0] I_dram_req_addr,
|
||||
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
||||
input wire I_dram_req_full,
|
||||
|
||||
// DRAM Icache Rsp
|
||||
output wire I_dram_rsp_ready,
|
||||
input wire I_dram_rsp_valid,
|
||||
input wire [31:0] I_dram_rsp_addr,
|
||||
input wire [`IBANK_LINE_SIZE-1:0] I_dram_rsp_data,
|
||||
|
||||
input wire snp_req_valid,
|
||||
input wire [31:0] snp_req_addr,
|
||||
output wire snp_req_full,
|
||||
|
||||
output wire out_ebreak
|
||||
`endif
|
||||
);
|
||||
`DEBUG_BEGIN
|
||||
wire scheduler_empty;
|
||||
|
@ -114,36 +70,37 @@ module Vortex
|
|||
assign dram_req_addr = gpu_dcache_dram_req_if.dram_req_addr;
|
||||
assign dram_rsp_ready = gpu_dcache_dram_req_if.dram_rsp_ready;
|
||||
|
||||
assign gpu_dcache_dram_req_if.dram_req_full = dram_req_full;
|
||||
assign gpu_dcache_dram_req_if.dram_req_ready = dram_req_ready;
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
|
||||
assign gpu_dcache_dram_res_if.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
|
||||
assign dram_req_data[i * 32 +: 32] = gpu_dcache_dram_req_if.dram_req_data[i];
|
||||
assign dram_req_data[i * 32 +: 32] = gpu_dcache_dram_req_if.dram_req_data[i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire temp_io_valid = (!memory_delay)
|
||||
&& (|dcache_req_if.core_req_valid)
|
||||
&& (dcache_req_if.core_req_mem_write[0] != `NO_MEM_WRITE)
|
||||
&& (dcache_req_if.core_req_addr[0] == 32'h00010000);
|
||||
&& (dcache_req_if.core_req_addr[0] == `IO_BUS_ADDR);
|
||||
|
||||
wire[31:0] temp_io_data = dcache_req_if.core_req_writedata[0];
|
||||
assign io_valid = temp_io_valid;
|
||||
assign io_data = temp_io_data;
|
||||
wire [31:0] temp_io_data = dcache_req_if.core_req_writedata[0];
|
||||
assign io_valid = temp_io_valid;
|
||||
assign io_data = temp_io_data;
|
||||
|
||||
assign dcache_req_qual_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~io_valid}};
|
||||
assign dcache_req_qual_if.core_req_addr = dcache_req_if.core_req_addr;
|
||||
assign dcache_req_qual_if.core_req_writedata = dcache_req_if.core_req_writedata;
|
||||
assign dcache_req_qual_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~io_valid}};
|
||||
assign dcache_req_qual_if.core_req_mem_read = dcache_req_if.core_req_mem_read;
|
||||
assign dcache_req_qual_if.core_req_mem_write = dcache_req_if.core_req_mem_write;
|
||||
assign dcache_req_qual_if.core_req_addr = dcache_req_if.core_req_addr;
|
||||
assign dcache_req_qual_if.core_req_writedata = dcache_req_if.core_req_writedata;
|
||||
assign dcache_req_qual_if.core_req_rd = dcache_req_if.core_req_rd;
|
||||
assign dcache_req_qual_if.core_req_wb = dcache_req_if.core_req_wb;
|
||||
assign dcache_req_qual_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
|
||||
assign dcache_req_qual_if.core_req_pc = dcache_req_if.core_req_pc;
|
||||
assign dcache_req_qual_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot;
|
||||
|
||||
assign dcache_req_qual_if.core_no_wb_slot = dcache_req_if.core_no_wb_slot;
|
||||
|
||||
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_rsp_if();
|
||||
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_req_if();
|
||||
|
||||
|
@ -158,7 +115,7 @@ module Vortex
|
|||
assign I_dram_req_addr = gpu_icache_dram_req_if.dram_req_addr;
|
||||
assign I_dram_rsp_ready = gpu_icache_dram_req_if.dram_rsp_ready;
|
||||
|
||||
assign gpu_icache_dram_req_if.dram_req_full = I_dram_req_full;
|
||||
assign gpu_icache_dram_req_if.dram_req_ready = I_dram_req_ready;
|
||||
|
||||
genvar j;
|
||||
generate
|
||||
|
@ -168,42 +125,41 @@ module Vortex
|
|||
end
|
||||
endgenerate
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Front-end to Back-end
|
||||
VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM
|
||||
VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM
|
||||
|
||||
// Back-end to Front-end
|
||||
VX_wb_if writeback_if(); // Writeback to GPRs
|
||||
VX_branch_response_if branch_rsp_if(); // Branch Resolution to Fetch
|
||||
VX_jal_response_if jal_rsp_if(); // Jump resolution to Fetch
|
||||
|
||||
// CSR Buses
|
||||
// VX_csr_write_request_if csr_w_req_if();
|
||||
VX_wb_if writeback_if(); // Writeback to GPRs
|
||||
VX_branch_response_if branch_rsp_if(); // Branch Resolution to Fetch
|
||||
VX_jal_response_if jal_rsp_if(); // Jump resolution to Fetch
|
||||
|
||||
// Warp controls
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
|
||||
// Cache snooping
|
||||
VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if();
|
||||
VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if();
|
||||
assign gpu_dcache_snp_req_if.snp_req_valid = llc_snp_req_valid;
|
||||
assign gpu_dcache_snp_req_if.snp_req_addr = llc_snp_req_addr;
|
||||
assign llc_snp_req_full = gpu_dcache_snp_req_if.snp_req_full;
|
||||
|
||||
assign gpu_dcache_snp_req_if.snp_req_valid = snp_req_valid;
|
||||
assign gpu_dcache_snp_req_if.snp_req_addr = snp_req_addr;
|
||||
assign snp_req_full = gpu_dcache_snp_req_if.snp_req_full;
|
||||
|
||||
VX_front_end front_end(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
.icache_req_if (icache_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.fetch_ebreak (out_ebreak)
|
||||
VX_front_end front_end (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.warp_ctl_if (warp_ctl_if),
|
||||
.bckE_req_if (bckE_req_if),
|
||||
.schedule_delay (schedule_delay),
|
||||
.icache_rsp_if (icache_rsp_if),
|
||||
.icache_req_if (icache_req_if),
|
||||
.jal_rsp_if (jal_rsp_if),
|
||||
.branch_rsp_if (branch_rsp_if),
|
||||
.fetch_ebreak (out_ebreak)
|
||||
);
|
||||
|
||||
VX_scheduler schedule(
|
||||
.clk (clk),
|
||||
VX_scheduler schedule (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.memory_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
|
@ -214,7 +170,9 @@ VX_scheduler schedule(
|
|||
.is_empty (scheduler_empty)
|
||||
);
|
||||
|
||||
VX_back_end #(.CORE_ID(CORE_ID)) back_end(
|
||||
VX_back_end #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) back_end (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.schedule_delay (schedule_delay),
|
||||
|
@ -230,7 +188,7 @@ VX_back_end #(.CORE_ID(CORE_ID)) back_end(
|
|||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
VX_dmem_controller dmem_controller(
|
||||
VX_dmem_controller dmem_controller (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
@ -253,14 +211,6 @@ VX_dmem_controller dmem_controller(
|
|||
.dcache_rsp_if (dcache_rsp_if)
|
||||
);
|
||||
|
||||
// VX_csr_handler csr_handler(
|
||||
// .clk (clk),
|
||||
// .in_decode_csr_address(decode_csr_address),
|
||||
// .csr_w_req_if (csr_w_req_if),
|
||||
// .in_wb_valid (writeback_if.wb_valid[0]),
|
||||
// .out_decode_csr_data (csr_decode_csr_data)
|
||||
// );
|
||||
|
||||
endmodule // Vortex
|
||||
|
||||
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module Vortex_Cluster
|
||||
#(
|
||||
parameter CLUSTER_ID = 0
|
||||
) (
|
||||
|
||||
module Vortex_Cluster #(
|
||||
parameter CLUSTER_ID = 0
|
||||
) (
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// IO
|
||||
output wire[`NUM_CORES_PER_CLUSTER-1:0] io_valid,
|
||||
|
@ -19,7 +17,7 @@ module Vortex_Cluster
|
|||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
input wire dram_req_full,
|
||||
input wire dram_req_ready,
|
||||
|
||||
// DRAM Rsp
|
||||
input wire dram_rsp_valid,
|
||||
|
@ -28,11 +26,11 @@ module Vortex_Cluster
|
|||
output wire dram_rsp_ready,
|
||||
|
||||
// LLC Snooping
|
||||
input wire llc_snp_req_valid,
|
||||
input wire[31:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_full,
|
||||
input wire llc_snp_req_valid,
|
||||
input wire[31:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_full,
|
||||
|
||||
output wire out_ebreak
|
||||
output wire out_ebreak
|
||||
);
|
||||
// DRAM Dcache Req
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_read;
|
||||
|
@ -64,7 +62,7 @@ module Vortex_Cluster
|
|||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_io_valid;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] per_core_io_data;
|
||||
|
||||
wire l2c_core_accept;
|
||||
wire l2c_core_req_ready;
|
||||
|
||||
wire snp_fwd_valid;
|
||||
wire[31:0] snp_fwd_addr;
|
||||
|
@ -94,7 +92,7 @@ module Vortex_Cluster
|
|||
.dram_req_write (per_core_dram_req_write [curr_core]),
|
||||
.dram_req_addr (per_core_dram_req_addr [curr_core]),
|
||||
.dram_req_data (curr_core_dram_req_data ),
|
||||
.dram_req_full (l2c_core_accept ),
|
||||
.dram_req_ready (l2c_core_req_ready ),
|
||||
.dram_rsp_valid (per_core_dram_rsp_valid [curr_core]),
|
||||
.dram_rsp_addr (per_core_dram_rsp_addr [curr_core]),
|
||||
.dram_rsp_data (per_core_dram_rsp_data [curr_core]),
|
||||
|
@ -103,14 +101,14 @@ module Vortex_Cluster
|
|||
.I_dram_req_write (per_core_I_dram_req_write [curr_core]),
|
||||
.I_dram_req_addr (per_core_I_dram_req_addr [curr_core]),
|
||||
.I_dram_req_data (curr_core_I_dram_req_data ),
|
||||
.I_dram_req_full (l2c_core_accept ),
|
||||
.I_dram_req_ready (l2c_core_req_ready ),
|
||||
.I_dram_rsp_valid (per_core_I_dram_rsp_valid [curr_core]),
|
||||
.I_dram_rsp_addr (per_core_I_dram_rsp_addr [curr_core]),
|
||||
.I_dram_rsp_data (per_core_I_dram_rsp_data [curr_core]),
|
||||
.I_dram_rsp_ready (per_core_I_dram_rsp_ready [curr_core]),
|
||||
.snp_req_valid (snp_fwd_valid),
|
||||
.snp_req_addr (snp_fwd_addr),
|
||||
.snp_req_full (snp_fwd_full [curr_core]),
|
||||
.llc_snp_req_valid (snp_fwd_valid),
|
||||
.llc_snp_req_addr (snp_fwd_addr),
|
||||
.llc_snp_req_full (snp_fwd_full [curr_core]),
|
||||
.out_ebreak (per_core_out_ebreak [curr_core])
|
||||
);
|
||||
|
||||
|
@ -220,7 +218,7 @@ module Vortex_Cluster
|
|||
.core_req_pc (0),
|
||||
|
||||
// L2 can't accept Core Request
|
||||
.delay_req (l2c_core_accept),
|
||||
.core_req_ready (l2c_core_req_ready),
|
||||
|
||||
// Core can't accept L2 Request
|
||||
.core_no_wb_slot (|l2c_core_no_wb_slot),
|
||||
|
@ -249,7 +247,7 @@ module Vortex_Cluster
|
|||
.dram_req_write (dram_req_write),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data ({dram_req_data_port}),
|
||||
.dram_req_full (dram_req_full),
|
||||
.dram_req_ready (dram_req_ready),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req_valid (llc_snp_req_valid),
|
||||
|
|
|
@ -2,21 +2,20 @@
|
|||
`include "VX_cache_config.vh"
|
||||
|
||||
module Vortex_Socket (
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// IO
|
||||
output wire io_valid[`NUM_CORES-1:0],
|
||||
output wire[31:0] io_data [`NUM_CORES-1:0],
|
||||
output wire io_valid[`NUM_CORES-1:0],
|
||||
output wire[31:0] io_data [`NUM_CORES-1:0],
|
||||
|
||||
// DRAM Req
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
input wire dram_req_full,
|
||||
input wire dram_req_ready,
|
||||
|
||||
// DRAM Rsp
|
||||
input wire dram_rsp_valid,
|
||||
|
@ -25,11 +24,11 @@ module Vortex_Socket (
|
|||
output wire dram_rsp_ready,
|
||||
|
||||
// LLC Snooping
|
||||
input wire llc_snp_req_valid,
|
||||
input wire[31:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_full,
|
||||
input wire llc_snp_req_valid,
|
||||
input wire[31:0] llc_snp_req_addr,
|
||||
output wire llc_snp_req_full,
|
||||
|
||||
output wire out_ebreak
|
||||
output wire out_ebreak
|
||||
);
|
||||
if (`NUM_CLUSTERS == 1) begin
|
||||
|
||||
|
@ -53,7 +52,7 @@ module Vortex_Socket (
|
|||
.dram_req_write (dram_req_write),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_full (dram_req_full),
|
||||
.dram_req_ready (dram_req_ready),
|
||||
|
||||
.dram_rsp_valid (dram_rsp_valid),
|
||||
.dram_rsp_addr (dram_rsp_addr),
|
||||
|
@ -85,7 +84,7 @@ module Vortex_Socket (
|
|||
wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_req_data;
|
||||
wire[31:0] per_cluster_dram_req_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
|
||||
|
||||
wire l3c_core_req_full;
|
||||
wire l3c_core_req_ready;
|
||||
|
||||
// // DRAM Dcache Rsp
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
|
||||
|
@ -113,7 +112,9 @@ module Vortex_Socket (
|
|||
genvar curr_cluster;
|
||||
for (curr_cluster = 0; curr_cluster < `NUM_CLUSTERS; curr_cluster=curr_cluster+1) begin
|
||||
|
||||
Vortex_Cluster #(.CLUSTER_ID(curr_cluster)) Vortex_Cluster(
|
||||
Vortex_Cluster #(
|
||||
.CLUSTER_ID(curr_cluster)
|
||||
) Vortex_Cluster(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.io_valid (per_cluster_io_valid [curr_cluster]),
|
||||
|
@ -123,7 +124,7 @@ module Vortex_Socket (
|
|||
.dram_req_read (per_cluster_dram_req_read [curr_cluster]),
|
||||
.dram_req_addr (per_cluster_dram_req_addr [curr_cluster]),
|
||||
.dram_req_data (per_cluster_dram_req_data_up [curr_cluster]),
|
||||
.dram_req_full (l3c_core_req_full),
|
||||
.dram_req_ready (l3c_core_req_ready),
|
||||
|
||||
.dram_rsp_valid (per_cluster_dram_rsp_valid [curr_cluster]),
|
||||
.dram_rsp_addr (per_cluster_dram_rsp_addr [curr_cluster]),
|
||||
|
@ -139,6 +140,7 @@ module Vortex_Socket (
|
|||
end
|
||||
|
||||
//////////////////// L3 Cache ////////////////////
|
||||
|
||||
wire[`L3NUM_REQUESTS-1:0] l3c_core_req_valid;
|
||||
wire[`L3NUM_REQUESTS-1:0][2:0] l3c_core_req_mem_write;
|
||||
wire[`L3NUM_REQUESTS-1:0][2:0] l3c_core_req_mem_read;
|
||||
|
@ -161,25 +163,24 @@ module Vortex_Socket (
|
|||
assign dram_rsp_data_port[llb_index] = dram_rsp_data[llb_index];
|
||||
end
|
||||
|
||||
//
|
||||
genvar l3c_curr_cluster;
|
||||
for (l3c_curr_cluster = 0; l3c_curr_cluster < `L3NUM_REQUESTS; l3c_curr_cluster=l3c_curr_cluster+1) begin
|
||||
// Core Request
|
||||
assign l3c_core_req_valid [l3c_curr_cluster] = per_cluster_dram_req_valid[l3c_curr_cluster];
|
||||
assign l3c_core_req_mem_read [l3c_curr_cluster] = per_cluster_dram_req_read [l3c_curr_cluster] ? `LW_MEM_READ : `NO_MEM_READ;
|
||||
assign l3c_core_req_mem_write [l3c_curr_cluster] = per_cluster_dram_req_write[l3c_curr_cluster] ? `SW_MEM_WRITE : `NO_MEM_WRITE;
|
||||
assign l3c_core_req_wb [l3c_curr_cluster] = per_cluster_dram_req_read [l3c_curr_cluster] ? 1 : 0;
|
||||
assign l3c_core_req_addr [l3c_curr_cluster] = per_cluster_dram_req_addr [l3c_curr_cluster];
|
||||
assign l3c_core_req_data [l3c_curr_cluster] = per_cluster_dram_req_data [l3c_curr_cluster];
|
||||
for (l3c_curr_cluster = 0; l3c_curr_cluster < `L3NUM_REQUESTS; l3c_curr_cluster=l3c_curr_cluster+1) begin
|
||||
// Core Request
|
||||
assign l3c_core_req_valid [l3c_curr_cluster] = per_cluster_dram_req_valid[l3c_curr_cluster];
|
||||
assign l3c_core_req_mem_read [l3c_curr_cluster] = per_cluster_dram_req_read [l3c_curr_cluster] ? `LW_MEM_READ : `NO_MEM_READ;
|
||||
assign l3c_core_req_mem_write [l3c_curr_cluster] = per_cluster_dram_req_write[l3c_curr_cluster] ? `SW_MEM_WRITE : `NO_MEM_WRITE;
|
||||
assign l3c_core_req_wb [l3c_curr_cluster] = per_cluster_dram_req_read [l3c_curr_cluster] ? 1 : 0;
|
||||
assign l3c_core_req_addr [l3c_curr_cluster] = per_cluster_dram_req_addr [l3c_curr_cluster];
|
||||
assign l3c_core_req_data [l3c_curr_cluster] = per_cluster_dram_req_data [l3c_curr_cluster];
|
||||
|
||||
// Core can't accept Response
|
||||
assign l3c_core_no_wb_slot [l3c_curr_cluster] = ~per_cluster_dram_rsp_ready[l3c_curr_cluster];
|
||||
// Core can't accept Response
|
||||
assign l3c_core_no_wb_slot [l3c_curr_cluster] = ~per_cluster_dram_rsp_ready[l3c_curr_cluster];
|
||||
|
||||
// Cache Fill Response
|
||||
assign per_cluster_dram_rsp_valid [l3c_curr_cluster] = l3c_wb [l3c_curr_cluster];
|
||||
assign per_cluster_dram_rsp_data [l3c_curr_cluster] = l3c_wb_data [l3c_curr_cluster];
|
||||
assign per_cluster_dram_rsp_addr [l3c_curr_cluster] = l3c_wb_addr [l3c_curr_cluster];
|
||||
end
|
||||
// Cache Fill Response
|
||||
assign per_cluster_dram_rsp_valid [l3c_curr_cluster] = l3c_wb [l3c_curr_cluster];
|
||||
assign per_cluster_dram_rsp_data [l3c_curr_cluster] = l3c_wb_data [l3c_curr_cluster];
|
||||
assign per_cluster_dram_rsp_addr [l3c_curr_cluster] = l3c_wb_addr [l3c_curr_cluster];
|
||||
end
|
||||
|
||||
VX_cache #(
|
||||
.CACHE_SIZE_BYTES (`L3CACHE_SIZE_BYTES),
|
||||
|
@ -203,8 +204,8 @@ module Vortex_Socket (
|
|||
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`L3SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) gpu_l3cache (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
// Core Req (DRAM Fills/WB) To L2 Request
|
||||
.core_req_valid (l3c_core_req_valid),
|
||||
|
@ -218,7 +219,7 @@ module Vortex_Socket (
|
|||
.core_req_pc (0),
|
||||
|
||||
// L2 can't accept Core Request
|
||||
.delay_req (l3c_core_req_full),
|
||||
.core_req_ready (l3c_core_req_ready),
|
||||
|
||||
// Core can't accept L2 Request
|
||||
.core_no_wb_slot (|l3c_core_no_wb_slot),
|
||||
|
@ -247,7 +248,7 @@ module Vortex_Socket (
|
|||
.dram_req_read (dram_req_read),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data ({dram_req_data_port}),
|
||||
.dram_req_full (dram_req_full),
|
||||
.dram_req_ready (dram_req_ready),
|
||||
|
||||
// Snoop Request
|
||||
.snp_req_valid (llc_snp_req_valid),
|
||||
|
|
|
@ -48,7 +48,7 @@ module VX_bank #(
|
|||
input wire reset,
|
||||
|
||||
// Input Core Request
|
||||
input wire delay_req,
|
||||
input wire req_ready,
|
||||
input wire [NUM_REQUESTS-1:0] bank_valids,
|
||||
input wire [NUM_REQUESTS-1:0][31:0] bank_addr,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] bank_writedata,
|
||||
|
@ -168,7 +168,7 @@ module VX_bank #(
|
|||
wire [2:0] reqq_req_mem_write_st0;
|
||||
wire [31:0] reqq_req_pc_st0;
|
||||
|
||||
assign reqq_push = !delay_req && (|bank_valids);
|
||||
assign reqq_push = req_ready && (|bank_valids);
|
||||
|
||||
VX_cache_req_queue #(
|
||||
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
|
||||
|
|
|
@ -52,44 +52,46 @@ module VX_cache #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Req Info
|
||||
// Core request
|
||||
input wire [NUM_REQUESTS-1:0] core_req_valid,
|
||||
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] core_req_writedata,
|
||||
input wire [NUM_REQUESTS-1:0][2:0] core_req_mem_read,
|
||||
input wire [NUM_REQUESTS-1:0][2:0] core_req_mem_write,
|
||||
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
|
||||
input wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] core_req_writedata,
|
||||
output wire core_req_ready,
|
||||
|
||||
// Req meta
|
||||
// Core request meta data
|
||||
input wire [4:0] core_req_rd,
|
||||
input wire [NUM_REQUESTS-1:0][1:0] core_req_wb,
|
||||
input wire [`NW_BITS-1:0] core_req_warp_num,
|
||||
input wire [31:0] core_req_pc,
|
||||
output wire delay_req,
|
||||
|
||||
|
||||
// Core Writeback
|
||||
input wire core_no_wb_slot,
|
||||
// Core response
|
||||
output wire [NUM_REQUESTS-1:0] core_wb_valid,
|
||||
output wire [4:0] core_wb_req_rd,
|
||||
output wire [1:0] core_wb_req_wb,
|
||||
output wire [`NW_BITS-1:0] core_wb_warp_num,
|
||||
output wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] core_wb_readdata,
|
||||
output wire [NUM_REQUESTS-1:0][31:0] core_wb_pc,
|
||||
output wire [NUM_REQUESTS-1:0][31:0] core_wb_address,
|
||||
output wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] core_wb_readdata,
|
||||
input wire core_no_wb_slot,
|
||||
|
||||
// Dram Fill Response
|
||||
// Core response meta data
|
||||
output wire [`NW_BITS-1:0] core_wb_warp_num,
|
||||
output wire [NUM_REQUESTS-1:0][31:0] core_wb_pc,
|
||||
|
||||
// DRAM request
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
|
||||
input wire dram_req_ready,
|
||||
|
||||
// DRAM response
|
||||
input wire dram_rsp_valid,
|
||||
input wire [31:0] dram_rsp_addr,
|
||||
input wire [`IBANK_LINE_WORDS-1:0][31:0] dram_rsp_data,
|
||||
output wire dram_rsp_ready,
|
||||
|
||||
// Dram request
|
||||
output wire dram_req_read,
|
||||
output wire dram_req_write,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
|
||||
input wire dram_req_full,
|
||||
|
||||
|
||||
// Snoop Req
|
||||
input wire snp_req_valid,
|
||||
input wire [31:0] snp_req_addr,
|
||||
|
@ -132,7 +134,7 @@ module VX_cache #(
|
|||
wire [NUM_BANKS-1:0][31:0] per_bank_snp_fwd_addr;
|
||||
wire [NUM_BANKS-1:0] per_bank_snp_fwd_pop;
|
||||
|
||||
assign delay_req = (|per_bank_reqq_full);
|
||||
assign core_req_ready = ~(|per_bank_reqq_full);
|
||||
assign snp_req_full = (|per_bank_snrq_full);
|
||||
|
||||
// assign dram_rsp_ready = (NUM_BANKS == 1) ? per_bank_dram_rsp_ready[0] : per_bank_dram_rsp_ready[dram_rsp_addr[`BANK_SELECT_ADDR_RNG]];
|
||||
|
@ -171,7 +173,7 @@ module VX_cache #(
|
|||
.dram_req_write (dram_req_write),
|
||||
.dram_req_addr (dram_req_addr),
|
||||
.dram_req_data (dram_req_data),
|
||||
.dram_req_full (dram_req_full)
|
||||
.dram_req_ready (dram_req_ready)
|
||||
);
|
||||
|
||||
VX_cache_core_req_bank_sel #(
|
||||
|
@ -372,7 +374,7 @@ module VX_cache #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
// Core req
|
||||
.delay_req (delay_req),
|
||||
.req_ready (core_req_ready),
|
||||
.bank_valids (curr_bank_valids),
|
||||
.bank_addr (curr_bank_addr),
|
||||
.bank_writedata (curr_bank_writedata),
|
||||
|
|
|
@ -50,14 +50,14 @@ module VX_cache_dram_req_arb #(
|
|||
|
||||
// Fill Request
|
||||
output wire dfqq_full,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr,
|
||||
input wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
|
||||
input wire [NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr,
|
||||
|
||||
// DFQ Request
|
||||
output wire[NUM_BANKS-1:0] per_bank_dram_wb_queue_pop,
|
||||
input wire[NUM_BANKS-1:0] per_bank_dram_wb_req_valid,
|
||||
input wire[NUM_BANKS-1:0][31:0] per_bank_dram_wb_req_addr,
|
||||
input wire[NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
|
||||
output wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop,
|
||||
input wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid,
|
||||
input wire [NUM_BANKS-1:0][31:0] per_bank_dram_wb_req_addr,
|
||||
input wire [NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
|
||||
|
||||
// real Dram request
|
||||
output wire dram_req_read,
|
||||
|
@ -65,7 +65,7 @@ module VX_cache_dram_req_arb #(
|
|||
output wire [31:0] dram_req_addr,
|
||||
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
|
||||
|
||||
input wire dram_req_full
|
||||
input wire dram_req_ready
|
||||
);
|
||||
|
||||
wire pref_pop;
|
||||
|
@ -75,7 +75,8 @@ module VX_cache_dram_req_arb #(
|
|||
wire dwb_valid;
|
||||
wire dfqq_req;
|
||||
|
||||
assign pref_pop = !dwb_valid && !dfqq_req && !dram_req_full && pref_valid;
|
||||
assign pref_pop = !dwb_valid && !dfqq_req && dram_req_ready && pref_valid;
|
||||
|
||||
VX_prefetcher #(
|
||||
.PRFQ_SIZE (PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (PRFQ_STRIDE),
|
||||
|
@ -99,7 +100,7 @@ module VX_cache_dram_req_arb #(
|
|||
wire dfqq_empty;
|
||||
`DEBUG_END
|
||||
|
||||
wire dfqq_pop = !dwb_valid && dfqq_req && !dram_req_full; // If no dwb, and dfqq has valids, then pop
|
||||
wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop
|
||||
wire dfqq_push = (|per_bank_dram_fill_req_valid);
|
||||
|
||||
VX_cache_dfq_queue cache_dfq_queue(
|
||||
|
@ -115,9 +116,9 @@ module VX_cache_dram_req_arb #(
|
|||
.dfqq_full (dfqq_full)
|
||||
);
|
||||
|
||||
wire[`LOG2UP(NUM_BANKS)-1:0] dwb_bank;
|
||||
wire [`LOG2UP(NUM_BANKS)-1:0] dwb_bank;
|
||||
|
||||
wire[NUM_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req_valid;
|
||||
wire [NUM_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req_valid;
|
||||
|
||||
VX_generic_priority_encoder #(
|
||||
.N(NUM_BANKS)
|
||||
|
@ -127,7 +128,7 @@ module VX_cache_dram_req_arb #(
|
|||
.found (dwb_valid)
|
||||
);
|
||||
|
||||
assign per_bank_dram_wb_queue_pop = dram_req_full ? 0 : use_wb_valid & ((1 << dwb_bank));
|
||||
assign per_bank_dram_wb_queue_pop = dram_req_ready ? (use_wb_valid & ((1 << dwb_bank))) : 0;
|
||||
|
||||
wire dram_req = dwb_valid || dfqq_req || pref_pop;
|
||||
assign dram_req_read = ((dfqq_req && !dwb_valid) || pref_pop) && dram_req;
|
||||
|
|
|
@ -12,7 +12,7 @@ interface VX_gpu_dcache_dram_req_if #(
|
|||
wire dram_req_read;
|
||||
wire [31:0] dram_req_addr;
|
||||
wire [BANK_LINE_WORDS-1:0][31:0] dram_req_data;
|
||||
wire dram_req_full;
|
||||
wire dram_req_ready;
|
||||
|
||||
wire dram_rsp_ready;
|
||||
|
||||
|
|
|
@ -7,21 +7,21 @@ interface VX_gpu_dcache_req_if #(
|
|||
parameter NUM_REQUESTS = 32
|
||||
) ();
|
||||
|
||||
// Core Request
|
||||
// Core request
|
||||
wire [NUM_REQUESTS-1:0] core_req_valid;
|
||||
wire [NUM_REQUESTS-1:0][31:0] core_req_addr;
|
||||
wire [NUM_REQUESTS-1:0][31:0] core_req_writedata;
|
||||
wire [NUM_REQUESTS-1:0][2:0] core_req_mem_read;
|
||||
wire [NUM_REQUESTS-1:0][2:0] core_req_mem_write;
|
||||
wire [NUM_REQUESTS-1:0][31:0] core_req_addr;
|
||||
wire [NUM_REQUESTS-1:0][31:0] core_req_writedata;
|
||||
|
||||
// Core request Meta data
|
||||
wire [4:0] core_req_rd;
|
||||
wire [NUM_REQUESTS-1:0][1:0] core_req_wb;
|
||||
wire [`NW_BITS-1:0] core_req_warp_num;
|
||||
wire [31:0] core_req_pc;
|
||||
|
||||
// Can't WB
|
||||
wire core_no_wb_slot;
|
||||
wire core_no_wb_slot;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
|
@ -7,18 +7,19 @@ interface VX_gpu_dcache_rsp_if #(
|
|||
parameter NUM_REQUESTS = 32
|
||||
) ();
|
||||
|
||||
// Cache WB
|
||||
// Core response
|
||||
wire [NUM_REQUESTS-1:0] core_wb_valid;
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [4:0] core_wb_req_rd;
|
||||
wire [1:0] core_wb_req_wb;
|
||||
`IGNORE_WARNINGS_END
|
||||
wire [`NW_BITS-1:0] core_wb_warp_num;
|
||||
`IGNORE_WARNINGS_END
|
||||
wire [NUM_REQUESTS-1:0][31:0] core_wb_pc;
|
||||
wire [NUM_REQUESTS-1:0][31:0] core_wb_readdata;
|
||||
wire [NUM_REQUESTS-1:0][31:0] core_wb_pc;
|
||||
|
||||
// Core response meta data
|
||||
wire [`NW_BITS-1:0] core_wb_warp_num;
|
||||
|
||||
// Cache Full
|
||||
wire delay_req;
|
||||
wire core_req_ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -34,6 +34,77 @@ void Simulator::print_stats(std::ostream& out) {
|
|||
out << std::setw(24) << "# of total cycles:" << std::dec << total_cycles_ << std::endl;
|
||||
}
|
||||
|
||||
void Simulator::dbus_driver() {
|
||||
// Iterate through each element, and get pop index
|
||||
int dequeue_index = -1;
|
||||
bool dequeue_valid = false;
|
||||
for (int i = 0; i < dram_req_vec_.size(); i++) {
|
||||
if (dram_req_vec_[i].cycles_left > 0) {
|
||||
dram_req_vec_[i].cycles_left -= 1;
|
||||
}
|
||||
|
||||
if ((dram_req_vec_[i].cycles_left == 0) && (!dequeue_valid)) {
|
||||
dequeue_index = i;
|
||||
dequeue_valid = true;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DRAM_STALLS
|
||||
dram_stalled_ = false;
|
||||
if (0 == (total_cycles_ % DRAM_STALLS_MODULO)) {
|
||||
dram_stalled_ = true;
|
||||
} else
|
||||
if (dram_req_vec_.size() >= DRAM_RQ_SIZE) {
|
||||
dram_stalled_ = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!dram_stalled_) {
|
||||
if (vortex_->dram_req_read) {
|
||||
// Need to add an element
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.base_addr = vortex_->dram_req_addr;
|
||||
dram_req.data = (unsigned *)malloc(GLOBAL_BLOCK_SIZE_BYTES);
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = dram_req.base_addr + (i * 4);
|
||||
unsigned data_rd;
|
||||
ram_->getWord(curr_addr, &data_rd);
|
||||
dram_req.data[i] = data_rd;
|
||||
}
|
||||
dram_req_vec_.push_back(dram_req);
|
||||
}
|
||||
|
||||
if (vortex_->dram_req_write) {
|
||||
unsigned base_addr = vortex_->dram_req_addr;
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = base_addr + (i * 4);
|
||||
unsigned data_wr = vortex_->dram_req_data[i];
|
||||
ram_->writeWord(curr_addr, &data_wr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vortex_->dram_rsp_ready && dequeue_valid) {
|
||||
vortex_->dram_rsp_valid = 1;
|
||||
vortex_->dram_rsp_addr = dram_req_vec_[dequeue_index].base_addr;
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
vortex_->dram_rsp_data[i] = dram_req_vec_[dequeue_index].data[i];
|
||||
}
|
||||
free(dram_req_vec_[dequeue_index].data);
|
||||
|
||||
dram_req_vec_.erase(dram_req_vec_.begin() + dequeue_index);
|
||||
} else {
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
vortex_->dram_rsp_addr = 0;
|
||||
}
|
||||
|
||||
vortex_->dram_req_ready = ~dram_stalled_;
|
||||
}
|
||||
|
||||
#ifndef USE_MULTICORE
|
||||
|
||||
void Simulator::ibus_driver() {
|
||||
|
@ -51,6 +122,16 @@ void Simulator::ibus_driver() {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DRAM_STALLS
|
||||
I_dram_stalled_ = false;
|
||||
if (0 == (total_cycles_ % DRAM_STALLS_MODULO)) {
|
||||
I_dram_stalled_ = true;
|
||||
} else
|
||||
if (I_dram_req_vec_.size() >= DRAM_RQ_SIZE) {
|
||||
I_dram_stalled_ = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!I_dram_stalled_) {
|
||||
// std::cout << "Icache Dram Request received!\n";
|
||||
if (vortex_->I_dram_req_read) {
|
||||
|
@ -100,135 +181,11 @@ void Simulator::ibus_driver() {
|
|||
vortex_->I_dram_rsp_addr = 0;
|
||||
}
|
||||
|
||||
// #ifdef ENABLE_DRAM_STALLS
|
||||
// I_dram_stalled_ = false;
|
||||
// if (0 == (total_cycles_ % DRAM_STALLS_MODULO)) {
|
||||
// I_dram_stalled_ = true;
|
||||
// } else
|
||||
// if (I_dram_req_vec_.size() >= DRAM_RQ_SIZE) {
|
||||
// I_dram_stalled_ = true;
|
||||
// }
|
||||
// #endif
|
||||
|
||||
// vortex_->dram_req_delay = I_dram_stalled_;
|
||||
vortex_->I_dram_req_ready = ~I_dram_stalled_;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void Simulator::dbus_driver() {
|
||||
// Iterate through each element, and get pop index
|
||||
int dequeue_index = -1;
|
||||
bool dequeue_valid = false;
|
||||
for (int i = 0; i < dram_req_vec_.size(); i++) {
|
||||
if (dram_req_vec_[i].cycles_left > 0) {
|
||||
dram_req_vec_[i].cycles_left -= 1;
|
||||
}
|
||||
|
||||
if ((dram_req_vec_[i].cycles_left == 0) && (!dequeue_valid)) {
|
||||
dequeue_index = i;
|
||||
dequeue_valid = true;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_MULTICORE
|
||||
|
||||
if (!dram_stalled_) {
|
||||
if (vortex_->dram_req_read) {
|
||||
// Need to add an element
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.base_addr = vortex_->dram_req_addr;
|
||||
dram_req.data = (unsigned *)malloc(GLOBAL_BLOCK_SIZE_BYTES);
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = dram_req.base_addr + (i * 4);
|
||||
unsigned data_rd;
|
||||
ram_->getWord(curr_addr, &data_rd);
|
||||
dram_req.data[i] = data_rd;
|
||||
}
|
||||
dram_req_vec_.push_back(dram_req);
|
||||
}
|
||||
|
||||
if (vortex_->dram_req_write) {
|
||||
unsigned base_addr = vortex_->dram_req_addr;
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = base_addr + (i * 4);
|
||||
unsigned data_wr = vortex_->dram_req_data[i];
|
||||
ram_->writeWord(curr_addr, &data_wr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vortex_->dram_rsp_ready && dequeue_valid) {
|
||||
vortex_->dram_rsp_valid = 1;
|
||||
vortex_->dram_rsp_addr = dram_req_vec_[dequeue_index].base_addr;
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
vortex_->dram_rsp_data[i] = dram_req_vec_[dequeue_index].data[i];
|
||||
}
|
||||
free(dram_req_vec_[dequeue_index].data);
|
||||
|
||||
dram_req_vec_.erase(dram_req_vec_.begin() + dequeue_index);
|
||||
} else {
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
vortex_->dram_rsp_addr = 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if (!dram_stalled_) {
|
||||
if (vortex_->dram_req_read) {
|
||||
// Need to add an element
|
||||
dram_req_t dram_req;
|
||||
dram_req.cycles_left = DRAM_LATENCY;
|
||||
dram_req.base_addr = vortex_->dram_req_addr;
|
||||
dram_req.data = (unsigned *)malloc(GLOBAL_BLOCK_SIZE_BYTES);
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = dram_req.base_addr + (i * 4);
|
||||
unsigned data_rd;
|
||||
ram_->getWord(curr_addr, &data_rd);
|
||||
dram_req.data[i] = data_rd;
|
||||
}
|
||||
dram_req_vec_.push_back(dram_req);
|
||||
}
|
||||
|
||||
if (vortex_->dram_req_write) {
|
||||
unsigned base_addr = vortex_->dram_req_addr;
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
unsigned curr_addr = base_addr + (i * 4);
|
||||
unsigned data_wr = vortex_->dram_req_data[i];
|
||||
ram_->writeWord(curr_addr, &data_wr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vortex_->dram_rsp_ready && dequeue_valid) {
|
||||
vortex_->dram_rsp_valid = 1;
|
||||
vortex_->dram_rsp_addr = dram_req_vec_[dequeue_index].base_addr;
|
||||
|
||||
for (int i = 0; i < (GLOBAL_BLOCK_SIZE_BYTES / 4); i++) {
|
||||
vortex_->dram_rsp_data[i] = dram_req_vec_[dequeue_index].data[i];
|
||||
}
|
||||
free(dram_req_vec_[dequeue_index].data);
|
||||
|
||||
dram_req_vec_.erase(dram_req_vec_.begin() + dequeue_index);
|
||||
} else {
|
||||
vortex_->dram_rsp_valid = 0;
|
||||
vortex_->dram_rsp_addr = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef USE_MULTICORE
|
||||
vortex_->dram_req_full = dram_stalled_;
|
||||
#else
|
||||
vortex_->dram_req_full = dram_stalled_;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Simulator::io_handler() {
|
||||
#ifdef USE_MULTICORE
|
||||
bool io_valid = false;
|
||||
|
@ -309,7 +266,6 @@ void Simulator::send_snoops(uint32_t mem_addr, uint32_t size) {
|
|||
auto aligned_addr_start = GLOBAL_BLOCK_SIZE_BYTES * (mem_addr / GLOBAL_BLOCK_SIZE_BYTES);
|
||||
auto aligned_addr_end = GLOBAL_BLOCK_SIZE_BYTES * ((mem_addr + size + GLOBAL_BLOCK_SIZE_BYTES - 1) / GLOBAL_BLOCK_SIZE_BYTES);
|
||||
|
||||
#ifdef USE_MULTICORE
|
||||
// submit snoop requests for the needed blocks
|
||||
vortex_->llc_snp_req_addr = aligned_addr_start;
|
||||
vortex_->llc_snp_req_valid = false;
|
||||
|
@ -325,37 +281,13 @@ void Simulator::send_snoops(uint32_t mem_addr, uint32_t size) {
|
|||
vortex_->llc_snp_req_valid = true;
|
||||
}
|
||||
}
|
||||
#else
|
||||
// submit snoop requests for the needed blocks
|
||||
vortex_->snp_req_addr = aligned_addr_start;
|
||||
vortex_->snp_req_valid = false;
|
||||
for (;;) {
|
||||
this->step();
|
||||
if (vortex_->snp_req_valid) {
|
||||
vortex_->snp_req_valid = false;
|
||||
if (vortex_->snp_req_addr >= aligned_addr_end)
|
||||
break;
|
||||
vortex_->snp_req_addr += GLOBAL_BLOCK_SIZE_BYTES;
|
||||
}
|
||||
if (!vortex_->snp_req_full) {
|
||||
vortex_->snp_req_valid = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||
printf("[sim] total cycles: %ld\n", this->total_cycles_);
|
||||
|
||||
// send snoops for L1 flush
|
||||
// send snoop requests to the caches
|
||||
this->send_snoops(mem_addr, size);
|
||||
this->wait(PIPELINE_FLUSH_LATENCY);
|
||||
|
||||
// #if NUM_CORES != 1
|
||||
// send snoops for L2 flush
|
||||
// this->send_snoops(mem_addr, size);
|
||||
// this->wait(PIPELINE_FLUSH_LATENCY);
|
||||
// #endif
|
||||
}
|
||||
|
||||
bool Simulator::run() {
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <ostream>
|
||||
#include <vector>
|
||||
|
||||
#define ENABLE_DRAM_STALLS
|
||||
//#define ENABLE_DRAM_STALLS
|
||||
#define DRAM_LATENCY 200
|
||||
#define DRAM_RQ_SIZE 16
|
||||
#define DRAM_STALLS_MODULO 16
|
||||
|
@ -55,7 +55,7 @@ private:
|
|||
void send_snoops(uint32_t mem_addr, uint32_t size);
|
||||
void wait(uint32_t cycles);
|
||||
|
||||
int64_t total_cycles_;
|
||||
uint64_t total_cycles_;
|
||||
bool dram_stalled_;
|
||||
bool I_dram_stalled_;
|
||||
std::vector<dram_req_t> dram_req_vec_;
|
||||
|
|
|
@ -12,7 +12,7 @@ int main(int argc, char **argv)
|
|||
|
||||
Verilated::commandArgs(argc, argv);
|
||||
|
||||
#define ALL_TESTS
|
||||
//#define ALL_TESTS
|
||||
#ifdef ALL_TESTS
|
||||
bool passed = true;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue