added config.vh

This commit is contained in:
Blaise Tine 2020-04-16 07:49:19 -04:00
parent c913e542e9
commit 81745f08c9
109 changed files with 1426 additions and 1544 deletions

View file

@ -9,11 +9,11 @@ extern int vx_dev_caps(int caps_id) {
case VX_CAPS_VERSION:
return 0;
case VX_CAPS_MAX_CORES:
return NUMBER_CORES;
return NUM_CORES;
case VX_CAPS_MAX_WARPS:
return NW;
return NUM_WARPS;
case VX_CAPS_MAX_THREADS:
return NT;
return NUM_THREADS;
case VX_CAPS_CACHE_LINESIZE:
return GLOBAL_BLOCK_SIZE_BYTES;
case VX_CAPS_LOCAL_MEM_SIZE:

View file

@ -142,7 +142,7 @@ public:
private:
void run() {
Harp::ArchDef arch("rv32i", NW, NT);
Harp::ArchDef arch("rv32i", NUM_WARPS, NUM_THREADS);
Harp::WordDecoder dec(arch);
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
Harp::Core core(arch, dec, mu);

View file

@ -33,7 +33,7 @@ THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu
.PHONY: build_config
build_config:
./gen_config.py --rtl_locations
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./simulate/VX_config.h
# -LDFLAGS '-lsystemc'
VERILATOR: build_config

View file

@ -6,8 +6,9 @@ ALL:sim
SRC = \
vortex_dpi.cpp \
vortex_tb.v \
../rtl/VX_define.v \
../rtl/VX_define_synth.v \
../rtl/VX_user_config.vh \
../rtl/VX_config.vh \
../rtl/VX_define.vh \
../rtl/interfaces/VX_branch_response_inter.v \
../rtl/interfaces/VX_csr_req_inter.v \
../rtl/interfaces/VX_csr_wb_inter.v \

View file

@ -2182,7 +2182,7 @@ Project_File_33 = ../rtl/shared_memory/VX_set_bit.v
Project_File_P_33 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 cover_branch 0 folder {Top Level} last_compile 0 cover_fsm 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 1 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 53 cover_expr 0 dont_compile 0 cover_stmt 0
Project_File_34 = ../rtl/interfaces/VX_dcache_response_inter.v
Project_File_P_34 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 27 dont_compile 0 cover_expr 0 cover_stmt 0
Project_File_35 = ../rtl/VX_define.v
Project_File_35 = ../rtl/VX_define.vh
Project_File_P_35 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 folder {Top Level} cover_branch 0 cover_fsm 0 last_compile 1572058635 vlog_noload 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 0 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 7 cover_expr 0 dont_compile 0 cover_stmt 0
Project_File_36 = ../rtl/interfaces/VX_csr_req_inter.v
Project_File_P_36 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 24 dont_compile 0 cover_expr 0 cover_stmt 0

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
//`define NUMBER_BANKS 8
//`define NUM_WORDS_PER_BLOCK 4

View file

@ -11,9 +11,10 @@ vortex_afu.json
+incdir+../rtl/pipe_regs
+incdir+../rtl/compat
../rtl/VX_define_synth.v
../rtl/VX_define.v
../rtl/generic_cache/VX_cache_config.v
../rtl/VX_user_config.vh
../rtl/VX_config.vh
../rtl/VX_define.vh
../rtl/generic_cache/VX_cache_config.vh
../rtl/Vortex_Socket.v
../rtl/Vortex_Cluster.v
../rtl/Vortex.v

5
hw/rtl/.gitignore vendored
View file

@ -1,4 +1 @@
/simulate/VX_define.h
/simulate/VX_define_synth.h
/VX_define_synth.v
/results.txt
/VX_user_config.vh

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_alu(
input wire clk,

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_back_end
#(

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
module VX_csr_data (
input wire clk, // Clock
@ -19,17 +19,17 @@ module VX_csr_data (
/* verilator lint_off WIDTH */
// wire[`NT_M1:0][31:0] thread_ids;
// wire[`NT_M1:0][31:0] warp_ids;
// wire[`NUM_THREADS-1:0][31:0] thread_ids;
// wire[`NUM_THREADS-1:0][31:0] warp_ids;
// genvar cur_t;
// for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
// for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
// assign thread_ids[cur_t] = cur_t;
// end
// genvar cur_tw;
// for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
// assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num};
// for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
// assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num};
// end
reg[11:0] csr[1023:0];

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_csr_pipe
#(
@ -14,8 +14,8 @@ module VX_csr_pipe
output wire stall_gpr_csr
);
wire[`NT_M1:0] valid_s2;
wire[`NW_M1:0] warp_num_s2;
wire[`NUM_THREADS-1:0] valid_s2;
wire[`NW_BITS-1:0] warp_num_s2;
wire[4:0] rd_s2;
wire[1:0] wb_s2;
wire[4:0] alu_op_s2;
@ -60,7 +60,7 @@ module VX_csr_pipe
wire zero = 0;
VX_generic_register #(.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_M1+1) + `NT)) csr_reg_s2 (
VX_generic_register #(.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)) csr_reg_s2 (
.clk (clk),
.reset(reset),
.stall(no_slot_csr),
@ -70,28 +70,26 @@ module VX_csr_pipe
);
wire[`NT_M1:0][31:0] final_csr_data;
wire[`NUM_THREADS-1:0][31:0] final_csr_data;
wire[`NT_M1:0][31:0] thread_ids;
wire[`NT_M1:0][31:0] warp_ids;
wire[`NT_M1:0][31:0] warp_idz;
wire[`NT_M1:0][31:0] csr_vec_read_data_s2;
wire[`NUM_THREADS-1:0][31:0] thread_ids;
wire[`NUM_THREADS-1:0][31:0] warp_ids;
wire[`NUM_THREADS-1:0][31:0] warp_idz;
wire[`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2;
genvar cur_t;
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
assign thread_ids[cur_t] = cur_t;
end
genvar cur_tw;
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2};
assign warp_idz[cur_tw] = (warp_num_s2 + (CORE_ID*`NW));
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = warp_num_s2;
assign warp_idz[cur_tw] = 32'(warp_num_s2 + (CORE_ID * `NUM_WARPS));
end
genvar cur_v;
for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin
for (cur_v = 0; cur_v < `NUM_THREADS; cur_v = cur_v + 1) begin
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
end
@ -104,7 +102,6 @@ module VX_csr_pipe
warp_id_select ? warp_idz :
csr_vec_read_data_s2;
assign VX_csr_wb.valid = valid_s2;
assign VX_csr_wb.warp_num = warp_num_s2;
assign VX_csr_wb.rd = rd_s2;

View file

@ -1,5 +1,5 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_csr_wrapper (
VX_csr_req_inter VX_csr_req,
@ -8,17 +8,17 @@ module VX_csr_wrapper (
);
wire[`NT_M1:0][31:0] thread_ids;
wire[`NT_M1:0][31:0] warp_ids;
wire[`NUM_THREADS-1:0][31:0] thread_ids;
wire[`NUM_THREADS-1:0][31:0] warp_ids;
genvar cur_t, cur_tw;
generate
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin : thread_ids_init
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init
assign thread_ids[cur_t] = cur_t;
end
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin : warp_ids_init
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, VX_csr_req.warp_num};
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, VX_csr_req.warp_num};
end
endgenerate

View file

@ -1,5 +1,5 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_decode(
// Fetch Inputs
@ -16,11 +16,11 @@ module VX_decode(
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
wire[`NW_M1:0] in_warp_num = fd_inst_meta_de.warp_num;
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
assign VX_frE_to_bckE_req.curr_PC = in_curr_PC;
wire[`NT_M1:0] in_valid = fd_inst_meta_de.valid;
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
wire[6:0] curr_opcode;

163
hw/rtl/VX_define.vh Normal file
View file

@ -0,0 +1,163 @@
`ifndef VX_DEFINE
`define VX_DEFINE
`include "./VX_config.vh"
// `define QUEUE_FORCE_MLAB 1
// `define SYN 1
// `define ASIC 1
// `define SYN_FUNC 1
`define LOG2UP(x) ((x > 1) ? $clog2(x) : 1)
`define NUM_CORES_PER_CLUSTER (`NUM_CORES / `NUM_CLUSTERS)
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NT_BITS `LOG2UP(`NUM_THREADS)
`define NC_BITS `LOG2UP(`NUM_CORES)
`define R_INST 7'd51
`define L_INST 7'd3
`define ALU_INST 7'd19
`define S_INST 7'd35
`define B_INST 7'd99
`define LUI_INST 7'd55
`define AUIPC_INST 7'd23
`define JAL_INST 7'd111
`define JALR_INST 7'd103
`define SYS_INST 7'd115
`define GPGPU_INST 7'h6b
`define WB_ALU 2'h1
`define WB_MEM 2'h2
`define WB_JAL 2'h3
`define NO_WB 2'h0
`define RS2_IMMED 1
`define RS2_REG 0
`define NO_MEM_READ 3'h7
`define LB_MEM_READ 3'h0
`define LH_MEM_READ 3'h1
`define LW_MEM_READ 3'h2
`define LBU_MEM_READ 3'h4
`define LHU_MEM_READ 3'h5
`define NO_MEM_WRITE 3'h7
`define SB_MEM_WRITE 3'h0
`define SH_MEM_WRITE 3'h1
`define SW_MEM_WRITE 3'h2
`define NO_BRANCH 3'h0
`define BEQ 3'h1
`define BNE 3'h2
`define BLT 3'h3
`define BGT 3'h4
`define BLTU 3'h5
`define BGTU 3'h6
`define NO_ALU 5'd15
`define ADD 5'd0
`define SUB 5'd1
`define SLLA 5'd2
`define SLT 5'd3
`define SLTU 5'd4
`define XOR 5'd5
`define SRL 5'd6
`define SRA 5'd7
`define OR 5'd8
`define AND 5'd9
`define SUBU 5'd10
`define LUI_ALU 5'd11
`define AUIPC_ALU 5'd12
`define CSR_ALU_RW 5'd13
`define CSR_ALU_RS 5'd14
`define CSR_ALU_RC 5'd15
`define MUL 5'd16
`define MULH 5'd17
`define MULHSU 5'd18
`define MULHU 5'd19
`define DIV 5'd20
`define DIVU 5'd21
`define REM 5'd22
`define REMU 5'd23
// WRITEBACK
`define WB_ALU 2'h1
`define WB_MEM 2'h2
`define WB_JAL 2'h3
`define NO_WB 2'h0
// JAL
`define JUMP 1'h1
`define NO_JUMP 1'h0
// STALLS
`define STALL 1'h1
`define NO_STALL 1'h0
`define TAKEN 1'h1
`define NOT_TAKEN 1'h0
`define ZERO_REG 5'h0
// ======================= Dcache Configurable Knobs ==========================
// Function ID
`define DFUNC_ID 0
// Size of line inside a bank in bits
`define DBANK_LINE_SIZE (`DBANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define DBANK_LINE_WORDS (`DBANK_LINE_SIZE_BYTES / `DWORD_SIZE_BYTES)
// ======================= Icache Configurable Knobs ==========================
// Function ID
`define IFUNC_ID 1
// Size of line inside a bank in bits
`define IBANK_LINE_SIZE (`IBANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define IBANK_LINE_WORDS (`IBANK_LINE_SIZE_BYTES / `IWORD_SIZE_BYTES)
// ======================= SM Configurable Knobs ==============================
// Function ID
`define SFUNC_ID 2
// Size of line inside a bank in bits
`define SBANK_LINE_SIZE (`SBANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define SBANK_LINE_WORDS (`SBANK_LINE_SIZE_BYTES / `SWORD_SIZE_BYTES)
// ======================= L2cache Configurable Knobs =========================
// Function ID
`define L2FUNC_ID 3
// Size of line inside a bank in bits
`define L2BANK_LINE_SIZE (`L2BANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define L2BANK_LINE_WORDS (`L2BANK_LINE_SIZE_BYTES / `L2WORD_SIZE_BYTES)
// ======================= L3cache Configurable Knobs =========================
// Function ID
`define L3FUNC_ID 3
// Size of line inside a bank in bits
`define L3BANK_LINE_SIZE (`L3BANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define L3BANK_LINE_WORDS (`L3BANK_LINE_SIZE_BYTES / `L3WORD_SIZE_BYTES)
// VX_DEFINE
`endif

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_dmem_controller (
input wire clk,
@ -36,7 +36,7 @@ module VX_dmem_controller (
wire dcache_wants_wb = (|VX_dcache_rsp_dcache.core_wb_valid);
// Dcache Request
assign VX_dcache_req_dcache.core_req_valid = VX_dcache_req.core_req_valid & {`NT{~to_shm}};
assign VX_dcache_req_dcache.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~to_shm}};
assign VX_dcache_req_dcache.core_req_addr = VX_dcache_req.core_req_addr;
assign VX_dcache_req_dcache.core_req_writedata = VX_dcache_req.core_req_writedata;
assign VX_dcache_req_dcache.core_req_mem_read = VX_dcache_req.core_req_mem_read;
@ -49,7 +49,7 @@ module VX_dmem_controller (
// Shred Memory Request
assign VX_dcache_req_smem.core_req_valid = VX_dcache_req.core_req_valid & {`NT{to_shm}};
assign VX_dcache_req_smem.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{to_shm}};
assign VX_dcache_req_smem.core_req_addr = VX_dcache_req.core_req_addr;
assign VX_dcache_req_smem.core_req_writedata = VX_dcache_req.core_req_writedata;
assign VX_dcache_req_smem.core_req_mem_read = VX_dcache_req.core_req_mem_read;
@ -73,8 +73,8 @@ module VX_dmem_controller (
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_smem_dram_req();
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_smem_dram_res();
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_req();
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_res();

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_execute_unit (
input wire clk,
@ -18,8 +18,8 @@ module VX_execute_unit (
output wire out_delay
);
wire[`NT_M1:0][31:0] in_a_reg_data;
wire[`NT_M1:0][31:0] in_b_reg_data;
wire[`NUM_THREADS-1:0][31:0] in_a_reg_data;
wire[`NUM_THREADS-1:0][31:0] in_b_reg_data;
wire[4:0] in_alu_op;
wire in_rs2_src;
wire[31:0] in_itype_immed;
@ -41,11 +41,11 @@ module VX_execute_unit (
assign in_curr_PC = VX_exec_unit_req.curr_PC;
wire[`NT_M1:0][31:0] alu_result;
wire[`NT_M1:0] alu_stall;
wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[`NUM_THREADS-1:0] alu_stall;
genvar index_out_reg;
generate
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) begin : alu_defs
for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs
VX_alu vx_alu(
.clk(clk),
.reset(reset),
@ -69,9 +69,9 @@ module VX_execute_unit (
assign out_delay = no_slot_exec || internal_stall;
wire [$clog2(`NT)-1:0] jal_branch_use_index;
wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index;
wire jal_branch_found_valid;
VX_generic_priority_encoder #(.N(`NT)) choose_alu_result(
VX_generic_priority_encoder #(.N(`NUM_THREADS)) choose_alu_result(
.valids(VX_exec_unit_req.valid),
.index (jal_branch_use_index),
.found (jal_branch_found_valid)
@ -95,10 +95,10 @@ module VX_execute_unit (
end
wire[`NT_M1:0][31:0] duplicate_PC_data;
wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
genvar i;
generate
for (i = 0; i < `NT; i=i+1) begin : pc_data_setup
for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next;
end
endgenerate
@ -113,7 +113,7 @@ module VX_execute_unit (
// Actual Writeback
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid & {`NT{!internal_stall}};
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid & {`NUM_THREADS{!internal_stall}};
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
@ -141,7 +141,7 @@ module VX_execute_unit (
// .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc })
// );
VX_generic_register #(.N(33 + `NW_M1 + 1)) jal_reg(
VX_generic_register #(.N(33 + `NW_BITS-1 + 1)) jal_reg(
.clk (clk),
.reset(reset),
.stall(zero),
@ -150,7 +150,7 @@ module VX_execute_unit (
.out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num})
);
VX_generic_register #(.N(34 + `NW_M1 + 1)) branch_reg(
VX_generic_register #(.N(34 + `NW_BITS-1 + 1)) branch_reg(
.clk (clk),
.reset(reset),
.stall(zero),

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_fetch (
input wire clk,
@ -7,8 +7,8 @@ module VX_fetch (
VX_join_inter VX_join,
input wire schedule_delay,
input wire icache_stage_delay,
input wire[`NW_M1:0] icache_stage_wid,
input wire[`NT-1:0] icache_stage_valids,
input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids,
output wire out_ebreak,
VX_jal_response_inter VX_jal_rsp,
@ -17,8 +17,8 @@ module VX_fetch (
VX_warp_ctl_inter VX_warp_ctl
);
wire[`NT_M1:0] thread_mask;
wire[`NW_M1:0] warp_num;
wire[`NUM_THREADS-1:0] thread_mask;
wire[`NW_BITS-1:0] warp_num;
wire[31:0] warp_pc;
wire scheduled_warp;

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_front_end (
input wire clk,
@ -37,8 +37,8 @@ wire icache_stage_delay;
wire vortex_ebreak;
wire terminate_sim;
wire[`NW_M1:0] icache_stage_wid;
wire[`NT-1:0] icache_stage_valids;
wire[`NW_BITS-1:0] icache_stage_wid;
wire[`NUM_THREADS-1:0] icache_stage_valids;
reg old_ebreak; // This should be eventually removed
always @(posedge clk) begin

View file

@ -1,7 +1,7 @@
`ifndef VX_GENERIC_PRIORITY_ENCODER
`define VX_GENERIC_PRIORITY_ENCODER
`include "VX_define.v"
`include "VX_define.vh"
module VX_generic_priority_encoder
#(
@ -10,8 +10,8 @@ module VX_generic_priority_encoder
(
input wire[N-1:0] valids,
//output reg[$clog2(N)-1:0] index,
output reg[(`CLOG2(N))-1:0] index,
//output reg[`CLOG2(N):0] index, // eh
output reg[(`LOG2UP(N))-1:0] index,
//output reg[`LOG2UP(N):0] index, // eh
output reg found
);
@ -22,7 +22,7 @@ module VX_generic_priority_encoder
for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
//index = i[$clog2(N)-1:0];
index = i[(`CLOG2(N))-1:0];
index = i[(`LOG2UP(N))-1:0];
found = 1;
end
end

View file

@ -1,5 +1,3 @@
`include "VX_define_synth.v"
module VX_generic_queue_ll
#(
parameter DATAW = 4,

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_gpgpu_inst (
// Input
@ -9,14 +9,14 @@ module VX_gpgpu_inst (
);
wire[`NT_M1:0] curr_valids = VX_gpu_inst_req.valid;
wire[`NUM_THREADS-1:0] curr_valids = VX_gpu_inst_req.valid;
wire is_split = (VX_gpu_inst_req.is_split);
wire[`NT_M1:0] tmc_new_mask;
wire all_threads = `NT < VX_gpu_inst_req.a_reg_data[0];
wire[`NUM_THREADS-1:0] tmc_new_mask;
wire all_threads = `NUM_THREADS < VX_gpu_inst_req.a_reg_data[0];
genvar curr_t;
generate
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin : tmc_new_mask_init
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < VX_gpu_inst_req.a_reg_data[0];
end
endgenerate
@ -33,11 +33,11 @@ module VX_gpgpu_inst (
wire wspawn = VX_gpu_inst_req.is_wspawn;
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
wire all_active = `NW < VX_gpu_inst_req.a_reg_data[0];
wire[`NW-1:0] wspawn_new_active;
wire all_active = `NUM_WARPS < VX_gpu_inst_req.a_reg_data[0];
wire[`NUM_WARPS-1:0] wspawn_new_active;
genvar curr_w;
generate
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) begin : wspawn_new_active_init
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < VX_gpu_inst_req.a_reg_data[0];
end
endgenerate
@ -47,19 +47,19 @@ module VX_gpgpu_inst (
assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0];
wire[31:0] num_warps_m1 = VX_gpu_inst_req.rd2 - 1;
assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NW):0];
assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
assign VX_warp_ctl.wspawn = wspawn;
assign VX_warp_ctl.wspawn_pc = wspawn_pc;
assign VX_warp_ctl.wspawn_new_active = wspawn_new_active;
wire[`NT_M1:0] split_new_use_mask;
wire[`NT_M1:0] split_new_later_mask;
wire[`NUM_THREADS-1:0] split_new_use_mask;
wire[`NUM_THREADS-1:0] split_new_later_mask;
// VX_gpu_inst_req.pc
genvar curr_s_t;
generate
for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin : masks_init
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
@ -67,18 +67,18 @@ module VX_gpgpu_inst (
end
endgenerate
wire[$clog2(`NT):0] num_valids;
wire[$clog2(`NUM_THREADS):0] num_valids;
VX_countones #(.N(`NT)) valids_counter (
VX_countones #(.N(`NUM_THREADS)) valids_counter (
.valids(curr_valids),
.count (num_valids)
);
// wire[`NW_M1:0] num_valids = $countones(curr_valids);
// wire[`NW_BITS-1:0] num_valids = $countones(curr_valids);
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NT{1'b1}}));
assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;

View file

@ -1,5 +1,5 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_gpr (
input wire clk,
@ -8,8 +8,8 @@ module VX_gpr (
VX_gpr_read_inter VX_gpr_read,
VX_wb_inter VX_writeback_inter,
output reg[`NT_M1:0][31:0] out_a_reg_data,
output reg[`NT_M1:0][31:0] out_b_reg_data
output reg[`NUM_THREADS-1:0][31:0] out_a_reg_data,
output reg[`NUM_THREADS-1:0][31:0] out_b_reg_data
);
@ -41,10 +41,10 @@ module VX_gpr (
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
wire[`NT_M1:0][31:0] write_bit_mask;
wire[`NUM_THREADS-1:0][31:0] write_bit_mask;
genvar curr_t;
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
assign write_bit_mask[curr_t] = {32{~local_write}};
end
@ -59,14 +59,14 @@ module VX_gpr (
wire cena_1 = 0;
wire cena_2 = 0;
wire[`NT_M1:0][31:0] temp_a;
wire[`NT_M1:0][31:0] temp_b;
wire[`NUM_THREADS-1:0][31:0] temp_a;
wire[`NUM_THREADS-1:0][31:0] temp_b;
`ifndef SYN
genvar thread;
genvar curr_bit;
for (thread = 0; thread < `NT; thread = thread + 1)
for (thread = 0; thread < `NUM_THREADS; thread = thread + 1)
begin
for (curr_bit = 0; curr_bit < 32; curr_bit=curr_bit+1)
begin
@ -83,7 +83,7 @@ module VX_gpr (
`endif
wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
wire[`NUM_THREADS-1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
genvar curr_base_thread;
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_gpr_stage (
input wire clk,
@ -114,15 +114,15 @@ module VX_gpr_stage (
);
wire[`NT_M1:0][31:0] temp_store_data;
wire[`NT_M1:0][31:0] temp_base_address; // A reg data
wire[`NUM_THREADS-1:0][31:0] temp_store_data;
wire[`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data
wire[`NT_M1:0][31:0] real_store_data;
wire[`NT_M1:0][31:0] real_base_address; // A reg data
wire[`NUM_THREADS-1:0][31:0] real_store_data;
wire[`NUM_THREADS-1:0][31:0] real_base_address; // A reg data
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
VX_generic_register #(.N(`NT*32*2)) lsu_data(
VX_generic_register #(.N(`NUM_THREADS*32*2)) lsu_data(
.clk (clk),
.reset(reset),
.stall(!store_curr_real),
@ -139,7 +139,7 @@ module VX_gpr_stage (
assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
VX_generic_register #(.N(77 + `NW_M1 + 1 + (`NT))) lsu_reg(
VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))) lsu_reg(
.clk (clk),
.reset(reset),
.stall(stall_lsu),
@ -148,7 +148,7 @@ module VX_gpr_stage (
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
);
VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg(
VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS))) exec_unit_reg(
.clk (clk),
.reset(reset),
.stall(stall_exec),
@ -160,7 +160,7 @@ module VX_gpr_stage (
assign VX_exec_unit_req.a_reg_data = real_base_address;
assign VX_exec_unit_req.b_reg_data = real_store_data;
VX_generic_register #(.N(36 + `NW_M1 + 1 + (`NT))) gpu_inst_reg(
VX_generic_register #(.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))) gpu_inst_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
@ -172,7 +172,7 @@ module VX_gpr_stage (
assign VX_gpu_inst_req.a_reg_data = real_base_address;
assign VX_gpu_inst_req.rd2 = real_store_data;
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg(
.clk (clk),
.reset(reset),
.stall(stall_gpr_csr),
@ -187,7 +187,7 @@ module VX_gpr_stage (
`else
// 341
VX_generic_register #(.N(77 + `NW_M1 + 1 + 65*(`NT))) lsu_reg(
VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) lsu_reg(
.clk (clk),
.reset(reset),
.stall(stall_lsu),
@ -196,7 +196,7 @@ module VX_gpr_stage (
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
);
VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg(
VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) exec_unit_reg(
.clk (clk),
.reset(reset),
.stall(stall_exec),
@ -205,7 +205,7 @@ module VX_gpr_stage (
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
);
VX_generic_register #(.N(68 + `NW_M1 + 1 + 33*(`NT))) gpu_inst_reg(
VX_generic_register #(.N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS))) gpu_inst_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
@ -214,7 +214,7 @@ module VX_gpr_stage (
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 })
);
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg(
.clk (clk),
.reset(reset),
.stall(stall_gpr_csr),

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_gpr_wrapper (
input wire clk,
@ -7,23 +7,22 @@ module VX_gpr_wrapper (
VX_wb_inter VX_writeback_inter,
VX_gpr_jal_inter VX_gpr_jal,
output wire[`NT_M1:0][31:0] out_a_reg_data,
output wire[`NT_M1:0][31:0] out_b_reg_data
output wire[`NUM_THREADS-1:0][31:0] out_a_reg_data,
output wire[`NUM_THREADS-1:0][31:0] out_b_reg_data
);
wire[`NW-1:0][`NT_M1:0][31:0] temp_a_reg_data;
wire[`NW-1:0][`NT_M1:0][31:0] temp_b_reg_data;
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data;
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data;
wire[`NT_M1:0][31:0] jal_data;
wire[`NUM_THREADS-1:0][31:0] jal_data;
genvar index;
generate
for (index = 0; index <= `NT_M1; index = index + 1) begin : jal_data_assign
for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
assign jal_data[index] = VX_gpr_jal.curr_PC;
end
endgenerate
`ifndef ASIC
assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[VX_gpr_read.warp_num]));
assign out_b_reg_data = (temp_b_reg_data[VX_gpr_read.warp_num]);
@ -31,8 +30,8 @@ module VX_gpr_wrapper (
wire zer = 0;
wire[`NW_M1:0] old_warp_num;
VX_generic_register #(`NW_M1+1) store_wn(
wire[`NW_BITS-1:0] old_warp_num;
VX_generic_register #(`NW_BITS-1+1) store_wn(
.clk (clk),
.reset(reset),
.stall(zer),
@ -49,7 +48,7 @@ module VX_gpr_wrapper (
genvar warp_index;
generate
for (warp_index = 0; warp_index < `NW; warp_index = warp_index + 1) begin : warp_gprs
for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
wire valid_write_request = warp_index == VX_writeback_inter.wb_warp_num;
VX_gpr vx_gpr(

View file

@ -1,12 +1,12 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_icache_stage (
input wire clk,
input wire reset,
input wire total_freeze,
output wire icache_stage_delay,
output wire[`NW_M1:0] icache_stage_wid,
output wire[`NT-1:0] icache_stage_valids,
output wire[`NW_BITS-1:0] icache_stage_wid,
output wire[`NUM_THREADS-1:0] icache_stage_valids,
VX_inst_meta_inter fe_inst_meta_fi,
VX_inst_meta_inter fe_inst_meta_id,
@ -14,7 +14,7 @@ module VX_icache_stage (
VX_gpu_dcache_req_inter VX_icache_req
);
reg[`NT-1:0] threads_active[`NW-1:0];
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
wire valid_inst = (|fe_inst_meta_fi.valid);
@ -39,7 +39,7 @@ module VX_icache_stage (
/* verilator lint_off WIDTH */
assign icache_stage_wid = fe_inst_meta_id.warp_num;
assign icache_stage_valids = fe_inst_meta_id.valid & {`NT{!icache_stage_delay}};
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
// Cache can't accept request
assign icache_stage_delay = VX_icache_rsp.delay_req;
@ -50,7 +50,7 @@ module VX_icache_stage (
integer curr_w;
always @(posedge clk) begin
if (reset) begin
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) threads_active[curr_w] <= 0;
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) threads_active[curr_w] <= 0;
end else begin
if (valid_inst && !icache_stage_delay) begin
/* verilator lint_off WIDTH */

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_inst_multiplex (
// Inputs
@ -12,9 +12,9 @@ module VX_inst_multiplex (
VX_csr_req_inter VX_csr_req
);
wire[`NT_M1:0] is_mem_mask;
wire[`NT_M1:0] is_gpu_mask;
wire[`NT_M1:0] is_csr_mask;
wire[`NUM_THREADS-1:0] is_mem_mask;
wire[`NUM_THREADS-1:0] is_gpu_mask;
wire[`NUM_THREADS-1:0] is_csr_mask;
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
@ -23,7 +23,7 @@ module VX_inst_multiplex (
genvar currT;
generate
for (currT = 0; currT < `NT; currT = currT + 1) begin : mask_init
for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init
assign is_mem_mask[currT] = is_mem;
assign is_gpu_mask[currT] = is_gpu;
assign is_csr_mask[currT] = is_csr;

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_lsu (
input wire clk,
@ -15,7 +15,7 @@ module VX_lsu (
);
// Generate Addresses
wire[`NT_M1:0][31:0] address;
wire[`NUM_THREADS-1:0][31:0] address;
VX_lsu_addr_gen VX_lsu_addr_gen
(
.base_address(VX_lsu_req.base_address),
@ -23,19 +23,19 @@ module VX_lsu (
.address (address)
);
wire[`NT_M1:0][31:0] use_address;
wire[`NT_M1:0][31:0] use_store_data;
wire[`NT_M1:0] use_valid;
wire[`NUM_THREADS-1:0][31:0] use_address;
wire[`NUM_THREADS-1:0][31:0] use_store_data;
wire[`NUM_THREADS-1:0] use_valid;
wire[2:0] use_mem_read;
wire[2:0] use_mem_write;
wire[4:0] use_rd;
wire[`NW_M1:0] use_warp_num;
wire[`NW_BITS-1:0] use_warp_num;
wire[1:0] use_wb;
wire[31:0] use_pc;
wire zero = 0;
VX_generic_register #(.N(45 + `NW_M1 + 1 + `NT*65)) lsu_buffer(
VX_generic_register #(.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)) lsu_buffer(
.clk (clk),
.reset(reset),
.stall(out_delay),
@ -49,10 +49,10 @@ module VX_lsu (
assign VX_dcache_req.core_req_valid = use_valid;
assign VX_dcache_req.core_req_addr = use_address;
assign VX_dcache_req.core_req_writedata = use_store_data;
assign VX_dcache_req.core_req_mem_read = {`NT{use_mem_read}};
assign VX_dcache_req.core_req_mem_write = {`NT{use_mem_write}};
assign VX_dcache_req.core_req_mem_read = {`NUM_THREADS{use_mem_read}};
assign VX_dcache_req.core_req_mem_write = {`NUM_THREADS{use_mem_write}};
assign VX_dcache_req.core_req_rd = use_rd;
assign VX_dcache_req.core_req_wb = {`NT{use_wb}};
assign VX_dcache_req.core_req_wb = {`NUM_THREADS{use_wb}};
assign VX_dcache_req.core_req_warp_num = use_warp_num;
assign VX_dcache_req.core_req_pc = use_pc;
@ -70,9 +70,9 @@ module VX_lsu (
assign VX_mem_wb.wb_warp_num = VX_dcache_rsp.core_wb_warp_num;
assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata;
wire[(`CLOG2(`NT))-1:0] use_pc_index;
wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index;
wire found;
VX_generic_priority_encoder #(.N(`NT)) pick_first_pc(
VX_generic_priority_encoder #(.N(`NUM_THREADS)) pick_first_pc(
.valids(VX_dcache_rsp.core_wb_valid),
.index (use_pc_index),
.found (found)

View file

@ -1,17 +1,15 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_lsu_addr_gen (
input wire[`NT_M1:0][31:0] base_address,
input wire[`NUM_THREADS-1:0][31:0] base_address,
input wire[31:0] offset,
output wire[`NT_M1:0][31:0] address
output wire[`NUM_THREADS-1:0][31:0] address
);
genvar index;
genvar i;
generate
for (index = 0; index < `NT; index = index + 1) begin : addresses
assign address[index] = base_address[index] + offset;
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : addresses
assign address[i] = base_address[i] + offset;
end
endgenerate

View file

@ -1,8 +1,8 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_priority_encoder (
input wire[`NW-1:0] valids,
output reg[`NW_M1:0] index,
input wire[`NUM_WARPS-1:0] valids,
output reg[`NW_BITS-1:0] index,
output reg found
);
@ -10,9 +10,9 @@ module VX_priority_encoder (
always @(*) begin
index = 0;
found = 0;
for (i = `NW-1; i >= 0; i = i - 1) begin
for (i = `NUM_WARPS-1; i >= 0; i = i - 1) begin
if (valids[i]) begin
index = i[`NW_M1:0];
index = i[`NW_BITS-1:0];
found = 1;
end
end

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_priority_encoder_w_mask
#(
parameter N = 10
@ -7,8 +7,8 @@ module VX_priority_encoder_w_mask
input wire[N-1:0] valids,
output reg [N-1:0] mask,
//output reg[$clog2(N)-1:0] index,
output reg[(`CLOG2(N))-1:0] index,
//output reg[`CLOG2(N):0] index, // eh
output reg[(`LOG2UP(N))-1:0] index,
//output reg[`LOG2UP(N):0] index, // eh
output reg found
);
@ -20,7 +20,7 @@ module VX_priority_encoder_w_mask
for (i = 0; i < N; i=i+1) begin
if (valids[i]) begin
//index = i[$clog2(N)-1:0];
index = i[(`CLOG2(N))-1:0];
index = i[(`LOG2UP(N))-1:0];
found = 1;
// mask[index] = (1 << i);
// $display("%h",(1 << i));

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_scheduler (
input wire clk,
@ -10,8 +10,7 @@ module VX_scheduler (
VX_wb_inter VX_writeback_inter,
output wire schedule_delay,
output wire is_empty
output wire is_empty
);
/* verilator lint_off WIDTH */
@ -19,7 +18,7 @@ module VX_scheduler (
assign is_empty = count_valid == 0;
reg[31:0][`NT-1:0] rename_table[`NW-1:0];
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0);
wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0);
@ -32,13 +31,11 @@ module VX_scheduler (
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
// classify our next instruction.
wire is_mem = is_store || is_load;
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
wire is_csr = VX_bckE_req.is_csr;
wire is_mem = is_store || is_load;
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
wire is_csr = VX_bckE_req.is_csr;
wire is_exec = !is_mem && !is_gpu && !is_csr;
// wire rs1_pass = 0;
// wire rs2_pass = 0;
@ -48,7 +45,6 @@ module VX_scheduler (
wire rs2_rename_qual = ((rs2_rename) && (VX_bckE_req.rs2 != 0 && using_rs2));
wire rd_rename_qual = ((rd_rename ) && (VX_bckE_req.rd != 0));
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid))
@ -61,7 +57,7 @@ module VX_scheduler (
always @(posedge clk or posedge reset) begin
if (reset) begin
for (w = 0; w < `NW; w=w+1)
for (w = 0; w < `NUM_WARPS; w=w+1)
begin
for (i = 0; i < 32; i = i + 1)
begin
@ -74,7 +70,6 @@ module VX_scheduler (
if (valid_wb && ((rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] & (~VX_writeback_inter.wb_valid)) == 0)) count_valid = count_valid - 1;
if (!schedule_delay && wb_inc) count_valid = count_valid + 1;
end
end

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_warp (
@ -6,7 +6,7 @@ module VX_warp (
input wire reset,
input wire stall,
input wire remove,
input wire[`NT_M1:0] in_thread_mask,
input wire[`NUM_THREADS-1:0] in_thread_mask,
input wire in_change_mask,
input wire in_jal,
input wire[31:0] in_jal_dest,
@ -16,20 +16,20 @@ module VX_warp (
input wire[31:0] in_wspawn_pc,
output wire[31:0] out_PC,
output wire[`NT_M1:0] out_valid
output wire[`NUM_THREADS-1:0] out_valid
);
reg[31:0] real_PC;
logic [31:0] temp_PC;
logic [31:0] use_PC;
reg[`NT_M1:0] valid;
reg[`NUM_THREADS-1:0] valid;
reg[`NT_M1:0] valid_zero;
reg[`NUM_THREADS-1:0] valid_zero;
integer ini_cur_th = 0;
initial begin
real_PC = 0;
for (ini_cur_th = 1; ini_cur_th < `NT; ini_cur_th=ini_cur_th+1) begin
for (ini_cur_th = 1; ini_cur_th < `NUM_THREADS; ini_cur_th=ini_cur_th+1) begin
valid[ini_cur_th] = 0; // Thread 1 active
valid_zero[ini_cur_th] = 0;
end
@ -49,7 +49,7 @@ module VX_warp (
genvar out_cur_th;
generate
for (out_cur_th = 0; out_cur_th < `NT; out_cur_th = out_cur_th+1) begin : out_valid_assign
for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : out_valid_assign
assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th];
end
endgenerate

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_warp_scheduler (
input wire clk, // Clock
@ -7,57 +7,57 @@ module VX_warp_scheduler (
// Wspawn
input wire wspawn,
input wire[31:0] wsapwn_pc,
input wire[`NW-1:0] wspawn_new_active,
input wire[`NUM_WARPS-1:0] wspawn_new_active,
// CTM
input wire ctm,
input wire[`NT_M1:0] ctm_mask,
input wire[`NW_M1:0] ctm_warp_num,
input wire[`NUM_THREADS-1:0] ctm_mask,
input wire[`NW_BITS-1:0] ctm_warp_num,
// WHALT
input wire whalt,
input wire[`NW_M1:0] whalt_warp_num,
input wire[`NW_BITS-1:0] whalt_warp_num,
input wire is_barrier,
input wire[31:0] barrier_id,
input wire[$clog2(`NW):0] num_warps,
input wire[`NW_M1:0] barrier_warp_num,
input wire[$clog2(`NUM_WARPS):0] num_warps,
input wire[`NW_BITS-1:0] barrier_warp_num,
// WSTALL
input wire wstall,
input wire[`NW_M1:0] wstall_warp_num,
input wire[`NW_BITS-1:0] wstall_warp_num,
// Split
input wire is_split,
input wire dont_split,
input wire[`NT_M1:0] split_new_mask,
input wire[`NT_M1:0] split_later_mask,
input wire[`NUM_THREADS-1:0] split_new_mask,
input wire[`NUM_THREADS-1:0] split_later_mask,
input wire[31:0] split_save_pc,
input wire[`NW_M1:0] split_warp_num,
input wire[`NW_BITS-1:0] split_warp_num,
// Join
input wire is_join,
input wire[`NW_M1:0] join_warp_num,
input wire[`NW_BITS-1:0] join_warp_num,
// JAL
input wire jal,
input wire[31:0] jal_dest,
input wire[`NW_M1:0] jal_warp_num,
input wire[`NW_BITS-1:0] jal_warp_num,
// Branch
input wire branch_valid,
input wire branch_dir,
input wire[31:0] branch_dest,
input wire[`NW_M1:0] branch_warp_num,
input wire[`NW_BITS-1:0] branch_warp_num,
output wire[`NT_M1:0] thread_mask,
output wire[`NW_M1:0] warp_num,
output wire[`NUM_THREADS-1:0] thread_mask,
output wire[`NW_BITS-1:0] warp_num,
output wire[31:0] warp_pc,
output wire out_ebreak,
output wire scheduled_warp,
input wire[`NW_M1:0] icache_stage_wid,
input wire[`NT-1:0] icache_stage_valids
input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids
);
@ -66,41 +66,41 @@ module VX_warp_scheduler (
wire update_visible_active;
wire[(1+32+`NT_M1):0] d[`NW-1:0];
wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
wire join_fall;
wire[31:0] join_pc;
wire[`NT_M1:0] join_tm;
wire[`NUM_THREADS-1:0] join_tm;
wire in_wspawn = wspawn;
wire in_ctm = ctm;
wire in_whalt = whalt;
wire in_wstall = wstall;
reg[`NW-1:0] warp_active;
reg[`NW-1:0] warp_stalled;
reg[`NUM_WARPS-1:0] warp_active;
reg[`NUM_WARPS-1:0] warp_stalled;
reg [`NW-1:0] visible_active;
wire[`NW-1:0] use_active;
reg [`NUM_WARPS-1:0] visible_active;
wire[`NUM_WARPS-1:0] use_active;
reg [`NW-1:0] warp_lock;
reg [`NUM_WARPS-1:0] warp_lock;
wire wstall_this_cycle;
reg[`NT_M1:0] thread_masks[`NW-1:0];
reg[31:0] warp_pcs[`NW-1:0];
reg[`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
reg[31:0] warp_pcs[`NUM_WARPS-1:0];
// barriers
reg[`NW-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
reg[`NUM_WARPS-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
wire reached_barrier_limit;
wire[`NW-1:0] curr_barrier_mask;
wire[$clog2(`NW):0] curr_barrier_count;
wire[`NUM_WARPS-1:0] curr_barrier_mask;
wire[$clog2(`NUM_WARPS):0] curr_barrier_count;
// wsapwn
reg[31:0] use_wsapwn_pc;
reg[`NW-1:0] use_wsapwn;
reg[`NUM_WARPS-1:0] use_wsapwn;
wire[`NW_M1:0] warp_to_schedule;
wire[`NW_BITS-1:0] warp_to_schedule;
wire schedule;
wire hazard;
@ -110,12 +110,12 @@ module VX_warp_scheduler (
wire[31:0] new_pc;
reg[`NW-1:0] total_barrier_stall;
reg[`NUM_WARPS-1:0] total_barrier_stall;
reg didnt_split;
/* verilator lint_off UNUSED */
// wire[$clog2(`NW):0] num_active;
// wire[$clog2(`NUM_WARPS):0] num_active;
/* verilator lint_on UNUSED */
integer curr_w_help;
@ -135,7 +135,7 @@ module VX_warp_scheduler (
didnt_split <= 0;
warp_lock <= 0;
// total_barrier_stall = 0;
for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin
for (curr_w_help = 1; curr_w_help < `NUM_WARPS; curr_w_help=curr_w_help+1) begin
warp_pcs[curr_w_help] <= 0;
warp_active[curr_w_help] <= 0; // Activating first warp
visible_active[curr_w_help] <= 0; // Activating first warp
@ -147,7 +147,7 @@ module VX_warp_scheduler (
if (wspawn) begin
warp_active <= wspawn_new_active;
use_wsapwn_pc <= wsapwn_pc;
use_wsapwn <= wspawn_new_active & (~`NW'b1);
use_wsapwn <= wspawn_new_active & (~`NUM_WARPS'b1);
end
if (is_barrier) begin
@ -219,30 +219,30 @@ module VX_warp_scheduler (
// Lock/Release
if (scheduled_warp && !stall) begin
warp_lock[warp_num] <= 1'b1;
// warp_lock <= {`NW{1'b1}};
// warp_lock <= {`NUM_WARPS{1'b1}};
end
if (|icache_stage_valids && !stall) begin
warp_lock[icache_stage_wid] <= 1'b0;
// warp_lock <= {`NW{1'b0}};
// warp_lock <= {`NUM_WARPS{1'b0}};
end
end
end
VX_countones #(.N(`NW)) barrier_count(
VX_countones #(.N(`NUM_WARPS)) barrier_count(
.valids(curr_barrier_mask),
.count (curr_barrier_count)
);
wire[$clog2(`NW):0] count_visible_active;
VX_countones #(.N(`NW)) num_visible(
wire[$clog2(`NUM_WARPS):0] count_visible_active;
VX_countones #(.N(`NUM_WARPS)) num_visible(
.valids(visible_active),
.count (count_visible_active)
);
// assign curr_barrier_count = $countones(curr_barrier_mask);
assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NW-1:0];
assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NUM_WARPS-1:0];
assign reached_barrier_limit = curr_barrier_count == (num_warps);
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
@ -253,15 +253,15 @@ module VX_warp_scheduler (
// total_barrier_stall = 0;
// for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
// begin
// total_barrier_stall[`NW-1:0] = total_barrier_stall[`NW-1:0] | barrier_stall_mask[curr_b];
// total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b];
// end
// end
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask};
wire[(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
wire[(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc , split_later_mask};
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
@ -270,13 +270,13 @@ module VX_warp_scheduler (
genvar curr_warp;
generate
for (curr_warp = 0; curr_warp < `NW; curr_warp = curr_warp + 1) begin : stacks
for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks
wire correct_warp_s = (curr_warp == split_warp_num);
wire correct_warp_j = (curr_warp == join_warp_num);
wire push = (is_split && !dont_split) && correct_warp_s;
wire pop = is_join && correct_warp_j;
VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT)+1)) ipdom_stack(
VX_generic_stack #(.WIDTH(1+32+`NUM_THREADS), .DEPTH($clog2(`NUM_THREADS)+1)) ipdom_stack(
.clk (clk),
.reset(reset),
.push (push),
@ -304,7 +304,7 @@ module VX_warp_scheduler (
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NT'b1 : thread_masks[warp_to_schedule]);
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]);
assign warp_num = warp_to_schedule;
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_writeback (
input wire clk,
@ -64,9 +64,9 @@ module VX_writeback (
wire zero = 0;
wire[`NT-1:0][31:0] use_wb_data;
wire[`NUM_THREADS-1:0][31:0] use_wb_data;
VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register(
VX_generic_register #(.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)) wb_register(
.clk (clk),
.reset(reset),
.stall(zero),

View file

@ -1,5 +1,5 @@
`include "VX_define.v"
`include "VX_cache_config.v"
`include "VX_define.vh"
`include "VX_cache_config.vh"
module Vortex
#(
@ -13,24 +13,24 @@ module Vortex
// IO
output wire io_valid,
output wire[31:0] io_data,
output wire [31:0] io_data,
// DRAM Dcache Req
output wire dram_req,
output wire dram_req_write,
output wire dram_req_read,
output wire [31:0] dram_req_addr,
output wire [31:0] dram_req_size,
output wire [31:0] dram_req_data[`DBANK_LINE_SIZE_RNG],
output wire [31:0] dram_expected_lat,
output wire dram_req,
output wire dram_req_write,
output wire dram_req_read,
output wire [31:0] dram_req_addr,
output wire [31:0] dram_req_size,
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
output wire [31:0] dram_expected_lat,
input wire dram_req_delay,
input wire dram_req_delay,
// DRAM Dcache Res
output wire dram_fill_accept,
input wire dram_fill_rsp,
input wire [31:0] dram_fill_rsp_addr,
input wire [31:0] dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG],
output wire dram_fill_accept,
input wire dram_fill_rsp,
input wire [31:0] dram_fill_rsp_addr,
input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data,
// DRAM Icache Req
output wire I_dram_req,
@ -38,25 +38,25 @@ module Vortex
output wire I_dram_req_read,
output wire [31:0] I_dram_req_addr,
output wire [31:0] I_dram_req_size,
output wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_req_data,
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
output wire [31:0] I_dram_expected_lat,
// DRAM Icache Res
output wire I_dram_fill_accept,
input wire I_dram_fill_rsp,
input wire [31:0] I_dram_fill_rsp_addr,
input wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_fill_rsp_data,
input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data,
// LLC Snooping
input wire snp_req,
input wire [31:0] snp_req_addr,
output wire snp_req_delay,
input wire snp_req,
input wire [31:0] snp_req_addr,
output wire snp_req_delay,
input wire I_snp_req,
input wire [31:0] I_snp_req_addr,
output wire I_snp_req_delay,
output wire out_ebreak
output wire out_ebreak
`else
@ -72,14 +72,14 @@ module Vortex
output wire dram_req_read,
output wire [31:0] dram_req_addr,
output wire [31:0] dram_req_size,
output wire [`DBANK_LINE_SIZE_RNG][31:0] dram_req_data,
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
output wire [31:0] dram_expected_lat,
// DRAM Dcache Res
output wire dram_fill_accept,
input wire dram_fill_rsp,
input wire [31:0] dram_fill_rsp_addr,
input wire [`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data,
input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data,
// DRAM Icache Req
@ -88,16 +88,16 @@ module Vortex
output wire I_dram_req_read,
output wire [31:0] I_dram_req_addr,
output wire [31:0] I_dram_req_size,
output wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_req_data,
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
output wire [31:0] I_dram_expected_lat,
// DRAM Icache Res
output wire I_dram_fill_accept,
input wire I_dram_fill_rsp,
input wire [31:0] I_dram_fill_rsp_addr,
input wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_fill_rsp_data,
input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data,
input wire dram_req_delay,
input wire dram_req_delay,
input wire snp_req,
input wire [31:0] snp_req_addr,
@ -110,27 +110,24 @@ module Vortex
output wire out_ebreak
`endif
);
wire scheduler_empty;
wire out_ebreak_unqual;
// assign out_ebreak = out_ebreak_unqual && (scheduler_empty && 1);
assign out_ebreak = out_ebreak_unqual;
wire memory_delay;
wire exec_delay;
wire gpr_stage_delay;
wire schedule_delay;
// Dcache Interface
VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_rsp();
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req();
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req_qual();
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_req();
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_res();
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_req();
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_res();
assign VX_gpu_dcache_dram_res.dram_fill_rsp = dram_fill_rsp;
@ -146,36 +143,40 @@ module Vortex
assign VX_gpu_dcache_dram_req.dram_req_delay = dram_req_delay;
genvar wordy;
genvar i;
generate
for (wordy = 0; wordy < `DBANK_LINE_SIZE_WORDS; wordy=wordy+1) begin
assign VX_gpu_dcache_dram_res.dram_fill_rsp_data[wordy] = dram_fill_rsp_data[wordy];
assign dram_req_data[wordy] = VX_gpu_dcache_dram_req.dram_req_data[wordy];
for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
assign VX_gpu_dcache_dram_res.dram_fill_rsp_data[i] = dram_fill_rsp_data[i * 32 +: 32];
assign dram_req_data[i * 32 +: 32] = VX_gpu_dcache_dram_req.dram_req_data[i];
end
endgenerate
wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.core_req_valid) && (VX_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE) && (VX_dcache_req.core_req_addr[0] == 32'h00010000);
wire temp_io_valid = (!memory_delay)
&& (|VX_dcache_req.core_req_valid)
&& (VX_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE)
&& (VX_dcache_req.core_req_addr[0] == 32'h00010000);
wire[31:0] temp_io_data = VX_dcache_req.core_req_writedata[0];
assign io_valid = temp_io_valid;
assign io_data = temp_io_data;
assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NT{~io_valid}};
assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr;
assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata;
assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read;
assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write;
assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd;
assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb;
assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num;
assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc;
assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot;
assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}};
assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr;
assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata;
assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read;
assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write;
assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd;
assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb;
assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num;
assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc;
assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot;
VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_rsp();
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_req();
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_req();
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_res();
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_req();
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_res();
assign VX_gpu_icache_dram_res.dram_fill_rsp = I_dram_fill_rsp;
@ -191,11 +192,11 @@ module Vortex
assign VX_gpu_icache_dram_req.dram_req_delay = dram_req_delay;
genvar iwordy;
genvar j;
generate
for (iwordy = 0; iwordy < `IBANK_LINE_SIZE_WORDS; iwordy=iwordy+1) begin
assign VX_gpu_icache_dram_res.dram_fill_rsp_data[iwordy] = I_dram_fill_rsp_data[iwordy];
assign I_dram_req_data[iwordy] = VX_gpu_icache_dram_req.dram_req_data[iwordy];
for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin
assign VX_gpu_icache_dram_res.dram_fill_rsp_data[j] = I_dram_fill_rsp_data[j * 32 +: 32];
assign I_dram_req_data[j * 32 +: 32] = VX_gpu_icache_dram_req.dram_req_data[j];
end
endgenerate
@ -239,7 +240,7 @@ VX_front_end vx_front_end(
.VX_jal_rsp (VX_jal_rsp),
.VX_branch_rsp (VX_branch_rsp),
.fetch_ebreak (out_ebreak_unqual)
);
);
VX_scheduler schedule(
.clk (clk),
@ -251,7 +252,7 @@ VX_scheduler schedule(
.VX_writeback_inter(VX_writeback_inter),
.schedule_delay (schedule_delay),
.is_empty (scheduler_empty)
);
);
VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
.clk (clk),
@ -267,7 +268,7 @@ VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
.out_mem_delay (memory_delay),
.out_exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
);
VX_dmem_controller VX_dmem_controller(
@ -291,7 +292,7 @@ VX_dmem_controller VX_dmem_controller(
// Core <-> Dcache
.VX_dcache_req (VX_dcache_req_qual),
.VX_dcache_rsp (VX_dcache_rsp)
);
);
// VX_csr_handler vx_csr_handler(
// .clk (clk),
@ -300,7 +301,7 @@ VX_dmem_controller VX_dmem_controller(
// .in_wb_valid (VX_writeback_inter.wb_valid[0]),
// .out_decode_csr_data (csr_decode_csr_data)
// );
// );
endmodule // Vortex

View file

@ -1,20 +1,18 @@
`include "VX_define.v"
`include "VX_cache_config.v"
`include "VX_define.vh"
`include "VX_cache_config.vh"
module Vortex_Cluster
#(
parameter CLUSTER_ID = 0
)
(
#(
parameter CLUSTER_ID = 0
) (
// Clock
input wire clk,
input wire reset,
// IO
output wire[`NUMBER_CORES_PER_CLUSTER-1:0] io_valid,
output wire[`NUMBER_CORES_PER_CLUSTER-1:0][31:0] io_data,
output wire[`NUM_CORES_PER_CLUSTER-1:0] io_valid,
output wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] io_data,
// DRAM Req
output wire out_dram_req,
@ -22,7 +20,7 @@ module Vortex_Cluster
output wire out_dram_req_read,
output wire [31:0] out_dram_req_addr,
output wire [31:0] out_dram_req_size,
output wire [31:0] out_dram_req_data[`DBANK_LINE_SIZE_RNG],
output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0],
output wire [31:0] out_dram_expected_lat,
input wire out_dram_req_delay,
@ -30,8 +28,7 @@ module Vortex_Cluster
output wire out_dram_fill_accept,
input wire out_dram_fill_rsp,
input wire [31:0] out_dram_fill_rsp_addr,
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG],
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0],
// LLC Snooping
input wire llc_snp_req,
@ -40,142 +37,133 @@ module Vortex_Cluster
output wire out_ebreak
);
// DRAM Dcache Req
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req_write;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req_read;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_addr;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_size;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_core_dram_req_data;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_expected_lat;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_write;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_read;
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_addr;
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_size;
wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_req_data;
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_expected_lat;
// DRAM Dcache Res
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_fill_accept;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_fill_rsp;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_fill_rsp_addr;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_core_dram_fill_rsp_data;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_accept;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_rsp;
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_fill_rsp_addr;
wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_fill_rsp_data;
// DRAM Icache Req
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_read;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_addr;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_size;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`IBANK_LINE_SIZE_RNG][31:0] per_core_I_dram_req_data;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_expected_lat;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_read;
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_addr;
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_size;
wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_req_data;
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_expected_lat;
// DRAM Icache Res
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_accept;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_rsp;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_fill_rsp_addr;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`IBANK_LINE_SIZE_RNG][31:0] per_core_I_dram_fill_rsp_data;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_accept;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_rsp;
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_fill_rsp_addr;
wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_fill_rsp_data;
// Out ebreak
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_out_ebreak;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_io_valid;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][31:0] per_core_io_data;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_out_ebreak;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_io_valid;
wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] per_core_io_data;
wire l2c_core_accept;
wire snp_fwd;
wire[31:0] snp_fwd_addr;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] snp_fwd_delay;
wire snp_fwd;
wire[31:0] snp_fwd_addr;
wire[`NUM_CORES_PER_CLUSTER-1:0] snp_fwd_delay;
assign out_ebreak = (&per_core_out_ebreak);
genvar curr_core;
generate
for (curr_core = 0; curr_core < `NUMBER_CORES_PER_CLUSTER; curr_core=curr_core+1) begin
for (curr_core = 0; curr_core < `NUM_CORES_PER_CLUSTER; curr_core=curr_core+1) begin
wire [`IBANK_LINE_SIZE_RNG][31:0] curr_core_I_dram_req_data;
wire [`DBANK_LINE_SIZE_RNG][31:0] curr_core_dram_req_data ;
wire [`IBANK_LINE_WORDS-1:0][31:0] curr_core_I_dram_req_data;
wire [`DBANK_LINE_WORDS-1:0][31:0] curr_core_dram_req_data ;
assign io_valid[curr_core] = per_core_io_valid[curr_core];
assign io_data [curr_core] = per_core_io_data [curr_core];
Vortex #(.CORE_ID(curr_core + (CLUSTER_ID * `NUMBER_CORES_PER_CLUSTER))) vortex_core(
.clk (clk),
.reset (reset),
.io_valid (per_core_io_valid [curr_core]),
.io_data (per_core_io_data [curr_core]),
.dram_req (per_core_dram_req [curr_core]),
.dram_req_write (per_core_dram_req_write [curr_core]),
.dram_req_read (per_core_dram_req_read [curr_core]),
.dram_req_addr (per_core_dram_req_addr [curr_core]),
.dram_req_size (per_core_dram_req_size [curr_core]),
.dram_req_data (curr_core_dram_req_data ),
.dram_expected_lat (per_core_dram_expected_lat [curr_core]),
.dram_fill_accept (per_core_dram_fill_accept [curr_core]),
.dram_fill_rsp (per_core_dram_fill_rsp [curr_core]),
.dram_fill_rsp_addr (per_core_dram_fill_rsp_addr [curr_core]),
.dram_fill_rsp_data (per_core_dram_fill_rsp_data [curr_core]),
.I_dram_req (per_core_I_dram_req [curr_core]),
.I_dram_req_write (per_core_I_dram_req_write [curr_core]),
.I_dram_req_read (per_core_I_dram_req_read [curr_core]),
.I_dram_req_addr (per_core_I_dram_req_addr [curr_core]),
.I_dram_req_size (per_core_I_dram_req_size [curr_core]),
.I_dram_req_data (curr_core_I_dram_req_data ),
.I_dram_expected_lat (per_core_I_dram_expected_lat [curr_core]),
.I_dram_fill_accept (per_core_I_dram_fill_accept [curr_core]),
.I_dram_fill_rsp (per_core_I_dram_fill_rsp [curr_core]),
.I_dram_fill_rsp_addr (per_core_I_dram_fill_rsp_addr[curr_core]),
.I_dram_fill_rsp_data (per_core_I_dram_fill_rsp_data[curr_core]),
.dram_req_delay (l2c_core_accept ),
.out_ebreak (per_core_out_ebreak [curr_core]),
.snp_req (snp_fwd),
.snp_req_addr (snp_fwd_addr),
.snp_req_delay (snp_fwd_delay[curr_core]),
.I_snp_req (0),
.I_snp_req_addr (),
.I_snp_req_delay ()
);
Vortex #(
.CORE_ID(curr_core + (CLUSTER_ID * `NUM_CORES_PER_CLUSTER))
) vortex_core(
.clk (clk),
.reset (reset),
.io_valid (per_core_io_valid [curr_core]),
.io_data (per_core_io_data [curr_core]),
.dram_req (per_core_dram_req [curr_core]),
.dram_req_write (per_core_dram_req_write [curr_core]),
.dram_req_read (per_core_dram_req_read [curr_core]),
.dram_req_addr (per_core_dram_req_addr [curr_core]),
.dram_req_size (per_core_dram_req_size [curr_core]),
.dram_req_data (curr_core_dram_req_data ),
.dram_expected_lat (per_core_dram_expected_lat [curr_core]),
.dram_fill_accept (per_core_dram_fill_accept [curr_core]),
.dram_fill_rsp (per_core_dram_fill_rsp [curr_core]),
.dram_fill_rsp_addr (per_core_dram_fill_rsp_addr [curr_core]),
.dram_fill_rsp_data (per_core_dram_fill_rsp_data [curr_core]),
.I_dram_req (per_core_I_dram_req [curr_core]),
.I_dram_req_write (per_core_I_dram_req_write [curr_core]),
.I_dram_req_read (per_core_I_dram_req_read [curr_core]),
.I_dram_req_addr (per_core_I_dram_req_addr [curr_core]),
.I_dram_req_size (per_core_I_dram_req_size [curr_core]),
.I_dram_req_data (curr_core_I_dram_req_data ),
.I_dram_expected_lat (per_core_I_dram_expected_lat [curr_core]),
.I_dram_fill_accept (per_core_I_dram_fill_accept [curr_core]),
.I_dram_fill_rsp (per_core_I_dram_fill_rsp [curr_core]),
.I_dram_fill_rsp_addr (per_core_I_dram_fill_rsp_addr[curr_core]),
.I_dram_fill_rsp_data (per_core_I_dram_fill_rsp_data[curr_core]),
.dram_req_delay (l2c_core_accept ),
.out_ebreak (per_core_out_ebreak [curr_core]),
.snp_req (snp_fwd),
.snp_req_addr (snp_fwd_addr),
.snp_req_delay (snp_fwd_delay[curr_core]),
.I_snp_req (0),
.I_snp_req_addr (),
.I_snp_req_delay ()
);
assign per_core_dram_req_data [curr_core] = curr_core_dram_req_data;
assign per_core_I_dram_req_data[curr_core] = curr_core_I_dram_req_data;
end
endgenerate
//////////////////// L2 Cache ////////////////////
wire[`LLNUMBER_REQUESTS-1:0] l2c_core_req;
wire[`LLNUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_write;
wire[`LLNUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_read;
wire[`LLNUMBER_REQUESTS-1:0][31:0] l2c_core_req_addr;
wire[`LLNUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l2c_core_req_data;
wire[`LLNUMBER_REQUESTS-1:0][1:0] l2c_core_req_wb;
wire[`L2NUMBER_REQUESTS-1:0] l2c_core_req;
wire[`L2NUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_write;
wire[`L2NUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_read;
wire[`L2NUMBER_REQUESTS-1:0][31:0] l2c_core_req_addr;
wire[`L2NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_core_req_data;
wire[`L2NUMBER_REQUESTS-1:0][1:0] l2c_core_req_wb;
wire[`LLNUMBER_REQUESTS-1:0] l2c_core_no_wb_slot;
wire[`L2NUMBER_REQUESTS-1:0] l2c_core_no_wb_slot;
wire[`L2NUMBER_REQUESTS-1:0] l2c_wb;
wire[`L2NUMBER_REQUESTS-1:0] [31:0] l2c_wb_addr;
wire[`L2NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_wb_data;
wire[`LLNUMBER_REQUESTS-1:0] l2c_wb;
wire[`LLNUMBER_REQUESTS-1:0] [31:0] l2c_wb_addr;
wire[`LLNUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l2c_wb_data;
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_req_data_port;
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data_port;
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port;
genvar llb_index;
generate
for (llb_index = 0; llb_index < `DBANK_LINE_SIZE_WORDS; llb_index=llb_index+1) begin
for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin
assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index];
assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index];
end
endgenerate
//
genvar l2c_curr_core;
generate
for (l2c_curr_core = 0; l2c_curr_core < `LLNUMBER_REQUESTS; l2c_curr_core=l2c_curr_core+2) begin
for (l2c_curr_core = 0; l2c_curr_core < `L2NUMBER_REQUESTS; l2c_curr_core=l2c_curr_core+2) begin
// Core Request
assign l2c_core_req [l2c_curr_core] = per_core_dram_req [(l2c_curr_core/2)];
assign l2c_core_req [l2c_curr_core+1] = per_core_I_dram_req[(l2c_curr_core/2)];
@ -214,29 +202,27 @@ module Vortex_Cluster
wire dram_snp_full;
wire dram_req_because_of_wb;
VX_cache #(
.CACHE_SIZE_BYTES (`LLCACHE_SIZE_BYTES),
.BANK_LINE_SIZE_BYTES (`LLBANK_LINE_SIZE_BYTES),
.NUMBER_BANKS (`LLNUMBER_BANKS),
.WORD_SIZE_BYTES (`LLWORD_SIZE_BYTES),
.NUMBER_REQUESTS (`LLNUMBER_REQUESTS),
.STAGE_1_CYCLES (`LLSTAGE_1_CYCLES),
.FUNC_ID (`LLFUNC_ID),
.REQQ_SIZE (`LLREQQ_SIZE),
.MRVQ_SIZE (`LLMRVQ_SIZE),
.DFPQ_SIZE (`LLDFPQ_SIZE),
.SNRQ_SIZE (`LLSNRQ_SIZE),
.CWBQ_SIZE (`LLCWBQ_SIZE),
.DWBQ_SIZE (`LLDWBQ_SIZE),
.DFQQ_SIZE (`LLDFQQ_SIZE),
.LLVQ_SIZE (`LLLLVQ_SIZE),
.FFSQ_SIZE (`LLFFSQ_SIZE),
.PRFQ_SIZE (`LLPRFQ_SIZE),
.PRFQ_STRIDE (`LLPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`LLFILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(`LLSIMULATED_DRAM_LATENCY_CYCLES)
)
gpu_l2cache
(
.CACHE_SIZE_BYTES (`L2CACHE_SIZE_BYTES),
.BANK_LINE_SIZE_BYTES (`L2BANK_LINE_SIZE_BYTES),
.NUMBER_BANKS (`L2NUMBER_BANKS),
.WORD_SIZE_BYTES (`L2WORD_SIZE_BYTES),
.NUMBER_REQUESTS (`L2NUMBER_REQUESTS),
.STAGE_1_CYCLES (`L2STAGE_1_CYCLES),
.FUNC_ID (`L2FUNC_ID),
.REQQ_SIZE (`L2REQQ_SIZE),
.MRVQ_SIZE (`L2MRVQ_SIZE),
.DFPQ_SIZE (`L2DFPQ_SIZE),
.SNRQ_SIZE (`L2SNRQ_SIZE),
.CWBQ_SIZE (`L2CWBQ_SIZE),
.DWBQ_SIZE (`L2DWBQ_SIZE),
.DFQQ_SIZE (`L2DFQQ_SIZE),
.LLVQ_SIZE (`L2LLVQ_SIZE),
.FFSQ_SIZE (`L2FFSQ_SIZE),
.PRFQ_SIZE (`L2PRFQ_SIZE),
.PRFQ_STRIDE (`L2PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(`L2SIMULATED_DRAM_LATENCY_CYCLES)
) gpu_l2cache (
.clk (clk),
.reset (reset),
@ -295,8 +281,6 @@ module Vortex_Cluster
.snp_fwd (snp_fwd),
.snp_fwd_addr (snp_fwd_addr),
.snp_fwd_delay (|snp_fwd_delay)
);
);
endmodule

View file

@ -1,5 +1,5 @@
`include "VX_define.v"
`include "VX_cache_config.v"
`include "VX_define.vh"
`include "VX_cache_config.vh"
module Vortex_Socket (
@ -8,8 +8,8 @@ module Vortex_Socket (
input wire reset,
// IO
output wire io_valid[`NUMBER_CORES-1:0],
output wire[31:0] io_data [`NUMBER_CORES-1:0],
output wire io_valid[`NUM_CORES-1:0],
output wire[31:0] io_data [`NUM_CORES-1:0],
output wire[31:0] number_cores,
@ -19,7 +19,7 @@ module Vortex_Socket (
output wire out_dram_req_read,
output wire [31:0] out_dram_req_addr,
output wire [31:0] out_dram_req_size,
output wire [31:0] out_dram_req_data[`DBANK_LINE_SIZE_RNG],
output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0],
output wire [31:0] out_dram_expected_lat,
input wire out_dram_req_delay,
@ -27,7 +27,7 @@ module Vortex_Socket (
output wire out_dram_fill_accept,
input wire out_dram_fill_rsp,
input wire [31:0] out_dram_fill_rsp_addr,
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG],
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0],
// LLC Snooping
input wire llc_snp_req,
@ -36,18 +36,16 @@ module Vortex_Socket (
output wire out_ebreak
);
assign number_cores = `NUM_CORES;
assign number_cores = `NUMBER_CORES;
if (`NUM_CLUSTERS == 1) begin
if (`NUMBER_CLUSTERS == 1) begin
wire[`NUMBER_CORES-1:0] cluster_io_valid;
wire[`NUMBER_CORES-1:0][31:0] cluster_io_data;
wire[`NUM_CORES-1:0] cluster_io_valid;
wire[`NUM_CORES-1:0][31:0] cluster_io_data;
genvar curr_c;
for (curr_c = 0; curr_c < `NUMBER_CORES; curr_c=curr_c+1) begin
for (curr_c = 0; curr_c < `NUM_CORES; curr_c=curr_c+1) begin
assign io_valid[curr_c] = cluster_io_valid[curr_c];
assign io_data [curr_c] = cluster_io_data [curr_c];
end
@ -76,62 +74,57 @@ module Vortex_Socket (
.llc_snp_req_addr (llc_snp_req_addr),
.llc_snp_req_delay (llc_snp_req_delay),
.out_ebreak (out_ebreak)
);
);
end else begin
wire snp_fwd;
wire[31:0] snp_fwd_addr;
wire[`NUMBER_CLUSTERS-1:0] snp_fwd_delay;
wire[`NUM_CLUSTERS-1:0] snp_fwd_delay;
wire[`NUMBER_CLUSTERS-1:0] per_cluster_out_ebreak;
wire[`NUM_CLUSTERS-1:0] per_cluster_out_ebreak;
assign out_ebreak = (&per_cluster_out_ebreak);
// // DRAM Dcache Req
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req;
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req_write;
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req_read;
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_req_addr;
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_req_size;
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_expected_lat;
wire[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_cluster_dram_req_data;
wire[31:0] per_cluster_dram_req_data_up[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG];
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read;
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_addr;
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_size;
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_expected_lat;
wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_req_data;
wire[31:0] per_cluster_dram_req_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
wire l3c_core_accept;
// // DRAM Dcache Res
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_fill_accept;
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_fill_rsp;
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_fill_rsp_addr;
wire[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_cluster_dram_fill_rsp_data;
wire[31:0] per_cluster_dram_fill_rsp_data_up[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG];
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_accept;
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_rsp;
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_fill_rsp_addr;
wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_fill_rsp_data;
wire[31:0] per_cluster_dram_fill_rsp_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
wire[`NUMBER_CLUSTERS-1:0][`NUMBER_CORES_PER_CLUSTER-1:0] per_cluster_io_valid;
wire[`NUMBER_CLUSTERS-1:0][`NUMBER_CORES_PER_CLUSTER-1:0][31:0] per_cluster_io_data;
wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0] per_cluster_io_valid;
wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0][31:0] per_cluster_io_data;
genvar curr_c;
genvar curr_cc;
genvar curr_word;
for (curr_c = 0; curr_c < `NUMBER_CLUSTERS; curr_c =curr_c+1) begin
for (curr_cc = 0; curr_cc < `NUMBER_CORES_PER_CLUSTER; curr_cc=curr_cc+1) begin
assign io_valid[curr_cc+(curr_c*`NUMBER_CORES_PER_CLUSTER)] = per_cluster_io_valid[curr_c][curr_cc];
assign io_data [curr_cc+(curr_c*`NUMBER_CORES_PER_CLUSTER)] = per_cluster_io_data [curr_c][curr_cc];
genvar curr_c, curr_cc, curr_word;
for (curr_c = 0; curr_c < `NUM_CLUSTERS; curr_c =curr_c+1) begin
for (curr_cc = 0; curr_cc < `NUM_CORES_PER_CLUSTER; curr_cc=curr_cc+1) begin
assign io_valid[curr_cc+(curr_c*`NUM_CORES_PER_CLUSTER)] = per_cluster_io_valid[curr_c][curr_cc];
assign io_data [curr_cc+(curr_c*`NUM_CORES_PER_CLUSTER)] = per_cluster_io_data [curr_c][curr_cc];
end
for (curr_word = 0; curr_word < `DBANK_LINE_SIZE_WORDS; curr_word = curr_word+1) begin
for (curr_word = 0; curr_word < `DBANK_LINE_WORDS; curr_word = curr_word+1) begin
assign per_cluster_dram_req_data [curr_c][curr_word] = per_cluster_dram_req_data_up [curr_c][curr_word];
assign per_cluster_dram_fill_rsp_data_up[curr_c][curr_word] = per_cluster_dram_fill_rsp_data[curr_c][curr_word];
end
end
genvar curr_cluster;
for (curr_cluster = 0; curr_cluster < `NUMBER_CLUSTERS; curr_cluster=curr_cluster+1) begin
for (curr_cluster = 0; curr_cluster < `NUM_CLUSTERS; curr_cluster=curr_cluster+1) begin
Vortex_Cluster #(.CLUSTER_ID(curr_cluster)) Vortex_Cluster(
.clk (clk),
@ -158,37 +151,33 @@ module Vortex_Socket (
.llc_snp_req_delay (snp_fwd_delay[curr_cluster]),
.out_ebreak (per_cluster_out_ebreak [curr_cluster])
);
);
end
//////////////////// L3 Cache ////////////////////
wire[`L3NUMBER_REQUESTS-1:0] l3c_core_req;
wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_write;
wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_read;
wire[`L3NUMBER_REQUESTS-1:0][31:0] l3c_core_req_addr;
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l3c_core_req_data;
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_core_req_data;
wire[`L3NUMBER_REQUESTS-1:0][1:0] l3c_core_req_wb;
wire[`L3NUMBER_REQUESTS-1:0] l3c_core_no_wb_slot;
wire[`L3NUMBER_REQUESTS-1:0] l3c_wb;
wire[`L3NUMBER_REQUESTS-1:0] [31:0] l3c_wb_addr;
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l3c_wb_data;
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_wb_data;
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_req_data_port;
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data_port;
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port;
genvar llb_index;
for (llb_index = 0; llb_index < `DBANK_LINE_SIZE_WORDS; llb_index=llb_index+1) begin
for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin
assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index];
assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index];
end
//
genvar l3c_curr_cluster;
for (l3c_curr_cluster = 0; l3c_curr_cluster < `L3NUMBER_REQUESTS; l3c_curr_cluster=l3c_curr_cluster+1) begin
@ -212,7 +201,6 @@ module Vortex_Socket (
assign per_cluster_dram_fill_rsp [l3c_curr_cluster] = l3c_wb [l3c_curr_cluster];
assign per_cluster_dram_fill_rsp_data[l3c_curr_cluster] = l3c_wb_data[l3c_curr_cluster];
assign per_cluster_dram_fill_rsp_addr[l3c_curr_cluster] = l3c_wb_addr[l3c_curr_cluster];
end
wire dram_snp_full;
@ -224,7 +212,7 @@ module Vortex_Socket (
.WORD_SIZE_BYTES (`L3WORD_SIZE_BYTES),
.NUMBER_REQUESTS (`L3NUMBER_REQUESTS),
.STAGE_1_CYCLES (`L3STAGE_1_CYCLES),
.FUNC_ID (`LLFUNC_ID),
.FUNC_ID (`L2FUNC_ID),
.REQQ_SIZE (`L3REQQ_SIZE),
.MRVQ_SIZE (`L3MRVQ_SIZE),
.DFPQ_SIZE (`L3DFPQ_SIZE),
@ -238,9 +226,7 @@ module Vortex_Socket (
.PRFQ_STRIDE (`L3PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(`L3SIMULATED_DRAM_LATENCY_CYCLES)
)
gpu_l3cache
(
) gpu_l3cache (
.clk (clk),
.reset (reset),
@ -300,10 +286,8 @@ module Vortex_Socket (
.snp_fwd (snp_fwd),
.snp_fwd_addr (snp_fwd_addr),
.snp_fwd_delay (|snp_fwd_delay)
);
);
end
endmodule

View file

@ -1,50 +1,45 @@
`include "VX_define.v"
`include "VX_define.vh"
module byte_enabled_simple_dual_port_ram
(
input we, clk,
input wire reset,
input wire[4:0] waddr, raddr1, raddr2,
input wire[`NT_M1:0] be,
input wire[`NT_M1:0][31:0] wdata,
output reg[`NT_M1:0][31:0] q1, q2
input wire[`NUM_THREADS-1:0] be,
input wire[`NUM_THREADS-1:0][31:0] wdata,
output reg[`NUM_THREADS-1:0][31:0] q1, q2
);
// integer regi;
// integer threadi;
// integer regi;
// integer threadi;
// Thread Byte Bit
logic [`NT_M1:0][3:0][7:0] GPR[31:0];
// Thread Byte Bit
logic [`NUM_THREADS-1:0][3:0][7:0] GPR[31:0];
// initial begin
// for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] = 0;
// end
integer ini;
always @(posedge clk, posedge reset) begin
// TODO Clearing ram not currently supported on FPGA.
if (reset) begin
// `ifdef ASIC
for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] <= 0;
// `endif
end
else if(we) begin
always @(posedge clk) begin
if (we) begin
integer thread_ind;
for (thread_ind = 0; thread_ind <= `NT_M1; thread_ind = thread_ind + 1) begin
if(be[thread_ind]) GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
if(be[thread_ind]) GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
if(be[thread_ind]) GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
if(be[thread_ind]) GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
for (thread_ind = 0; thread_ind < `NUM_THREADS; thread_ind = thread_ind + 1) begin
if (be[thread_ind]) begin
GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
end
end
end
// $display("^^^^^^^^^^^^^^^^^^^^^^^");
// for (regi = 0; regi <= 31; regi = regi + 1) begin
// for (threadi = 0; threadi <= `NT_M1; threadi = threadi + 1) begin
// for (threadi = 0; threadi < `NUM_THREADS; threadi = threadi + 1) begin
// if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]);
// end
// end
end
assign q1 = GPR[raddr1];

View file

@ -2,7 +2,7 @@
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
// Also make sure all possible output states are transmitted back to the bank correctly
`include "VX_define.v"
`include "VX_define.vh"
module VX_Cache_Bank
#(
@ -67,7 +67,7 @@ module VX_Cache_Bank
localparam RECIV_MEM_RSP = 2;
localparam BLOCK_NUM_BITS = `CLOG2(CACHE_BLOCK);
localparam BLOCK_NUM_BITS = `LOG2UP(CACHE_BLOCK);
// Inputs
input wire rst;
input wire clk;

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_cache_bank_valid
#(

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_cache_data
#(

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
module VX_cache_data_per_index
#(

View file

@ -8,7 +8,7 @@
// TO DO:
// - Send in a response from memory of what the data is from the test bench
`include "VX_define.v"
`include "VX_define.vh"
//`include "VX_Cache_Bank.v"
//`include "VX_cache_bank_valid.v"
//`include "VX_priority_encoder.v"

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
`define NUM_WORDS_PER_BLOCK 4
@ -33,17 +33,17 @@ module VX_d_cache_encapsulate (
//parameter cache_entry = 9;
input wire clk, rst;
input wire i_p_valid[`NT_M1:0];
input wire [31:0] i_p_addr[`NT_M1:0];
input wire i_p_valid[`NUM_THREADS-1:0];
input wire [31:0] i_p_addr[`NUM_THREADS-1:0];
input wire i_p_initial_request;
input wire [31:0] i_p_writedata[`NT_M1:0];
input wire [31:0] i_p_writedata[`NUM_THREADS-1:0];
input wire i_p_read_or_write;
input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0];
input wire i_m_ready;
output reg [31:0] o_p_readdata[`NT_M1:0];
output reg o_p_readdata_valid[`NT_M1:0] ;
output reg [31:0] o_p_readdata[`NUM_THREADS-1:0];
output reg o_p_readdata_valid[`NUM_THREADS-1:0] ;
output reg o_p_waitrequest;
output reg [31:0] o_m_addr;
@ -53,12 +53,12 @@ module VX_d_cache_encapsulate (
// Inter
wire [`NT_M1:0] i_p_valid_inter;
wire [`NT_M1:0][31:0] i_p_addr_inter;
wire [`NT_M1:0][31:0] i_p_writedata_inter;
wire [`NUM_THREADS-1:0] i_p_valid_inter;
wire [`NUM_THREADS-1:0][31:0] i_p_addr_inter;
wire [`NUM_THREADS-1:0][31:0] i_p_writedata_inter;
reg [`NT_M1:0][31:0] o_p_readdata_inter;
reg [`NT_M1:0] o_p_readdata_valid_inter;
reg [`NUM_THREADS-1:0][31:0] o_p_readdata_inter;
reg [`NUM_THREADS-1:0] o_p_readdata_valid_inter;
reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter;
wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter;
@ -66,7 +66,7 @@ module VX_d_cache_encapsulate (
genvar curr_thraed, curr_bank, curr_word;
generate
for (curr_thraed = 0; curr_thraed < `NT; curr_thraed = curr_thraed + 1) begin : threads
for (curr_thraed = 0; curr_thraed < `NUM_THREADS; curr_thraed = curr_thraed + 1) begin : threads
assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed];
assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed];
assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed];

View file

@ -1,4 +1,4 @@
`include "VX_define.v"
`include "VX_define.vh"
`include "VX_d_cache.v"
module VX_d_cache_tb;
@ -6,13 +6,13 @@ module VX_d_cache_tb;
parameter NUMBER_BANKS = 8;
reg clk, reset, im_ready;
reg [`NT_M1:0] i_p_valid;
reg [`NT_M1:0][13:0] i_p_addr; // FIXME
reg [`NUM_THREADS-1:0] i_p_valid;
reg [`NUM_THREADS-1:0][13:0] i_p_addr; // FIXME
reg i_p_initial_request;
reg [`NT_M1:0][31:0] i_p_writedata;
reg [`NUM_THREADS-1:0][31:0] i_p_writedata;
reg i_p_read_or_write; //, i_p_write;
reg [`NT_M1:0][31:0] o_p_readdata;
reg [`NT_M1:0] o_p_readdata_valid;
reg [`NUM_THREADS-1:0][31:0] o_p_readdata;
reg [`NUM_THREADS-1:0] o_p_readdata_valid;
reg o_p_waitrequest;
reg [13:0] o_m_addr; // Only one address is sent out at a time to memory
reg o_m_valid;

View file

@ -2,7 +2,7 @@
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
// Also make sure all possible output states are transmitted back to the bank correctly
// `include "VX_define.v"
// `include "VX_define.vh"
module cache_set(clk,
rst,
// These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes
@ -94,7 +94,7 @@ module cache_set(clk,
readdata <= data[3];
end
end else if (access) begin
//tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12];
//tag[`NUM_THREADS-1:0] <= i_p_addr[`NUM_THREADS-1:0][13:12];
counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC
// Hit in First Column
if (tag[0] == o_tag && valid[0]) begin

View file

@ -1,5 +1,5 @@
`include "VX_cache_config.v"
`include "VX_define.v"
`include "VX_cache_config.vh"
`include "VX_define.vh"
module VX_bank
#(
// Size of cache in bytes
@ -60,7 +60,7 @@ module VX_bank
input wire [4:0] bank_rd,
input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb,
input wire [31:0] bank_pc,
input wire [`NW_M1:0] bank_warp_num,
input wire [`NW_BITS-1:0] bank_warp_num,
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read,
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write,
output wire reqq_full,
@ -71,7 +71,7 @@ module VX_bank
output wire [`vx_clog2(NUMBER_REQUESTS)-1:0] bank_wb_tid,
output wire [4:0] bank_wb_rd,
output wire [1:0] bank_wb_wb,
output wire [`NW_M1:0] bank_wb_warp_num,
output wire [`NW_BITS-1:0] bank_wb_warp_num,
output wire [`WORD_SIZE_RNG] bank_wb_data,
output wire [31:0] bank_wb_pc,
output wire [31:0] bank_wb_address,
@ -86,14 +86,14 @@ module VX_bank
// Dram Fill Response
input wire dram_fill_rsp,
input wire [31:0] dram_fill_addr,
input wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dram_fill_rsp_data,
input wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_fill_rsp_data,
output wire dram_fill_accept,
// Dram WB Requests
input wire dram_wb_queue_pop,
output wire dram_wb_req,
output wire[31:0] dram_wb_req_addr,
output wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dram_wb_req_data,
output wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_wb_req_data,
// Snp Request
input wire snp_req,
@ -112,7 +112,7 @@ module VX_bank
if (reset) begin
snoop_state <= 0;
end else begin
snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID));
snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID));
end
end
@ -139,11 +139,11 @@ module VX_bank
wire dfpq_empty;
wire dfpq_full;
wire[31:0] dfpq_addr_st0;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dfpq_filldata_st0;
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dfpq_filldata_st0;
assign dram_fill_accept = !dfpq_full;
VX_generic_queue_ll #(.DATAW(32+(`BANK_LINE_SIZE_WORDS*`WORD_SIZE)), .SIZE(DFPQ_SIZE)) dfp_queue(
VX_generic_queue_ll #(.DATAW(32+(`BANK_LINE_WORDS*`WORD_SIZE)), .SIZE(DFPQ_SIZE)) dfp_queue(
.clk (clk),
.reset (reset),
.push (dram_fill_rsp),
@ -164,7 +164,7 @@ module VX_bank
wire [`WORD_SIZE_RNG] reqq_req_writeword_st0;
wire [4:0] reqq_req_rd_st0;
wire [1:0] reqq_req_wb_st0;
wire [`NW_M1:0] reqq_req_warp_num_st0;
wire [`NW_BITS-1:0] reqq_req_warp_num_st0;
wire [2:0] reqq_req_mem_read_st0;
wire [2:0] reqq_req_mem_write_st0;
wire [31:0] reqq_req_pc_st0;
@ -231,7 +231,7 @@ module VX_bank
wire [4:0] mrvq_rd_st0;
wire [1:0] mrvq_wb_st0;
wire [31:0] miss_resrv_pc_st0;
wire [`NW_M1:0] mrvq_warp_num_st0;
wire [`NW_BITS-1:0] mrvq_warp_num_st0;
wire [2:0] mrvq_mem_read_st0;
wire [2:0] mrvq_mem_write_st0;
@ -241,7 +241,7 @@ module VX_bank
wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid;
wire[4:0] miss_add_rd;
wire[1:0] miss_add_wb;
wire[`NW_M1:0] miss_add_warp_num;
wire[`NW_BITS-1:0] miss_add_warp_num;
wire[2:0] miss_add_mem_read;
wire[2:0] miss_add_mem_write;
@ -336,7 +336,7 @@ module VX_bank
wire qual_valid_st0;
wire [31:0] qual_addr_st0;
wire [`WORD_SIZE_RNG] qual_writeword_st0;
wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] qual_writedata_st0;
wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] qual_writedata_st0;
wire [`REQ_INST_META_SIZE-1:0] qual_inst_meta_st0;
wire qual_going_to_write_st0;
wire qual_is_snp;
@ -344,7 +344,7 @@ module VX_bank
wire [`WORD_SIZE_RNG] writeword_st1 [STAGE_1_CYCLES-1:0];
wire [`REQ_INST_META_SIZE-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0];
wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] writedata_st1[STAGE_1_CYCLES-1:0];
wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] writedata_st1[STAGE_1_CYCLES-1:0];
wire is_snp_st1 [STAGE_1_CYCLES-1:0];
wire [31:0] pc_st1 [STAGE_1_CYCLES-1:0];
@ -387,7 +387,7 @@ module VX_bank
reqq_pop ? reqq_req_writeword_st0 :
0;
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_c0 (
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_c0 (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
@ -399,7 +399,7 @@ module VX_bank
genvar curr_stage;
generate
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES; curr_stage = curr_stage + 1) begin
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_cc (
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_cc (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
@ -412,7 +412,7 @@ module VX_bank
wire[`WORD_SIZE_RNG] readword_st1e;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] readdata_st1e;
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] readdata_st1e;
wire[`TAG_SELECT_SIZE_RNG] readtag_st1e;
wire miss_st1e;
wire dirty_st1e;
@ -421,7 +421,7 @@ module VX_bank
wire [4:0] rd_st1e;
wire [1:0] wb_st1e;
wire [`NW_M1:0] warp_num_st1e;
wire [`NW_BITS-1:0] warp_num_st1e;
wire [2:0] mem_read_st1e;
wire [2:0] mem_write_st1e;
wire [`vx_clog2(NUMBER_REQUESTS)-1:0] tid_st1e;
@ -488,7 +488,7 @@ module VX_bank
wire valid_st2;
wire[`WORD_SIZE_RNG] writeword_st2;
wire[`WORD_SIZE_RNG] readword_st2;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] readdata_st2;
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] readdata_st2;
wire miss_st2;
wire dirty_st2;
wire[`REQ_INST_META_SIZE-1:0] inst_meta_st2;
@ -498,7 +498,7 @@ module VX_bank
wire [31:0] pc_st2;
VX_generic_register #(.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_SIZE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)) st_1e_2 (
VX_generic_register #(.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)) st_1e_2 (
.clk (clk),
.reset(reset),
.stall(stall_bank_pipe),
@ -525,17 +525,17 @@ module VX_bank
// Enqueue to CWB Queue
wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `LLFUNC_ID) && (miss_add_wb == 0)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `L2FUNC_ID) && (miss_add_wb == 0)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
wire [`WORD_SIZE_RNG] cwbq_data = readword_st2;
wire [`vx_clog2(NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid;
wire [4:0] cwbq_rd = miss_add_rd;
wire [1:0] cwbq_wb = miss_add_wb;
wire [`NW_M1:0] cwbq_warp_num = miss_add_warp_num;
wire [`NW_BITS-1:0] cwbq_warp_num = miss_add_warp_num;
wire [31:0] cwbq_pc = pc_st2;
wire cwbq_empty;
assign bank_wb_valid = !cwbq_empty;
VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1+1) + `WORD_SIZE + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue(
VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1+1) + `WORD_SIZE + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue(
.clk (clk),
.reset (reset),
@ -554,8 +554,8 @@ module VX_bank
wire[31:0] dwbq_req_addr;
wire dwbq_empty;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data;
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dwbq_req_data;
if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : ({readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK);
end else begin
@ -603,7 +603,7 @@ module VX_bank
assign dram_fill_req_addr = addr_st2 & `BASE_ADDR_MASK;
assign dram_wb_req = !dwbq_empty;
VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_SIZE_WORDS * `WORD_SIZE)), .SIZE(DWBQ_SIZE)) dwb_queue(
VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_WORDS * `WORD_SIZE)), .SIZE(DWBQ_SIZE)) dwb_queue(
.clk (clk),
.reset (reset),

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_cache
#(
@ -66,7 +66,7 @@ module VX_cache
// Req meta
input wire [4:0] core_req_rd,
input wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb,
input wire [`NW_M1:0] core_req_warp_num,
input wire [`NW_BITS-1:0] core_req_warp_num,
input wire [31:0] core_req_pc,
output wire delay_req,
@ -75,7 +75,7 @@ module VX_cache
output wire [NUMBER_REQUESTS-1:0] core_wb_valid,
output wire [4:0] core_wb_req_rd,
output wire [1:0] core_wb_req_wb,
output wire [`NW_M1:0] core_wb_warp_num,
output wire [`NW_BITS-1:0] core_wb_warp_num,
output wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] core_wb_readdata,
output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc,
output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_address,
@ -84,7 +84,7 @@ module VX_cache
// Dram Fill Response
input wire dram_fill_rsp,
input wire [31:0] dram_fill_rsp_addr,
input wire [`IBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data,
input wire [`IBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data,
output wire dram_fill_accept,
// Dram request
@ -93,7 +93,7 @@ module VX_cache
output wire dram_req_read,
output wire [31:0] dram_req_addr,
output wire [31:0] dram_req_size,
output wire [`IBANK_LINE_SIZE_RNG][31:0] dram_req_data,
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
output wire dram_req_because_of_wb,
input wire dram_req_delay,
@ -119,7 +119,7 @@ module VX_cache
wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid;
wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd;
wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb;
wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num;
wire [NUMBER_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num;
wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data;
wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc;
wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address;
@ -134,7 +134,7 @@ module VX_cache
wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req;
wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp;
wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr;
wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] per_bank_dram_wb_req_data;
wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data;
wire[NUMBER_BANKS-1:0] per_bank_reqq_full;
@ -287,7 +287,7 @@ module VX_cache
wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] curr_bank_writedata;
wire [4:0] curr_bank_rd;
wire [NUMBER_REQUESTS-1:0][1:0] curr_bank_wb;
wire [`NW_M1:0] curr_bank_warp_num;
wire [`NW_BITS-1:0] curr_bank_warp_num;
wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_read;
wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_write;
wire [31:0] curr_bank_pc;
@ -298,13 +298,13 @@ module VX_cache
wire [31:0] curr_bank_wb_pc;
wire [4:0] curr_bank_wb_rd;
wire [1:0] curr_bank_wb_wb;
wire [`NW_M1:0] curr_bank_wb_warp_num;
wire [`NW_BITS-1:0] curr_bank_wb_warp_num;
wire [`WORD_SIZE_RNG] curr_bank_wb_data;
wire [31:0] curr_bank_wb_address;
wire curr_bank_dram_fill_rsp;
wire [31:0] curr_bank_dram_fill_rsp_addr;
wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] curr_bank_dram_fill_rsp_data;
wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_fill_rsp_data;
wire curr_bank_dram_fill_accept;
wire curr_bank_dfqq_full;
@ -316,7 +316,7 @@ module VX_cache
wire curr_bank_dram_wb_queue_pop;
wire curr_bank_dram_wb_req;
wire[31:0] curr_bank_dram_wb_req_addr;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] curr_bank_dram_wb_req_data;
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_wb_req_data;
wire curr_bank_snp_req;
wire[31:0] curr_bank_snp_req_addr;

View file

@ -1,7 +1,7 @@
`ifndef VX_CACHE_CONFIG
`define VX_CACHE_CONFIG
`include "../VX_define.v"
`include "../VX_define.vh"
// data tid rd wb warp_num read write
@ -10,10 +10,10 @@
`define vx_clog2(value) ((value == 1) ? 1 : $clog2(value))
`define MRVQ_METADATA_SIZE (`WORD_SIZE + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1 + 1) + 3 + 3)
`define MRVQ_METADATA_SIZE (`WORD_SIZE + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1) + 3 + 3)
// 5 + 2 + 4 + 3 + 3 + 1
`define REQ_INST_META_SIZE (5 + 2 + (`NW_M1+1) + 3 + 3 + `vx_clog2(NUMBER_REQUESTS))
`define REQ_INST_META_SIZE (5 + 2 + (`NW_BITS-1+1) + 3 + 3 + `vx_clog2(NUMBER_REQUESTS))
// `define vx_clog2_h(value, x) (value == (1 << x)) ? (x)
@ -60,9 +60,7 @@
// 8
`define BANK_LINE_COUNT (`BANK_SIZE_BYTES/BANK_LINE_SIZE_BYTES)
// 4
`define BANK_LINE_SIZE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES)
// 3:0
`define BANK_LINE_SIZE_RNG `BANK_LINE_SIZE_WORDS-1:0
`define BANK_LINE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES)
// Offset is fixed
`define OFFSET_ADDR_NUM_BITS 2
@ -73,7 +71,7 @@
`define OFFSET_SIZE_RNG `OFFSET_SIZE_END:0
// 2
`define WORD_SELECT_NUM_BITS (`vx_clog2(`BANK_LINE_SIZE_WORDS))
`define WORD_SELECT_NUM_BITS (`vx_clog2(`BANK_LINE_WORDS))
// 2
`define WORD_SELECT_SIZE_END (`WORD_SELECT_NUM_BITS)
// 2

View file

@ -1,5 +1,5 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_cache_core_req_bank_sel
#(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_cache_dfq_queue
#(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_cache_dram_req_arb
#(
@ -62,7 +62,7 @@ module VX_cache_dram_req_arb
output wire[NUMBER_BANKS-1:0] per_bank_dram_wb_queue_pop,
input wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req,
input wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr,
input wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
input wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
input wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp,
// real Dram request
@ -71,7 +71,7 @@ module VX_cache_dram_req_arb
output wire dram_req_read,
output wire [31:0] dram_req_addr,
output wire [31:0] dram_req_size,
output wire [`IBANK_LINE_SIZE_RNG][31:0] dram_req_data,
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
output wire dram_req_because_of_wb,
input wire dram_req_delay

View file

@ -1,5 +1,5 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_cache_miss_resrv
#(
@ -56,7 +56,7 @@ module VX_cache_miss_resrv
input wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid,
input wire[4:0] miss_add_rd,
input wire[1:0] miss_add_wb,
input wire[`NW_M1:0] miss_add_warp_num,
input wire[`NW_BITS-1:0] miss_add_warp_num,
input wire[2:0] miss_add_mem_read,
input wire[2:0] miss_add_mem_write,
input wire[31:0] miss_add_pc,
@ -75,14 +75,14 @@ module VX_cache_miss_resrv
output wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_resrv_tid_st0,
output wire[4:0] miss_resrv_rd_st0,
output wire[1:0] miss_resrv_wb_st0,
output wire[`NW_M1:0] miss_resrv_warp_num_st0,
output wire[`NW_BITS-1:0] miss_resrv_warp_num_st0,
output wire[2:0] miss_resrv_mem_read_st0,
output wire[31:0] miss_resrv_pc_st0,
output wire[2:0] miss_resrv_mem_write_st0
);
// Size of metadata = 32 + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1 + 1)
// Size of metadata = 32 + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1)
reg[`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0];
reg[MRVQ_SIZE-1:0][31:0] addr_table;
reg[MRVQ_SIZE-1:0][31:0] pc_table;

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_cache_req_queue
#(
@ -55,7 +55,7 @@ module VX_cache_req_queue
input wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] bank_writedata,
input wire [4:0] bank_rd,
input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb,
input wire [`NW_M1:0] bank_warp_num,
input wire [`NW_BITS-1:0] bank_warp_num,
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read,
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write,
input wire [31:0] bank_pc,
@ -68,7 +68,7 @@ module VX_cache_req_queue
output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0,
output wire [4:0] reqq_req_rd_st0,
output wire [1:0] reqq_req_wb_st0,
output wire [`NW_M1:0] reqq_req_warp_num_st0,
output wire [`NW_BITS-1:0] reqq_req_warp_num_st0,
output wire [2:0] reqq_req_mem_read_st0,
output wire [2:0] reqq_req_mem_write_st0,
output wire [31:0] reqq_req_pc_st0,
@ -83,7 +83,7 @@ module VX_cache_req_queue
wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] out_per_writedata;
wire [4:0] out_per_rd;
wire [NUMBER_REQUESTS-1:0][1:0] out_per_wb;
wire [`NW_M1:0] out_per_warp_num;
wire [`NW_BITS-1:0] out_per_warp_num;
wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_read;
wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_write;
wire [31:0] out_per_pc;
@ -95,7 +95,7 @@ module VX_cache_req_queue
reg [4:0] use_per_rd;
reg [NUMBER_REQUESTS-1:0][1:0] use_per_wb;
reg [31:0] use_per_pc;
reg [`NW_M1:0] use_per_warp_num;
reg [`NW_BITS-1:0] use_per_warp_num;
reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_read;
reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_write;
@ -105,7 +105,7 @@ module VX_cache_req_queue
wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] qual_writedata;
wire [4:0] qual_rd;
wire [NUMBER_REQUESTS-1:0][1:0] qual_wb;
wire [`NW_M1:0] qual_warp_num;
wire [`NW_BITS-1:0] qual_warp_num;
wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_read;
wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_write;
wire [31:0] qual_pc;
@ -120,7 +120,7 @@ module VX_cache_req_queue
wire push_qual = reqq_push && !reqq_full;
wire pop_qual = !out_empty && use_empty;
VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUMBER_REQUESTS*2) + (`NW_M1+1) + (NUMBER_REQUESTS * (3 + 3)) + 32 ), .SIZE(REQQ_SIZE)) reqq_queue(
VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUMBER_REQUESTS*2) + (`NW_BITS-1+1) + (NUMBER_REQUESTS * (3 + 3)) + 32 ), .SIZE(REQQ_SIZE)) reqq_queue(
.clk (clk),
.reset (reset),
.push (push_qual),

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_cache_wb_sel_merge
#(
@ -53,7 +53,7 @@ module VX_cache_wb_sel_merge
input wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid,
input wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd,
input wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb,
input wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num,
input wire [NUMBER_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num,
input wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data,
input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc,
input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address,
@ -67,7 +67,7 @@ module VX_cache_wb_sel_merge
output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_pc,
output wire [4:0] core_wb_req_rd,
output wire [1:0] core_wb_req_wb,
output wire [`NW_M1:0] core_wb_warp_num,
output wire [`NW_BITS-1:0] core_wb_warp_num,
output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_address
);
@ -105,7 +105,7 @@ module VX_cache_wb_sel_merge
core_wb_pc = 0;
core_wb_address = 0;
for (this_bank = 0; this_bank < NUMBER_BANKS; this_bank = this_bank + 1) begin
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
if (found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_dcache_llv_resp_bank_sel
#(
@ -48,13 +48,13 @@ module VX_dcache_llv_resp_bank_sel
output reg [NUMBER_BANKS-1:0] per_bank_llvq_pop,
input wire[NUMBER_BANKS-1:0] per_bank_llvq_valid,
input wire[NUMBER_BANKS-1:0][31:0] per_bank_llvq_res_addr,
input wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][31:0] per_bank_llvq_res_data,
input wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][31:0] per_bank_llvq_res_data,
input wire[NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_llvq_res_tid,
input wire llvq_pop,
output reg[NUMBER_REQUESTS-1:0] llvq_valid,
output reg[NUMBER_REQUESTS-1:0][31:0] llvq_res_addr,
output reg[NUMBER_REQUESTS-1:0][`BANK_LINE_SIZE_RNG][31:0] llvq_res_data
output reg[NUMBER_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_res_data
);

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_fill_invalidator
#(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_prefetcher
#(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_snp_fwd_arb
#(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_tag_data_access
#(
@ -60,12 +60,12 @@ module VX_tag_data_access
input wire writefill_st1e,
input wire[31:0] writeaddr_st1e,
input wire[`WORD_SIZE_RNG] writeword_st1e,
input wire[`DBANK_LINE_SIZE_RNG][31:0] writedata_st1e,
input wire[`DBANK_LINE_WORDS-1:0][31:0] writedata_st1e,
input wire[2:0] mem_write_st1e,
input wire[2:0] mem_read_st1e,
output wire[`WORD_SIZE_RNG] readword_st1e,
output wire[`DBANK_LINE_SIZE_RNG][31:0] readdata_st1e,
output wire[`DBANK_LINE_WORDS-1:0][31:0] readdata_st1e,
output wire[`TAG_SELECT_SIZE_RNG] readtag_st1e,
output wire miss_st1e,
output wire dirty_st1e,
@ -74,25 +74,25 @@ module VX_tag_data_access
);
reg[`DBANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-1:0];
reg[`DBANK_LINE_WORDS-1:0][31:0] readdata_st[STAGE_1_CYCLES-1:0];
reg read_valid_st1c[STAGE_1_CYCLES-1:0];
reg read_dirty_st1c[STAGE_1_CYCLES-1:0];
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-1:0];
reg[`DBANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
reg[`DBANK_LINE_WORDS-1:0][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
wire qual_read_valid_st1;
wire qual_read_dirty_st1;
wire[`TAG_SELECT_SIZE_RNG] qual_read_tag_st1;
wire[`DBANK_LINE_SIZE_RNG][31:0] qual_read_data_st1;
wire[`DBANK_LINE_WORDS-1:0][31:0] qual_read_data_st1;
wire use_read_valid_st1e;
wire use_read_dirty_st1e;
wire[`TAG_SELECT_SIZE_RNG] use_read_tag_st1e;
wire[`DBANK_LINE_SIZE_RNG][31:0] use_read_data_st1e;
wire[`DBANK_LINE_SIZE_RNG][3:0] use_write_enable;
wire[`DBANK_LINE_SIZE_RNG][31:0] use_write_data;
wire[`DBANK_LINE_WORDS-1:0][31:0] use_read_data_st1e;
wire[`DBANK_LINE_WORDS-1:0][3:0] use_write_enable;
wire[`DBANK_LINE_WORDS-1:0][31:0] use_write_data;
wire sw, sb, sh;
@ -140,8 +140,8 @@ module VX_tag_data_access
.fill_sent (fill_sent)
);
// VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_c0 (
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) ), .Valid(0)) s0_1_c0 (
// VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_c0 (
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) ), .Valid(0)) s0_1_c0 (
.clk (clk),
.reset(reset),
.stall(stall),
@ -153,7 +153,7 @@ module VX_tag_data_access
genvar curr_stage;
generate
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_cc (
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_cc (
.clk (clk),
.reset(reset),
.stall(stall),
@ -170,7 +170,7 @@ module VX_tag_data_access
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM
genvar curr_w;
for (curr_w = 0; curr_w < `DBANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0];
for (curr_w = 0; curr_w < `DBANK_LINE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0];
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1];
/////////////////////// LOAD LOGIC ///////////////////
@ -243,23 +243,23 @@ module VX_tag_data_access
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e && !is_snp_st1e;
wire force_write = real_writefill;
wire[`DBANK_LINE_SIZE_RNG][3:0] we;
wire[`DBANK_LINE_SIZE_RNG][31:0] data_write;
wire[`DBANK_LINE_WORDS-1:0][3:0] we;
wire[`DBANK_LINE_WORDS-1:0][31:0] data_write;
genvar g;
generate
for (g = 0; g < `DBANK_LINE_SIZE_WORDS; g = g + 1) begin : write_enables
for (g = 0; g < `DBANK_LINE_WORDS; g = g + 1) begin : write_enables
wire normal_write = (block_offset == g[`WORD_SELECT_SIZE_RNG]) && should_write && !real_writefill;
assign we[g] = (force_write) ? 4'b1111 :
(should_write && !real_writefill && (FUNC_ID == `LLFUNC_ID)) ? 4'b1111 :
(should_write && !real_writefill && (FUNC_ID == `L2FUNC_ID)) ? 4'b1111 :
(normal_write && sw) ? 4'b1111 :
(normal_write && sb) ? sb_mask :
(normal_write && sh) ? sh_mask :
4'b0000;
if (!(FUNC_ID == `LLFUNC_ID)) assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat;
if (!(FUNC_ID == `L2FUNC_ID)) assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat;
end
if ((FUNC_ID == `LLFUNC_ID)) begin
if ((FUNC_ID == `L2FUNC_ID)) begin
assign data_write = force_write ? writedata_st1e : writeword_st1e;
end
endgenerate
@ -268,7 +268,7 @@ module VX_tag_data_access
assign use_write_data = data_write;
///////////////////////
if (FUNC_ID == `LLFUNC_ID) begin
if (FUNC_ID == `L2FUNC_ID) begin
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
end else begin
assign readword_st1e = data_Qual;

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v"
`include "VX_cache_config.vh"
module VX_tag_data_structure
#(
@ -55,18 +55,18 @@ module VX_tag_data_structure
output wire read_valid,
output wire read_dirty,
output wire[`TAG_SELECT_SIZE_RNG] read_tag,
output wire[`DBANK_LINE_SIZE_RNG][31:0] read_data,
output wire[`DBANK_LINE_WORDS-1:0][31:0] read_data,
input wire invalidate,
input wire[`DBANK_LINE_SIZE_RNG][3:0] write_enable,
input wire[`DBANK_LINE_WORDS-1:0][3:0] write_enable,
input wire write_fill,
input wire[31:0] write_addr,
input wire[`DBANK_LINE_SIZE_RNG][31:0] write_data,
input wire[`DBANK_LINE_WORDS-1:0][31:0] write_data,
input wire fill_sent
);
reg[`DBANK_LINE_SIZE_RNG][3:0][7:0] data [`BANK_LINE_COUNT-1:0];
reg[`DBANK_LINE_WORDS-1:0][3:0][7:0] data [`BANK_LINE_COUNT-1:0];
reg[`TAG_SELECT_SIZE_RNG] tag [`BANK_LINE_COUNT-1:0];
reg valid[`BANK_LINE_COUNT-1:0];
reg dirty[`BANK_LINE_COUNT-1:0];
@ -110,7 +110,7 @@ module VX_tag_data_structure
valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
end
for (f = 0; f < `DBANK_LINE_SIZE_WORDS; f = f + 1) begin
for (f = 0; f < `DBANK_LINE_WORDS; f = f + 1) begin
if (write_enable[f][0]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][0] <= write_data[f][7 :0 ];
if (write_enable[f][1]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][1] <= write_data[f][15:8 ];
if (write_enable[f][2]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][2] <= write_data[f][23:16];

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_BRANCH_RSP
@ -9,7 +9,7 @@ interface VX_branch_response_inter ();
wire valid_branch;
wire branch_dir;
wire[31:0] branch_dest;
wire[`NW_M1:0] branch_warp_num;
wire[`NW_BITS-1:0] branch_warp_num;
endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_CSR_REQ
@ -7,8 +7,8 @@
interface VX_csr_req_inter ();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[`NUM_THREADS-1:0] valid;
wire[`NW_BITS-1:0] warp_num;
wire[4:0] rd;
wire[1:0] wb;
wire[4:0] alu_op;

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_CSR_WB_REQ
@ -7,15 +7,13 @@
interface VX_csr_wb_inter ();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[4:0] rd;
wire[1:0] wb;
wire[`NUM_THREADS-1:0] valid;
wire[`NW_BITS-1:0] warp_num;
wire[4:0] rd;
wire[1:0] wb;
wire[`NT_M1:0][31:0] csr_result;
wire[`NUM_THREADS-1:0][31:0] csr_result;
endinterface
`endif

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_DCACHE_REQ
@ -7,11 +7,11 @@
interface VX_dcache_request_inter ();
wire[`NT_M1:0][31:0] out_cache_driver_in_address;
wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_address;
wire[2:0] out_cache_driver_in_mem_read;
wire[2:0] out_cache_driver_in_mem_write;
wire[`NT_M1:0] out_cache_driver_in_valid;
wire[`NT_M1:0][31:0] out_cache_driver_in_data;
wire[`NUM_THREADS-1:0] out_cache_driver_in_valid;
wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_data;
endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_DCACHE_RSP
@ -7,7 +7,7 @@
interface VX_dcache_response_inter ();
wire[`NT_M1:0][31:0] in_cache_driver_out_data;
wire[`NUM_THREADS-1:0][31:0] in_cache_driver_out_data;
wire delay;
endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_DRAM_REQ_RSP_INTER

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_EXE_UNIT_REQ_INTER
@ -8,8 +8,8 @@
interface VX_exec_unit_req_inter ();
// Meta
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[`NUM_THREADS-1:0] valid;
wire[`NW_BITS-1:0] warp_num;
wire[31:0] curr_PC;
wire[31:0] PC_next;
@ -18,8 +18,8 @@ interface VX_exec_unit_req_inter ();
wire[1:0] wb;
// Data and alu op
wire[`NT_M1:0][31:0] a_reg_data;
wire[`NT_M1:0][31:0] b_reg_data;
wire[`NUM_THREADS-1:0][31:0] a_reg_data;
wire[`NUM_THREADS-1:0][31:0] b_reg_data;
wire[4:0] alu_op;
wire[4:0] rs1;
wire[4:0] rs2;

View file

@ -1,5 +1,5 @@
`include "VX_define.v"
`include "VX_define.vh"
`ifndef VX_FrE_to_BE_INTER
@ -30,8 +30,8 @@ interface VX_frE_to_bckE_req_inter ();
wire jal;
wire[31:0] jal_offset;
wire[31:0] PC_next;
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[`NUM_THREADS-1:0] valid;
wire[`NW_BITS-1:0] warp_num;
// GPGPU stuff
wire is_wspawn;

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_GPR_CLONE_INTER
@ -9,7 +9,7 @@
interface VX_gpr_clone_inter ();
/* verilator lint_off UNUSED */
wire is_clone;
wire[`NW_M1:0] warp_num;
wire[`NW_BITS-1:0] warp_num;
/* verilator lint_on UNUSED */
endinterface

View file

@ -1,13 +1,13 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_gpr_data_INTER
`define VX_gpr_data_INTER
interface VX_gpr_data_inter ();
wire[`NT_M1:0][31:0] a_reg_data;
wire[`NT_M1:0][31:0] b_reg_data;
wire[`NUM_THREADS-1:0][31:0] a_reg_data;
wire[`NUM_THREADS-1:0][31:0] b_reg_data;
endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_GPR_JAL_INTER
`define VX_GPR_JAL_INTER

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_GPR_READ
`define VX_GPR_READ
@ -8,7 +8,7 @@ interface VX_gpr_read_inter ();
wire[4:0] rs1;
wire[4:0] rs2;
wire[`NW_M1:0] warp_num;
wire[`NW_BITS-1:0] warp_num;
endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_GPR_WSPAWN_INTER
`define VX_GPR_WSPAWN_INTER
@ -7,8 +7,8 @@
interface VX_gpr_wspawn_inter ();
/* verilator lint_off UNUSED */
wire is_wspawn;
wire[`NW_M1:0] which_wspawn;
// wire[`NW_M1:0] warp_num;
wire[`NW_BITS-1:0] which_wspawn;
// wire[`NW_BITS-1:0] warp_num;
/* verilator lint_on UNUSED */
endinterface

View file

@ -1,6 +1,6 @@
`include "../generic_cache/VX_cache_config.v"
`include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_DRAM_DCACHE_REQ
@ -8,7 +8,7 @@
interface VX_gpu_dcache_dram_req_inter
#(
parameter BANK_LINE_SIZE_WORDS = 2
parameter BANK_LINE_WORDS = 2
)
();
@ -18,7 +18,7 @@ interface VX_gpu_dcache_dram_req_inter
wire dram_req_read;
wire [31:0] dram_req_addr;
wire [31:0] dram_req_size;
wire [BANK_LINE_SIZE_WORDS-1:0][31:0] dram_req_data;
wire [BANK_LINE_WORDS-1:0][31:0] dram_req_data;
// Snoop
wire dram_because_of_snp;

View file

@ -1,7 +1,7 @@
`include "../generic_cache/VX_cache_config.v"
`include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_DRAM_DCACHE_RES
@ -9,13 +9,13 @@
interface VX_gpu_dcache_dram_res_inter
#(
parameter BANK_LINE_SIZE_WORDS = 2
parameter BANK_LINE_WORDS = 2
)
();
// DRAM Rsponse
wire dram_fill_rsp;
wire [31:0] dram_fill_rsp_addr;
wire [BANK_LINE_SIZE_WORDS-1:0][31:0] dram_fill_rsp_data;
wire [BANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data;
endinterface

View file

@ -1,6 +1,6 @@
`include "../generic_cache/VX_cache_config.v"
`include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_DCACHE_REQ
@ -20,7 +20,7 @@ interface VX_gpu_dcache_req_inter
wire [NUMBER_REQUESTS-1:0][2:0] core_req_mem_write;
wire [4:0] core_req_rd;
wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb;
wire [`NW_M1:0] core_req_warp_num;
wire [`NW_BITS-1:0] core_req_warp_num;
wire [31:0] core_req_pc;
// Can't WB

View file

@ -1,6 +1,6 @@
`include "../generic_cache/VX_cache_config.v"
`include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_DCACHE_RES
@ -16,7 +16,7 @@ interface VX_gpu_dcache_res_inter
wire [NUMBER_REQUESTS-1:0] core_wb_valid;
wire [4:0] core_wb_req_rd;
wire [1:0] core_wb_req_wb;
wire [`NW_M1:0] core_wb_warp_num;
wire [`NW_BITS-1:0] core_wb_warp_num;
wire [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata;
wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc;

View file

@ -1,7 +1,7 @@
`include "../generic_cache/VX_cache_config.v"
`include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_SNP_REQ

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_GPU_INST_REQ_IN
@ -6,8 +6,8 @@
interface VX_gpu_inst_req_inter();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[`NUM_THREADS-1:0] valid;
wire[`NW_BITS-1:0] warp_num;
wire is_wspawn;
wire is_tmc;
wire is_split;
@ -16,7 +16,7 @@ interface VX_gpu_inst_req_inter();
wire[31:0] pc_next;
wire[`NT_M1:0][31:0] a_reg_data;
wire[`NUM_THREADS-1:0][31:0] a_reg_data;
wire[31:0] rd2;

View file

@ -1,4 +1,4 @@
`include "../generic_cache/VX_cache_config.v"
`include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_SNP_REQ_RSP

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_ICACHE_REQ

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_ICACHE_RSP

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_EXEC_UNIT_WB_INST_INTER
@ -7,12 +7,12 @@
interface VX_inst_exec_wb_inter ();
wire[`NT_M1:0][31:0] alu_result;
wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[31:0] exec_wb_pc;
wire[4:0] rd;
wire[1:0] wb;
wire[`NT_M1:0] wb_valid;
wire[`NW_M1:0] wb_warp_num;
wire[`NUM_THREADS-1:0] wb_valid;
wire[`NW_BITS-1:0] wb_warp_num;
endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_MEM_WB_INST_INTER
@ -7,12 +7,12 @@
interface VX_inst_mem_wb_inter ();
wire[`NT_M1:0][31:0] loaded_data;
wire[`NUM_THREADS-1:0][31:0] loaded_data;
wire[31:0] mem_wb_pc;
wire[4:0] rd;
wire[1:0] wb;
wire[`NT_M1:0] wb_valid;
wire[`NW_M1:0] wb_warp_num;
wire[`NUM_THREADS-1:0] wb_valid;
wire[`NW_BITS-1:0] wb_warp_num;
endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_F_D_INTER
@ -7,8 +7,8 @@
interface VX_inst_meta_inter ();
wire[31:0] instruction;
wire[31:0] inst_pc;
wire[`NW_M1:0] warp_num;
wire[`NT_M1:0] valid;
wire[`NW_BITS-1:0] warp_num;
wire[`NUM_THREADS-1:0] valid;
endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_JAL_RSP
@ -9,7 +9,7 @@ interface VX_jal_response_inter ();
wire jal;
wire[31:0] jal_dest;
wire[`NW_M1:0] jal_warp_num;
wire[`NW_BITS-1:0] jal_warp_num;
endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_JOIN_INTER
@ -8,7 +8,7 @@
interface VX_join_inter ();
wire is_join;
wire[`NW_M1:0] join_warp_num;
wire[`NW_BITS-1:0] join_warp_num;
endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_LSU_REQ_INTER
@ -7,11 +7,11 @@
interface VX_lsu_req_inter ();
wire[`NT_M1:0] valid;
wire[`NUM_THREADS-1:0] valid;
wire[31:0] lsu_pc;
wire[`NW_M1:0] warp_num;
wire[`NT_M1:0][31:0] store_data;
wire[`NT_M1:0][31:0] base_address; // A reg data
wire[`NW_BITS-1:0] warp_num;
wire[`NUM_THREADS-1:0][31:0] store_data;
wire[`NUM_THREADS-1:0][31:0] base_address; // A reg data
wire[31:0] offset; // itype_immed
wire[2:0] mem_read;
wire[2:0] mem_write;

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_MEM_REQ_IN
@ -6,20 +6,20 @@
interface VX_mem_req_inter ();
wire[`NT_M1:0][31:0] alu_result;
wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[2:0] mem_read;
wire[2:0] mem_write;
wire[4:0] rd;
wire[1:0] wb;
wire[4:0] rs1;
wire[4:0] rs2;
wire[`NT_M1:0][31:0] rd2;
wire[`NUM_THREADS-1:0][31:0] rd2;
wire[31:0] PC_next;
wire[31:0] curr_PC;
wire[31:0] branch_offset;
wire[2:0] branch_type;
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[`NUM_THREADS-1:0] valid;
wire[`NW_BITS-1:0] warp_num;
endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_MW_WB_INTER
@ -7,13 +7,13 @@
interface VX_mw_wb_inter ();
wire[`NT_M1:0][31:0] alu_result;
wire[`NT_M1:0][31:0] mem_result;
wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[`NUM_THREADS-1:0][31:0] mem_result;
wire[4:0] rd;
wire[1:0] wb;
wire[31:0] PC_next;
wire[`NT_M1:0] valid;
wire [`NW_M1:0] warp_num;
wire[`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_WARP_CTL_INTER
@ -7,26 +7,26 @@
interface VX_warp_ctl_inter ();
wire[`NW_M1:0] warp_num;
wire[`NW_BITS-1:0] warp_num;
wire change_mask;
wire[`NT_M1:0] thread_mask;
wire[`NUM_THREADS-1:0] thread_mask;
wire wspawn;
wire[31:0] wspawn_pc;
wire[`NW-1:0] wspawn_new_active;
wire[`NUM_WARPS-1:0] wspawn_new_active;
wire ebreak;
// barrier
wire is_barrier;
wire[31:0] barrier_id;
wire[$clog2(`NW):0] num_warps;
wire[$clog2(`NUM_WARPS):0] num_warps;
wire is_split;
wire dont_split;
wire[`NW_M1:0] split_warp_num;
wire[`NT_M1:0] split_new_mask;
wire[`NT_M1:0] split_later_mask;
wire[`NW_BITS-1:0] split_warp_num;
wire[`NUM_THREADS-1:0] split_new_mask;
wire[`NUM_THREADS-1:0] split_later_mask;
wire[31:0] split_save_pc;

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_WB_INTER
@ -7,12 +7,12 @@
interface VX_wb_inter ();
wire[`NT_M1:0][31:0] write_data;
wire[`NUM_THREADS-1:0][31:0] write_data;
wire[31:0] wb_pc;
wire[4:0] rd;
wire[1:0] wb;
wire[`NT_M1:0] wb_valid;
wire[`NW_M1:0] wb_warp_num;
wire[`NUM_THREADS-1:0] wb_valid;
wire[`NW_BITS-1:0] wb_warp_num;
endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
`ifndef VX_WSTALL_INTER
@ -7,7 +7,7 @@
interface VX_wstall_inter();
wire wstall;
wire[`NW_M1:0] warp_num;
wire[`NW_BITS-1:0] warp_num;
endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
module VX_d_e_reg (
input wire clk,
@ -16,7 +16,7 @@ module VX_d_e_reg (
wire flush = (in_branch_stall == `STALL);
VX_generic_register #(.N(233 + `NW_M1 + 1 + `NT)) d_e_reg
VX_generic_register #(.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)) d_e_reg
(
.clk (clk),
.reset(reset),

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
module VX_f_d_reg (
input wire clk,
@ -13,7 +13,7 @@ module VX_f_d_reg (
wire flush = 1'b0;
wire stall = in_freeze == 1'b1;
VX_generic_register #( .N(64+`NW_M1+1+`NT) ) f_d_reg (
VX_generic_register #( .N(64+`NW_BITS-1+1+`NUM_THREADS) ) f_d_reg (
.clk (clk),
.reset(reset),
.stall(stall),

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
module VX_i_d_reg (
input wire clk,
@ -14,7 +14,7 @@ module VX_i_d_reg (
wire stall = in_freeze == 1'b1;
VX_generic_register #( .N( 64 + `NW_M1 + 1 + `NT ) ) i_d_reg (
VX_generic_register #( .N( 64 + `NW_BITS-1 + 1 + `NUM_THREADS ) ) i_d_reg (
.clk (clk),
.reset(reset),
.stall(stall),

View file

@ -1,4 +1,4 @@
`include "../VX_define.v"
`include "../VX_define.vh"
// Converts in_valids to bank_valids
module VX_bank_valids
@ -7,16 +7,16 @@ module VX_bank_valids
parameter BITS_PER_BANK = 3
)
(
input wire[`NT_M1:0] in_valids,
input wire[`NT_M1:0][31:0] in_addr,
output reg[NB:0][`NT_M1:0] bank_valids
input wire[`NUM_THREADS-1:0] in_valids,
input wire[`NUM_THREADS-1:0][31:0] in_addr,
output reg[NB:0][`NUM_THREADS-1:0] bank_valids
);
integer i, j;
always@(*) begin
for(j = 0; j <= NB; j = j+1 ) begin
for(i = 0; i <= `NT_M1; i = i+1) begin
for(i = 0; i < `NUM_THREADS; i = i+1) begin
if(in_valids[i]) begin
if(in_addr[i][(2+BITS_PER_BANK-1):2] == j[BITS_PER_BANK-1:0]) begin
bank_valids[j][i] = 1'b1;

Some files were not shown because too many files have changed in this diff Show more