added config.vh

This commit is contained in:
Blaise Tine 2020-04-16 07:49:19 -04:00
parent c913e542e9
commit 81745f08c9
109 changed files with 1426 additions and 1544 deletions

View file

@ -9,11 +9,11 @@ extern int vx_dev_caps(int caps_id) {
case VX_CAPS_VERSION: case VX_CAPS_VERSION:
return 0; return 0;
case VX_CAPS_MAX_CORES: case VX_CAPS_MAX_CORES:
return NUMBER_CORES; return NUM_CORES;
case VX_CAPS_MAX_WARPS: case VX_CAPS_MAX_WARPS:
return NW; return NUM_WARPS;
case VX_CAPS_MAX_THREADS: case VX_CAPS_MAX_THREADS:
return NT; return NUM_THREADS;
case VX_CAPS_CACHE_LINESIZE: case VX_CAPS_CACHE_LINESIZE:
return GLOBAL_BLOCK_SIZE_BYTES; return GLOBAL_BLOCK_SIZE_BYTES;
case VX_CAPS_LOCAL_MEM_SIZE: case VX_CAPS_LOCAL_MEM_SIZE:

View file

@ -142,7 +142,7 @@ public:
private: private:
void run() { void run() {
Harp::ArchDef arch("rv32i", NW, NT); Harp::ArchDef arch("rv32i", NUM_WARPS, NUM_THREADS);
Harp::WordDecoder dec(arch); Harp::WordDecoder dec(arch);
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true); Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
Harp::Core core(arch, dec, mu); Harp::Core core(arch, dec, mu);

View file

@ -33,7 +33,7 @@ THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu
.PHONY: build_config .PHONY: build_config
build_config: build_config:
./gen_config.py --rtl_locations ./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./simulate/VX_config.h
# -LDFLAGS '-lsystemc' # -LDFLAGS '-lsystemc'
VERILATOR: build_config VERILATOR: build_config

View file

@ -6,8 +6,9 @@ ALL:sim
SRC = \ SRC = \
vortex_dpi.cpp \ vortex_dpi.cpp \
vortex_tb.v \ vortex_tb.v \
../rtl/VX_define.v \ ../rtl/VX_user_config.vh \
../rtl/VX_define_synth.v \ ../rtl/VX_config.vh \
../rtl/VX_define.vh \
../rtl/interfaces/VX_branch_response_inter.v \ ../rtl/interfaces/VX_branch_response_inter.v \
../rtl/interfaces/VX_csr_req_inter.v \ ../rtl/interfaces/VX_csr_req_inter.v \
../rtl/interfaces/VX_csr_wb_inter.v \ ../rtl/interfaces/VX_csr_wb_inter.v \

View file

@ -2182,7 +2182,7 @@ Project_File_33 = ../rtl/shared_memory/VX_set_bit.v
Project_File_P_33 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 cover_branch 0 folder {Top Level} last_compile 0 cover_fsm 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 1 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 53 cover_expr 0 dont_compile 0 cover_stmt 0 Project_File_P_33 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 cover_branch 0 folder {Top Level} last_compile 0 cover_fsm 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 1 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 53 cover_expr 0 dont_compile 0 cover_stmt 0
Project_File_34 = ../rtl/interfaces/VX_dcache_response_inter.v Project_File_34 = ../rtl/interfaces/VX_dcache_response_inter.v
Project_File_P_34 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 27 dont_compile 0 cover_expr 0 cover_stmt 0 Project_File_P_34 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 27 dont_compile 0 cover_expr 0 cover_stmt 0
Project_File_35 = ../rtl/VX_define.v Project_File_35 = ../rtl/VX_define.vh
Project_File_P_35 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 folder {Top Level} cover_branch 0 cover_fsm 0 last_compile 1572058635 vlog_noload 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 0 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 7 cover_expr 0 dont_compile 0 cover_stmt 0 Project_File_P_35 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 folder {Top Level} cover_branch 0 cover_fsm 0 last_compile 1572058635 vlog_noload 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 0 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 7 cover_expr 0 dont_compile 0 cover_stmt 0
Project_File_36 = ../rtl/interfaces/VX_csr_req_inter.v Project_File_36 = ../rtl/interfaces/VX_csr_req_inter.v
Project_File_P_36 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 24 dont_compile 0 cover_expr 0 cover_stmt 0 Project_File_P_36 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 24 dont_compile 0 cover_expr 0 cover_stmt 0

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
//`define NUMBER_BANKS 8 //`define NUMBER_BANKS 8
//`define NUM_WORDS_PER_BLOCK 4 //`define NUM_WORDS_PER_BLOCK 4

View file

@ -11,9 +11,10 @@ vortex_afu.json
+incdir+../rtl/pipe_regs +incdir+../rtl/pipe_regs
+incdir+../rtl/compat +incdir+../rtl/compat
../rtl/VX_define_synth.v ../rtl/VX_user_config.vh
../rtl/VX_define.v ../rtl/VX_config.vh
../rtl/generic_cache/VX_cache_config.v ../rtl/VX_define.vh
../rtl/generic_cache/VX_cache_config.vh
../rtl/Vortex_Socket.v ../rtl/Vortex_Socket.v
../rtl/Vortex_Cluster.v ../rtl/Vortex_Cluster.v
../rtl/Vortex.v ../rtl/Vortex.v

5
hw/rtl/.gitignore vendored
View file

@ -1,4 +1 @@
/simulate/VX_define.h /VX_user_config.vh
/simulate/VX_define_synth.h
/VX_define_synth.v
/results.txt

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_alu( module VX_alu(
input wire clk, input wire clk,

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_back_end module VX_back_end
#( #(

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
module VX_csr_data ( module VX_csr_data (
input wire clk, // Clock input wire clk, // Clock
@ -19,17 +19,17 @@ module VX_csr_data (
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
// wire[`NT_M1:0][31:0] thread_ids; // wire[`NUM_THREADS-1:0][31:0] thread_ids;
// wire[`NT_M1:0][31:0] warp_ids; // wire[`NUM_THREADS-1:0][31:0] warp_ids;
// genvar cur_t; // genvar cur_t;
// for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin // for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
// assign thread_ids[cur_t] = cur_t; // assign thread_ids[cur_t] = cur_t;
// end // end
// genvar cur_tw; // genvar cur_tw;
// for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin // for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
// assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num}; // assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num};
// end // end
reg[11:0] csr[1023:0]; reg[11:0] csr[1023:0];

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_csr_pipe module VX_csr_pipe
#( #(
@ -14,8 +14,8 @@ module VX_csr_pipe
output wire stall_gpr_csr output wire stall_gpr_csr
); );
wire[`NT_M1:0] valid_s2; wire[`NUM_THREADS-1:0] valid_s2;
wire[`NW_M1:0] warp_num_s2; wire[`NW_BITS-1:0] warp_num_s2;
wire[4:0] rd_s2; wire[4:0] rd_s2;
wire[1:0] wb_s2; wire[1:0] wb_s2;
wire[4:0] alu_op_s2; wire[4:0] alu_op_s2;
@ -60,7 +60,7 @@ module VX_csr_pipe
wire zero = 0; wire zero = 0;
VX_generic_register #(.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_M1+1) + `NT)) csr_reg_s2 ( VX_generic_register #(.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)) csr_reg_s2 (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(no_slot_csr), .stall(no_slot_csr),
@ -70,28 +70,26 @@ module VX_csr_pipe
); );
wire[`NT_M1:0][31:0] final_csr_data; wire[`NUM_THREADS-1:0][31:0] final_csr_data;
wire[`NT_M1:0][31:0] thread_ids; wire[`NUM_THREADS-1:0][31:0] thread_ids;
wire[`NT_M1:0][31:0] warp_ids; wire[`NUM_THREADS-1:0][31:0] warp_ids;
wire[`NT_M1:0][31:0] warp_idz; wire[`NUM_THREADS-1:0][31:0] warp_idz;
wire[`NT_M1:0][31:0] csr_vec_read_data_s2; wire[`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2;
genvar cur_t; genvar cur_t;
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
assign thread_ids[cur_t] = cur_t; assign thread_ids[cur_t] = cur_t;
end end
genvar cur_tw; genvar cur_tw;
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2}; assign warp_ids[cur_tw] = warp_num_s2;
assign warp_idz[cur_tw] = (warp_num_s2 + (CORE_ID*`NW)); assign warp_idz[cur_tw] = 32'(warp_num_s2 + (CORE_ID * `NUM_WARPS));
end end
genvar cur_v; genvar cur_v;
for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin for (cur_v = 0; cur_v < `NUM_THREADS; cur_v = cur_v + 1) begin
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2; assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
end end
@ -104,7 +102,6 @@ module VX_csr_pipe
warp_id_select ? warp_idz : warp_id_select ? warp_idz :
csr_vec_read_data_s2; csr_vec_read_data_s2;
assign VX_csr_wb.valid = valid_s2; assign VX_csr_wb.valid = valid_s2;
assign VX_csr_wb.warp_num = warp_num_s2; assign VX_csr_wb.warp_num = warp_num_s2;
assign VX_csr_wb.rd = rd_s2; assign VX_csr_wb.rd = rd_s2;

View file

@ -1,5 +1,5 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_csr_wrapper ( module VX_csr_wrapper (
VX_csr_req_inter VX_csr_req, VX_csr_req_inter VX_csr_req,
@ -8,17 +8,17 @@ module VX_csr_wrapper (
); );
wire[`NT_M1:0][31:0] thread_ids; wire[`NUM_THREADS-1:0][31:0] thread_ids;
wire[`NT_M1:0][31:0] warp_ids; wire[`NUM_THREADS-1:0][31:0] warp_ids;
genvar cur_t, cur_tw; genvar cur_t, cur_tw;
generate generate
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin : thread_ids_init for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init
assign thread_ids[cur_t] = cur_t; assign thread_ids[cur_t] = cur_t;
end end
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin : warp_ids_init for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, VX_csr_req.warp_num}; assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, VX_csr_req.warp_num};
end end
endgenerate endgenerate

View file

@ -1,5 +1,5 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_decode( module VX_decode(
// Fetch Inputs // Fetch Inputs
@ -16,11 +16,11 @@ module VX_decode(
wire[31:0] in_instruction = fd_inst_meta_de.instruction; wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc; wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
wire[`NW_M1:0] in_warp_num = fd_inst_meta_de.warp_num; wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
assign VX_frE_to_bckE_req.curr_PC = in_curr_PC; assign VX_frE_to_bckE_req.curr_PC = in_curr_PC;
wire[`NT_M1:0] in_valid = fd_inst_meta_de.valid; wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
wire[6:0] curr_opcode; wire[6:0] curr_opcode;

163
hw/rtl/VX_define.vh Normal file
View file

@ -0,0 +1,163 @@
`ifndef VX_DEFINE
`define VX_DEFINE
`include "./VX_config.vh"
// `define QUEUE_FORCE_MLAB 1
// `define SYN 1
// `define ASIC 1
// `define SYN_FUNC 1
`define LOG2UP(x) ((x > 1) ? $clog2(x) : 1)
`define NUM_CORES_PER_CLUSTER (`NUM_CORES / `NUM_CLUSTERS)
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NT_BITS `LOG2UP(`NUM_THREADS)
`define NC_BITS `LOG2UP(`NUM_CORES)
`define R_INST 7'd51
`define L_INST 7'd3
`define ALU_INST 7'd19
`define S_INST 7'd35
`define B_INST 7'd99
`define LUI_INST 7'd55
`define AUIPC_INST 7'd23
`define JAL_INST 7'd111
`define JALR_INST 7'd103
`define SYS_INST 7'd115
`define GPGPU_INST 7'h6b
`define WB_ALU 2'h1
`define WB_MEM 2'h2
`define WB_JAL 2'h3
`define NO_WB 2'h0
`define RS2_IMMED 1
`define RS2_REG 0
`define NO_MEM_READ 3'h7
`define LB_MEM_READ 3'h0
`define LH_MEM_READ 3'h1
`define LW_MEM_READ 3'h2
`define LBU_MEM_READ 3'h4
`define LHU_MEM_READ 3'h5
`define NO_MEM_WRITE 3'h7
`define SB_MEM_WRITE 3'h0
`define SH_MEM_WRITE 3'h1
`define SW_MEM_WRITE 3'h2
`define NO_BRANCH 3'h0
`define BEQ 3'h1
`define BNE 3'h2
`define BLT 3'h3
`define BGT 3'h4
`define BLTU 3'h5
`define BGTU 3'h6
`define NO_ALU 5'd15
`define ADD 5'd0
`define SUB 5'd1
`define SLLA 5'd2
`define SLT 5'd3
`define SLTU 5'd4
`define XOR 5'd5
`define SRL 5'd6
`define SRA 5'd7
`define OR 5'd8
`define AND 5'd9
`define SUBU 5'd10
`define LUI_ALU 5'd11
`define AUIPC_ALU 5'd12
`define CSR_ALU_RW 5'd13
`define CSR_ALU_RS 5'd14
`define CSR_ALU_RC 5'd15
`define MUL 5'd16
`define MULH 5'd17
`define MULHSU 5'd18
`define MULHU 5'd19
`define DIV 5'd20
`define DIVU 5'd21
`define REM 5'd22
`define REMU 5'd23
// WRITEBACK
`define WB_ALU 2'h1
`define WB_MEM 2'h2
`define WB_JAL 2'h3
`define NO_WB 2'h0
// JAL
`define JUMP 1'h1
`define NO_JUMP 1'h0
// STALLS
`define STALL 1'h1
`define NO_STALL 1'h0
`define TAKEN 1'h1
`define NOT_TAKEN 1'h0
`define ZERO_REG 5'h0
// ======================= Dcache Configurable Knobs ==========================
// Function ID
`define DFUNC_ID 0
// Size of line inside a bank in bits
`define DBANK_LINE_SIZE (`DBANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define DBANK_LINE_WORDS (`DBANK_LINE_SIZE_BYTES / `DWORD_SIZE_BYTES)
// ======================= Icache Configurable Knobs ==========================
// Function ID
`define IFUNC_ID 1
// Size of line inside a bank in bits
`define IBANK_LINE_SIZE (`IBANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define IBANK_LINE_WORDS (`IBANK_LINE_SIZE_BYTES / `IWORD_SIZE_BYTES)
// ======================= SM Configurable Knobs ==============================
// Function ID
`define SFUNC_ID 2
// Size of line inside a bank in bits
`define SBANK_LINE_SIZE (`SBANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define SBANK_LINE_WORDS (`SBANK_LINE_SIZE_BYTES / `SWORD_SIZE_BYTES)
// ======================= L2cache Configurable Knobs =========================
// Function ID
`define L2FUNC_ID 3
// Size of line inside a bank in bits
`define L2BANK_LINE_SIZE (`L2BANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define L2BANK_LINE_WORDS (`L2BANK_LINE_SIZE_BYTES / `L2WORD_SIZE_BYTES)
// ======================= L3cache Configurable Knobs =========================
// Function ID
`define L3FUNC_ID 3
// Size of line inside a bank in bits
`define L3BANK_LINE_SIZE (`L3BANK_LINE_SIZE_BYTES * 8)
// Bank Number of words in a line
`define L3BANK_LINE_WORDS (`L3BANK_LINE_SIZE_BYTES / `L3WORD_SIZE_BYTES)
// VX_DEFINE
`endif

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_dmem_controller ( module VX_dmem_controller (
input wire clk, input wire clk,
@ -36,7 +36,7 @@ module VX_dmem_controller (
wire dcache_wants_wb = (|VX_dcache_rsp_dcache.core_wb_valid); wire dcache_wants_wb = (|VX_dcache_rsp_dcache.core_wb_valid);
// Dcache Request // Dcache Request
assign VX_dcache_req_dcache.core_req_valid = VX_dcache_req.core_req_valid & {`NT{~to_shm}}; assign VX_dcache_req_dcache.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~to_shm}};
assign VX_dcache_req_dcache.core_req_addr = VX_dcache_req.core_req_addr; assign VX_dcache_req_dcache.core_req_addr = VX_dcache_req.core_req_addr;
assign VX_dcache_req_dcache.core_req_writedata = VX_dcache_req.core_req_writedata; assign VX_dcache_req_dcache.core_req_writedata = VX_dcache_req.core_req_writedata;
assign VX_dcache_req_dcache.core_req_mem_read = VX_dcache_req.core_req_mem_read; assign VX_dcache_req_dcache.core_req_mem_read = VX_dcache_req.core_req_mem_read;
@ -49,7 +49,7 @@ module VX_dmem_controller (
// Shred Memory Request // Shred Memory Request
assign VX_dcache_req_smem.core_req_valid = VX_dcache_req.core_req_valid & {`NT{to_shm}}; assign VX_dcache_req_smem.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{to_shm}};
assign VX_dcache_req_smem.core_req_addr = VX_dcache_req.core_req_addr; assign VX_dcache_req_smem.core_req_addr = VX_dcache_req.core_req_addr;
assign VX_dcache_req_smem.core_req_writedata = VX_dcache_req.core_req_writedata; assign VX_dcache_req_smem.core_req_writedata = VX_dcache_req.core_req_writedata;
assign VX_dcache_req_smem.core_req_mem_read = VX_dcache_req.core_req_mem_read; assign VX_dcache_req_smem.core_req_mem_read = VX_dcache_req.core_req_mem_read;
@ -73,8 +73,8 @@ module VX_dmem_controller (
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_smem_dram_req(); VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_req();
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_smem_dram_res(); VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_res();

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_execute_unit ( module VX_execute_unit (
input wire clk, input wire clk,
@ -18,8 +18,8 @@ module VX_execute_unit (
output wire out_delay output wire out_delay
); );
wire[`NT_M1:0][31:0] in_a_reg_data; wire[`NUM_THREADS-1:0][31:0] in_a_reg_data;
wire[`NT_M1:0][31:0] in_b_reg_data; wire[`NUM_THREADS-1:0][31:0] in_b_reg_data;
wire[4:0] in_alu_op; wire[4:0] in_alu_op;
wire in_rs2_src; wire in_rs2_src;
wire[31:0] in_itype_immed; wire[31:0] in_itype_immed;
@ -41,11 +41,11 @@ module VX_execute_unit (
assign in_curr_PC = VX_exec_unit_req.curr_PC; assign in_curr_PC = VX_exec_unit_req.curr_PC;
wire[`NT_M1:0][31:0] alu_result; wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[`NT_M1:0] alu_stall; wire[`NUM_THREADS-1:0] alu_stall;
genvar index_out_reg; genvar index_out_reg;
generate generate
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) begin : alu_defs for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs
VX_alu vx_alu( VX_alu vx_alu(
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
@ -69,9 +69,9 @@ module VX_execute_unit (
assign out_delay = no_slot_exec || internal_stall; assign out_delay = no_slot_exec || internal_stall;
wire [$clog2(`NT)-1:0] jal_branch_use_index; wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index;
wire jal_branch_found_valid; wire jal_branch_found_valid;
VX_generic_priority_encoder #(.N(`NT)) choose_alu_result( VX_generic_priority_encoder #(.N(`NUM_THREADS)) choose_alu_result(
.valids(VX_exec_unit_req.valid), .valids(VX_exec_unit_req.valid),
.index (jal_branch_use_index), .index (jal_branch_use_index),
.found (jal_branch_found_valid) .found (jal_branch_found_valid)
@ -95,10 +95,10 @@ module VX_execute_unit (
end end
wire[`NT_M1:0][31:0] duplicate_PC_data; wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
genvar i; genvar i;
generate generate
for (i = 0; i < `NT; i=i+1) begin : pc_data_setup for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next; assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next;
end end
endgenerate endgenerate
@ -113,7 +113,7 @@ module VX_execute_unit (
// Actual Writeback // Actual Writeback
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd; assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb; assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid & {`NT{!internal_stall}}; assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid & {`NUM_THREADS{!internal_stall}};
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num; assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result; assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
@ -141,7 +141,7 @@ module VX_execute_unit (
// .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc }) // .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc })
// ); // );
VX_generic_register #(.N(33 + `NW_M1 + 1)) jal_reg( VX_generic_register #(.N(33 + `NW_BITS-1 + 1)) jal_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(zero), .stall(zero),
@ -150,7 +150,7 @@ module VX_execute_unit (
.out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num}) .out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num})
); );
VX_generic_register #(.N(34 + `NW_M1 + 1)) branch_reg( VX_generic_register #(.N(34 + `NW_BITS-1 + 1)) branch_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(zero), .stall(zero),

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_fetch ( module VX_fetch (
input wire clk, input wire clk,
@ -7,8 +7,8 @@ module VX_fetch (
VX_join_inter VX_join, VX_join_inter VX_join,
input wire schedule_delay, input wire schedule_delay,
input wire icache_stage_delay, input wire icache_stage_delay,
input wire[`NW_M1:0] icache_stage_wid, input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NT-1:0] icache_stage_valids, input wire[`NUM_THREADS-1:0] icache_stage_valids,
output wire out_ebreak, output wire out_ebreak,
VX_jal_response_inter VX_jal_rsp, VX_jal_response_inter VX_jal_rsp,
@ -17,8 +17,8 @@ module VX_fetch (
VX_warp_ctl_inter VX_warp_ctl VX_warp_ctl_inter VX_warp_ctl
); );
wire[`NT_M1:0] thread_mask; wire[`NUM_THREADS-1:0] thread_mask;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
wire[31:0] warp_pc; wire[31:0] warp_pc;
wire scheduled_warp; wire scheduled_warp;

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_front_end ( module VX_front_end (
input wire clk, input wire clk,
@ -37,8 +37,8 @@ wire icache_stage_delay;
wire vortex_ebreak; wire vortex_ebreak;
wire terminate_sim; wire terminate_sim;
wire[`NW_M1:0] icache_stage_wid; wire[`NW_BITS-1:0] icache_stage_wid;
wire[`NT-1:0] icache_stage_valids; wire[`NUM_THREADS-1:0] icache_stage_valids;
reg old_ebreak; // This should be eventually removed reg old_ebreak; // This should be eventually removed
always @(posedge clk) begin always @(posedge clk) begin

View file

@ -1,7 +1,7 @@
`ifndef VX_GENERIC_PRIORITY_ENCODER `ifndef VX_GENERIC_PRIORITY_ENCODER
`define VX_GENERIC_PRIORITY_ENCODER `define VX_GENERIC_PRIORITY_ENCODER
`include "VX_define.v" `include "VX_define.vh"
module VX_generic_priority_encoder module VX_generic_priority_encoder
#( #(
@ -10,8 +10,8 @@ module VX_generic_priority_encoder
( (
input wire[N-1:0] valids, input wire[N-1:0] valids,
//output reg[$clog2(N)-1:0] index, //output reg[$clog2(N)-1:0] index,
output reg[(`CLOG2(N))-1:0] index, output reg[(`LOG2UP(N))-1:0] index,
//output reg[`CLOG2(N):0] index, // eh //output reg[`LOG2UP(N):0] index, // eh
output reg found output reg found
); );
@ -22,7 +22,7 @@ module VX_generic_priority_encoder
for (i = N-1; i >= 0; i = i - 1) begin for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin if (valids[i]) begin
//index = i[$clog2(N)-1:0]; //index = i[$clog2(N)-1:0];
index = i[(`CLOG2(N))-1:0]; index = i[(`LOG2UP(N))-1:0];
found = 1; found = 1;
end end
end end

View file

@ -1,5 +1,3 @@
`include "VX_define_synth.v"
module VX_generic_queue_ll module VX_generic_queue_ll
#( #(
parameter DATAW = 4, parameter DATAW = 4,

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_gpgpu_inst ( module VX_gpgpu_inst (
// Input // Input
@ -9,14 +9,14 @@ module VX_gpgpu_inst (
); );
wire[`NT_M1:0] curr_valids = VX_gpu_inst_req.valid; wire[`NUM_THREADS-1:0] curr_valids = VX_gpu_inst_req.valid;
wire is_split = (VX_gpu_inst_req.is_split); wire is_split = (VX_gpu_inst_req.is_split);
wire[`NT_M1:0] tmc_new_mask; wire[`NUM_THREADS-1:0] tmc_new_mask;
wire all_threads = `NT < VX_gpu_inst_req.a_reg_data[0]; wire all_threads = `NUM_THREADS < VX_gpu_inst_req.a_reg_data[0];
genvar curr_t; genvar curr_t;
generate generate
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin : tmc_new_mask_init for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < VX_gpu_inst_req.a_reg_data[0]; assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < VX_gpu_inst_req.a_reg_data[0];
end end
endgenerate endgenerate
@ -33,11 +33,11 @@ module VX_gpgpu_inst (
wire wspawn = VX_gpu_inst_req.is_wspawn; wire wspawn = VX_gpu_inst_req.is_wspawn;
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2; wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
wire all_active = `NW < VX_gpu_inst_req.a_reg_data[0]; wire all_active = `NUM_WARPS < VX_gpu_inst_req.a_reg_data[0];
wire[`NW-1:0] wspawn_new_active; wire[`NUM_WARPS-1:0] wspawn_new_active;
genvar curr_w; genvar curr_w;
generate generate
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) begin : wspawn_new_active_init for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < VX_gpu_inst_req.a_reg_data[0]; assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < VX_gpu_inst_req.a_reg_data[0];
end end
endgenerate endgenerate
@ -47,19 +47,19 @@ module VX_gpgpu_inst (
assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0]; assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0];
wire[31:0] num_warps_m1 = VX_gpu_inst_req.rd2 - 1; wire[31:0] num_warps_m1 = VX_gpu_inst_req.rd2 - 1;
assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NW):0]; assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
assign VX_warp_ctl.wspawn = wspawn; assign VX_warp_ctl.wspawn = wspawn;
assign VX_warp_ctl.wspawn_pc = wspawn_pc; assign VX_warp_ctl.wspawn_pc = wspawn_pc;
assign VX_warp_ctl.wspawn_new_active = wspawn_new_active; assign VX_warp_ctl.wspawn_new_active = wspawn_new_active;
wire[`NT_M1:0] split_new_use_mask; wire[`NUM_THREADS-1:0] split_new_use_mask;
wire[`NT_M1:0] split_new_later_mask; wire[`NUM_THREADS-1:0] split_new_later_mask;
// VX_gpu_inst_req.pc // VX_gpu_inst_req.pc
genvar curr_s_t; genvar curr_s_t;
generate generate
for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin : masks_init for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1); wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool); assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
@ -67,18 +67,18 @@ module VX_gpgpu_inst (
end end
endgenerate endgenerate
wire[$clog2(`NT):0] num_valids; wire[$clog2(`NUM_THREADS):0] num_valids;
VX_countones #(.N(`NT)) valids_counter ( VX_countones #(.N(`NUM_THREADS)) valids_counter (
.valids(curr_valids), .valids(curr_valids),
.count (num_valids) .count (num_valids)
); );
// wire[`NW_M1:0] num_valids = $countones(curr_valids); // wire[`NW_BITS-1:0] num_valids = $countones(curr_valids);
assign VX_warp_ctl.is_split = is_split && (num_valids > 1); assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NT{1'b1}})); assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
assign VX_warp_ctl.split_new_mask = split_new_use_mask; assign VX_warp_ctl.split_new_mask = split_new_use_mask;
assign VX_warp_ctl.split_later_mask = split_new_later_mask; assign VX_warp_ctl.split_later_mask = split_new_later_mask;
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next; assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;

View file

@ -1,5 +1,5 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_gpr ( module VX_gpr (
input wire clk, input wire clk,
@ -8,8 +8,8 @@ module VX_gpr (
VX_gpr_read_inter VX_gpr_read, VX_gpr_read_inter VX_gpr_read,
VX_wb_inter VX_writeback_inter, VX_wb_inter VX_writeback_inter,
output reg[`NT_M1:0][31:0] out_a_reg_data, output reg[`NUM_THREADS-1:0][31:0] out_a_reg_data,
output reg[`NT_M1:0][31:0] out_b_reg_data output reg[`NUM_THREADS-1:0][31:0] out_b_reg_data
); );
@ -41,10 +41,10 @@ module VX_gpr (
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid); wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
wire[`NT_M1:0][31:0] write_bit_mask; wire[`NUM_THREADS-1:0][31:0] write_bit_mask;
genvar curr_t; genvar curr_t;
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t]; wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
assign write_bit_mask[curr_t] = {32{~local_write}}; assign write_bit_mask[curr_t] = {32{~local_write}};
end end
@ -59,14 +59,14 @@ module VX_gpr (
wire cena_1 = 0; wire cena_1 = 0;
wire cena_2 = 0; wire cena_2 = 0;
wire[`NT_M1:0][31:0] temp_a; wire[`NUM_THREADS-1:0][31:0] temp_a;
wire[`NT_M1:0][31:0] temp_b; wire[`NUM_THREADS-1:0][31:0] temp_b;
`ifndef SYN `ifndef SYN
genvar thread; genvar thread;
genvar curr_bit; genvar curr_bit;
for (thread = 0; thread < `NT; thread = thread + 1) for (thread = 0; thread < `NUM_THREADS; thread = thread + 1)
begin begin
for (curr_bit = 0; curr_bit < 32; curr_bit=curr_bit+1) for (curr_bit = 0; curr_bit < 32; curr_bit=curr_bit+1)
begin begin
@ -83,7 +83,7 @@ module VX_gpr (
`endif `endif
wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0; wire[`NUM_THREADS-1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
genvar curr_base_thread; genvar curr_base_thread;
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4) for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_gpr_stage ( module VX_gpr_stage (
input wire clk, input wire clk,
@ -114,15 +114,15 @@ module VX_gpr_stage (
); );
wire[`NT_M1:0][31:0] temp_store_data; wire[`NUM_THREADS-1:0][31:0] temp_store_data;
wire[`NT_M1:0][31:0] temp_base_address; // A reg data wire[`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data
wire[`NT_M1:0][31:0] real_store_data; wire[`NUM_THREADS-1:0][31:0] real_store_data;
wire[`NT_M1:0][31:0] real_base_address; // A reg data wire[`NUM_THREADS-1:0][31:0] real_base_address; // A reg data
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu; wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
VX_generic_register #(.N(`NT*32*2)) lsu_data( VX_generic_register #(.N(`NUM_THREADS*32*2)) lsu_data(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(!store_curr_real), .stall(!store_curr_real),
@ -139,7 +139,7 @@ module VX_gpr_stage (
assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address; assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
VX_generic_register #(.N(77 + `NW_M1 + 1 + (`NT))) lsu_reg( VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))) lsu_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_lsu), .stall(stall_lsu),
@ -148,7 +148,7 @@ module VX_gpr_stage (
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb }) .out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
); );
VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg( VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS))) exec_unit_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_exec), .stall(stall_exec),
@ -160,7 +160,7 @@ module VX_gpr_stage (
assign VX_exec_unit_req.a_reg_data = real_base_address; assign VX_exec_unit_req.a_reg_data = real_base_address;
assign VX_exec_unit_req.b_reg_data = real_store_data; assign VX_exec_unit_req.b_reg_data = real_store_data;
VX_generic_register #(.N(36 + `NW_M1 + 1 + (`NT))) gpu_inst_reg( VX_generic_register #(.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))) gpu_inst_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_rest), .stall(stall_rest),
@ -172,7 +172,7 @@ module VX_gpr_stage (
assign VX_gpu_inst_req.a_reg_data = real_base_address; assign VX_gpu_inst_req.a_reg_data = real_base_address;
assign VX_gpu_inst_req.rd2 = real_store_data; assign VX_gpu_inst_req.rd2 = real_store_data;
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg( VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_gpr_csr), .stall(stall_gpr_csr),
@ -187,7 +187,7 @@ module VX_gpr_stage (
`else `else
// 341 // 341
VX_generic_register #(.N(77 + `NW_M1 + 1 + 65*(`NT))) lsu_reg( VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) lsu_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_lsu), .stall(stall_lsu),
@ -196,7 +196,7 @@ module VX_gpr_stage (
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb }) .out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
); );
VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg( VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) exec_unit_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_exec), .stall(stall_exec),
@ -205,7 +205,7 @@ module VX_gpr_stage (
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask }) .out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
); );
VX_generic_register #(.N(68 + `NW_M1 + 1 + 33*(`NT))) gpu_inst_reg( VX_generic_register #(.N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS))) gpu_inst_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_rest), .stall(stall_rest),
@ -214,7 +214,7 @@ module VX_gpr_stage (
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 }) .out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 })
); );
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg( VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_gpr_csr), .stall(stall_gpr_csr),

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_gpr_wrapper ( module VX_gpr_wrapper (
input wire clk, input wire clk,
@ -7,23 +7,22 @@ module VX_gpr_wrapper (
VX_wb_inter VX_writeback_inter, VX_wb_inter VX_writeback_inter,
VX_gpr_jal_inter VX_gpr_jal, VX_gpr_jal_inter VX_gpr_jal,
output wire[`NT_M1:0][31:0] out_a_reg_data, output wire[`NUM_THREADS-1:0][31:0] out_a_reg_data,
output wire[`NT_M1:0][31:0] out_b_reg_data output wire[`NUM_THREADS-1:0][31:0] out_b_reg_data
); );
wire[`NW-1:0][`NT_M1:0][31:0] temp_a_reg_data; wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data;
wire[`NW-1:0][`NT_M1:0][31:0] temp_b_reg_data; wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data;
wire[`NT_M1:0][31:0] jal_data; wire[`NUM_THREADS-1:0][31:0] jal_data;
genvar index; genvar index;
generate generate
for (index = 0; index <= `NT_M1; index = index + 1) begin : jal_data_assign for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
assign jal_data[index] = VX_gpr_jal.curr_PC; assign jal_data[index] = VX_gpr_jal.curr_PC;
end end
endgenerate endgenerate
`ifndef ASIC `ifndef ASIC
assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[VX_gpr_read.warp_num])); assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[VX_gpr_read.warp_num]));
assign out_b_reg_data = (temp_b_reg_data[VX_gpr_read.warp_num]); assign out_b_reg_data = (temp_b_reg_data[VX_gpr_read.warp_num]);
@ -31,8 +30,8 @@ module VX_gpr_wrapper (
wire zer = 0; wire zer = 0;
wire[`NW_M1:0] old_warp_num; wire[`NW_BITS-1:0] old_warp_num;
VX_generic_register #(`NW_M1+1) store_wn( VX_generic_register #(`NW_BITS-1+1) store_wn(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(zer), .stall(zer),
@ -49,7 +48,7 @@ module VX_gpr_wrapper (
genvar warp_index; genvar warp_index;
generate generate
for (warp_index = 0; warp_index < `NW; warp_index = warp_index + 1) begin : warp_gprs for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
wire valid_write_request = warp_index == VX_writeback_inter.wb_warp_num; wire valid_write_request = warp_index == VX_writeback_inter.wb_warp_num;
VX_gpr vx_gpr( VX_gpr vx_gpr(

View file

@ -1,12 +1,12 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_icache_stage ( module VX_icache_stage (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire total_freeze, input wire total_freeze,
output wire icache_stage_delay, output wire icache_stage_delay,
output wire[`NW_M1:0] icache_stage_wid, output wire[`NW_BITS-1:0] icache_stage_wid,
output wire[`NT-1:0] icache_stage_valids, output wire[`NUM_THREADS-1:0] icache_stage_valids,
VX_inst_meta_inter fe_inst_meta_fi, VX_inst_meta_inter fe_inst_meta_fi,
VX_inst_meta_inter fe_inst_meta_id, VX_inst_meta_inter fe_inst_meta_id,
@ -14,7 +14,7 @@ module VX_icache_stage (
VX_gpu_dcache_req_inter VX_icache_req VX_gpu_dcache_req_inter VX_icache_req
); );
reg[`NT-1:0] threads_active[`NW-1:0]; reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
wire valid_inst = (|fe_inst_meta_fi.valid); wire valid_inst = (|fe_inst_meta_fi.valid);
@ -39,7 +39,7 @@ module VX_icache_stage (
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
assign icache_stage_wid = fe_inst_meta_id.warp_num; assign icache_stage_wid = fe_inst_meta_id.warp_num;
assign icache_stage_valids = fe_inst_meta_id.valid & {`NT{!icache_stage_delay}}; assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
// Cache can't accept request // Cache can't accept request
assign icache_stage_delay = VX_icache_rsp.delay_req; assign icache_stage_delay = VX_icache_rsp.delay_req;
@ -50,7 +50,7 @@ module VX_icache_stage (
integer curr_w; integer curr_w;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) threads_active[curr_w] <= 0; for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) threads_active[curr_w] <= 0;
end else begin end else begin
if (valid_inst && !icache_stage_delay) begin if (valid_inst && !icache_stage_delay) begin
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_inst_multiplex ( module VX_inst_multiplex (
// Inputs // Inputs
@ -12,9 +12,9 @@ module VX_inst_multiplex (
VX_csr_req_inter VX_csr_req VX_csr_req_inter VX_csr_req
); );
wire[`NT_M1:0] is_mem_mask; wire[`NUM_THREADS-1:0] is_mem_mask;
wire[`NT_M1:0] is_gpu_mask; wire[`NUM_THREADS-1:0] is_gpu_mask;
wire[`NT_M1:0] is_csr_mask; wire[`NUM_THREADS-1:0] is_csr_mask;
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ); wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split); wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
@ -23,7 +23,7 @@ module VX_inst_multiplex (
genvar currT; genvar currT;
generate generate
for (currT = 0; currT < `NT; currT = currT + 1) begin : mask_init for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init
assign is_mem_mask[currT] = is_mem; assign is_mem_mask[currT] = is_mem;
assign is_gpu_mask[currT] = is_gpu; assign is_gpu_mask[currT] = is_gpu;
assign is_csr_mask[currT] = is_csr; assign is_csr_mask[currT] = is_csr;

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_lsu ( module VX_lsu (
input wire clk, input wire clk,
@ -15,7 +15,7 @@ module VX_lsu (
); );
// Generate Addresses // Generate Addresses
wire[`NT_M1:0][31:0] address; wire[`NUM_THREADS-1:0][31:0] address;
VX_lsu_addr_gen VX_lsu_addr_gen VX_lsu_addr_gen VX_lsu_addr_gen
( (
.base_address(VX_lsu_req.base_address), .base_address(VX_lsu_req.base_address),
@ -23,19 +23,19 @@ module VX_lsu (
.address (address) .address (address)
); );
wire[`NT_M1:0][31:0] use_address; wire[`NUM_THREADS-1:0][31:0] use_address;
wire[`NT_M1:0][31:0] use_store_data; wire[`NUM_THREADS-1:0][31:0] use_store_data;
wire[`NT_M1:0] use_valid; wire[`NUM_THREADS-1:0] use_valid;
wire[2:0] use_mem_read; wire[2:0] use_mem_read;
wire[2:0] use_mem_write; wire[2:0] use_mem_write;
wire[4:0] use_rd; wire[4:0] use_rd;
wire[`NW_M1:0] use_warp_num; wire[`NW_BITS-1:0] use_warp_num;
wire[1:0] use_wb; wire[1:0] use_wb;
wire[31:0] use_pc; wire[31:0] use_pc;
wire zero = 0; wire zero = 0;
VX_generic_register #(.N(45 + `NW_M1 + 1 + `NT*65)) lsu_buffer( VX_generic_register #(.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)) lsu_buffer(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(out_delay), .stall(out_delay),
@ -49,10 +49,10 @@ module VX_lsu (
assign VX_dcache_req.core_req_valid = use_valid; assign VX_dcache_req.core_req_valid = use_valid;
assign VX_dcache_req.core_req_addr = use_address; assign VX_dcache_req.core_req_addr = use_address;
assign VX_dcache_req.core_req_writedata = use_store_data; assign VX_dcache_req.core_req_writedata = use_store_data;
assign VX_dcache_req.core_req_mem_read = {`NT{use_mem_read}}; assign VX_dcache_req.core_req_mem_read = {`NUM_THREADS{use_mem_read}};
assign VX_dcache_req.core_req_mem_write = {`NT{use_mem_write}}; assign VX_dcache_req.core_req_mem_write = {`NUM_THREADS{use_mem_write}};
assign VX_dcache_req.core_req_rd = use_rd; assign VX_dcache_req.core_req_rd = use_rd;
assign VX_dcache_req.core_req_wb = {`NT{use_wb}}; assign VX_dcache_req.core_req_wb = {`NUM_THREADS{use_wb}};
assign VX_dcache_req.core_req_warp_num = use_warp_num; assign VX_dcache_req.core_req_warp_num = use_warp_num;
assign VX_dcache_req.core_req_pc = use_pc; assign VX_dcache_req.core_req_pc = use_pc;
@ -70,9 +70,9 @@ module VX_lsu (
assign VX_mem_wb.wb_warp_num = VX_dcache_rsp.core_wb_warp_num; assign VX_mem_wb.wb_warp_num = VX_dcache_rsp.core_wb_warp_num;
assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata; assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata;
wire[(`CLOG2(`NT))-1:0] use_pc_index; wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index;
wire found; wire found;
VX_generic_priority_encoder #(.N(`NT)) pick_first_pc( VX_generic_priority_encoder #(.N(`NUM_THREADS)) pick_first_pc(
.valids(VX_dcache_rsp.core_wb_valid), .valids(VX_dcache_rsp.core_wb_valid),
.index (use_pc_index), .index (use_pc_index),
.found (found) .found (found)

View file

@ -1,17 +1,15 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_lsu_addr_gen ( module VX_lsu_addr_gen (
input wire[`NT_M1:0][31:0] base_address, input wire[`NUM_THREADS-1:0][31:0] base_address,
input wire[31:0] offset, input wire[31:0] offset,
output wire[`NT_M1:0][31:0] address output wire[`NUM_THREADS-1:0][31:0] address
); );
genvar i;
genvar index;
generate generate
for (index = 0; index < `NT; index = index + 1) begin : addresses for (i = 0; i < `NUM_THREADS; i = i + 1) begin : addresses
assign address[index] = base_address[index] + offset; assign address[i] = base_address[i] + offset;
end end
endgenerate endgenerate

View file

@ -1,8 +1,8 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_priority_encoder ( module VX_priority_encoder (
input wire[`NW-1:0] valids, input wire[`NUM_WARPS-1:0] valids,
output reg[`NW_M1:0] index, output reg[`NW_BITS-1:0] index,
output reg found output reg found
); );
@ -10,9 +10,9 @@ module VX_priority_encoder (
always @(*) begin always @(*) begin
index = 0; index = 0;
found = 0; found = 0;
for (i = `NW-1; i >= 0; i = i - 1) begin for (i = `NUM_WARPS-1; i >= 0; i = i - 1) begin
if (valids[i]) begin if (valids[i]) begin
index = i[`NW_M1:0]; index = i[`NW_BITS-1:0];
found = 1; found = 1;
end end
end end

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_priority_encoder_w_mask module VX_priority_encoder_w_mask
#( #(
parameter N = 10 parameter N = 10
@ -7,8 +7,8 @@ module VX_priority_encoder_w_mask
input wire[N-1:0] valids, input wire[N-1:0] valids,
output reg [N-1:0] mask, output reg [N-1:0] mask,
//output reg[$clog2(N)-1:0] index, //output reg[$clog2(N)-1:0] index,
output reg[(`CLOG2(N))-1:0] index, output reg[(`LOG2UP(N))-1:0] index,
//output reg[`CLOG2(N):0] index, // eh //output reg[`LOG2UP(N):0] index, // eh
output reg found output reg found
); );
@ -20,7 +20,7 @@ module VX_priority_encoder_w_mask
for (i = 0; i < N; i=i+1) begin for (i = 0; i < N; i=i+1) begin
if (valids[i]) begin if (valids[i]) begin
//index = i[$clog2(N)-1:0]; //index = i[$clog2(N)-1:0];
index = i[(`CLOG2(N))-1:0]; index = i[(`LOG2UP(N))-1:0];
found = 1; found = 1;
// mask[index] = (1 << i); // mask[index] = (1 << i);
// $display("%h",(1 << i)); // $display("%h",(1 << i));

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_scheduler ( module VX_scheduler (
input wire clk, input wire clk,
@ -10,8 +10,7 @@ module VX_scheduler (
VX_wb_inter VX_writeback_inter, VX_wb_inter VX_writeback_inter,
output wire schedule_delay, output wire schedule_delay,
output wire is_empty output wire is_empty
); );
/* verilator lint_off WIDTH */ /* verilator lint_off WIDTH */
@ -19,7 +18,7 @@ module VX_scheduler (
assign is_empty = count_valid == 0; assign is_empty = count_valid == 0;
reg[31:0][`NT-1:0] rename_table[`NW-1:0]; reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0); wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0);
wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0); wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0);
@ -32,13 +31,11 @@ module VX_scheduler (
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ); wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
// classify our next instruction. // classify our next instruction.
wire is_mem = is_store || is_load; wire is_mem = is_store || is_load;
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split); wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
wire is_csr = VX_bckE_req.is_csr; wire is_csr = VX_bckE_req.is_csr;
wire is_exec = !is_mem && !is_gpu && !is_csr; wire is_exec = !is_mem && !is_gpu && !is_csr;
// wire rs1_pass = 0; // wire rs1_pass = 0;
// wire rs2_pass = 0; // wire rs2_pass = 0;
@ -48,7 +45,6 @@ module VX_scheduler (
wire rs2_rename_qual = ((rs2_rename) && (VX_bckE_req.rs2 != 0 && using_rs2)); wire rs2_rename_qual = ((rs2_rename) && (VX_bckE_req.rs2 != 0 && using_rs2));
wire rd_rename_qual = ((rd_rename ) && (VX_bckE_req.rd != 0)); wire rd_rename_qual = ((rd_rename ) && (VX_bckE_req.rd != 0));
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual; wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid)) assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid))
@ -61,7 +57,7 @@ module VX_scheduler (
always @(posedge clk or posedge reset) begin always @(posedge clk or posedge reset) begin
if (reset) begin if (reset) begin
for (w = 0; w < `NW; w=w+1) for (w = 0; w < `NUM_WARPS; w=w+1)
begin begin
for (i = 0; i < 32; i = i + 1) for (i = 0; i < 32; i = i + 1)
begin begin
@ -74,7 +70,6 @@ module VX_scheduler (
if (valid_wb && ((rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] & (~VX_writeback_inter.wb_valid)) == 0)) count_valid = count_valid - 1; if (valid_wb && ((rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] & (~VX_writeback_inter.wb_valid)) == 0)) count_valid = count_valid - 1;
if (!schedule_delay && wb_inc) count_valid = count_valid + 1; if (!schedule_delay && wb_inc) count_valid = count_valid + 1;
end end
end end

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_warp ( module VX_warp (
@ -6,7 +6,7 @@ module VX_warp (
input wire reset, input wire reset,
input wire stall, input wire stall,
input wire remove, input wire remove,
input wire[`NT_M1:0] in_thread_mask, input wire[`NUM_THREADS-1:0] in_thread_mask,
input wire in_change_mask, input wire in_change_mask,
input wire in_jal, input wire in_jal,
input wire[31:0] in_jal_dest, input wire[31:0] in_jal_dest,
@ -16,20 +16,20 @@ module VX_warp (
input wire[31:0] in_wspawn_pc, input wire[31:0] in_wspawn_pc,
output wire[31:0] out_PC, output wire[31:0] out_PC,
output wire[`NT_M1:0] out_valid output wire[`NUM_THREADS-1:0] out_valid
); );
reg[31:0] real_PC; reg[31:0] real_PC;
logic [31:0] temp_PC; logic [31:0] temp_PC;
logic [31:0] use_PC; logic [31:0] use_PC;
reg[`NT_M1:0] valid; reg[`NUM_THREADS-1:0] valid;
reg[`NT_M1:0] valid_zero; reg[`NUM_THREADS-1:0] valid_zero;
integer ini_cur_th = 0; integer ini_cur_th = 0;
initial begin initial begin
real_PC = 0; real_PC = 0;
for (ini_cur_th = 1; ini_cur_th < `NT; ini_cur_th=ini_cur_th+1) begin for (ini_cur_th = 1; ini_cur_th < `NUM_THREADS; ini_cur_th=ini_cur_th+1) begin
valid[ini_cur_th] = 0; // Thread 1 active valid[ini_cur_th] = 0; // Thread 1 active
valid_zero[ini_cur_th] = 0; valid_zero[ini_cur_th] = 0;
end end
@ -49,7 +49,7 @@ module VX_warp (
genvar out_cur_th; genvar out_cur_th;
generate generate
for (out_cur_th = 0; out_cur_th < `NT; out_cur_th = out_cur_th+1) begin : out_valid_assign for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : out_valid_assign
assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th]; assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th];
end end
endgenerate endgenerate

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_warp_scheduler ( module VX_warp_scheduler (
input wire clk, // Clock input wire clk, // Clock
@ -7,57 +7,57 @@ module VX_warp_scheduler (
// Wspawn // Wspawn
input wire wspawn, input wire wspawn,
input wire[31:0] wsapwn_pc, input wire[31:0] wsapwn_pc,
input wire[`NW-1:0] wspawn_new_active, input wire[`NUM_WARPS-1:0] wspawn_new_active,
// CTM // CTM
input wire ctm, input wire ctm,
input wire[`NT_M1:0] ctm_mask, input wire[`NUM_THREADS-1:0] ctm_mask,
input wire[`NW_M1:0] ctm_warp_num, input wire[`NW_BITS-1:0] ctm_warp_num,
// WHALT // WHALT
input wire whalt, input wire whalt,
input wire[`NW_M1:0] whalt_warp_num, input wire[`NW_BITS-1:0] whalt_warp_num,
input wire is_barrier, input wire is_barrier,
input wire[31:0] barrier_id, input wire[31:0] barrier_id,
input wire[$clog2(`NW):0] num_warps, input wire[$clog2(`NUM_WARPS):0] num_warps,
input wire[`NW_M1:0] barrier_warp_num, input wire[`NW_BITS-1:0] barrier_warp_num,
// WSTALL // WSTALL
input wire wstall, input wire wstall,
input wire[`NW_M1:0] wstall_warp_num, input wire[`NW_BITS-1:0] wstall_warp_num,
// Split // Split
input wire is_split, input wire is_split,
input wire dont_split, input wire dont_split,
input wire[`NT_M1:0] split_new_mask, input wire[`NUM_THREADS-1:0] split_new_mask,
input wire[`NT_M1:0] split_later_mask, input wire[`NUM_THREADS-1:0] split_later_mask,
input wire[31:0] split_save_pc, input wire[31:0] split_save_pc,
input wire[`NW_M1:0] split_warp_num, input wire[`NW_BITS-1:0] split_warp_num,
// Join // Join
input wire is_join, input wire is_join,
input wire[`NW_M1:0] join_warp_num, input wire[`NW_BITS-1:0] join_warp_num,
// JAL // JAL
input wire jal, input wire jal,
input wire[31:0] jal_dest, input wire[31:0] jal_dest,
input wire[`NW_M1:0] jal_warp_num, input wire[`NW_BITS-1:0] jal_warp_num,
// Branch // Branch
input wire branch_valid, input wire branch_valid,
input wire branch_dir, input wire branch_dir,
input wire[31:0] branch_dest, input wire[31:0] branch_dest,
input wire[`NW_M1:0] branch_warp_num, input wire[`NW_BITS-1:0] branch_warp_num,
output wire[`NT_M1:0] thread_mask, output wire[`NUM_THREADS-1:0] thread_mask,
output wire[`NW_M1:0] warp_num, output wire[`NW_BITS-1:0] warp_num,
output wire[31:0] warp_pc, output wire[31:0] warp_pc,
output wire out_ebreak, output wire out_ebreak,
output wire scheduled_warp, output wire scheduled_warp,
input wire[`NW_M1:0] icache_stage_wid, input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NT-1:0] icache_stage_valids input wire[`NUM_THREADS-1:0] icache_stage_valids
); );
@ -66,41 +66,41 @@ module VX_warp_scheduler (
wire update_visible_active; wire update_visible_active;
wire[(1+32+`NT_M1):0] d[`NW-1:0]; wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
wire join_fall; wire join_fall;
wire[31:0] join_pc; wire[31:0] join_pc;
wire[`NT_M1:0] join_tm; wire[`NUM_THREADS-1:0] join_tm;
wire in_wspawn = wspawn; wire in_wspawn = wspawn;
wire in_ctm = ctm; wire in_ctm = ctm;
wire in_whalt = whalt; wire in_whalt = whalt;
wire in_wstall = wstall; wire in_wstall = wstall;
reg[`NW-1:0] warp_active; reg[`NUM_WARPS-1:0] warp_active;
reg[`NW-1:0] warp_stalled; reg[`NUM_WARPS-1:0] warp_stalled;
reg [`NW-1:0] visible_active; reg [`NUM_WARPS-1:0] visible_active;
wire[`NW-1:0] use_active; wire[`NUM_WARPS-1:0] use_active;
reg [`NW-1:0] warp_lock; reg [`NUM_WARPS-1:0] warp_lock;
wire wstall_this_cycle; wire wstall_this_cycle;
reg[`NT_M1:0] thread_masks[`NW-1:0]; reg[`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
reg[31:0] warp_pcs[`NW-1:0]; reg[31:0] warp_pcs[`NUM_WARPS-1:0];
// barriers // barriers
reg[`NW-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0]; reg[`NUM_WARPS-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
wire reached_barrier_limit; wire reached_barrier_limit;
wire[`NW-1:0] curr_barrier_mask; wire[`NUM_WARPS-1:0] curr_barrier_mask;
wire[$clog2(`NW):0] curr_barrier_count; wire[$clog2(`NUM_WARPS):0] curr_barrier_count;
// wsapwn // wsapwn
reg[31:0] use_wsapwn_pc; reg[31:0] use_wsapwn_pc;
reg[`NW-1:0] use_wsapwn; reg[`NUM_WARPS-1:0] use_wsapwn;
wire[`NW_M1:0] warp_to_schedule; wire[`NW_BITS-1:0] warp_to_schedule;
wire schedule; wire schedule;
wire hazard; wire hazard;
@ -110,12 +110,12 @@ module VX_warp_scheduler (
wire[31:0] new_pc; wire[31:0] new_pc;
reg[`NW-1:0] total_barrier_stall; reg[`NUM_WARPS-1:0] total_barrier_stall;
reg didnt_split; reg didnt_split;
/* verilator lint_off UNUSED */ /* verilator lint_off UNUSED */
// wire[$clog2(`NW):0] num_active; // wire[$clog2(`NUM_WARPS):0] num_active;
/* verilator lint_on UNUSED */ /* verilator lint_on UNUSED */
integer curr_w_help; integer curr_w_help;
@ -135,7 +135,7 @@ module VX_warp_scheduler (
didnt_split <= 0; didnt_split <= 0;
warp_lock <= 0; warp_lock <= 0;
// total_barrier_stall = 0; // total_barrier_stall = 0;
for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin for (curr_w_help = 1; curr_w_help < `NUM_WARPS; curr_w_help=curr_w_help+1) begin
warp_pcs[curr_w_help] <= 0; warp_pcs[curr_w_help] <= 0;
warp_active[curr_w_help] <= 0; // Activating first warp warp_active[curr_w_help] <= 0; // Activating first warp
visible_active[curr_w_help] <= 0; // Activating first warp visible_active[curr_w_help] <= 0; // Activating first warp
@ -147,7 +147,7 @@ module VX_warp_scheduler (
if (wspawn) begin if (wspawn) begin
warp_active <= wspawn_new_active; warp_active <= wspawn_new_active;
use_wsapwn_pc <= wsapwn_pc; use_wsapwn_pc <= wsapwn_pc;
use_wsapwn <= wspawn_new_active & (~`NW'b1); use_wsapwn <= wspawn_new_active & (~`NUM_WARPS'b1);
end end
if (is_barrier) begin if (is_barrier) begin
@ -219,30 +219,30 @@ module VX_warp_scheduler (
// Lock/Release // Lock/Release
if (scheduled_warp && !stall) begin if (scheduled_warp && !stall) begin
warp_lock[warp_num] <= 1'b1; warp_lock[warp_num] <= 1'b1;
// warp_lock <= {`NW{1'b1}}; // warp_lock <= {`NUM_WARPS{1'b1}};
end end
if (|icache_stage_valids && !stall) begin if (|icache_stage_valids && !stall) begin
warp_lock[icache_stage_wid] <= 1'b0; warp_lock[icache_stage_wid] <= 1'b0;
// warp_lock <= {`NW{1'b0}}; // warp_lock <= {`NUM_WARPS{1'b0}};
end end
end end
end end
VX_countones #(.N(`NW)) barrier_count( VX_countones #(.N(`NUM_WARPS)) barrier_count(
.valids(curr_barrier_mask), .valids(curr_barrier_mask),
.count (curr_barrier_count) .count (curr_barrier_count)
); );
wire[$clog2(`NW):0] count_visible_active; wire[$clog2(`NUM_WARPS):0] count_visible_active;
VX_countones #(.N(`NW)) num_visible( VX_countones #(.N(`NUM_WARPS)) num_visible(
.valids(visible_active), .valids(visible_active),
.count (count_visible_active) .count (count_visible_active)
); );
// assign curr_barrier_count = $countones(curr_barrier_mask); // assign curr_barrier_count = $countones(curr_barrier_mask);
assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NW-1:0]; assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NUM_WARPS-1:0];
assign reached_barrier_limit = curr_barrier_count == (num_warps); assign reached_barrier_limit = curr_barrier_count == (num_warps);
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
@ -253,15 +253,15 @@ module VX_warp_scheduler (
// total_barrier_stall = 0; // total_barrier_stall = 0;
// for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1) // for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
// begin // begin
// total_barrier_stall[`NW-1:0] = total_barrier_stall[`NW-1:0] | barrier_stall_mask[curr_b]; // total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b];
// end // end
// end // end
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join); assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]}; wire[(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask}; wire[(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc , split_later_mask};
assign {join_fall, join_pc, join_tm} = d[join_warp_num]; assign {join_fall, join_pc, join_tm} = d[join_warp_num];
@ -270,13 +270,13 @@ module VX_warp_scheduler (
genvar curr_warp; genvar curr_warp;
generate generate
for (curr_warp = 0; curr_warp < `NW; curr_warp = curr_warp + 1) begin : stacks for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks
wire correct_warp_s = (curr_warp == split_warp_num); wire correct_warp_s = (curr_warp == split_warp_num);
wire correct_warp_j = (curr_warp == join_warp_num); wire correct_warp_j = (curr_warp == join_warp_num);
wire push = (is_split && !dont_split) && correct_warp_s; wire push = (is_split && !dont_split) && correct_warp_s;
wire pop = is_join && correct_warp_j; wire pop = is_join && correct_warp_j;
VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT)+1)) ipdom_stack( VX_generic_stack #(.WIDTH(1+32+`NUM_THREADS), .DEPTH($clog2(`NUM_THREADS)+1)) ipdom_stack(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.push (push), .push (push),
@ -304,7 +304,7 @@ module VX_warp_scheduler (
wire real_use_wspawn = use_wsapwn[warp_to_schedule]; wire real_use_wspawn = use_wsapwn[warp_to_schedule];
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule]; assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NT'b1 : thread_masks[warp_to_schedule]); assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]);
assign warp_num = warp_to_schedule; assign warp_num = warp_to_schedule;
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall; assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_writeback ( module VX_writeback (
input wire clk, input wire clk,
@ -64,9 +64,9 @@ module VX_writeback (
wire zero = 0; wire zero = 0;
wire[`NT-1:0][31:0] use_wb_data; wire[`NUM_THREADS-1:0][31:0] use_wb_data;
VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register( VX_generic_register #(.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)) wb_register(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(zero), .stall(zero),

View file

@ -1,5 +1,5 @@
`include "VX_define.v" `include "VX_define.vh"
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module Vortex module Vortex
#( #(
@ -13,24 +13,24 @@ module Vortex
// IO // IO
output wire io_valid, output wire io_valid,
output wire[31:0] io_data, output wire [31:0] io_data,
// DRAM Dcache Req // DRAM Dcache Req
output wire dram_req, output wire dram_req,
output wire dram_req_write, output wire dram_req_write,
output wire dram_req_read, output wire dram_req_read,
output wire [31:0] dram_req_addr, output wire [31:0] dram_req_addr,
output wire [31:0] dram_req_size, output wire [31:0] dram_req_size,
output wire [31:0] dram_req_data[`DBANK_LINE_SIZE_RNG], output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
output wire [31:0] dram_expected_lat, output wire [31:0] dram_expected_lat,
input wire dram_req_delay, input wire dram_req_delay,
// DRAM Dcache Res // DRAM Dcache Res
output wire dram_fill_accept, output wire dram_fill_accept,
input wire dram_fill_rsp, input wire dram_fill_rsp,
input wire [31:0] dram_fill_rsp_addr, input wire [31:0] dram_fill_rsp_addr,
input wire [31:0] dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG], input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data,
// DRAM Icache Req // DRAM Icache Req
output wire I_dram_req, output wire I_dram_req,
@ -38,25 +38,25 @@ module Vortex
output wire I_dram_req_read, output wire I_dram_req_read,
output wire [31:0] I_dram_req_addr, output wire [31:0] I_dram_req_addr,
output wire [31:0] I_dram_req_size, output wire [31:0] I_dram_req_size,
output wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_req_data, output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
output wire [31:0] I_dram_expected_lat, output wire [31:0] I_dram_expected_lat,
// DRAM Icache Res // DRAM Icache Res
output wire I_dram_fill_accept, output wire I_dram_fill_accept,
input wire I_dram_fill_rsp, input wire I_dram_fill_rsp,
input wire [31:0] I_dram_fill_rsp_addr, input wire [31:0] I_dram_fill_rsp_addr,
input wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_fill_rsp_data, input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data,
// LLC Snooping // LLC Snooping
input wire snp_req, input wire snp_req,
input wire [31:0] snp_req_addr, input wire [31:0] snp_req_addr,
output wire snp_req_delay, output wire snp_req_delay,
input wire I_snp_req, input wire I_snp_req,
input wire [31:0] I_snp_req_addr, input wire [31:0] I_snp_req_addr,
output wire I_snp_req_delay, output wire I_snp_req_delay,
output wire out_ebreak output wire out_ebreak
`else `else
@ -72,14 +72,14 @@ module Vortex
output wire dram_req_read, output wire dram_req_read,
output wire [31:0] dram_req_addr, output wire [31:0] dram_req_addr,
output wire [31:0] dram_req_size, output wire [31:0] dram_req_size,
output wire [`DBANK_LINE_SIZE_RNG][31:0] dram_req_data, output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
output wire [31:0] dram_expected_lat, output wire [31:0] dram_expected_lat,
// DRAM Dcache Res // DRAM Dcache Res
output wire dram_fill_accept, output wire dram_fill_accept,
input wire dram_fill_rsp, input wire dram_fill_rsp,
input wire [31:0] dram_fill_rsp_addr, input wire [31:0] dram_fill_rsp_addr,
input wire [`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data, input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data,
// DRAM Icache Req // DRAM Icache Req
@ -88,16 +88,16 @@ module Vortex
output wire I_dram_req_read, output wire I_dram_req_read,
output wire [31:0] I_dram_req_addr, output wire [31:0] I_dram_req_addr,
output wire [31:0] I_dram_req_size, output wire [31:0] I_dram_req_size,
output wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_req_data, output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
output wire [31:0] I_dram_expected_lat, output wire [31:0] I_dram_expected_lat,
// DRAM Icache Res // DRAM Icache Res
output wire I_dram_fill_accept, output wire I_dram_fill_accept,
input wire I_dram_fill_rsp, input wire I_dram_fill_rsp,
input wire [31:0] I_dram_fill_rsp_addr, input wire [31:0] I_dram_fill_rsp_addr,
input wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_fill_rsp_data, input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data,
input wire dram_req_delay, input wire dram_req_delay,
input wire snp_req, input wire snp_req,
input wire [31:0] snp_req_addr, input wire [31:0] snp_req_addr,
@ -110,27 +110,24 @@ module Vortex
output wire out_ebreak output wire out_ebreak
`endif `endif
); );
wire scheduler_empty; wire scheduler_empty;
wire out_ebreak_unqual; wire out_ebreak_unqual;
// assign out_ebreak = out_ebreak_unqual && (scheduler_empty && 1); // assign out_ebreak = out_ebreak_unqual && (scheduler_empty && 1);
assign out_ebreak = out_ebreak_unqual; assign out_ebreak = out_ebreak_unqual;
wire memory_delay; wire memory_delay;
wire exec_delay; wire exec_delay;
wire gpr_stage_delay; wire gpr_stage_delay;
wire schedule_delay; wire schedule_delay;
// Dcache Interface // Dcache Interface
VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_rsp(); VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_rsp();
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req(); VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req();
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req_qual(); VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req_qual();
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_req(); VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_req();
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_res(); VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_res();
assign VX_gpu_dcache_dram_res.dram_fill_rsp = dram_fill_rsp; assign VX_gpu_dcache_dram_res.dram_fill_rsp = dram_fill_rsp;
@ -146,36 +143,40 @@ module Vortex
assign VX_gpu_dcache_dram_req.dram_req_delay = dram_req_delay; assign VX_gpu_dcache_dram_req.dram_req_delay = dram_req_delay;
genvar wordy; genvar i;
generate generate
for (wordy = 0; wordy < `DBANK_LINE_SIZE_WORDS; wordy=wordy+1) begin for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
assign VX_gpu_dcache_dram_res.dram_fill_rsp_data[wordy] = dram_fill_rsp_data[wordy]; assign VX_gpu_dcache_dram_res.dram_fill_rsp_data[i] = dram_fill_rsp_data[i * 32 +: 32];
assign dram_req_data[wordy] = VX_gpu_dcache_dram_req.dram_req_data[wordy]; assign dram_req_data[i * 32 +: 32] = VX_gpu_dcache_dram_req.dram_req_data[i];
end end
endgenerate endgenerate
wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.core_req_valid) && (VX_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE) && (VX_dcache_req.core_req_addr[0] == 32'h00010000); wire temp_io_valid = (!memory_delay)
&& (|VX_dcache_req.core_req_valid)
&& (VX_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE)
&& (VX_dcache_req.core_req_addr[0] == 32'h00010000);
wire[31:0] temp_io_data = VX_dcache_req.core_req_writedata[0]; wire[31:0] temp_io_data = VX_dcache_req.core_req_writedata[0];
assign io_valid = temp_io_valid; assign io_valid = temp_io_valid;
assign io_data = temp_io_data; assign io_data = temp_io_data;
assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NT{~io_valid}}; assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}};
assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr; assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr;
assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata; assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata;
assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read; assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read;
assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write; assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write;
assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd; assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd;
assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb; assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb;
assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num; assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num;
assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc; assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc;
assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot; assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot;
VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_rsp(); VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_rsp();
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_req(); VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_req();
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_req(); VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_req();
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_res(); VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_res();
assign VX_gpu_icache_dram_res.dram_fill_rsp = I_dram_fill_rsp; assign VX_gpu_icache_dram_res.dram_fill_rsp = I_dram_fill_rsp;
@ -191,11 +192,11 @@ module Vortex
assign VX_gpu_icache_dram_req.dram_req_delay = dram_req_delay; assign VX_gpu_icache_dram_req.dram_req_delay = dram_req_delay;
genvar iwordy; genvar j;
generate generate
for (iwordy = 0; iwordy < `IBANK_LINE_SIZE_WORDS; iwordy=iwordy+1) begin for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin
assign VX_gpu_icache_dram_res.dram_fill_rsp_data[iwordy] = I_dram_fill_rsp_data[iwordy]; assign VX_gpu_icache_dram_res.dram_fill_rsp_data[j] = I_dram_fill_rsp_data[j * 32 +: 32];
assign I_dram_req_data[iwordy] = VX_gpu_icache_dram_req.dram_req_data[iwordy]; assign I_dram_req_data[j * 32 +: 32] = VX_gpu_icache_dram_req.dram_req_data[j];
end end
endgenerate endgenerate
@ -239,7 +240,7 @@ VX_front_end vx_front_end(
.VX_jal_rsp (VX_jal_rsp), .VX_jal_rsp (VX_jal_rsp),
.VX_branch_rsp (VX_branch_rsp), .VX_branch_rsp (VX_branch_rsp),
.fetch_ebreak (out_ebreak_unqual) .fetch_ebreak (out_ebreak_unqual)
); );
VX_scheduler schedule( VX_scheduler schedule(
.clk (clk), .clk (clk),
@ -251,7 +252,7 @@ VX_scheduler schedule(
.VX_writeback_inter(VX_writeback_inter), .VX_writeback_inter(VX_writeback_inter),
.schedule_delay (schedule_delay), .schedule_delay (schedule_delay),
.is_empty (scheduler_empty) .is_empty (scheduler_empty)
); );
VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end( VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
.clk (clk), .clk (clk),
@ -267,7 +268,7 @@ VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
.out_mem_delay (memory_delay), .out_mem_delay (memory_delay),
.out_exec_delay (exec_delay), .out_exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay) .gpr_stage_delay (gpr_stage_delay)
); );
VX_dmem_controller VX_dmem_controller( VX_dmem_controller VX_dmem_controller(
@ -291,7 +292,7 @@ VX_dmem_controller VX_dmem_controller(
// Core <-> Dcache // Core <-> Dcache
.VX_dcache_req (VX_dcache_req_qual), .VX_dcache_req (VX_dcache_req_qual),
.VX_dcache_rsp (VX_dcache_rsp) .VX_dcache_rsp (VX_dcache_rsp)
); );
// VX_csr_handler vx_csr_handler( // VX_csr_handler vx_csr_handler(
// .clk (clk), // .clk (clk),
@ -300,7 +301,7 @@ VX_dmem_controller VX_dmem_controller(
// .in_wb_valid (VX_writeback_inter.wb_valid[0]), // .in_wb_valid (VX_writeback_inter.wb_valid[0]),
// .out_decode_csr_data (csr_decode_csr_data) // .out_decode_csr_data (csr_decode_csr_data)
// ); // );
endmodule // Vortex endmodule // Vortex

View file

@ -1,20 +1,18 @@
`include "VX_define.v" `include "VX_define.vh"
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module Vortex_Cluster module Vortex_Cluster
#( #(
parameter CLUSTER_ID = 0 parameter CLUSTER_ID = 0
) ) (
(
// Clock // Clock
input wire clk, input wire clk,
input wire reset, input wire reset,
// IO // IO
output wire[`NUMBER_CORES_PER_CLUSTER-1:0] io_valid, output wire[`NUM_CORES_PER_CLUSTER-1:0] io_valid,
output wire[`NUMBER_CORES_PER_CLUSTER-1:0][31:0] io_data, output wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] io_data,
// DRAM Req // DRAM Req
output wire out_dram_req, output wire out_dram_req,
@ -22,7 +20,7 @@ module Vortex_Cluster
output wire out_dram_req_read, output wire out_dram_req_read,
output wire [31:0] out_dram_req_addr, output wire [31:0] out_dram_req_addr,
output wire [31:0] out_dram_req_size, output wire [31:0] out_dram_req_size,
output wire [31:0] out_dram_req_data[`DBANK_LINE_SIZE_RNG], output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0],
output wire [31:0] out_dram_expected_lat, output wire [31:0] out_dram_expected_lat,
input wire out_dram_req_delay, input wire out_dram_req_delay,
@ -30,8 +28,7 @@ module Vortex_Cluster
output wire out_dram_fill_accept, output wire out_dram_fill_accept,
input wire out_dram_fill_rsp, input wire out_dram_fill_rsp,
input wire [31:0] out_dram_fill_rsp_addr, input wire [31:0] out_dram_fill_rsp_addr,
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG], input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0],
// LLC Snooping // LLC Snooping
input wire llc_snp_req, input wire llc_snp_req,
@ -40,142 +37,133 @@ module Vortex_Cluster
output wire out_ebreak output wire out_ebreak
); );
// DRAM Dcache Req // DRAM Dcache Req
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req_write; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_write;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req_read; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_read;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_addr; wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_addr;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_size; wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_size;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_core_dram_req_data; wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_req_data;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_expected_lat; wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_expected_lat;
// DRAM Dcache Res // DRAM Dcache Res
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_fill_accept; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_accept;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_fill_rsp; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_rsp;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_fill_rsp_addr; wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_fill_rsp_addr;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_core_dram_fill_rsp_data; wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_fill_rsp_data;
// DRAM Icache Req // DRAM Icache Req
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_read; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_read;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_addr; wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_addr;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_size; wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_size;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`IBANK_LINE_SIZE_RNG][31:0] per_core_I_dram_req_data; wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_req_data;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_expected_lat; wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_expected_lat;
// DRAM Icache Res // DRAM Icache Res
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_accept; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_accept;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_rsp; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_rsp;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_fill_rsp_addr; wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_fill_rsp_addr;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`IBANK_LINE_SIZE_RNG][31:0] per_core_I_dram_fill_rsp_data; wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_fill_rsp_data;
// Out ebreak // Out ebreak
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_out_ebreak; wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_out_ebreak;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_io_valid;
wire[`NUMBER_CORES_PER_CLUSTER-1:0][31:0] per_core_io_data;
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_io_valid;
wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] per_core_io_data;
wire l2c_core_accept; wire l2c_core_accept;
wire snp_fwd;
wire snp_fwd; wire[31:0] snp_fwd_addr;
wire[31:0] snp_fwd_addr; wire[`NUM_CORES_PER_CLUSTER-1:0] snp_fwd_delay;
wire[`NUMBER_CORES_PER_CLUSTER-1:0] snp_fwd_delay;
assign out_ebreak = (&per_core_out_ebreak); assign out_ebreak = (&per_core_out_ebreak);
genvar curr_core; genvar curr_core;
generate generate
for (curr_core = 0; curr_core < `NUMBER_CORES_PER_CLUSTER; curr_core=curr_core+1) begin for (curr_core = 0; curr_core < `NUM_CORES_PER_CLUSTER; curr_core=curr_core+1) begin
wire [`IBANK_LINE_SIZE_RNG][31:0] curr_core_I_dram_req_data; wire [`IBANK_LINE_WORDS-1:0][31:0] curr_core_I_dram_req_data;
wire [`DBANK_LINE_SIZE_RNG][31:0] curr_core_dram_req_data ; wire [`DBANK_LINE_WORDS-1:0][31:0] curr_core_dram_req_data ;
assign io_valid[curr_core] = per_core_io_valid[curr_core]; assign io_valid[curr_core] = per_core_io_valid[curr_core];
assign io_data [curr_core] = per_core_io_data [curr_core]; assign io_data [curr_core] = per_core_io_data [curr_core];
Vortex #(.CORE_ID(curr_core + (CLUSTER_ID * `NUMBER_CORES_PER_CLUSTER))) vortex_core( Vortex #(
.clk (clk), .CORE_ID(curr_core + (CLUSTER_ID * `NUM_CORES_PER_CLUSTER))
.reset (reset), ) vortex_core(
.io_valid (per_core_io_valid [curr_core]), .clk (clk),
.io_data (per_core_io_data [curr_core]), .reset (reset),
.dram_req (per_core_dram_req [curr_core]), .io_valid (per_core_io_valid [curr_core]),
.dram_req_write (per_core_dram_req_write [curr_core]), .io_data (per_core_io_data [curr_core]),
.dram_req_read (per_core_dram_req_read [curr_core]), .dram_req (per_core_dram_req [curr_core]),
.dram_req_addr (per_core_dram_req_addr [curr_core]), .dram_req_write (per_core_dram_req_write [curr_core]),
.dram_req_size (per_core_dram_req_size [curr_core]), .dram_req_read (per_core_dram_req_read [curr_core]),
.dram_req_data (curr_core_dram_req_data ), .dram_req_addr (per_core_dram_req_addr [curr_core]),
.dram_expected_lat (per_core_dram_expected_lat [curr_core]), .dram_req_size (per_core_dram_req_size [curr_core]),
.dram_fill_accept (per_core_dram_fill_accept [curr_core]), .dram_req_data (curr_core_dram_req_data ),
.dram_fill_rsp (per_core_dram_fill_rsp [curr_core]), .dram_expected_lat (per_core_dram_expected_lat [curr_core]),
.dram_fill_rsp_addr (per_core_dram_fill_rsp_addr [curr_core]), .dram_fill_accept (per_core_dram_fill_accept [curr_core]),
.dram_fill_rsp_data (per_core_dram_fill_rsp_data [curr_core]), .dram_fill_rsp (per_core_dram_fill_rsp [curr_core]),
.I_dram_req (per_core_I_dram_req [curr_core]), .dram_fill_rsp_addr (per_core_dram_fill_rsp_addr [curr_core]),
.I_dram_req_write (per_core_I_dram_req_write [curr_core]), .dram_fill_rsp_data (per_core_dram_fill_rsp_data [curr_core]),
.I_dram_req_read (per_core_I_dram_req_read [curr_core]), .I_dram_req (per_core_I_dram_req [curr_core]),
.I_dram_req_addr (per_core_I_dram_req_addr [curr_core]), .I_dram_req_write (per_core_I_dram_req_write [curr_core]),
.I_dram_req_size (per_core_I_dram_req_size [curr_core]), .I_dram_req_read (per_core_I_dram_req_read [curr_core]),
.I_dram_req_data (curr_core_I_dram_req_data ), .I_dram_req_addr (per_core_I_dram_req_addr [curr_core]),
.I_dram_expected_lat (per_core_I_dram_expected_lat [curr_core]), .I_dram_req_size (per_core_I_dram_req_size [curr_core]),
.I_dram_fill_accept (per_core_I_dram_fill_accept [curr_core]), .I_dram_req_data (curr_core_I_dram_req_data ),
.I_dram_fill_rsp (per_core_I_dram_fill_rsp [curr_core]), .I_dram_expected_lat (per_core_I_dram_expected_lat [curr_core]),
.I_dram_fill_rsp_addr (per_core_I_dram_fill_rsp_addr[curr_core]), .I_dram_fill_accept (per_core_I_dram_fill_accept [curr_core]),
.I_dram_fill_rsp_data (per_core_I_dram_fill_rsp_data[curr_core]), .I_dram_fill_rsp (per_core_I_dram_fill_rsp [curr_core]),
.dram_req_delay (l2c_core_accept ), .I_dram_fill_rsp_addr (per_core_I_dram_fill_rsp_addr[curr_core]),
.out_ebreak (per_core_out_ebreak [curr_core]), .I_dram_fill_rsp_data (per_core_I_dram_fill_rsp_data[curr_core]),
.snp_req (snp_fwd), .dram_req_delay (l2c_core_accept ),
.snp_req_addr (snp_fwd_addr), .out_ebreak (per_core_out_ebreak [curr_core]),
.snp_req_delay (snp_fwd_delay[curr_core]), .snp_req (snp_fwd),
.I_snp_req (0), .snp_req_addr (snp_fwd_addr),
.I_snp_req_addr (), .snp_req_delay (snp_fwd_delay[curr_core]),
.I_snp_req_delay () .I_snp_req (0),
); .I_snp_req_addr (),
.I_snp_req_delay ()
);
assign per_core_dram_req_data [curr_core] = curr_core_dram_req_data; assign per_core_dram_req_data [curr_core] = curr_core_dram_req_data;
assign per_core_I_dram_req_data[curr_core] = curr_core_I_dram_req_data; assign per_core_I_dram_req_data[curr_core] = curr_core_I_dram_req_data;
end end
endgenerate endgenerate
//////////////////// L2 Cache //////////////////// //////////////////// L2 Cache ////////////////////
wire[`LLNUMBER_REQUESTS-1:0] l2c_core_req; wire[`L2NUMBER_REQUESTS-1:0] l2c_core_req;
wire[`LLNUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_write; wire[`L2NUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_write;
wire[`LLNUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_read; wire[`L2NUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_read;
wire[`LLNUMBER_REQUESTS-1:0][31:0] l2c_core_req_addr; wire[`L2NUMBER_REQUESTS-1:0][31:0] l2c_core_req_addr;
wire[`LLNUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l2c_core_req_data; wire[`L2NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_core_req_data;
wire[`LLNUMBER_REQUESTS-1:0][1:0] l2c_core_req_wb; wire[`L2NUMBER_REQUESTS-1:0][1:0] l2c_core_req_wb;
wire[`LLNUMBER_REQUESTS-1:0] l2c_core_no_wb_slot; wire[`L2NUMBER_REQUESTS-1:0] l2c_core_no_wb_slot;
wire[`L2NUMBER_REQUESTS-1:0] l2c_wb;
wire[`L2NUMBER_REQUESTS-1:0] [31:0] l2c_wb_addr;
wire[`L2NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_wb_data;
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
wire[`LLNUMBER_REQUESTS-1:0] l2c_wb; wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port;
wire[`LLNUMBER_REQUESTS-1:0] [31:0] l2c_wb_addr;
wire[`LLNUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l2c_wb_data;
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_req_data_port;
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data_port;
genvar llb_index; genvar llb_index;
generate generate
for (llb_index = 0; llb_index < `DBANK_LINE_SIZE_WORDS; llb_index=llb_index+1) begin for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin
assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index]; assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index];
assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index]; assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index];
end end
endgenerate endgenerate
//
genvar l2c_curr_core; genvar l2c_curr_core;
generate generate
for (l2c_curr_core = 0; l2c_curr_core < `LLNUMBER_REQUESTS; l2c_curr_core=l2c_curr_core+2) begin for (l2c_curr_core = 0; l2c_curr_core < `L2NUMBER_REQUESTS; l2c_curr_core=l2c_curr_core+2) begin
// Core Request // Core Request
assign l2c_core_req [l2c_curr_core] = per_core_dram_req [(l2c_curr_core/2)]; assign l2c_core_req [l2c_curr_core] = per_core_dram_req [(l2c_curr_core/2)];
assign l2c_core_req [l2c_curr_core+1] = per_core_I_dram_req[(l2c_curr_core/2)]; assign l2c_core_req [l2c_curr_core+1] = per_core_I_dram_req[(l2c_curr_core/2)];
@ -214,29 +202,27 @@ module Vortex_Cluster
wire dram_snp_full; wire dram_snp_full;
wire dram_req_because_of_wb; wire dram_req_because_of_wb;
VX_cache #( VX_cache #(
.CACHE_SIZE_BYTES (`LLCACHE_SIZE_BYTES), .CACHE_SIZE_BYTES (`L2CACHE_SIZE_BYTES),
.BANK_LINE_SIZE_BYTES (`LLBANK_LINE_SIZE_BYTES), .BANK_LINE_SIZE_BYTES (`L2BANK_LINE_SIZE_BYTES),
.NUMBER_BANKS (`LLNUMBER_BANKS), .NUMBER_BANKS (`L2NUMBER_BANKS),
.WORD_SIZE_BYTES (`LLWORD_SIZE_BYTES), .WORD_SIZE_BYTES (`L2WORD_SIZE_BYTES),
.NUMBER_REQUESTS (`LLNUMBER_REQUESTS), .NUMBER_REQUESTS (`L2NUMBER_REQUESTS),
.STAGE_1_CYCLES (`LLSTAGE_1_CYCLES), .STAGE_1_CYCLES (`L2STAGE_1_CYCLES),
.FUNC_ID (`LLFUNC_ID), .FUNC_ID (`L2FUNC_ID),
.REQQ_SIZE (`LLREQQ_SIZE), .REQQ_SIZE (`L2REQQ_SIZE),
.MRVQ_SIZE (`LLMRVQ_SIZE), .MRVQ_SIZE (`L2MRVQ_SIZE),
.DFPQ_SIZE (`LLDFPQ_SIZE), .DFPQ_SIZE (`L2DFPQ_SIZE),
.SNRQ_SIZE (`LLSNRQ_SIZE), .SNRQ_SIZE (`L2SNRQ_SIZE),
.CWBQ_SIZE (`LLCWBQ_SIZE), .CWBQ_SIZE (`L2CWBQ_SIZE),
.DWBQ_SIZE (`LLDWBQ_SIZE), .DWBQ_SIZE (`L2DWBQ_SIZE),
.DFQQ_SIZE (`LLDFQQ_SIZE), .DFQQ_SIZE (`L2DFQQ_SIZE),
.LLVQ_SIZE (`LLLLVQ_SIZE), .LLVQ_SIZE (`L2LLVQ_SIZE),
.FFSQ_SIZE (`LLFFSQ_SIZE), .FFSQ_SIZE (`L2FFSQ_SIZE),
.PRFQ_SIZE (`LLPRFQ_SIZE), .PRFQ_SIZE (`L2PRFQ_SIZE),
.PRFQ_STRIDE (`LLPRFQ_STRIDE), .PRFQ_STRIDE (`L2PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`LLFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(`LLSIMULATED_DRAM_LATENCY_CYCLES) .SIMULATED_DRAM_LATENCY_CYCLES(`L2SIMULATED_DRAM_LATENCY_CYCLES)
) ) gpu_l2cache (
gpu_l2cache
(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -295,8 +281,6 @@ module Vortex_Cluster
.snp_fwd (snp_fwd), .snp_fwd (snp_fwd),
.snp_fwd_addr (snp_fwd_addr), .snp_fwd_addr (snp_fwd_addr),
.snp_fwd_delay (|snp_fwd_delay) .snp_fwd_delay (|snp_fwd_delay)
); );
endmodule endmodule

View file

@ -1,5 +1,5 @@
`include "VX_define.v" `include "VX_define.vh"
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module Vortex_Socket ( module Vortex_Socket (
@ -8,8 +8,8 @@ module Vortex_Socket (
input wire reset, input wire reset,
// IO // IO
output wire io_valid[`NUMBER_CORES-1:0], output wire io_valid[`NUM_CORES-1:0],
output wire[31:0] io_data [`NUMBER_CORES-1:0], output wire[31:0] io_data [`NUM_CORES-1:0],
output wire[31:0] number_cores, output wire[31:0] number_cores,
@ -19,7 +19,7 @@ module Vortex_Socket (
output wire out_dram_req_read, output wire out_dram_req_read,
output wire [31:0] out_dram_req_addr, output wire [31:0] out_dram_req_addr,
output wire [31:0] out_dram_req_size, output wire [31:0] out_dram_req_size,
output wire [31:0] out_dram_req_data[`DBANK_LINE_SIZE_RNG], output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0],
output wire [31:0] out_dram_expected_lat, output wire [31:0] out_dram_expected_lat,
input wire out_dram_req_delay, input wire out_dram_req_delay,
@ -27,7 +27,7 @@ module Vortex_Socket (
output wire out_dram_fill_accept, output wire out_dram_fill_accept,
input wire out_dram_fill_rsp, input wire out_dram_fill_rsp,
input wire [31:0] out_dram_fill_rsp_addr, input wire [31:0] out_dram_fill_rsp_addr,
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG], input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0],
// LLC Snooping // LLC Snooping
input wire llc_snp_req, input wire llc_snp_req,
@ -36,18 +36,16 @@ module Vortex_Socket (
output wire out_ebreak output wire out_ebreak
); );
assign number_cores = `NUM_CORES;
assign number_cores = `NUMBER_CORES; if (`NUM_CLUSTERS == 1) begin
wire[`NUM_CORES-1:0] cluster_io_valid;
if (`NUMBER_CLUSTERS == 1) begin wire[`NUM_CORES-1:0][31:0] cluster_io_data;
wire[`NUMBER_CORES-1:0] cluster_io_valid;
wire[`NUMBER_CORES-1:0][31:0] cluster_io_data;
genvar curr_c; genvar curr_c;
for (curr_c = 0; curr_c < `NUMBER_CORES; curr_c=curr_c+1) begin for (curr_c = 0; curr_c < `NUM_CORES; curr_c=curr_c+1) begin
assign io_valid[curr_c] = cluster_io_valid[curr_c]; assign io_valid[curr_c] = cluster_io_valid[curr_c];
assign io_data [curr_c] = cluster_io_data [curr_c]; assign io_data [curr_c] = cluster_io_data [curr_c];
end end
@ -76,62 +74,57 @@ module Vortex_Socket (
.llc_snp_req_addr (llc_snp_req_addr), .llc_snp_req_addr (llc_snp_req_addr),
.llc_snp_req_delay (llc_snp_req_delay), .llc_snp_req_delay (llc_snp_req_delay),
.out_ebreak (out_ebreak) .out_ebreak (out_ebreak)
); );
end else begin end else begin
wire snp_fwd; wire snp_fwd;
wire[31:0] snp_fwd_addr; wire[31:0] snp_fwd_addr;
wire[`NUMBER_CLUSTERS-1:0] snp_fwd_delay; wire[`NUM_CLUSTERS-1:0] snp_fwd_delay;
wire[`NUMBER_CLUSTERS-1:0] per_cluster_out_ebreak; wire[`NUM_CLUSTERS-1:0] per_cluster_out_ebreak;
assign out_ebreak = (&per_cluster_out_ebreak); assign out_ebreak = (&per_cluster_out_ebreak);
// // DRAM Dcache Req // // DRAM Dcache Req
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req;
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req_write; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req_read; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read;
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_req_addr; wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_addr;
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_req_size; wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_size;
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_expected_lat; wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_expected_lat;
wire[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_cluster_dram_req_data; wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_req_data;
wire[31:0] per_cluster_dram_req_data_up[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG]; wire[31:0] per_cluster_dram_req_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
wire l3c_core_accept; wire l3c_core_accept;
// // DRAM Dcache Res // // DRAM Dcache Res
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_fill_accept; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_accept;
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_fill_rsp; wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_rsp;
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_fill_rsp_addr; wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_fill_rsp_addr;
wire[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_cluster_dram_fill_rsp_data; wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_fill_rsp_data;
wire[31:0] per_cluster_dram_fill_rsp_data_up[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG]; wire[31:0] per_cluster_dram_fill_rsp_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
wire[`NUMBER_CLUSTERS-1:0][`NUMBER_CORES_PER_CLUSTER-1:0] per_cluster_io_valid; wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0] per_cluster_io_valid;
wire[`NUMBER_CLUSTERS-1:0][`NUMBER_CORES_PER_CLUSTER-1:0][31:0] per_cluster_io_data; wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0][31:0] per_cluster_io_data;
genvar curr_c; genvar curr_c, curr_cc, curr_word;
genvar curr_cc; for (curr_c = 0; curr_c < `NUM_CLUSTERS; curr_c =curr_c+1) begin
genvar curr_word; for (curr_cc = 0; curr_cc < `NUM_CORES_PER_CLUSTER; curr_cc=curr_cc+1) begin
for (curr_c = 0; curr_c < `NUMBER_CLUSTERS; curr_c =curr_c+1) begin assign io_valid[curr_cc+(curr_c*`NUM_CORES_PER_CLUSTER)] = per_cluster_io_valid[curr_c][curr_cc];
for (curr_cc = 0; curr_cc < `NUMBER_CORES_PER_CLUSTER; curr_cc=curr_cc+1) begin assign io_data [curr_cc+(curr_c*`NUM_CORES_PER_CLUSTER)] = per_cluster_io_data [curr_c][curr_cc];
assign io_valid[curr_cc+(curr_c*`NUMBER_CORES_PER_CLUSTER)] = per_cluster_io_valid[curr_c][curr_cc];
assign io_data [curr_cc+(curr_c*`NUMBER_CORES_PER_CLUSTER)] = per_cluster_io_data [curr_c][curr_cc];
end end
for (curr_word = 0; curr_word < `DBANK_LINE_SIZE_WORDS; curr_word = curr_word+1) begin for (curr_word = 0; curr_word < `DBANK_LINE_WORDS; curr_word = curr_word+1) begin
assign per_cluster_dram_req_data [curr_c][curr_word] = per_cluster_dram_req_data_up [curr_c][curr_word]; assign per_cluster_dram_req_data [curr_c][curr_word] = per_cluster_dram_req_data_up [curr_c][curr_word];
assign per_cluster_dram_fill_rsp_data_up[curr_c][curr_word] = per_cluster_dram_fill_rsp_data[curr_c][curr_word]; assign per_cluster_dram_fill_rsp_data_up[curr_c][curr_word] = per_cluster_dram_fill_rsp_data[curr_c][curr_word];
end end
end end
genvar curr_cluster; genvar curr_cluster;
for (curr_cluster = 0; curr_cluster < `NUMBER_CLUSTERS; curr_cluster=curr_cluster+1) begin for (curr_cluster = 0; curr_cluster < `NUM_CLUSTERS; curr_cluster=curr_cluster+1) begin
Vortex_Cluster #(.CLUSTER_ID(curr_cluster)) Vortex_Cluster( Vortex_Cluster #(.CLUSTER_ID(curr_cluster)) Vortex_Cluster(
.clk (clk), .clk (clk),
@ -158,37 +151,33 @@ module Vortex_Socket (
.llc_snp_req_delay (snp_fwd_delay[curr_cluster]), .llc_snp_req_delay (snp_fwd_delay[curr_cluster]),
.out_ebreak (per_cluster_out_ebreak [curr_cluster]) .out_ebreak (per_cluster_out_ebreak [curr_cluster])
); );
end end
//////////////////// L3 Cache //////////////////// //////////////////// L3 Cache ////////////////////
wire[`L3NUMBER_REQUESTS-1:0] l3c_core_req; wire[`L3NUMBER_REQUESTS-1:0] l3c_core_req;
wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_write; wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_write;
wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_read; wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_read;
wire[`L3NUMBER_REQUESTS-1:0][31:0] l3c_core_req_addr; wire[`L3NUMBER_REQUESTS-1:0][31:0] l3c_core_req_addr;
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l3c_core_req_data; wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_core_req_data;
wire[`L3NUMBER_REQUESTS-1:0][1:0] l3c_core_req_wb; wire[`L3NUMBER_REQUESTS-1:0][1:0] l3c_core_req_wb;
wire[`L3NUMBER_REQUESTS-1:0] l3c_core_no_wb_slot; wire[`L3NUMBER_REQUESTS-1:0] l3c_core_no_wb_slot;
wire[`L3NUMBER_REQUESTS-1:0] l3c_wb; wire[`L3NUMBER_REQUESTS-1:0] l3c_wb;
wire[`L3NUMBER_REQUESTS-1:0] [31:0] l3c_wb_addr; wire[`L3NUMBER_REQUESTS-1:0] [31:0] l3c_wb_addr;
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l3c_wb_data; wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_wb_data;
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_req_data_port; wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data_port; wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port;
genvar llb_index; genvar llb_index;
for (llb_index = 0; llb_index < `DBANK_LINE_SIZE_WORDS; llb_index=llb_index+1) begin for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin
assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index]; assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index];
assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index]; assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index];
end end
// //
genvar l3c_curr_cluster; genvar l3c_curr_cluster;
for (l3c_curr_cluster = 0; l3c_curr_cluster < `L3NUMBER_REQUESTS; l3c_curr_cluster=l3c_curr_cluster+1) begin for (l3c_curr_cluster = 0; l3c_curr_cluster < `L3NUMBER_REQUESTS; l3c_curr_cluster=l3c_curr_cluster+1) begin
@ -212,7 +201,6 @@ module Vortex_Socket (
assign per_cluster_dram_fill_rsp [l3c_curr_cluster] = l3c_wb [l3c_curr_cluster]; assign per_cluster_dram_fill_rsp [l3c_curr_cluster] = l3c_wb [l3c_curr_cluster];
assign per_cluster_dram_fill_rsp_data[l3c_curr_cluster] = l3c_wb_data[l3c_curr_cluster]; assign per_cluster_dram_fill_rsp_data[l3c_curr_cluster] = l3c_wb_data[l3c_curr_cluster];
assign per_cluster_dram_fill_rsp_addr[l3c_curr_cluster] = l3c_wb_addr[l3c_curr_cluster]; assign per_cluster_dram_fill_rsp_addr[l3c_curr_cluster] = l3c_wb_addr[l3c_curr_cluster];
end end
wire dram_snp_full; wire dram_snp_full;
@ -224,7 +212,7 @@ module Vortex_Socket (
.WORD_SIZE_BYTES (`L3WORD_SIZE_BYTES), .WORD_SIZE_BYTES (`L3WORD_SIZE_BYTES),
.NUMBER_REQUESTS (`L3NUMBER_REQUESTS), .NUMBER_REQUESTS (`L3NUMBER_REQUESTS),
.STAGE_1_CYCLES (`L3STAGE_1_CYCLES), .STAGE_1_CYCLES (`L3STAGE_1_CYCLES),
.FUNC_ID (`LLFUNC_ID), .FUNC_ID (`L2FUNC_ID),
.REQQ_SIZE (`L3REQQ_SIZE), .REQQ_SIZE (`L3REQQ_SIZE),
.MRVQ_SIZE (`L3MRVQ_SIZE), .MRVQ_SIZE (`L3MRVQ_SIZE),
.DFPQ_SIZE (`L3DFPQ_SIZE), .DFPQ_SIZE (`L3DFPQ_SIZE),
@ -238,9 +226,7 @@ module Vortex_Socket (
.PRFQ_STRIDE (`L3PRFQ_STRIDE), .PRFQ_STRIDE (`L3PRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(`L3SIMULATED_DRAM_LATENCY_CYCLES) .SIMULATED_DRAM_LATENCY_CYCLES(`L3SIMULATED_DRAM_LATENCY_CYCLES)
) ) gpu_l3cache (
gpu_l3cache
(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -300,10 +286,8 @@ module Vortex_Socket (
.snp_fwd (snp_fwd), .snp_fwd (snp_fwd),
.snp_fwd_addr (snp_fwd_addr), .snp_fwd_addr (snp_fwd_addr),
.snp_fwd_delay (|snp_fwd_delay) .snp_fwd_delay (|snp_fwd_delay)
); );
end end
endmodule endmodule

View file

@ -1,50 +1,45 @@
`include "VX_define.v" `include "VX_define.vh"
module byte_enabled_simple_dual_port_ram module byte_enabled_simple_dual_port_ram
( (
input we, clk, input we, clk,
input wire reset, input wire reset,
input wire[4:0] waddr, raddr1, raddr2, input wire[4:0] waddr, raddr1, raddr2,
input wire[`NT_M1:0] be, input wire[`NUM_THREADS-1:0] be,
input wire[`NT_M1:0][31:0] wdata, input wire[`NUM_THREADS-1:0][31:0] wdata,
output reg[`NT_M1:0][31:0] q1, q2 output reg[`NUM_THREADS-1:0][31:0] q1, q2
); );
// integer regi; // integer regi;
// integer threadi; // integer threadi;
// Thread Byte Bit // Thread Byte Bit
logic [`NT_M1:0][3:0][7:0] GPR[31:0]; logic [`NUM_THREADS-1:0][3:0][7:0] GPR[31:0];
// initial begin // initial begin
// for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] = 0; // for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] = 0;
// end // end
integer ini; integer ini;
always @(posedge clk, posedge reset) begin always @(posedge clk) begin
// TODO Clearing ram not currently supported on FPGA. if (we) begin
if (reset) begin
// `ifdef ASIC
for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] <= 0;
// `endif
end
else if(we) begin
integer thread_ind; integer thread_ind;
for (thread_ind = 0; thread_ind <= `NT_M1; thread_ind = thread_ind + 1) begin for (thread_ind = 0; thread_ind < `NUM_THREADS; thread_ind = thread_ind + 1) begin
if(be[thread_ind]) GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0]; if (be[thread_ind]) begin
if(be[thread_ind]) GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8]; GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
if(be[thread_ind]) GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16]; GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
if(be[thread_ind]) GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24]; GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
end
end end
end end
// $display("^^^^^^^^^^^^^^^^^^^^^^^"); // $display("^^^^^^^^^^^^^^^^^^^^^^^");
// for (regi = 0; regi <= 31; regi = regi + 1) begin // for (regi = 0; regi <= 31; regi = regi + 1) begin
// for (threadi = 0; threadi <= `NT_M1; threadi = threadi + 1) begin // for (threadi = 0; threadi < `NUM_THREADS; threadi = threadi + 1) begin
// if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]); // if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]);
// end // end
// end // end
end end
assign q1 = GPR[raddr1]; assign q1 = GPR[raddr1];

View file

@ -2,7 +2,7 @@
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default // Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
// Also make sure all possible output states are transmitted back to the bank correctly // Also make sure all possible output states are transmitted back to the bank correctly
`include "VX_define.v" `include "VX_define.vh"
module VX_Cache_Bank module VX_Cache_Bank
#( #(
@ -67,7 +67,7 @@ module VX_Cache_Bank
localparam RECIV_MEM_RSP = 2; localparam RECIV_MEM_RSP = 2;
localparam BLOCK_NUM_BITS = `CLOG2(CACHE_BLOCK); localparam BLOCK_NUM_BITS = `LOG2UP(CACHE_BLOCK);
// Inputs // Inputs
input wire rst; input wire rst;
input wire clk; input wire clk;

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_cache_bank_valid module VX_cache_bank_valid
#( #(

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_cache_data module VX_cache_data
#( #(

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
module VX_cache_data_per_index module VX_cache_data_per_index
#( #(

View file

@ -8,7 +8,7 @@
// TO DO: // TO DO:
// - Send in a response from memory of what the data is from the test bench // - Send in a response from memory of what the data is from the test bench
`include "VX_define.v" `include "VX_define.vh"
//`include "VX_Cache_Bank.v" //`include "VX_Cache_Bank.v"
//`include "VX_cache_bank_valid.v" //`include "VX_cache_bank_valid.v"
//`include "VX_priority_encoder.v" //`include "VX_priority_encoder.v"

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
`define NUM_WORDS_PER_BLOCK 4 `define NUM_WORDS_PER_BLOCK 4
@ -33,17 +33,17 @@ module VX_d_cache_encapsulate (
//parameter cache_entry = 9; //parameter cache_entry = 9;
input wire clk, rst; input wire clk, rst;
input wire i_p_valid[`NT_M1:0]; input wire i_p_valid[`NUM_THREADS-1:0];
input wire [31:0] i_p_addr[`NT_M1:0]; input wire [31:0] i_p_addr[`NUM_THREADS-1:0];
input wire i_p_initial_request; input wire i_p_initial_request;
input wire [31:0] i_p_writedata[`NT_M1:0]; input wire [31:0] i_p_writedata[`NUM_THREADS-1:0];
input wire i_p_read_or_write; input wire i_p_read_or_write;
input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0]; input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0];
input wire i_m_ready; input wire i_m_ready;
output reg [31:0] o_p_readdata[`NT_M1:0]; output reg [31:0] o_p_readdata[`NUM_THREADS-1:0];
output reg o_p_readdata_valid[`NT_M1:0] ; output reg o_p_readdata_valid[`NUM_THREADS-1:0] ;
output reg o_p_waitrequest; output reg o_p_waitrequest;
output reg [31:0] o_m_addr; output reg [31:0] o_m_addr;
@ -53,12 +53,12 @@ module VX_d_cache_encapsulate (
// Inter // Inter
wire [`NT_M1:0] i_p_valid_inter; wire [`NUM_THREADS-1:0] i_p_valid_inter;
wire [`NT_M1:0][31:0] i_p_addr_inter; wire [`NUM_THREADS-1:0][31:0] i_p_addr_inter;
wire [`NT_M1:0][31:0] i_p_writedata_inter; wire [`NUM_THREADS-1:0][31:0] i_p_writedata_inter;
reg [`NT_M1:0][31:0] o_p_readdata_inter; reg [`NUM_THREADS-1:0][31:0] o_p_readdata_inter;
reg [`NT_M1:0] o_p_readdata_valid_inter; reg [`NUM_THREADS-1:0] o_p_readdata_valid_inter;
reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter; reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter;
wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter; wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter;
@ -66,7 +66,7 @@ module VX_d_cache_encapsulate (
genvar curr_thraed, curr_bank, curr_word; genvar curr_thraed, curr_bank, curr_word;
generate generate
for (curr_thraed = 0; curr_thraed < `NT; curr_thraed = curr_thraed + 1) begin : threads for (curr_thraed = 0; curr_thraed < `NUM_THREADS; curr_thraed = curr_thraed + 1) begin : threads
assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed]; assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed];
assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed]; assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed];
assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed]; assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed];

View file

@ -1,4 +1,4 @@
`include "VX_define.v" `include "VX_define.vh"
`include "VX_d_cache.v" `include "VX_d_cache.v"
module VX_d_cache_tb; module VX_d_cache_tb;
@ -6,13 +6,13 @@ module VX_d_cache_tb;
parameter NUMBER_BANKS = 8; parameter NUMBER_BANKS = 8;
reg clk, reset, im_ready; reg clk, reset, im_ready;
reg [`NT_M1:0] i_p_valid; reg [`NUM_THREADS-1:0] i_p_valid;
reg [`NT_M1:0][13:0] i_p_addr; // FIXME reg [`NUM_THREADS-1:0][13:0] i_p_addr; // FIXME
reg i_p_initial_request; reg i_p_initial_request;
reg [`NT_M1:0][31:0] i_p_writedata; reg [`NUM_THREADS-1:0][31:0] i_p_writedata;
reg i_p_read_or_write; //, i_p_write; reg i_p_read_or_write; //, i_p_write;
reg [`NT_M1:0][31:0] o_p_readdata; reg [`NUM_THREADS-1:0][31:0] o_p_readdata;
reg [`NT_M1:0] o_p_readdata_valid; reg [`NUM_THREADS-1:0] o_p_readdata_valid;
reg o_p_waitrequest; reg o_p_waitrequest;
reg [13:0] o_m_addr; // Only one address is sent out at a time to memory reg [13:0] o_m_addr; // Only one address is sent out at a time to memory
reg o_m_valid; reg o_m_valid;

View file

@ -2,7 +2,7 @@
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default // Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
// Also make sure all possible output states are transmitted back to the bank correctly // Also make sure all possible output states are transmitted back to the bank correctly
// `include "VX_define.v" // `include "VX_define.vh"
module cache_set(clk, module cache_set(clk,
rst, rst,
// These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes // These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes
@ -94,7 +94,7 @@ module cache_set(clk,
readdata <= data[3]; readdata <= data[3];
end end
end else if (access) begin end else if (access) begin
//tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12]; //tag[`NUM_THREADS-1:0] <= i_p_addr[`NUM_THREADS-1:0][13:12];
counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC
// Hit in First Column // Hit in First Column
if (tag[0] == o_tag && valid[0]) begin if (tag[0] == o_tag && valid[0]) begin

View file

@ -1,5 +1,5 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
`include "VX_define.v" `include "VX_define.vh"
module VX_bank module VX_bank
#( #(
// Size of cache in bytes // Size of cache in bytes
@ -60,7 +60,7 @@ module VX_bank
input wire [4:0] bank_rd, input wire [4:0] bank_rd,
input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb, input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb,
input wire [31:0] bank_pc, input wire [31:0] bank_pc,
input wire [`NW_M1:0] bank_warp_num, input wire [`NW_BITS-1:0] bank_warp_num,
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read, input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read,
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write, input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write,
output wire reqq_full, output wire reqq_full,
@ -71,7 +71,7 @@ module VX_bank
output wire [`vx_clog2(NUMBER_REQUESTS)-1:0] bank_wb_tid, output wire [`vx_clog2(NUMBER_REQUESTS)-1:0] bank_wb_tid,
output wire [4:0] bank_wb_rd, output wire [4:0] bank_wb_rd,
output wire [1:0] bank_wb_wb, output wire [1:0] bank_wb_wb,
output wire [`NW_M1:0] bank_wb_warp_num, output wire [`NW_BITS-1:0] bank_wb_warp_num,
output wire [`WORD_SIZE_RNG] bank_wb_data, output wire [`WORD_SIZE_RNG] bank_wb_data,
output wire [31:0] bank_wb_pc, output wire [31:0] bank_wb_pc,
output wire [31:0] bank_wb_address, output wire [31:0] bank_wb_address,
@ -86,14 +86,14 @@ module VX_bank
// Dram Fill Response // Dram Fill Response
input wire dram_fill_rsp, input wire dram_fill_rsp,
input wire [31:0] dram_fill_addr, input wire [31:0] dram_fill_addr,
input wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dram_fill_rsp_data, input wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_fill_rsp_data,
output wire dram_fill_accept, output wire dram_fill_accept,
// Dram WB Requests // Dram WB Requests
input wire dram_wb_queue_pop, input wire dram_wb_queue_pop,
output wire dram_wb_req, output wire dram_wb_req,
output wire[31:0] dram_wb_req_addr, output wire[31:0] dram_wb_req_addr,
output wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dram_wb_req_data, output wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_wb_req_data,
// Snp Request // Snp Request
input wire snp_req, input wire snp_req,
@ -112,7 +112,7 @@ module VX_bank
if (reset) begin if (reset) begin
snoop_state <= 0; snoop_state <= 0;
end else begin end else begin
snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)); snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID));
end end
end end
@ -139,11 +139,11 @@ module VX_bank
wire dfpq_empty; wire dfpq_empty;
wire dfpq_full; wire dfpq_full;
wire[31:0] dfpq_addr_st0; wire[31:0] dfpq_addr_st0;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dfpq_filldata_st0; wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dfpq_filldata_st0;
assign dram_fill_accept = !dfpq_full; assign dram_fill_accept = !dfpq_full;
VX_generic_queue_ll #(.DATAW(32+(`BANK_LINE_SIZE_WORDS*`WORD_SIZE)), .SIZE(DFPQ_SIZE)) dfp_queue( VX_generic_queue_ll #(.DATAW(32+(`BANK_LINE_WORDS*`WORD_SIZE)), .SIZE(DFPQ_SIZE)) dfp_queue(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (dram_fill_rsp), .push (dram_fill_rsp),
@ -164,7 +164,7 @@ module VX_bank
wire [`WORD_SIZE_RNG] reqq_req_writeword_st0; wire [`WORD_SIZE_RNG] reqq_req_writeword_st0;
wire [4:0] reqq_req_rd_st0; wire [4:0] reqq_req_rd_st0;
wire [1:0] reqq_req_wb_st0; wire [1:0] reqq_req_wb_st0;
wire [`NW_M1:0] reqq_req_warp_num_st0; wire [`NW_BITS-1:0] reqq_req_warp_num_st0;
wire [2:0] reqq_req_mem_read_st0; wire [2:0] reqq_req_mem_read_st0;
wire [2:0] reqq_req_mem_write_st0; wire [2:0] reqq_req_mem_write_st0;
wire [31:0] reqq_req_pc_st0; wire [31:0] reqq_req_pc_st0;
@ -231,7 +231,7 @@ module VX_bank
wire [4:0] mrvq_rd_st0; wire [4:0] mrvq_rd_st0;
wire [1:0] mrvq_wb_st0; wire [1:0] mrvq_wb_st0;
wire [31:0] miss_resrv_pc_st0; wire [31:0] miss_resrv_pc_st0;
wire [`NW_M1:0] mrvq_warp_num_st0; wire [`NW_BITS-1:0] mrvq_warp_num_st0;
wire [2:0] mrvq_mem_read_st0; wire [2:0] mrvq_mem_read_st0;
wire [2:0] mrvq_mem_write_st0; wire [2:0] mrvq_mem_write_st0;
@ -241,7 +241,7 @@ module VX_bank
wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid; wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid;
wire[4:0] miss_add_rd; wire[4:0] miss_add_rd;
wire[1:0] miss_add_wb; wire[1:0] miss_add_wb;
wire[`NW_M1:0] miss_add_warp_num; wire[`NW_BITS-1:0] miss_add_warp_num;
wire[2:0] miss_add_mem_read; wire[2:0] miss_add_mem_read;
wire[2:0] miss_add_mem_write; wire[2:0] miss_add_mem_write;
@ -336,7 +336,7 @@ module VX_bank
wire qual_valid_st0; wire qual_valid_st0;
wire [31:0] qual_addr_st0; wire [31:0] qual_addr_st0;
wire [`WORD_SIZE_RNG] qual_writeword_st0; wire [`WORD_SIZE_RNG] qual_writeword_st0;
wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] qual_writedata_st0; wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] qual_writedata_st0;
wire [`REQ_INST_META_SIZE-1:0] qual_inst_meta_st0; wire [`REQ_INST_META_SIZE-1:0] qual_inst_meta_st0;
wire qual_going_to_write_st0; wire qual_going_to_write_st0;
wire qual_is_snp; wire qual_is_snp;
@ -344,7 +344,7 @@ module VX_bank
wire [`WORD_SIZE_RNG] writeword_st1 [STAGE_1_CYCLES-1:0]; wire [`WORD_SIZE_RNG] writeword_st1 [STAGE_1_CYCLES-1:0];
wire [`REQ_INST_META_SIZE-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0]; wire [`REQ_INST_META_SIZE-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0];
wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] writedata_st1[STAGE_1_CYCLES-1:0]; wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] writedata_st1[STAGE_1_CYCLES-1:0];
wire is_snp_st1 [STAGE_1_CYCLES-1:0]; wire is_snp_st1 [STAGE_1_CYCLES-1:0];
wire [31:0] pc_st1 [STAGE_1_CYCLES-1:0]; wire [31:0] pc_st1 [STAGE_1_CYCLES-1:0];
@ -387,7 +387,7 @@ module VX_bank
reqq_pop ? reqq_req_writeword_st0 : reqq_pop ? reqq_req_writeword_st0 :
0; 0;
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_c0 ( VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_c0 (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_bank_pipe), .stall(stall_bank_pipe),
@ -399,7 +399,7 @@ module VX_bank
genvar curr_stage; genvar curr_stage;
generate generate
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES; curr_stage = curr_stage + 1) begin for (curr_stage = 1; curr_stage < STAGE_1_CYCLES; curr_stage = curr_stage + 1) begin
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_cc ( VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_cc (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_bank_pipe), .stall(stall_bank_pipe),
@ -412,7 +412,7 @@ module VX_bank
wire[`WORD_SIZE_RNG] readword_st1e; wire[`WORD_SIZE_RNG] readword_st1e;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] readdata_st1e; wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] readdata_st1e;
wire[`TAG_SELECT_SIZE_RNG] readtag_st1e; wire[`TAG_SELECT_SIZE_RNG] readtag_st1e;
wire miss_st1e; wire miss_st1e;
wire dirty_st1e; wire dirty_st1e;
@ -421,7 +421,7 @@ module VX_bank
wire [4:0] rd_st1e; wire [4:0] rd_st1e;
wire [1:0] wb_st1e; wire [1:0] wb_st1e;
wire [`NW_M1:0] warp_num_st1e; wire [`NW_BITS-1:0] warp_num_st1e;
wire [2:0] mem_read_st1e; wire [2:0] mem_read_st1e;
wire [2:0] mem_write_st1e; wire [2:0] mem_write_st1e;
wire [`vx_clog2(NUMBER_REQUESTS)-1:0] tid_st1e; wire [`vx_clog2(NUMBER_REQUESTS)-1:0] tid_st1e;
@ -488,7 +488,7 @@ module VX_bank
wire valid_st2; wire valid_st2;
wire[`WORD_SIZE_RNG] writeword_st2; wire[`WORD_SIZE_RNG] writeword_st2;
wire[`WORD_SIZE_RNG] readword_st2; wire[`WORD_SIZE_RNG] readword_st2;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] readdata_st2; wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] readdata_st2;
wire miss_st2; wire miss_st2;
wire dirty_st2; wire dirty_st2;
wire[`REQ_INST_META_SIZE-1:0] inst_meta_st2; wire[`REQ_INST_META_SIZE-1:0] inst_meta_st2;
@ -498,7 +498,7 @@ module VX_bank
wire [31:0] pc_st2; wire [31:0] pc_st2;
VX_generic_register #(.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_SIZE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)) st_1e_2 ( VX_generic_register #(.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)) st_1e_2 (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_bank_pipe), .stall(stall_bank_pipe),
@ -525,17 +525,17 @@ module VX_bank
// Enqueue to CWB Queue // Enqueue to CWB Queue
wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `LLFUNC_ID) && (miss_add_wb == 0)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full)); wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `L2FUNC_ID) && (miss_add_wb == 0)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
wire [`WORD_SIZE_RNG] cwbq_data = readword_st2; wire [`WORD_SIZE_RNG] cwbq_data = readword_st2;
wire [`vx_clog2(NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid; wire [`vx_clog2(NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid;
wire [4:0] cwbq_rd = miss_add_rd; wire [4:0] cwbq_rd = miss_add_rd;
wire [1:0] cwbq_wb = miss_add_wb; wire [1:0] cwbq_wb = miss_add_wb;
wire [`NW_M1:0] cwbq_warp_num = miss_add_warp_num; wire [`NW_BITS-1:0] cwbq_warp_num = miss_add_warp_num;
wire [31:0] cwbq_pc = pc_st2; wire [31:0] cwbq_pc = pc_st2;
wire cwbq_empty; wire cwbq_empty;
assign bank_wb_valid = !cwbq_empty; assign bank_wb_valid = !cwbq_empty;
VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1+1) + `WORD_SIZE + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue( VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1+1) + `WORD_SIZE + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
@ -554,8 +554,8 @@ module VX_bank
wire[31:0] dwbq_req_addr; wire[31:0] dwbq_req_addr;
wire dwbq_empty; wire dwbq_empty;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data; wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dwbq_req_data;
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2; assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : ({readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK); assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : ({readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK);
end else begin end else begin
@ -603,7 +603,7 @@ module VX_bank
assign dram_fill_req_addr = addr_st2 & `BASE_ADDR_MASK; assign dram_fill_req_addr = addr_st2 & `BASE_ADDR_MASK;
assign dram_wb_req = !dwbq_empty; assign dram_wb_req = !dwbq_empty;
VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_SIZE_WORDS * `WORD_SIZE)), .SIZE(DWBQ_SIZE)) dwb_queue( VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_WORDS * `WORD_SIZE)), .SIZE(DWBQ_SIZE)) dwb_queue(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_cache module VX_cache
#( #(
@ -66,7 +66,7 @@ module VX_cache
// Req meta // Req meta
input wire [4:0] core_req_rd, input wire [4:0] core_req_rd,
input wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb, input wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb,
input wire [`NW_M1:0] core_req_warp_num, input wire [`NW_BITS-1:0] core_req_warp_num,
input wire [31:0] core_req_pc, input wire [31:0] core_req_pc,
output wire delay_req, output wire delay_req,
@ -75,7 +75,7 @@ module VX_cache
output wire [NUMBER_REQUESTS-1:0] core_wb_valid, output wire [NUMBER_REQUESTS-1:0] core_wb_valid,
output wire [4:0] core_wb_req_rd, output wire [4:0] core_wb_req_rd,
output wire [1:0] core_wb_req_wb, output wire [1:0] core_wb_req_wb,
output wire [`NW_M1:0] core_wb_warp_num, output wire [`NW_BITS-1:0] core_wb_warp_num,
output wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] core_wb_readdata, output wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] core_wb_readdata,
output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc, output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc,
output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_address, output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_address,
@ -84,7 +84,7 @@ module VX_cache
// Dram Fill Response // Dram Fill Response
input wire dram_fill_rsp, input wire dram_fill_rsp,
input wire [31:0] dram_fill_rsp_addr, input wire [31:0] dram_fill_rsp_addr,
input wire [`IBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data, input wire [`IBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data,
output wire dram_fill_accept, output wire dram_fill_accept,
// Dram request // Dram request
@ -93,7 +93,7 @@ module VX_cache
output wire dram_req_read, output wire dram_req_read,
output wire [31:0] dram_req_addr, output wire [31:0] dram_req_addr,
output wire [31:0] dram_req_size, output wire [31:0] dram_req_size,
output wire [`IBANK_LINE_SIZE_RNG][31:0] dram_req_data, output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
output wire dram_req_because_of_wb, output wire dram_req_because_of_wb,
input wire dram_req_delay, input wire dram_req_delay,
@ -119,7 +119,7 @@ module VX_cache
wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid; wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid;
wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd; wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd;
wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb; wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb;
wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num; wire [NUMBER_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num;
wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data; wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data;
wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc; wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc;
wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address; wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address;
@ -134,7 +134,7 @@ module VX_cache
wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req; wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req;
wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp; wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp;
wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr; wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr;
wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] per_bank_dram_wb_req_data; wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data;
wire[NUMBER_BANKS-1:0] per_bank_reqq_full; wire[NUMBER_BANKS-1:0] per_bank_reqq_full;
@ -287,7 +287,7 @@ module VX_cache
wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] curr_bank_writedata; wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] curr_bank_writedata;
wire [4:0] curr_bank_rd; wire [4:0] curr_bank_rd;
wire [NUMBER_REQUESTS-1:0][1:0] curr_bank_wb; wire [NUMBER_REQUESTS-1:0][1:0] curr_bank_wb;
wire [`NW_M1:0] curr_bank_warp_num; wire [`NW_BITS-1:0] curr_bank_warp_num;
wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_read; wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_read;
wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_write; wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_write;
wire [31:0] curr_bank_pc; wire [31:0] curr_bank_pc;
@ -298,13 +298,13 @@ module VX_cache
wire [31:0] curr_bank_wb_pc; wire [31:0] curr_bank_wb_pc;
wire [4:0] curr_bank_wb_rd; wire [4:0] curr_bank_wb_rd;
wire [1:0] curr_bank_wb_wb; wire [1:0] curr_bank_wb_wb;
wire [`NW_M1:0] curr_bank_wb_warp_num; wire [`NW_BITS-1:0] curr_bank_wb_warp_num;
wire [`WORD_SIZE_RNG] curr_bank_wb_data; wire [`WORD_SIZE_RNG] curr_bank_wb_data;
wire [31:0] curr_bank_wb_address; wire [31:0] curr_bank_wb_address;
wire curr_bank_dram_fill_rsp; wire curr_bank_dram_fill_rsp;
wire [31:0] curr_bank_dram_fill_rsp_addr; wire [31:0] curr_bank_dram_fill_rsp_addr;
wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] curr_bank_dram_fill_rsp_data; wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_fill_rsp_data;
wire curr_bank_dram_fill_accept; wire curr_bank_dram_fill_accept;
wire curr_bank_dfqq_full; wire curr_bank_dfqq_full;
@ -316,7 +316,7 @@ module VX_cache
wire curr_bank_dram_wb_queue_pop; wire curr_bank_dram_wb_queue_pop;
wire curr_bank_dram_wb_req; wire curr_bank_dram_wb_req;
wire[31:0] curr_bank_dram_wb_req_addr; wire[31:0] curr_bank_dram_wb_req_addr;
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] curr_bank_dram_wb_req_data; wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_wb_req_data;
wire curr_bank_snp_req; wire curr_bank_snp_req;
wire[31:0] curr_bank_snp_req_addr; wire[31:0] curr_bank_snp_req_addr;

View file

@ -1,7 +1,7 @@
`ifndef VX_CACHE_CONFIG `ifndef VX_CACHE_CONFIG
`define VX_CACHE_CONFIG `define VX_CACHE_CONFIG
`include "../VX_define.v" `include "../VX_define.vh"
// data tid rd wb warp_num read write // data tid rd wb warp_num read write
@ -10,10 +10,10 @@
`define vx_clog2(value) ((value == 1) ? 1 : $clog2(value)) `define vx_clog2(value) ((value == 1) ? 1 : $clog2(value))
`define MRVQ_METADATA_SIZE (`WORD_SIZE + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1 + 1) + 3 + 3) `define MRVQ_METADATA_SIZE (`WORD_SIZE + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1) + 3 + 3)
// 5 + 2 + 4 + 3 + 3 + 1 // 5 + 2 + 4 + 3 + 3 + 1
`define REQ_INST_META_SIZE (5 + 2 + (`NW_M1+1) + 3 + 3 + `vx_clog2(NUMBER_REQUESTS)) `define REQ_INST_META_SIZE (5 + 2 + (`NW_BITS-1+1) + 3 + 3 + `vx_clog2(NUMBER_REQUESTS))
// `define vx_clog2_h(value, x) (value == (1 << x)) ? (x) // `define vx_clog2_h(value, x) (value == (1 << x)) ? (x)
@ -60,9 +60,7 @@
// 8 // 8
`define BANK_LINE_COUNT (`BANK_SIZE_BYTES/BANK_LINE_SIZE_BYTES) `define BANK_LINE_COUNT (`BANK_SIZE_BYTES/BANK_LINE_SIZE_BYTES)
// 4 // 4
`define BANK_LINE_SIZE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES) `define BANK_LINE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES)
// 3:0
`define BANK_LINE_SIZE_RNG `BANK_LINE_SIZE_WORDS-1:0
// Offset is fixed // Offset is fixed
`define OFFSET_ADDR_NUM_BITS 2 `define OFFSET_ADDR_NUM_BITS 2
@ -73,7 +71,7 @@
`define OFFSET_SIZE_RNG `OFFSET_SIZE_END:0 `define OFFSET_SIZE_RNG `OFFSET_SIZE_END:0
// 2 // 2
`define WORD_SELECT_NUM_BITS (`vx_clog2(`BANK_LINE_SIZE_WORDS)) `define WORD_SELECT_NUM_BITS (`vx_clog2(`BANK_LINE_WORDS))
// 2 // 2
`define WORD_SELECT_SIZE_END (`WORD_SELECT_NUM_BITS) `define WORD_SELECT_SIZE_END (`WORD_SELECT_NUM_BITS)
// 2 // 2

View file

@ -1,5 +1,5 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_cache_core_req_bank_sel module VX_cache_core_req_bank_sel
#( #(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_cache_dfq_queue module VX_cache_dfq_queue
#( #(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_cache_dram_req_arb module VX_cache_dram_req_arb
#( #(
@ -62,7 +62,7 @@ module VX_cache_dram_req_arb
output wire[NUMBER_BANKS-1:0] per_bank_dram_wb_queue_pop, output wire[NUMBER_BANKS-1:0] per_bank_dram_wb_queue_pop,
input wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req, input wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req,
input wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr, input wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr,
input wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] per_bank_dram_wb_req_data, input wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
input wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp, input wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp,
// real Dram request // real Dram request
@ -71,7 +71,7 @@ module VX_cache_dram_req_arb
output wire dram_req_read, output wire dram_req_read,
output wire [31:0] dram_req_addr, output wire [31:0] dram_req_addr,
output wire [31:0] dram_req_size, output wire [31:0] dram_req_size,
output wire [`IBANK_LINE_SIZE_RNG][31:0] dram_req_data, output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
output wire dram_req_because_of_wb, output wire dram_req_because_of_wb,
input wire dram_req_delay input wire dram_req_delay

View file

@ -1,5 +1,5 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_cache_miss_resrv module VX_cache_miss_resrv
#( #(
@ -56,7 +56,7 @@ module VX_cache_miss_resrv
input wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid, input wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid,
input wire[4:0] miss_add_rd, input wire[4:0] miss_add_rd,
input wire[1:0] miss_add_wb, input wire[1:0] miss_add_wb,
input wire[`NW_M1:0] miss_add_warp_num, input wire[`NW_BITS-1:0] miss_add_warp_num,
input wire[2:0] miss_add_mem_read, input wire[2:0] miss_add_mem_read,
input wire[2:0] miss_add_mem_write, input wire[2:0] miss_add_mem_write,
input wire[31:0] miss_add_pc, input wire[31:0] miss_add_pc,
@ -75,14 +75,14 @@ module VX_cache_miss_resrv
output wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_resrv_tid_st0, output wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_resrv_tid_st0,
output wire[4:0] miss_resrv_rd_st0, output wire[4:0] miss_resrv_rd_st0,
output wire[1:0] miss_resrv_wb_st0, output wire[1:0] miss_resrv_wb_st0,
output wire[`NW_M1:0] miss_resrv_warp_num_st0, output wire[`NW_BITS-1:0] miss_resrv_warp_num_st0,
output wire[2:0] miss_resrv_mem_read_st0, output wire[2:0] miss_resrv_mem_read_st0,
output wire[31:0] miss_resrv_pc_st0, output wire[31:0] miss_resrv_pc_st0,
output wire[2:0] miss_resrv_mem_write_st0 output wire[2:0] miss_resrv_mem_write_st0
); );
// Size of metadata = 32 + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1 + 1) // Size of metadata = 32 + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1)
reg[`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0]; reg[`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0];
reg[MRVQ_SIZE-1:0][31:0] addr_table; reg[MRVQ_SIZE-1:0][31:0] addr_table;
reg[MRVQ_SIZE-1:0][31:0] pc_table; reg[MRVQ_SIZE-1:0][31:0] pc_table;

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_cache_req_queue module VX_cache_req_queue
#( #(
@ -55,7 +55,7 @@ module VX_cache_req_queue
input wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] bank_writedata, input wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] bank_writedata,
input wire [4:0] bank_rd, input wire [4:0] bank_rd,
input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb, input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb,
input wire [`NW_M1:0] bank_warp_num, input wire [`NW_BITS-1:0] bank_warp_num,
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read, input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read,
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write, input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write,
input wire [31:0] bank_pc, input wire [31:0] bank_pc,
@ -68,7 +68,7 @@ module VX_cache_req_queue
output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0, output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0,
output wire [4:0] reqq_req_rd_st0, output wire [4:0] reqq_req_rd_st0,
output wire [1:0] reqq_req_wb_st0, output wire [1:0] reqq_req_wb_st0,
output wire [`NW_M1:0] reqq_req_warp_num_st0, output wire [`NW_BITS-1:0] reqq_req_warp_num_st0,
output wire [2:0] reqq_req_mem_read_st0, output wire [2:0] reqq_req_mem_read_st0,
output wire [2:0] reqq_req_mem_write_st0, output wire [2:0] reqq_req_mem_write_st0,
output wire [31:0] reqq_req_pc_st0, output wire [31:0] reqq_req_pc_st0,
@ -83,7 +83,7 @@ module VX_cache_req_queue
wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] out_per_writedata; wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] out_per_writedata;
wire [4:0] out_per_rd; wire [4:0] out_per_rd;
wire [NUMBER_REQUESTS-1:0][1:0] out_per_wb; wire [NUMBER_REQUESTS-1:0][1:0] out_per_wb;
wire [`NW_M1:0] out_per_warp_num; wire [`NW_BITS-1:0] out_per_warp_num;
wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_read; wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_read;
wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_write; wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_write;
wire [31:0] out_per_pc; wire [31:0] out_per_pc;
@ -95,7 +95,7 @@ module VX_cache_req_queue
reg [4:0] use_per_rd; reg [4:0] use_per_rd;
reg [NUMBER_REQUESTS-1:0][1:0] use_per_wb; reg [NUMBER_REQUESTS-1:0][1:0] use_per_wb;
reg [31:0] use_per_pc; reg [31:0] use_per_pc;
reg [`NW_M1:0] use_per_warp_num; reg [`NW_BITS-1:0] use_per_warp_num;
reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_read; reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_read;
reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_write; reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_write;
@ -105,7 +105,7 @@ module VX_cache_req_queue
wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] qual_writedata; wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] qual_writedata;
wire [4:0] qual_rd; wire [4:0] qual_rd;
wire [NUMBER_REQUESTS-1:0][1:0] qual_wb; wire [NUMBER_REQUESTS-1:0][1:0] qual_wb;
wire [`NW_M1:0] qual_warp_num; wire [`NW_BITS-1:0] qual_warp_num;
wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_read; wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_read;
wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_write; wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_write;
wire [31:0] qual_pc; wire [31:0] qual_pc;
@ -120,7 +120,7 @@ module VX_cache_req_queue
wire push_qual = reqq_push && !reqq_full; wire push_qual = reqq_push && !reqq_full;
wire pop_qual = !out_empty && use_empty; wire pop_qual = !out_empty && use_empty;
VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUMBER_REQUESTS*2) + (`NW_M1+1) + (NUMBER_REQUESTS * (3 + 3)) + 32 ), .SIZE(REQQ_SIZE)) reqq_queue( VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUMBER_REQUESTS*2) + (`NW_BITS-1+1) + (NUMBER_REQUESTS * (3 + 3)) + 32 ), .SIZE(REQQ_SIZE)) reqq_queue(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (push_qual), .push (push_qual),

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_cache_wb_sel_merge module VX_cache_wb_sel_merge
#( #(
@ -53,7 +53,7 @@ module VX_cache_wb_sel_merge
input wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid, input wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid,
input wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd, input wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd,
input wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb, input wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb,
input wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num, input wire [NUMBER_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num,
input wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data, input wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data,
input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc, input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc,
input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address, input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address,
@ -67,7 +67,7 @@ module VX_cache_wb_sel_merge
output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_pc, output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_pc,
output wire [4:0] core_wb_req_rd, output wire [4:0] core_wb_req_rd,
output wire [1:0] core_wb_req_wb, output wire [1:0] core_wb_req_wb,
output wire [`NW_M1:0] core_wb_warp_num, output wire [`NW_BITS-1:0] core_wb_warp_num,
output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_address output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_address
); );
@ -105,7 +105,7 @@ module VX_cache_wb_sel_merge
core_wb_pc = 0; core_wb_pc = 0;
core_wb_address = 0; core_wb_address = 0;
for (this_bank = 0; this_bank < NUMBER_BANKS; this_bank = this_bank + 1) begin for (this_bank = 0; this_bank < NUMBER_BANKS; this_bank = this_bank + 1) begin
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
if (found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin if (found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin
core_wb_valid[per_bank_wb_tid[this_bank]] = 1; core_wb_valid[per_bank_wb_tid[this_bank]] = 1;

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_dcache_llv_resp_bank_sel module VX_dcache_llv_resp_bank_sel
#( #(
@ -48,13 +48,13 @@ module VX_dcache_llv_resp_bank_sel
output reg [NUMBER_BANKS-1:0] per_bank_llvq_pop, output reg [NUMBER_BANKS-1:0] per_bank_llvq_pop,
input wire[NUMBER_BANKS-1:0] per_bank_llvq_valid, input wire[NUMBER_BANKS-1:0] per_bank_llvq_valid,
input wire[NUMBER_BANKS-1:0][31:0] per_bank_llvq_res_addr, input wire[NUMBER_BANKS-1:0][31:0] per_bank_llvq_res_addr,
input wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][31:0] per_bank_llvq_res_data, input wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][31:0] per_bank_llvq_res_data,
input wire[NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_llvq_res_tid, input wire[NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_llvq_res_tid,
input wire llvq_pop, input wire llvq_pop,
output reg[NUMBER_REQUESTS-1:0] llvq_valid, output reg[NUMBER_REQUESTS-1:0] llvq_valid,
output reg[NUMBER_REQUESTS-1:0][31:0] llvq_res_addr, output reg[NUMBER_REQUESTS-1:0][31:0] llvq_res_addr,
output reg[NUMBER_REQUESTS-1:0][`BANK_LINE_SIZE_RNG][31:0] llvq_res_data output reg[NUMBER_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_res_data
); );

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_fill_invalidator module VX_fill_invalidator
#( #(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_prefetcher module VX_prefetcher
#( #(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_snp_fwd_arb module VX_snp_fwd_arb
#( #(

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_tag_data_access module VX_tag_data_access
#( #(
@ -60,12 +60,12 @@ module VX_tag_data_access
input wire writefill_st1e, input wire writefill_st1e,
input wire[31:0] writeaddr_st1e, input wire[31:0] writeaddr_st1e,
input wire[`WORD_SIZE_RNG] writeword_st1e, input wire[`WORD_SIZE_RNG] writeword_st1e,
input wire[`DBANK_LINE_SIZE_RNG][31:0] writedata_st1e, input wire[`DBANK_LINE_WORDS-1:0][31:0] writedata_st1e,
input wire[2:0] mem_write_st1e, input wire[2:0] mem_write_st1e,
input wire[2:0] mem_read_st1e, input wire[2:0] mem_read_st1e,
output wire[`WORD_SIZE_RNG] readword_st1e, output wire[`WORD_SIZE_RNG] readword_st1e,
output wire[`DBANK_LINE_SIZE_RNG][31:0] readdata_st1e, output wire[`DBANK_LINE_WORDS-1:0][31:0] readdata_st1e,
output wire[`TAG_SELECT_SIZE_RNG] readtag_st1e, output wire[`TAG_SELECT_SIZE_RNG] readtag_st1e,
output wire miss_st1e, output wire miss_st1e,
output wire dirty_st1e, output wire dirty_st1e,
@ -74,25 +74,25 @@ module VX_tag_data_access
); );
reg[`DBANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-1:0]; reg[`DBANK_LINE_WORDS-1:0][31:0] readdata_st[STAGE_1_CYCLES-1:0];
reg read_valid_st1c[STAGE_1_CYCLES-1:0]; reg read_valid_st1c[STAGE_1_CYCLES-1:0];
reg read_dirty_st1c[STAGE_1_CYCLES-1:0]; reg read_dirty_st1c[STAGE_1_CYCLES-1:0];
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-1:0]; reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-1:0];
reg[`DBANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-1:0]; reg[`DBANK_LINE_WORDS-1:0][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
wire qual_read_valid_st1; wire qual_read_valid_st1;
wire qual_read_dirty_st1; wire qual_read_dirty_st1;
wire[`TAG_SELECT_SIZE_RNG] qual_read_tag_st1; wire[`TAG_SELECT_SIZE_RNG] qual_read_tag_st1;
wire[`DBANK_LINE_SIZE_RNG][31:0] qual_read_data_st1; wire[`DBANK_LINE_WORDS-1:0][31:0] qual_read_data_st1;
wire use_read_valid_st1e; wire use_read_valid_st1e;
wire use_read_dirty_st1e; wire use_read_dirty_st1e;
wire[`TAG_SELECT_SIZE_RNG] use_read_tag_st1e; wire[`TAG_SELECT_SIZE_RNG] use_read_tag_st1e;
wire[`DBANK_LINE_SIZE_RNG][31:0] use_read_data_st1e; wire[`DBANK_LINE_WORDS-1:0][31:0] use_read_data_st1e;
wire[`DBANK_LINE_SIZE_RNG][3:0] use_write_enable; wire[`DBANK_LINE_WORDS-1:0][3:0] use_write_enable;
wire[`DBANK_LINE_SIZE_RNG][31:0] use_write_data; wire[`DBANK_LINE_WORDS-1:0][31:0] use_write_data;
wire sw, sb, sh; wire sw, sb, sh;
@ -140,8 +140,8 @@ module VX_tag_data_access
.fill_sent (fill_sent) .fill_sent (fill_sent)
); );
// VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_c0 ( // VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_c0 (
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) ), .Valid(0)) s0_1_c0 ( VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) ), .Valid(0)) s0_1_c0 (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),
@ -153,7 +153,7 @@ module VX_tag_data_access
genvar curr_stage; genvar curr_stage;
generate generate
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_cc ( VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_cc (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),
@ -170,7 +170,7 @@ module VX_tag_data_access
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM
genvar curr_w; genvar curr_w;
for (curr_w = 0; curr_w < `DBANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0]; for (curr_w = 0; curr_w < `DBANK_LINE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0];
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1]; // assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1];
/////////////////////// LOAD LOGIC /////////////////// /////////////////////// LOAD LOGIC ///////////////////
@ -243,23 +243,23 @@ module VX_tag_data_access
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e && !is_snp_st1e; wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e && !is_snp_st1e;
wire force_write = real_writefill; wire force_write = real_writefill;
wire[`DBANK_LINE_SIZE_RNG][3:0] we; wire[`DBANK_LINE_WORDS-1:0][3:0] we;
wire[`DBANK_LINE_SIZE_RNG][31:0] data_write; wire[`DBANK_LINE_WORDS-1:0][31:0] data_write;
genvar g; genvar g;
generate generate
for (g = 0; g < `DBANK_LINE_SIZE_WORDS; g = g + 1) begin : write_enables for (g = 0; g < `DBANK_LINE_WORDS; g = g + 1) begin : write_enables
wire normal_write = (block_offset == g[`WORD_SELECT_SIZE_RNG]) && should_write && !real_writefill; wire normal_write = (block_offset == g[`WORD_SELECT_SIZE_RNG]) && should_write && !real_writefill;
assign we[g] = (force_write) ? 4'b1111 : assign we[g] = (force_write) ? 4'b1111 :
(should_write && !real_writefill && (FUNC_ID == `LLFUNC_ID)) ? 4'b1111 : (should_write && !real_writefill && (FUNC_ID == `L2FUNC_ID)) ? 4'b1111 :
(normal_write && sw) ? 4'b1111 : (normal_write && sw) ? 4'b1111 :
(normal_write && sb) ? sb_mask : (normal_write && sb) ? sb_mask :
(normal_write && sh) ? sh_mask : (normal_write && sh) ? sh_mask :
4'b0000; 4'b0000;
if (!(FUNC_ID == `LLFUNC_ID)) assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat; if (!(FUNC_ID == `L2FUNC_ID)) assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat;
end end
if ((FUNC_ID == `LLFUNC_ID)) begin if ((FUNC_ID == `L2FUNC_ID)) begin
assign data_write = force_write ? writedata_st1e : writeword_st1e; assign data_write = force_write ? writedata_st1e : writeword_st1e;
end end
endgenerate endgenerate
@ -268,7 +268,7 @@ module VX_tag_data_access
assign use_write_data = data_write; assign use_write_data = data_write;
/////////////////////// ///////////////////////
if (FUNC_ID == `LLFUNC_ID) begin if (FUNC_ID == `L2FUNC_ID) begin
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1]; assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
end else begin end else begin
assign readword_st1e = data_Qual; assign readword_st1e = data_Qual;

View file

@ -1,4 +1,4 @@
`include "VX_cache_config.v" `include "VX_cache_config.vh"
module VX_tag_data_structure module VX_tag_data_structure
#( #(
@ -55,18 +55,18 @@ module VX_tag_data_structure
output wire read_valid, output wire read_valid,
output wire read_dirty, output wire read_dirty,
output wire[`TAG_SELECT_SIZE_RNG] read_tag, output wire[`TAG_SELECT_SIZE_RNG] read_tag,
output wire[`DBANK_LINE_SIZE_RNG][31:0] read_data, output wire[`DBANK_LINE_WORDS-1:0][31:0] read_data,
input wire invalidate, input wire invalidate,
input wire[`DBANK_LINE_SIZE_RNG][3:0] write_enable, input wire[`DBANK_LINE_WORDS-1:0][3:0] write_enable,
input wire write_fill, input wire write_fill,
input wire[31:0] write_addr, input wire[31:0] write_addr,
input wire[`DBANK_LINE_SIZE_RNG][31:0] write_data, input wire[`DBANK_LINE_WORDS-1:0][31:0] write_data,
input wire fill_sent input wire fill_sent
); );
reg[`DBANK_LINE_SIZE_RNG][3:0][7:0] data [`BANK_LINE_COUNT-1:0]; reg[`DBANK_LINE_WORDS-1:0][3:0][7:0] data [`BANK_LINE_COUNT-1:0];
reg[`TAG_SELECT_SIZE_RNG] tag [`BANK_LINE_COUNT-1:0]; reg[`TAG_SELECT_SIZE_RNG] tag [`BANK_LINE_COUNT-1:0];
reg valid[`BANK_LINE_COUNT-1:0]; reg valid[`BANK_LINE_COUNT-1:0];
reg dirty[`BANK_LINE_COUNT-1:0]; reg dirty[`BANK_LINE_COUNT-1:0];
@ -110,7 +110,7 @@ module VX_tag_data_structure
valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0; valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
end end
for (f = 0; f < `DBANK_LINE_SIZE_WORDS; f = f + 1) begin for (f = 0; f < `DBANK_LINE_WORDS; f = f + 1) begin
if (write_enable[f][0]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][0] <= write_data[f][7 :0 ]; if (write_enable[f][0]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][0] <= write_data[f][7 :0 ];
if (write_enable[f][1]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][1] <= write_data[f][15:8 ]; if (write_enable[f][1]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][1] <= write_data[f][15:8 ];
if (write_enable[f][2]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][2] <= write_data[f][23:16]; if (write_enable[f][2]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][2] <= write_data[f][23:16];

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_BRANCH_RSP `ifndef VX_BRANCH_RSP
@ -9,7 +9,7 @@ interface VX_branch_response_inter ();
wire valid_branch; wire valid_branch;
wire branch_dir; wire branch_dir;
wire[31:0] branch_dest; wire[31:0] branch_dest;
wire[`NW_M1:0] branch_warp_num; wire[`NW_BITS-1:0] branch_warp_num;
endinterface endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_CSR_REQ `ifndef VX_CSR_REQ
@ -7,8 +7,8 @@
interface VX_csr_req_inter (); interface VX_csr_req_inter ();
wire[`NT_M1:0] valid; wire[`NUM_THREADS-1:0] valid;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
wire[4:0] rd; wire[4:0] rd;
wire[1:0] wb; wire[1:0] wb;
wire[4:0] alu_op; wire[4:0] alu_op;

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_CSR_WB_REQ `ifndef VX_CSR_WB_REQ
@ -7,15 +7,13 @@
interface VX_csr_wb_inter (); interface VX_csr_wb_inter ();
wire[`NT_M1:0] valid; wire[`NUM_THREADS-1:0] valid;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
wire[4:0] rd; wire[4:0] rd;
wire[1:0] wb; wire[1:0] wb;
wire[`NT_M1:0][31:0] csr_result; wire[`NUM_THREADS-1:0][31:0] csr_result;
endinterface endinterface
`endif `endif

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_DCACHE_REQ `ifndef VX_DCACHE_REQ
@ -7,11 +7,11 @@
interface VX_dcache_request_inter (); interface VX_dcache_request_inter ();
wire[`NT_M1:0][31:0] out_cache_driver_in_address; wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_address;
wire[2:0] out_cache_driver_in_mem_read; wire[2:0] out_cache_driver_in_mem_read;
wire[2:0] out_cache_driver_in_mem_write; wire[2:0] out_cache_driver_in_mem_write;
wire[`NT_M1:0] out_cache_driver_in_valid; wire[`NUM_THREADS-1:0] out_cache_driver_in_valid;
wire[`NT_M1:0][31:0] out_cache_driver_in_data; wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_data;
endinterface endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_DCACHE_RSP `ifndef VX_DCACHE_RSP
@ -7,7 +7,7 @@
interface VX_dcache_response_inter (); interface VX_dcache_response_inter ();
wire[`NT_M1:0][31:0] in_cache_driver_out_data; wire[`NUM_THREADS-1:0][31:0] in_cache_driver_out_data;
wire delay; wire delay;
endinterface endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_DRAM_REQ_RSP_INTER `ifndef VX_DRAM_REQ_RSP_INTER

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_EXE_UNIT_REQ_INTER `ifndef VX_EXE_UNIT_REQ_INTER
@ -8,8 +8,8 @@
interface VX_exec_unit_req_inter (); interface VX_exec_unit_req_inter ();
// Meta // Meta
wire[`NT_M1:0] valid; wire[`NUM_THREADS-1:0] valid;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
wire[31:0] curr_PC; wire[31:0] curr_PC;
wire[31:0] PC_next; wire[31:0] PC_next;
@ -18,8 +18,8 @@ interface VX_exec_unit_req_inter ();
wire[1:0] wb; wire[1:0] wb;
// Data and alu op // Data and alu op
wire[`NT_M1:0][31:0] a_reg_data; wire[`NUM_THREADS-1:0][31:0] a_reg_data;
wire[`NT_M1:0][31:0] b_reg_data; wire[`NUM_THREADS-1:0][31:0] b_reg_data;
wire[4:0] alu_op; wire[4:0] alu_op;
wire[4:0] rs1; wire[4:0] rs1;
wire[4:0] rs2; wire[4:0] rs2;

View file

@ -1,5 +1,5 @@
`include "VX_define.v" `include "VX_define.vh"
`ifndef VX_FrE_to_BE_INTER `ifndef VX_FrE_to_BE_INTER
@ -30,8 +30,8 @@ interface VX_frE_to_bckE_req_inter ();
wire jal; wire jal;
wire[31:0] jal_offset; wire[31:0] jal_offset;
wire[31:0] PC_next; wire[31:0] PC_next;
wire[`NT_M1:0] valid; wire[`NUM_THREADS-1:0] valid;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
// GPGPU stuff // GPGPU stuff
wire is_wspawn; wire is_wspawn;

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_GPR_CLONE_INTER `ifndef VX_GPR_CLONE_INTER
@ -9,7 +9,7 @@
interface VX_gpr_clone_inter (); interface VX_gpr_clone_inter ();
/* verilator lint_off UNUSED */ /* verilator lint_off UNUSED */
wire is_clone; wire is_clone;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
/* verilator lint_on UNUSED */ /* verilator lint_on UNUSED */
endinterface endinterface

View file

@ -1,13 +1,13 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_gpr_data_INTER `ifndef VX_gpr_data_INTER
`define VX_gpr_data_INTER `define VX_gpr_data_INTER
interface VX_gpr_data_inter (); interface VX_gpr_data_inter ();
wire[`NT_M1:0][31:0] a_reg_data; wire[`NUM_THREADS-1:0][31:0] a_reg_data;
wire[`NT_M1:0][31:0] b_reg_data; wire[`NUM_THREADS-1:0][31:0] b_reg_data;
endinterface endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_GPR_JAL_INTER `ifndef VX_GPR_JAL_INTER
`define VX_GPR_JAL_INTER `define VX_GPR_JAL_INTER

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_GPR_READ `ifndef VX_GPR_READ
`define VX_GPR_READ `define VX_GPR_READ
@ -8,7 +8,7 @@ interface VX_gpr_read_inter ();
wire[4:0] rs1; wire[4:0] rs1;
wire[4:0] rs2; wire[4:0] rs2;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
endinterface endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_GPR_WSPAWN_INTER `ifndef VX_GPR_WSPAWN_INTER
`define VX_GPR_WSPAWN_INTER `define VX_GPR_WSPAWN_INTER
@ -7,8 +7,8 @@
interface VX_gpr_wspawn_inter (); interface VX_gpr_wspawn_inter ();
/* verilator lint_off UNUSED */ /* verilator lint_off UNUSED */
wire is_wspawn; wire is_wspawn;
wire[`NW_M1:0] which_wspawn; wire[`NW_BITS-1:0] which_wspawn;
// wire[`NW_M1:0] warp_num; // wire[`NW_BITS-1:0] warp_num;
/* verilator lint_on UNUSED */ /* verilator lint_on UNUSED */
endinterface endinterface

View file

@ -1,6 +1,6 @@
`include "../generic_cache/VX_cache_config.v" `include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_DRAM_DCACHE_REQ `ifndef VX_GPU_DRAM_DCACHE_REQ
@ -8,7 +8,7 @@
interface VX_gpu_dcache_dram_req_inter interface VX_gpu_dcache_dram_req_inter
#( #(
parameter BANK_LINE_SIZE_WORDS = 2 parameter BANK_LINE_WORDS = 2
) )
(); ();
@ -18,7 +18,7 @@ interface VX_gpu_dcache_dram_req_inter
wire dram_req_read; wire dram_req_read;
wire [31:0] dram_req_addr; wire [31:0] dram_req_addr;
wire [31:0] dram_req_size; wire [31:0] dram_req_size;
wire [BANK_LINE_SIZE_WORDS-1:0][31:0] dram_req_data; wire [BANK_LINE_WORDS-1:0][31:0] dram_req_data;
// Snoop // Snoop
wire dram_because_of_snp; wire dram_because_of_snp;

View file

@ -1,7 +1,7 @@
`include "../generic_cache/VX_cache_config.v" `include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_DRAM_DCACHE_RES `ifndef VX_GPU_DRAM_DCACHE_RES
@ -9,13 +9,13 @@
interface VX_gpu_dcache_dram_res_inter interface VX_gpu_dcache_dram_res_inter
#( #(
parameter BANK_LINE_SIZE_WORDS = 2 parameter BANK_LINE_WORDS = 2
) )
(); ();
// DRAM Rsponse // DRAM Rsponse
wire dram_fill_rsp; wire dram_fill_rsp;
wire [31:0] dram_fill_rsp_addr; wire [31:0] dram_fill_rsp_addr;
wire [BANK_LINE_SIZE_WORDS-1:0][31:0] dram_fill_rsp_data; wire [BANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data;
endinterface endinterface

View file

@ -1,6 +1,6 @@
`include "../generic_cache/VX_cache_config.v" `include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_DCACHE_REQ `ifndef VX_GPU_DCACHE_REQ
@ -20,7 +20,7 @@ interface VX_gpu_dcache_req_inter
wire [NUMBER_REQUESTS-1:0][2:0] core_req_mem_write; wire [NUMBER_REQUESTS-1:0][2:0] core_req_mem_write;
wire [4:0] core_req_rd; wire [4:0] core_req_rd;
wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb; wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb;
wire [`NW_M1:0] core_req_warp_num; wire [`NW_BITS-1:0] core_req_warp_num;
wire [31:0] core_req_pc; wire [31:0] core_req_pc;
// Can't WB // Can't WB

View file

@ -1,6 +1,6 @@
`include "../generic_cache/VX_cache_config.v" `include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_DCACHE_RES `ifndef VX_GPU_DCACHE_RES
@ -16,7 +16,7 @@ interface VX_gpu_dcache_res_inter
wire [NUMBER_REQUESTS-1:0] core_wb_valid; wire [NUMBER_REQUESTS-1:0] core_wb_valid;
wire [4:0] core_wb_req_rd; wire [4:0] core_wb_req_rd;
wire [1:0] core_wb_req_wb; wire [1:0] core_wb_req_wb;
wire [`NW_M1:0] core_wb_warp_num; wire [`NW_BITS-1:0] core_wb_warp_num;
wire [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata; wire [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata;
wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc; wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc;

View file

@ -1,7 +1,7 @@
`include "../generic_cache/VX_cache_config.v" `include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_SNP_REQ `ifndef VX_GPU_SNP_REQ

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_GPU_INST_REQ_IN `ifndef VX_GPU_INST_REQ_IN
@ -6,8 +6,8 @@
interface VX_gpu_inst_req_inter(); interface VX_gpu_inst_req_inter();
wire[`NT_M1:0] valid; wire[`NUM_THREADS-1:0] valid;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
wire is_wspawn; wire is_wspawn;
wire is_tmc; wire is_tmc;
wire is_split; wire is_split;
@ -16,7 +16,7 @@ interface VX_gpu_inst_req_inter();
wire[31:0] pc_next; wire[31:0] pc_next;
wire[`NT_M1:0][31:0] a_reg_data; wire[`NUM_THREADS-1:0][31:0] a_reg_data;
wire[31:0] rd2; wire[31:0] rd2;

View file

@ -1,4 +1,4 @@
`include "../generic_cache/VX_cache_config.v" `include "../generic_cache/VX_cache_config.vh"
`ifndef VX_GPU_SNP_REQ_RSP `ifndef VX_GPU_SNP_REQ_RSP

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_ICACHE_REQ `ifndef VX_ICACHE_REQ

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_ICACHE_RSP `ifndef VX_ICACHE_RSP

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_EXEC_UNIT_WB_INST_INTER `ifndef VX_EXEC_UNIT_WB_INST_INTER
@ -7,12 +7,12 @@
interface VX_inst_exec_wb_inter (); interface VX_inst_exec_wb_inter ();
wire[`NT_M1:0][31:0] alu_result; wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[31:0] exec_wb_pc; wire[31:0] exec_wb_pc;
wire[4:0] rd; wire[4:0] rd;
wire[1:0] wb; wire[1:0] wb;
wire[`NT_M1:0] wb_valid; wire[`NUM_THREADS-1:0] wb_valid;
wire[`NW_M1:0] wb_warp_num; wire[`NW_BITS-1:0] wb_warp_num;
endinterface endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_MEM_WB_INST_INTER `ifndef VX_MEM_WB_INST_INTER
@ -7,12 +7,12 @@
interface VX_inst_mem_wb_inter (); interface VX_inst_mem_wb_inter ();
wire[`NT_M1:0][31:0] loaded_data; wire[`NUM_THREADS-1:0][31:0] loaded_data;
wire[31:0] mem_wb_pc; wire[31:0] mem_wb_pc;
wire[4:0] rd; wire[4:0] rd;
wire[1:0] wb; wire[1:0] wb;
wire[`NT_M1:0] wb_valid; wire[`NUM_THREADS-1:0] wb_valid;
wire[`NW_M1:0] wb_warp_num; wire[`NW_BITS-1:0] wb_warp_num;
endinterface endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_F_D_INTER `ifndef VX_F_D_INTER
@ -7,8 +7,8 @@
interface VX_inst_meta_inter (); interface VX_inst_meta_inter ();
wire[31:0] instruction; wire[31:0] instruction;
wire[31:0] inst_pc; wire[31:0] inst_pc;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
wire[`NT_M1:0] valid; wire[`NUM_THREADS-1:0] valid;
endinterface endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_JAL_RSP `ifndef VX_JAL_RSP
@ -9,7 +9,7 @@ interface VX_jal_response_inter ();
wire jal; wire jal;
wire[31:0] jal_dest; wire[31:0] jal_dest;
wire[`NW_M1:0] jal_warp_num; wire[`NW_BITS-1:0] jal_warp_num;
endinterface endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_JOIN_INTER `ifndef VX_JOIN_INTER
@ -8,7 +8,7 @@
interface VX_join_inter (); interface VX_join_inter ();
wire is_join; wire is_join;
wire[`NW_M1:0] join_warp_num; wire[`NW_BITS-1:0] join_warp_num;
endinterface endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_LSU_REQ_INTER `ifndef VX_LSU_REQ_INTER
@ -7,11 +7,11 @@
interface VX_lsu_req_inter (); interface VX_lsu_req_inter ();
wire[`NT_M1:0] valid; wire[`NUM_THREADS-1:0] valid;
wire[31:0] lsu_pc; wire[31:0] lsu_pc;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
wire[`NT_M1:0][31:0] store_data; wire[`NUM_THREADS-1:0][31:0] store_data;
wire[`NT_M1:0][31:0] base_address; // A reg data wire[`NUM_THREADS-1:0][31:0] base_address; // A reg data
wire[31:0] offset; // itype_immed wire[31:0] offset; // itype_immed
wire[2:0] mem_read; wire[2:0] mem_read;
wire[2:0] mem_write; wire[2:0] mem_write;

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_MEM_REQ_IN `ifndef VX_MEM_REQ_IN
@ -6,20 +6,20 @@
interface VX_mem_req_inter (); interface VX_mem_req_inter ();
wire[`NT_M1:0][31:0] alu_result; wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[2:0] mem_read; wire[2:0] mem_read;
wire[2:0] mem_write; wire[2:0] mem_write;
wire[4:0] rd; wire[4:0] rd;
wire[1:0] wb; wire[1:0] wb;
wire[4:0] rs1; wire[4:0] rs1;
wire[4:0] rs2; wire[4:0] rs2;
wire[`NT_M1:0][31:0] rd2; wire[`NUM_THREADS-1:0][31:0] rd2;
wire[31:0] PC_next; wire[31:0] PC_next;
wire[31:0] curr_PC; wire[31:0] curr_PC;
wire[31:0] branch_offset; wire[31:0] branch_offset;
wire[2:0] branch_type; wire[2:0] branch_type;
wire[`NT_M1:0] valid; wire[`NUM_THREADS-1:0] valid;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
endinterface endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_MW_WB_INTER `ifndef VX_MW_WB_INTER
@ -7,13 +7,13 @@
interface VX_mw_wb_inter (); interface VX_mw_wb_inter ();
wire[`NT_M1:0][31:0] alu_result; wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[`NT_M1:0][31:0] mem_result; wire[`NUM_THREADS-1:0][31:0] mem_result;
wire[4:0] rd; wire[4:0] rd;
wire[1:0] wb; wire[1:0] wb;
wire[31:0] PC_next; wire[31:0] PC_next;
wire[`NT_M1:0] valid; wire[`NUM_THREADS-1:0] valid;
wire [`NW_M1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
endinterface endinterface

View file

@ -1,5 +1,5 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_WARP_CTL_INTER `ifndef VX_WARP_CTL_INTER
@ -7,26 +7,26 @@
interface VX_warp_ctl_inter (); interface VX_warp_ctl_inter ();
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
wire change_mask; wire change_mask;
wire[`NT_M1:0] thread_mask; wire[`NUM_THREADS-1:0] thread_mask;
wire wspawn; wire wspawn;
wire[31:0] wspawn_pc; wire[31:0] wspawn_pc;
wire[`NW-1:0] wspawn_new_active; wire[`NUM_WARPS-1:0] wspawn_new_active;
wire ebreak; wire ebreak;
// barrier // barrier
wire is_barrier; wire is_barrier;
wire[31:0] barrier_id; wire[31:0] barrier_id;
wire[$clog2(`NW):0] num_warps; wire[$clog2(`NUM_WARPS):0] num_warps;
wire is_split; wire is_split;
wire dont_split; wire dont_split;
wire[`NW_M1:0] split_warp_num; wire[`NW_BITS-1:0] split_warp_num;
wire[`NT_M1:0] split_new_mask; wire[`NUM_THREADS-1:0] split_new_mask;
wire[`NT_M1:0] split_later_mask; wire[`NUM_THREADS-1:0] split_later_mask;
wire[31:0] split_save_pc; wire[31:0] split_save_pc;

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_WB_INTER `ifndef VX_WB_INTER
@ -7,12 +7,12 @@
interface VX_wb_inter (); interface VX_wb_inter ();
wire[`NT_M1:0][31:0] write_data; wire[`NUM_THREADS-1:0][31:0] write_data;
wire[31:0] wb_pc; wire[31:0] wb_pc;
wire[4:0] rd; wire[4:0] rd;
wire[1:0] wb; wire[1:0] wb;
wire[`NT_M1:0] wb_valid; wire[`NUM_THREADS-1:0] wb_valid;
wire[`NW_M1:0] wb_warp_num; wire[`NW_BITS-1:0] wb_warp_num;
endinterface endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
`ifndef VX_WSTALL_INTER `ifndef VX_WSTALL_INTER
@ -7,7 +7,7 @@
interface VX_wstall_inter(); interface VX_wstall_inter();
wire wstall; wire wstall;
wire[`NW_M1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
endinterface endinterface

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
module VX_d_e_reg ( module VX_d_e_reg (
input wire clk, input wire clk,
@ -16,7 +16,7 @@ module VX_d_e_reg (
wire flush = (in_branch_stall == `STALL); wire flush = (in_branch_stall == `STALL);
VX_generic_register #(.N(233 + `NW_M1 + 1 + `NT)) d_e_reg VX_generic_register #(.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)) d_e_reg
( (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
module VX_f_d_reg ( module VX_f_d_reg (
input wire clk, input wire clk,
@ -13,7 +13,7 @@ module VX_f_d_reg (
wire flush = 1'b0; wire flush = 1'b0;
wire stall = in_freeze == 1'b1; wire stall = in_freeze == 1'b1;
VX_generic_register #( .N(64+`NW_M1+1+`NT) ) f_d_reg ( VX_generic_register #( .N(64+`NW_BITS-1+1+`NUM_THREADS) ) f_d_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
module VX_i_d_reg ( module VX_i_d_reg (
input wire clk, input wire clk,
@ -14,7 +14,7 @@ module VX_i_d_reg (
wire stall = in_freeze == 1'b1; wire stall = in_freeze == 1'b1;
VX_generic_register #( .N( 64 + `NW_M1 + 1 + `NT ) ) i_d_reg ( VX_generic_register #( .N( 64 + `NW_BITS-1 + 1 + `NUM_THREADS ) ) i_d_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),

View file

@ -1,4 +1,4 @@
`include "../VX_define.v" `include "../VX_define.vh"
// Converts in_valids to bank_valids // Converts in_valids to bank_valids
module VX_bank_valids module VX_bank_valids
@ -7,16 +7,16 @@ module VX_bank_valids
parameter BITS_PER_BANK = 3 parameter BITS_PER_BANK = 3
) )
( (
input wire[`NT_M1:0] in_valids, input wire[`NUM_THREADS-1:0] in_valids,
input wire[`NT_M1:0][31:0] in_addr, input wire[`NUM_THREADS-1:0][31:0] in_addr,
output reg[NB:0][`NT_M1:0] bank_valids output reg[NB:0][`NUM_THREADS-1:0] bank_valids
); );
integer i, j; integer i, j;
always@(*) begin always@(*) begin
for(j = 0; j <= NB; j = j+1 ) begin for(j = 0; j <= NB; j = j+1 ) begin
for(i = 0; i <= `NT_M1; i = i+1) begin for(i = 0; i < `NUM_THREADS; i = i+1) begin
if(in_valids[i]) begin if(in_valids[i]) begin
if(in_addr[i][(2+BITS_PER_BANK-1):2] == j[BITS_PER_BANK-1:0]) begin if(in_addr[i][(2+BITS_PER_BANK-1):2] == j[BITS_PER_BANK-1:0]) begin
bank_valids[j][i] = 1'b1; bank_valids[j][i] = 1'b1;

Some files were not shown because too many files have changed in this diff Show more