mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
added config.vh
This commit is contained in:
parent
c913e542e9
commit
81745f08c9
109 changed files with 1426 additions and 1544 deletions
|
@ -9,11 +9,11 @@ extern int vx_dev_caps(int caps_id) {
|
|||
case VX_CAPS_VERSION:
|
||||
return 0;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
return NUMBER_CORES;
|
||||
return NUM_CORES;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
return NW;
|
||||
return NUM_WARPS;
|
||||
case VX_CAPS_MAX_THREADS:
|
||||
return NT;
|
||||
return NUM_THREADS;
|
||||
case VX_CAPS_CACHE_LINESIZE:
|
||||
return GLOBAL_BLOCK_SIZE_BYTES;
|
||||
case VX_CAPS_LOCAL_MEM_SIZE:
|
||||
|
|
|
@ -142,7 +142,7 @@ public:
|
|||
private:
|
||||
|
||||
void run() {
|
||||
Harp::ArchDef arch("rv32i", NW, NT);
|
||||
Harp::ArchDef arch("rv32i", NUM_WARPS, NUM_THREADS);
|
||||
Harp::WordDecoder dec(arch);
|
||||
Harp::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
|
||||
Harp::Core core(arch, dec, mu);
|
||||
|
|
|
@ -33,7 +33,7 @@ THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu
|
|||
|
||||
.PHONY: build_config
|
||||
build_config:
|
||||
./gen_config.py --rtl_locations
|
||||
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./simulate/VX_config.h
|
||||
|
||||
# -LDFLAGS '-lsystemc'
|
||||
VERILATOR: build_config
|
||||
|
|
|
@ -6,8 +6,9 @@ ALL:sim
|
|||
SRC = \
|
||||
vortex_dpi.cpp \
|
||||
vortex_tb.v \
|
||||
../rtl/VX_define.v \
|
||||
../rtl/VX_define_synth.v \
|
||||
../rtl/VX_user_config.vh \
|
||||
../rtl/VX_config.vh \
|
||||
../rtl/VX_define.vh \
|
||||
../rtl/interfaces/VX_branch_response_inter.v \
|
||||
../rtl/interfaces/VX_csr_req_inter.v \
|
||||
../rtl/interfaces/VX_csr_wb_inter.v \
|
||||
|
|
|
@ -2182,7 +2182,7 @@ Project_File_33 = ../rtl/shared_memory/VX_set_bit.v
|
|||
Project_File_P_33 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 cover_branch 0 folder {Top Level} last_compile 0 cover_fsm 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 1 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 53 cover_expr 0 dont_compile 0 cover_stmt 0
|
||||
Project_File_34 = ../rtl/interfaces/VX_dcache_response_inter.v
|
||||
Project_File_P_34 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 27 dont_compile 0 cover_expr 0 cover_stmt 0
|
||||
Project_File_35 = ../rtl/VX_define.v
|
||||
Project_File_35 = ../rtl/VX_define.vh
|
||||
Project_File_P_35 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 folder {Top Level} cover_branch 0 cover_fsm 0 last_compile 1572058635 vlog_noload 0 cover_excludedefault 0 vlog_enable0In 0 vlog_disableopt 0 cover_covercells 0 voptflow 1 cover_optlevel 3 vlog_showsource 0 vlog_hazard 0 toggle - vlog_0InOptions {} ood 0 cover_noshort 0 vlog_upper 0 compile_to work vlog_options {} compile_order 7 cover_expr 0 dont_compile 0 cover_stmt 0
|
||||
Project_File_36 = ../rtl/interfaces/VX_csr_req_inter.v
|
||||
Project_File_P_36 = cover_toggle 0 vlog_protect 0 file_type verilog group_id 0 cover_exttoggle 0 cover_nofec 0 cover_cond 0 vlog_1995compat SV vlog_nodebug 0 vlog_noload 0 last_compile 1571845660 folder {Top Level} cover_branch 0 cover_fsm 0 vlog_enable0In 0 cover_excludedefault 0 vlog_disableopt 0 cover_covercells 0 vlog_hazard 0 vlog_showsource 0 cover_optlevel 3 voptflow 1 ood 0 vlog_0InOptions {} toggle - vlog_options {} compile_to work vlog_upper 0 cover_noshort 0 compile_order 24 dont_compile 0 cover_expr 0 cover_stmt 0
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
//`define NUMBER_BANKS 8
|
||||
//`define NUM_WORDS_PER_BLOCK 4
|
||||
|
|
|
@ -11,9 +11,10 @@ vortex_afu.json
|
|||
+incdir+../rtl/pipe_regs
|
||||
+incdir+../rtl/compat
|
||||
|
||||
../rtl/VX_define_synth.v
|
||||
../rtl/VX_define.v
|
||||
../rtl/generic_cache/VX_cache_config.v
|
||||
../rtl/VX_user_config.vh
|
||||
../rtl/VX_config.vh
|
||||
../rtl/VX_define.vh
|
||||
../rtl/generic_cache/VX_cache_config.vh
|
||||
../rtl/Vortex_Socket.v
|
||||
../rtl/Vortex_Cluster.v
|
||||
../rtl/Vortex.v
|
||||
|
|
5
hw/rtl/.gitignore
vendored
5
hw/rtl/.gitignore
vendored
|
@ -1,4 +1 @@
|
|||
/simulate/VX_define.h
|
||||
/simulate/VX_define_synth.h
|
||||
/VX_define_synth.v
|
||||
/results.txt
|
||||
/VX_user_config.vh
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_alu(
|
||||
input wire clk,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_back_end
|
||||
#(
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
module VX_csr_data (
|
||||
input wire clk, // Clock
|
||||
|
@ -19,17 +19,17 @@ module VX_csr_data (
|
|||
|
||||
/* verilator lint_off WIDTH */
|
||||
|
||||
// wire[`NT_M1:0][31:0] thread_ids;
|
||||
// wire[`NT_M1:0][31:0] warp_ids;
|
||||
// wire[`NUM_THREADS-1:0][31:0] thread_ids;
|
||||
// wire[`NUM_THREADS-1:0][31:0] warp_ids;
|
||||
|
||||
// genvar cur_t;
|
||||
// for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
|
||||
// for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
|
||||
// assign thread_ids[cur_t] = cur_t;
|
||||
// end
|
||||
|
||||
// genvar cur_tw;
|
||||
// for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
|
||||
// assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num};
|
||||
// for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
|
||||
// assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num};
|
||||
// end
|
||||
|
||||
reg[11:0] csr[1023:0];
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_pipe
|
||||
#(
|
||||
|
@ -14,8 +14,8 @@ module VX_csr_pipe
|
|||
output wire stall_gpr_csr
|
||||
);
|
||||
|
||||
wire[`NT_M1:0] valid_s2;
|
||||
wire[`NW_M1:0] warp_num_s2;
|
||||
wire[`NUM_THREADS-1:0] valid_s2;
|
||||
wire[`NW_BITS-1:0] warp_num_s2;
|
||||
wire[4:0] rd_s2;
|
||||
wire[1:0] wb_s2;
|
||||
wire[4:0] alu_op_s2;
|
||||
|
@ -60,7 +60,7 @@ module VX_csr_pipe
|
|||
|
||||
wire zero = 0;
|
||||
|
||||
VX_generic_register #(.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_M1+1) + `NT)) csr_reg_s2 (
|
||||
VX_generic_register #(.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)) csr_reg_s2 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(no_slot_csr),
|
||||
|
@ -70,28 +70,26 @@ module VX_csr_pipe
|
|||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] final_csr_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] final_csr_data;
|
||||
|
||||
wire[`NT_M1:0][31:0] thread_ids;
|
||||
wire[`NT_M1:0][31:0] warp_ids;
|
||||
wire[`NT_M1:0][31:0] warp_idz;
|
||||
wire[`NT_M1:0][31:0] csr_vec_read_data_s2;
|
||||
wire[`NUM_THREADS-1:0][31:0] thread_ids;
|
||||
wire[`NUM_THREADS-1:0][31:0] warp_ids;
|
||||
wire[`NUM_THREADS-1:0][31:0] warp_idz;
|
||||
wire[`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2;
|
||||
|
||||
genvar cur_t;
|
||||
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
|
||||
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
|
||||
assign thread_ids[cur_t] = cur_t;
|
||||
end
|
||||
|
||||
genvar cur_tw;
|
||||
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2};
|
||||
assign warp_idz[cur_tw] = (warp_num_s2 + (CORE_ID*`NW));
|
||||
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
|
||||
assign warp_ids[cur_tw] = warp_num_s2;
|
||||
assign warp_idz[cur_tw] = 32'(warp_num_s2 + (CORE_ID * `NUM_WARPS));
|
||||
end
|
||||
|
||||
|
||||
|
||||
genvar cur_v;
|
||||
for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin
|
||||
for (cur_v = 0; cur_v < `NUM_THREADS; cur_v = cur_v + 1) begin
|
||||
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
|
||||
end
|
||||
|
||||
|
@ -104,7 +102,6 @@ module VX_csr_pipe
|
|||
warp_id_select ? warp_idz :
|
||||
csr_vec_read_data_s2;
|
||||
|
||||
|
||||
assign VX_csr_wb.valid = valid_s2;
|
||||
assign VX_csr_wb.warp_num = warp_num_s2;
|
||||
assign VX_csr_wb.rd = rd_s2;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_wrapper (
|
||||
VX_csr_req_inter VX_csr_req,
|
||||
|
@ -8,17 +8,17 @@ module VX_csr_wrapper (
|
|||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] thread_ids;
|
||||
wire[`NT_M1:0][31:0] warp_ids;
|
||||
wire[`NUM_THREADS-1:0][31:0] thread_ids;
|
||||
wire[`NUM_THREADS-1:0][31:0] warp_ids;
|
||||
|
||||
genvar cur_t, cur_tw;
|
||||
generate
|
||||
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin : thread_ids_init
|
||||
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init
|
||||
assign thread_ids[cur_t] = cur_t;
|
||||
end
|
||||
|
||||
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin : warp_ids_init
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, VX_csr_req.warp_num};
|
||||
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, VX_csr_req.warp_num};
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_decode(
|
||||
// Fetch Inputs
|
||||
|
@ -16,11 +16,11 @@ module VX_decode(
|
|||
|
||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||
wire[`NW_M1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||
|
||||
assign VX_frE_to_bckE_req.curr_PC = in_curr_PC;
|
||||
|
||||
wire[`NT_M1:0] in_valid = fd_inst_meta_de.valid;
|
||||
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
|
||||
|
||||
wire[6:0] curr_opcode;
|
||||
|
||||
|
|
163
hw/rtl/VX_define.vh
Normal file
163
hw/rtl/VX_define.vh
Normal file
|
@ -0,0 +1,163 @@
|
|||
`ifndef VX_DEFINE
|
||||
`define VX_DEFINE
|
||||
|
||||
`include "./VX_config.vh"
|
||||
|
||||
// `define QUEUE_FORCE_MLAB 1
|
||||
|
||||
// `define SYN 1
|
||||
// `define ASIC 1
|
||||
// `define SYN_FUNC 1
|
||||
|
||||
`define LOG2UP(x) ((x > 1) ? $clog2(x) : 1)
|
||||
|
||||
`define NUM_CORES_PER_CLUSTER (`NUM_CORES / `NUM_CLUSTERS)
|
||||
|
||||
`define NW_BITS `LOG2UP(`NUM_WARPS)
|
||||
|
||||
`define NT_BITS `LOG2UP(`NUM_THREADS)
|
||||
|
||||
`define NC_BITS `LOG2UP(`NUM_CORES)
|
||||
|
||||
`define R_INST 7'd51
|
||||
`define L_INST 7'd3
|
||||
`define ALU_INST 7'd19
|
||||
`define S_INST 7'd35
|
||||
`define B_INST 7'd99
|
||||
`define LUI_INST 7'd55
|
||||
`define AUIPC_INST 7'd23
|
||||
`define JAL_INST 7'd111
|
||||
`define JALR_INST 7'd103
|
||||
`define SYS_INST 7'd115
|
||||
`define GPGPU_INST 7'h6b
|
||||
|
||||
`define WB_ALU 2'h1
|
||||
`define WB_MEM 2'h2
|
||||
`define WB_JAL 2'h3
|
||||
`define NO_WB 2'h0
|
||||
|
||||
`define RS2_IMMED 1
|
||||
`define RS2_REG 0
|
||||
|
||||
`define NO_MEM_READ 3'h7
|
||||
`define LB_MEM_READ 3'h0
|
||||
`define LH_MEM_READ 3'h1
|
||||
`define LW_MEM_READ 3'h2
|
||||
`define LBU_MEM_READ 3'h4
|
||||
`define LHU_MEM_READ 3'h5
|
||||
|
||||
`define NO_MEM_WRITE 3'h7
|
||||
`define SB_MEM_WRITE 3'h0
|
||||
`define SH_MEM_WRITE 3'h1
|
||||
`define SW_MEM_WRITE 3'h2
|
||||
|
||||
`define NO_BRANCH 3'h0
|
||||
`define BEQ 3'h1
|
||||
`define BNE 3'h2
|
||||
`define BLT 3'h3
|
||||
`define BGT 3'h4
|
||||
`define BLTU 3'h5
|
||||
`define BGTU 3'h6
|
||||
|
||||
`define NO_ALU 5'd15
|
||||
`define ADD 5'd0
|
||||
`define SUB 5'd1
|
||||
`define SLLA 5'd2
|
||||
`define SLT 5'd3
|
||||
`define SLTU 5'd4
|
||||
`define XOR 5'd5
|
||||
`define SRL 5'd6
|
||||
`define SRA 5'd7
|
||||
`define OR 5'd8
|
||||
`define AND 5'd9
|
||||
`define SUBU 5'd10
|
||||
`define LUI_ALU 5'd11
|
||||
`define AUIPC_ALU 5'd12
|
||||
`define CSR_ALU_RW 5'd13
|
||||
`define CSR_ALU_RS 5'd14
|
||||
`define CSR_ALU_RC 5'd15
|
||||
`define MUL 5'd16
|
||||
`define MULH 5'd17
|
||||
`define MULHSU 5'd18
|
||||
`define MULHU 5'd19
|
||||
`define DIV 5'd20
|
||||
`define DIVU 5'd21
|
||||
`define REM 5'd22
|
||||
`define REMU 5'd23
|
||||
|
||||
// WRITEBACK
|
||||
`define WB_ALU 2'h1
|
||||
`define WB_MEM 2'h2
|
||||
`define WB_JAL 2'h3
|
||||
`define NO_WB 2'h0
|
||||
|
||||
// JAL
|
||||
`define JUMP 1'h1
|
||||
`define NO_JUMP 1'h0
|
||||
|
||||
// STALLS
|
||||
`define STALL 1'h1
|
||||
`define NO_STALL 1'h0
|
||||
|
||||
`define TAKEN 1'h1
|
||||
`define NOT_TAKEN 1'h0
|
||||
|
||||
`define ZERO_REG 5'h0
|
||||
|
||||
// ======================= Dcache Configurable Knobs ==========================
|
||||
|
||||
// Function ID
|
||||
`define DFUNC_ID 0
|
||||
|
||||
// Size of line inside a bank in bits
|
||||
`define DBANK_LINE_SIZE (`DBANK_LINE_SIZE_BYTES * 8)
|
||||
|
||||
// Bank Number of words in a line
|
||||
`define DBANK_LINE_WORDS (`DBANK_LINE_SIZE_BYTES / `DWORD_SIZE_BYTES)
|
||||
|
||||
// ======================= Icache Configurable Knobs ==========================
|
||||
|
||||
// Function ID
|
||||
`define IFUNC_ID 1
|
||||
|
||||
// Size of line inside a bank in bits
|
||||
`define IBANK_LINE_SIZE (`IBANK_LINE_SIZE_BYTES * 8)
|
||||
|
||||
// Bank Number of words in a line
|
||||
`define IBANK_LINE_WORDS (`IBANK_LINE_SIZE_BYTES / `IWORD_SIZE_BYTES)
|
||||
|
||||
// ======================= SM Configurable Knobs ==============================
|
||||
|
||||
// Function ID
|
||||
`define SFUNC_ID 2
|
||||
|
||||
// Size of line inside a bank in bits
|
||||
`define SBANK_LINE_SIZE (`SBANK_LINE_SIZE_BYTES * 8)
|
||||
|
||||
// Bank Number of words in a line
|
||||
`define SBANK_LINE_WORDS (`SBANK_LINE_SIZE_BYTES / `SWORD_SIZE_BYTES)
|
||||
|
||||
// ======================= L2cache Configurable Knobs =========================
|
||||
|
||||
// Function ID
|
||||
`define L2FUNC_ID 3
|
||||
|
||||
// Size of line inside a bank in bits
|
||||
`define L2BANK_LINE_SIZE (`L2BANK_LINE_SIZE_BYTES * 8)
|
||||
|
||||
// Bank Number of words in a line
|
||||
`define L2BANK_LINE_WORDS (`L2BANK_LINE_SIZE_BYTES / `L2WORD_SIZE_BYTES)
|
||||
|
||||
// ======================= L3cache Configurable Knobs =========================
|
||||
|
||||
// Function ID
|
||||
`define L3FUNC_ID 3
|
||||
|
||||
// Size of line inside a bank in bits
|
||||
`define L3BANK_LINE_SIZE (`L3BANK_LINE_SIZE_BYTES * 8)
|
||||
|
||||
// Bank Number of words in a line
|
||||
`define L3BANK_LINE_WORDS (`L3BANK_LINE_SIZE_BYTES / `L3WORD_SIZE_BYTES)
|
||||
|
||||
// VX_DEFINE
|
||||
`endif
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_dmem_controller (
|
||||
input wire clk,
|
||||
|
@ -36,7 +36,7 @@ module VX_dmem_controller (
|
|||
wire dcache_wants_wb = (|VX_dcache_rsp_dcache.core_wb_valid);
|
||||
|
||||
// Dcache Request
|
||||
assign VX_dcache_req_dcache.core_req_valid = VX_dcache_req.core_req_valid & {`NT{~to_shm}};
|
||||
assign VX_dcache_req_dcache.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~to_shm}};
|
||||
assign VX_dcache_req_dcache.core_req_addr = VX_dcache_req.core_req_addr;
|
||||
assign VX_dcache_req_dcache.core_req_writedata = VX_dcache_req.core_req_writedata;
|
||||
assign VX_dcache_req_dcache.core_req_mem_read = VX_dcache_req.core_req_mem_read;
|
||||
|
@ -49,7 +49,7 @@ module VX_dmem_controller (
|
|||
|
||||
|
||||
// Shred Memory Request
|
||||
assign VX_dcache_req_smem.core_req_valid = VX_dcache_req.core_req_valid & {`NT{to_shm}};
|
||||
assign VX_dcache_req_smem.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{to_shm}};
|
||||
assign VX_dcache_req_smem.core_req_addr = VX_dcache_req.core_req_addr;
|
||||
assign VX_dcache_req_smem.core_req_writedata = VX_dcache_req.core_req_writedata;
|
||||
assign VX_dcache_req_smem.core_req_mem_read = VX_dcache_req.core_req_mem_read;
|
||||
|
@ -73,8 +73,8 @@ module VX_dmem_controller (
|
|||
|
||||
|
||||
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_smem_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_smem_dram_res();
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_smem_dram_res();
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_execute_unit (
|
||||
input wire clk,
|
||||
|
@ -18,8 +18,8 @@ module VX_execute_unit (
|
|||
output wire out_delay
|
||||
);
|
||||
|
||||
wire[`NT_M1:0][31:0] in_a_reg_data;
|
||||
wire[`NT_M1:0][31:0] in_b_reg_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] in_a_reg_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] in_b_reg_data;
|
||||
wire[4:0] in_alu_op;
|
||||
wire in_rs2_src;
|
||||
wire[31:0] in_itype_immed;
|
||||
|
@ -41,11 +41,11 @@ module VX_execute_unit (
|
|||
assign in_curr_PC = VX_exec_unit_req.curr_PC;
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[`NT_M1:0] alu_stall;
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[`NUM_THREADS-1:0] alu_stall;
|
||||
genvar index_out_reg;
|
||||
generate
|
||||
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) begin : alu_defs
|
||||
for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs
|
||||
VX_alu vx_alu(
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
@ -69,9 +69,9 @@ module VX_execute_unit (
|
|||
assign out_delay = no_slot_exec || internal_stall;
|
||||
|
||||
|
||||
wire [$clog2(`NT)-1:0] jal_branch_use_index;
|
||||
wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index;
|
||||
wire jal_branch_found_valid;
|
||||
VX_generic_priority_encoder #(.N(`NT)) choose_alu_result(
|
||||
VX_generic_priority_encoder #(.N(`NUM_THREADS)) choose_alu_result(
|
||||
.valids(VX_exec_unit_req.valid),
|
||||
.index (jal_branch_use_index),
|
||||
.found (jal_branch_found_valid)
|
||||
|
@ -95,10 +95,10 @@ module VX_execute_unit (
|
|||
end
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] duplicate_PC_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NT; i=i+1) begin : pc_data_setup
|
||||
for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
|
||||
assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next;
|
||||
end
|
||||
endgenerate
|
||||
|
@ -113,7 +113,7 @@ module VX_execute_unit (
|
|||
// Actual Writeback
|
||||
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
|
||||
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
|
||||
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid & {`NT{!internal_stall}};
|
||||
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid & {`NUM_THREADS{!internal_stall}};
|
||||
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
|
||||
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
|
||||
|
||||
|
@ -141,7 +141,7 @@ module VX_execute_unit (
|
|||
// .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc })
|
||||
// );
|
||||
|
||||
VX_generic_register #(.N(33 + `NW_M1 + 1)) jal_reg(
|
||||
VX_generic_register #(.N(33 + `NW_BITS-1 + 1)) jal_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zero),
|
||||
|
@ -150,7 +150,7 @@ module VX_execute_unit (
|
|||
.out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num})
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(34 + `NW_M1 + 1)) branch_reg(
|
||||
VX_generic_register #(.N(34 + `NW_BITS-1 + 1)) branch_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zero),
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_fetch (
|
||||
input wire clk,
|
||||
|
@ -7,8 +7,8 @@ module VX_fetch (
|
|||
VX_join_inter VX_join,
|
||||
input wire schedule_delay,
|
||||
input wire icache_stage_delay,
|
||||
input wire[`NW_M1:0] icache_stage_wid,
|
||||
input wire[`NT-1:0] icache_stage_valids,
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
|
||||
output wire out_ebreak,
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
|
@ -17,8 +17,8 @@ module VX_fetch (
|
|||
VX_warp_ctl_inter VX_warp_ctl
|
||||
);
|
||||
|
||||
wire[`NT_M1:0] thread_mask;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0] thread_mask;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[31:0] warp_pc;
|
||||
wire scheduled_warp;
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_front_end (
|
||||
input wire clk,
|
||||
|
@ -37,8 +37,8 @@ wire icache_stage_delay;
|
|||
wire vortex_ebreak;
|
||||
wire terminate_sim;
|
||||
|
||||
wire[`NW_M1:0] icache_stage_wid;
|
||||
wire[`NT-1:0] icache_stage_valids;
|
||||
wire[`NW_BITS-1:0] icache_stage_wid;
|
||||
wire[`NUM_THREADS-1:0] icache_stage_valids;
|
||||
|
||||
reg old_ebreak; // This should be eventually removed
|
||||
always @(posedge clk) begin
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
`ifndef VX_GENERIC_PRIORITY_ENCODER
|
||||
`define VX_GENERIC_PRIORITY_ENCODER
|
||||
|
||||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_generic_priority_encoder
|
||||
#(
|
||||
|
@ -10,8 +10,8 @@ module VX_generic_priority_encoder
|
|||
(
|
||||
input wire[N-1:0] valids,
|
||||
//output reg[$clog2(N)-1:0] index,
|
||||
output reg[(`CLOG2(N))-1:0] index,
|
||||
//output reg[`CLOG2(N):0] index, // eh
|
||||
output reg[(`LOG2UP(N))-1:0] index,
|
||||
//output reg[`LOG2UP(N):0] index, // eh
|
||||
output reg found
|
||||
);
|
||||
|
||||
|
@ -22,7 +22,7 @@ module VX_generic_priority_encoder
|
|||
for (i = N-1; i >= 0; i = i - 1) begin
|
||||
if (valids[i]) begin
|
||||
//index = i[$clog2(N)-1:0];
|
||||
index = i[(`CLOG2(N))-1:0];
|
||||
index = i[(`LOG2UP(N))-1:0];
|
||||
found = 1;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
`include "VX_define_synth.v"
|
||||
|
||||
module VX_generic_queue_ll
|
||||
#(
|
||||
parameter DATAW = 4,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpgpu_inst (
|
||||
// Input
|
||||
|
@ -9,14 +9,14 @@ module VX_gpgpu_inst (
|
|||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0] curr_valids = VX_gpu_inst_req.valid;
|
||||
wire[`NUM_THREADS-1:0] curr_valids = VX_gpu_inst_req.valid;
|
||||
wire is_split = (VX_gpu_inst_req.is_split);
|
||||
|
||||
wire[`NT_M1:0] tmc_new_mask;
|
||||
wire all_threads = `NT < VX_gpu_inst_req.a_reg_data[0];
|
||||
wire[`NUM_THREADS-1:0] tmc_new_mask;
|
||||
wire all_threads = `NUM_THREADS < VX_gpu_inst_req.a_reg_data[0];
|
||||
genvar curr_t;
|
||||
generate
|
||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin : tmc_new_mask_init
|
||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
|
||||
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < VX_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
@ -33,11 +33,11 @@ module VX_gpgpu_inst (
|
|||
|
||||
wire wspawn = VX_gpu_inst_req.is_wspawn;
|
||||
wire[31:0] wspawn_pc = VX_gpu_inst_req.rd2;
|
||||
wire all_active = `NW < VX_gpu_inst_req.a_reg_data[0];
|
||||
wire[`NW-1:0] wspawn_new_active;
|
||||
wire all_active = `NUM_WARPS < VX_gpu_inst_req.a_reg_data[0];
|
||||
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
||||
genvar curr_w;
|
||||
generate
|
||||
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) begin : wspawn_new_active_init
|
||||
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
|
||||
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < VX_gpu_inst_req.a_reg_data[0];
|
||||
end
|
||||
endgenerate
|
||||
|
@ -47,19 +47,19 @@ module VX_gpgpu_inst (
|
|||
assign VX_warp_ctl.barrier_id = VX_gpu_inst_req.a_reg_data[0];
|
||||
|
||||
wire[31:0] num_warps_m1 = VX_gpu_inst_req.rd2 - 1;
|
||||
assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NW):0];
|
||||
assign VX_warp_ctl.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
|
||||
|
||||
assign VX_warp_ctl.wspawn = wspawn;
|
||||
assign VX_warp_ctl.wspawn_pc = wspawn_pc;
|
||||
assign VX_warp_ctl.wspawn_new_active = wspawn_new_active;
|
||||
|
||||
wire[`NT_M1:0] split_new_use_mask;
|
||||
wire[`NT_M1:0] split_new_later_mask;
|
||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||
|
||||
// VX_gpu_inst_req.pc
|
||||
genvar curr_s_t;
|
||||
generate
|
||||
for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin : masks_init
|
||||
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
|
||||
wire curr_bool = (VX_gpu_inst_req.a_reg_data[curr_s_t] == 32'b1);
|
||||
|
||||
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
|
||||
|
@ -67,18 +67,18 @@ module VX_gpgpu_inst (
|
|||
end
|
||||
endgenerate
|
||||
|
||||
wire[$clog2(`NT):0] num_valids;
|
||||
wire[$clog2(`NUM_THREADS):0] num_valids;
|
||||
|
||||
VX_countones #(.N(`NT)) valids_counter (
|
||||
VX_countones #(.N(`NUM_THREADS)) valids_counter (
|
||||
.valids(curr_valids),
|
||||
.count (num_valids)
|
||||
);
|
||||
|
||||
// wire[`NW_M1:0] num_valids = $countones(curr_valids);
|
||||
// wire[`NW_BITS-1:0] num_valids = $countones(curr_valids);
|
||||
|
||||
|
||||
assign VX_warp_ctl.is_split = is_split && (num_valids > 1);
|
||||
assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NT{1'b1}}));
|
||||
assign VX_warp_ctl.dont_split = VX_warp_ctl.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
|
||||
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
|
||||
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
|
||||
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr (
|
||||
input wire clk,
|
||||
|
@ -8,8 +8,8 @@ module VX_gpr (
|
|||
VX_gpr_read_inter VX_gpr_read,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
|
||||
output reg[`NT_M1:0][31:0] out_a_reg_data,
|
||||
output reg[`NT_M1:0][31:0] out_b_reg_data
|
||||
output reg[`NUM_THREADS-1:0][31:0] out_a_reg_data,
|
||||
output reg[`NUM_THREADS-1:0][31:0] out_b_reg_data
|
||||
);
|
||||
|
||||
|
||||
|
@ -41,10 +41,10 @@ module VX_gpr (
|
|||
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] write_bit_mask;
|
||||
wire[`NUM_THREADS-1:0][31:0] write_bit_mask;
|
||||
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
|
||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
|
||||
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
|
||||
assign write_bit_mask[curr_t] = {32{~local_write}};
|
||||
end
|
||||
|
@ -59,14 +59,14 @@ module VX_gpr (
|
|||
wire cena_1 = 0;
|
||||
wire cena_2 = 0;
|
||||
|
||||
wire[`NT_M1:0][31:0] temp_a;
|
||||
wire[`NT_M1:0][31:0] temp_b;
|
||||
wire[`NUM_THREADS-1:0][31:0] temp_a;
|
||||
wire[`NUM_THREADS-1:0][31:0] temp_b;
|
||||
|
||||
|
||||
`ifndef SYN
|
||||
genvar thread;
|
||||
genvar curr_bit;
|
||||
for (thread = 0; thread < `NT; thread = thread + 1)
|
||||
for (thread = 0; thread < `NUM_THREADS; thread = thread + 1)
|
||||
begin
|
||||
for (curr_bit = 0; curr_bit < 32; curr_bit=curr_bit+1)
|
||||
begin
|
||||
|
@ -83,7 +83,7 @@ module VX_gpr (
|
|||
`endif
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
|
||||
wire[`NUM_THREADS-1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
|
||||
|
||||
genvar curr_base_thread;
|
||||
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_stage (
|
||||
input wire clk,
|
||||
|
@ -114,15 +114,15 @@ module VX_gpr_stage (
|
|||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] temp_store_data;
|
||||
wire[`NT_M1:0][31:0] temp_base_address; // A reg data
|
||||
wire[`NUM_THREADS-1:0][31:0] temp_store_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data
|
||||
|
||||
wire[`NT_M1:0][31:0] real_store_data;
|
||||
wire[`NT_M1:0][31:0] real_base_address; // A reg data
|
||||
wire[`NUM_THREADS-1:0][31:0] real_store_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] real_base_address; // A reg data
|
||||
|
||||
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
|
||||
|
||||
VX_generic_register #(.N(`NT*32*2)) lsu_data(
|
||||
VX_generic_register #(.N(`NUM_THREADS*32*2)) lsu_data(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(!store_curr_real),
|
||||
|
@ -139,7 +139,7 @@ module VX_gpr_stage (
|
|||
assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
|
||||
|
||||
|
||||
VX_generic_register #(.N(77 + `NW_M1 + 1 + (`NT))) lsu_reg(
|
||||
VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))) lsu_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_lsu),
|
||||
|
@ -148,7 +148,7 @@ module VX_gpr_stage (
|
|||
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg(
|
||||
VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS))) exec_unit_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_exec),
|
||||
|
@ -160,7 +160,7 @@ module VX_gpr_stage (
|
|||
assign VX_exec_unit_req.a_reg_data = real_base_address;
|
||||
assign VX_exec_unit_req.b_reg_data = real_store_data;
|
||||
|
||||
VX_generic_register #(.N(36 + `NW_M1 + 1 + (`NT))) gpu_inst_reg(
|
||||
VX_generic_register #(.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))) gpu_inst_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
|
@ -172,7 +172,7 @@ module VX_gpr_stage (
|
|||
assign VX_gpu_inst_req.a_reg_data = real_base_address;
|
||||
assign VX_gpu_inst_req.rd2 = real_store_data;
|
||||
|
||||
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
|
||||
VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_gpr_csr),
|
||||
|
@ -187,7 +187,7 @@ module VX_gpr_stage (
|
|||
`else
|
||||
|
||||
// 341
|
||||
VX_generic_register #(.N(77 + `NW_M1 + 1 + 65*(`NT))) lsu_reg(
|
||||
VX_generic_register #(.N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) lsu_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_lsu),
|
||||
|
@ -196,7 +196,7 @@ module VX_gpr_stage (
|
|||
.out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg(
|
||||
VX_generic_register #(.N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))) exec_unit_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_exec),
|
||||
|
@ -205,7 +205,7 @@ module VX_gpr_stage (
|
|||
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(68 + `NW_M1 + 1 + 33*(`NT))) gpu_inst_reg(
|
||||
VX_generic_register #(.N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS))) gpu_inst_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
|
@ -214,7 +214,7 @@ module VX_gpr_stage (
|
|||
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
|
||||
VX_generic_register #(.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)) csr_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_gpr_csr),
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_wrapper (
|
||||
input wire clk,
|
||||
|
@ -7,23 +7,22 @@ module VX_gpr_wrapper (
|
|||
VX_wb_inter VX_writeback_inter,
|
||||
VX_gpr_jal_inter VX_gpr_jal,
|
||||
|
||||
output wire[`NT_M1:0][31:0] out_a_reg_data,
|
||||
output wire[`NT_M1:0][31:0] out_b_reg_data
|
||||
output wire[`NUM_THREADS-1:0][31:0] out_a_reg_data,
|
||||
output wire[`NUM_THREADS-1:0][31:0] out_b_reg_data
|
||||
|
||||
);
|
||||
|
||||
wire[`NW-1:0][`NT_M1:0][31:0] temp_a_reg_data;
|
||||
wire[`NW-1:0][`NT_M1:0][31:0] temp_b_reg_data;
|
||||
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data;
|
||||
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data;
|
||||
|
||||
wire[`NT_M1:0][31:0] jal_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] jal_data;
|
||||
genvar index;
|
||||
generate
|
||||
for (index = 0; index <= `NT_M1; index = index + 1) begin : jal_data_assign
|
||||
for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
|
||||
assign jal_data[index] = VX_gpr_jal.curr_PC;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
`ifndef ASIC
|
||||
assign out_a_reg_data = (VX_gpr_jal.is_jal ? jal_data : (temp_a_reg_data[VX_gpr_read.warp_num]));
|
||||
assign out_b_reg_data = (temp_b_reg_data[VX_gpr_read.warp_num]);
|
||||
|
@ -31,8 +30,8 @@ module VX_gpr_wrapper (
|
|||
|
||||
wire zer = 0;
|
||||
|
||||
wire[`NW_M1:0] old_warp_num;
|
||||
VX_generic_register #(`NW_M1+1) store_wn(
|
||||
wire[`NW_BITS-1:0] old_warp_num;
|
||||
VX_generic_register #(`NW_BITS-1+1) store_wn(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zer),
|
||||
|
@ -49,7 +48,7 @@ module VX_gpr_wrapper (
|
|||
genvar warp_index;
|
||||
generate
|
||||
|
||||
for (warp_index = 0; warp_index < `NW; warp_index = warp_index + 1) begin : warp_gprs
|
||||
for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
|
||||
|
||||
wire valid_write_request = warp_index == VX_writeback_inter.wb_warp_num;
|
||||
VX_gpr vx_gpr(
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_icache_stage (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire total_freeze,
|
||||
output wire icache_stage_delay,
|
||||
output wire[`NW_M1:0] icache_stage_wid,
|
||||
output wire[`NT-1:0] icache_stage_valids,
|
||||
output wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
output wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||
VX_inst_meta_inter fe_inst_meta_fi,
|
||||
VX_inst_meta_inter fe_inst_meta_id,
|
||||
|
||||
|
@ -14,7 +14,7 @@ module VX_icache_stage (
|
|||
VX_gpu_dcache_req_inter VX_icache_req
|
||||
);
|
||||
|
||||
reg[`NT-1:0] threads_active[`NW-1:0];
|
||||
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_inst = (|fe_inst_meta_fi.valid);
|
||||
|
||||
|
@ -39,7 +39,7 @@ module VX_icache_stage (
|
|||
/* verilator lint_off WIDTH */
|
||||
|
||||
assign icache_stage_wid = fe_inst_meta_id.warp_num;
|
||||
assign icache_stage_valids = fe_inst_meta_id.valid & {`NT{!icache_stage_delay}};
|
||||
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
|
||||
|
||||
// Cache can't accept request
|
||||
assign icache_stage_delay = VX_icache_rsp.delay_req;
|
||||
|
@ -50,7 +50,7 @@ module VX_icache_stage (
|
|||
integer curr_w;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (curr_w = 0; curr_w < `NW; curr_w=curr_w+1) threads_active[curr_w] <= 0;
|
||||
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) threads_active[curr_w] <= 0;
|
||||
end else begin
|
||||
if (valid_inst && !icache_stage_delay) begin
|
||||
/* verilator lint_off WIDTH */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_inst_multiplex (
|
||||
// Inputs
|
||||
|
@ -12,9 +12,9 @@ module VX_inst_multiplex (
|
|||
VX_csr_req_inter VX_csr_req
|
||||
);
|
||||
|
||||
wire[`NT_M1:0] is_mem_mask;
|
||||
wire[`NT_M1:0] is_gpu_mask;
|
||||
wire[`NT_M1:0] is_csr_mask;
|
||||
wire[`NUM_THREADS-1:0] is_mem_mask;
|
||||
wire[`NUM_THREADS-1:0] is_gpu_mask;
|
||||
wire[`NUM_THREADS-1:0] is_csr_mask;
|
||||
|
||||
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
|
||||
|
@ -23,7 +23,7 @@ module VX_inst_multiplex (
|
|||
|
||||
genvar currT;
|
||||
generate
|
||||
for (currT = 0; currT < `NT; currT = currT + 1) begin : mask_init
|
||||
for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init
|
||||
assign is_mem_mask[currT] = is_mem;
|
||||
assign is_gpu_mask[currT] = is_gpu;
|
||||
assign is_csr_mask[currT] = is_csr;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu (
|
||||
input wire clk,
|
||||
|
@ -15,7 +15,7 @@ module VX_lsu (
|
|||
);
|
||||
|
||||
// Generate Addresses
|
||||
wire[`NT_M1:0][31:0] address;
|
||||
wire[`NUM_THREADS-1:0][31:0] address;
|
||||
VX_lsu_addr_gen VX_lsu_addr_gen
|
||||
(
|
||||
.base_address(VX_lsu_req.base_address),
|
||||
|
@ -23,19 +23,19 @@ module VX_lsu (
|
|||
.address (address)
|
||||
);
|
||||
|
||||
wire[`NT_M1:0][31:0] use_address;
|
||||
wire[`NT_M1:0][31:0] use_store_data;
|
||||
wire[`NT_M1:0] use_valid;
|
||||
wire[`NUM_THREADS-1:0][31:0] use_address;
|
||||
wire[`NUM_THREADS-1:0][31:0] use_store_data;
|
||||
wire[`NUM_THREADS-1:0] use_valid;
|
||||
wire[2:0] use_mem_read;
|
||||
wire[2:0] use_mem_write;
|
||||
wire[4:0] use_rd;
|
||||
wire[`NW_M1:0] use_warp_num;
|
||||
wire[`NW_BITS-1:0] use_warp_num;
|
||||
wire[1:0] use_wb;
|
||||
wire[31:0] use_pc;
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
VX_generic_register #(.N(45 + `NW_M1 + 1 + `NT*65)) lsu_buffer(
|
||||
VX_generic_register #(.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)) lsu_buffer(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(out_delay),
|
||||
|
@ -49,10 +49,10 @@ module VX_lsu (
|
|||
assign VX_dcache_req.core_req_valid = use_valid;
|
||||
assign VX_dcache_req.core_req_addr = use_address;
|
||||
assign VX_dcache_req.core_req_writedata = use_store_data;
|
||||
assign VX_dcache_req.core_req_mem_read = {`NT{use_mem_read}};
|
||||
assign VX_dcache_req.core_req_mem_write = {`NT{use_mem_write}};
|
||||
assign VX_dcache_req.core_req_mem_read = {`NUM_THREADS{use_mem_read}};
|
||||
assign VX_dcache_req.core_req_mem_write = {`NUM_THREADS{use_mem_write}};
|
||||
assign VX_dcache_req.core_req_rd = use_rd;
|
||||
assign VX_dcache_req.core_req_wb = {`NT{use_wb}};
|
||||
assign VX_dcache_req.core_req_wb = {`NUM_THREADS{use_wb}};
|
||||
assign VX_dcache_req.core_req_warp_num = use_warp_num;
|
||||
assign VX_dcache_req.core_req_pc = use_pc;
|
||||
|
||||
|
@ -70,9 +70,9 @@ module VX_lsu (
|
|||
assign VX_mem_wb.wb_warp_num = VX_dcache_rsp.core_wb_warp_num;
|
||||
assign VX_mem_wb.loaded_data = VX_dcache_rsp.core_wb_readdata;
|
||||
|
||||
wire[(`CLOG2(`NT))-1:0] use_pc_index;
|
||||
wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index;
|
||||
wire found;
|
||||
VX_generic_priority_encoder #(.N(`NT)) pick_first_pc(
|
||||
VX_generic_priority_encoder #(.N(`NUM_THREADS)) pick_first_pc(
|
||||
.valids(VX_dcache_rsp.core_wb_valid),
|
||||
.index (use_pc_index),
|
||||
.found (found)
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_lsu_addr_gen (
|
||||
input wire[`NT_M1:0][31:0] base_address,
|
||||
input wire[`NUM_THREADS-1:0][31:0] base_address,
|
||||
input wire[31:0] offset,
|
||||
output wire[`NT_M1:0][31:0] address
|
||||
output wire[`NUM_THREADS-1:0][31:0] address
|
||||
|
||||
);
|
||||
|
||||
|
||||
genvar index;
|
||||
genvar i;
|
||||
generate
|
||||
for (index = 0; index < `NT; index = index + 1) begin : addresses
|
||||
assign address[index] = base_address[index] + offset;
|
||||
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : addresses
|
||||
assign address[i] = base_address[i] + offset;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_priority_encoder (
|
||||
input wire[`NW-1:0] valids,
|
||||
output reg[`NW_M1:0] index,
|
||||
input wire[`NUM_WARPS-1:0] valids,
|
||||
output reg[`NW_BITS-1:0] index,
|
||||
output reg found
|
||||
);
|
||||
|
||||
|
@ -10,9 +10,9 @@ module VX_priority_encoder (
|
|||
always @(*) begin
|
||||
index = 0;
|
||||
found = 0;
|
||||
for (i = `NW-1; i >= 0; i = i - 1) begin
|
||||
for (i = `NUM_WARPS-1; i >= 0; i = i - 1) begin
|
||||
if (valids[i]) begin
|
||||
index = i[`NW_M1:0];
|
||||
index = i[`NW_BITS-1:0];
|
||||
found = 1;
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
module VX_priority_encoder_w_mask
|
||||
#(
|
||||
parameter N = 10
|
||||
|
@ -7,8 +7,8 @@ module VX_priority_encoder_w_mask
|
|||
input wire[N-1:0] valids,
|
||||
output reg [N-1:0] mask,
|
||||
//output reg[$clog2(N)-1:0] index,
|
||||
output reg[(`CLOG2(N))-1:0] index,
|
||||
//output reg[`CLOG2(N):0] index, // eh
|
||||
output reg[(`LOG2UP(N))-1:0] index,
|
||||
//output reg[`LOG2UP(N):0] index, // eh
|
||||
output reg found
|
||||
);
|
||||
|
||||
|
@ -20,7 +20,7 @@ module VX_priority_encoder_w_mask
|
|||
for (i = 0; i < N; i=i+1) begin
|
||||
if (valids[i]) begin
|
||||
//index = i[$clog2(N)-1:0];
|
||||
index = i[(`CLOG2(N))-1:0];
|
||||
index = i[(`LOG2UP(N))-1:0];
|
||||
found = 1;
|
||||
// mask[index] = (1 << i);
|
||||
// $display("%h",(1 << i));
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_scheduler (
|
||||
input wire clk,
|
||||
|
@ -10,8 +10,7 @@ module VX_scheduler (
|
|||
VX_wb_inter VX_writeback_inter,
|
||||
|
||||
output wire schedule_delay,
|
||||
output wire is_empty
|
||||
|
||||
output wire is_empty
|
||||
);
|
||||
|
||||
/* verilator lint_off WIDTH */
|
||||
|
@ -19,7 +18,7 @@ module VX_scheduler (
|
|||
|
||||
assign is_empty = count_valid == 0;
|
||||
|
||||
reg[31:0][`NT-1:0] rename_table[`NW-1:0];
|
||||
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
|
||||
|
||||
wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0);
|
||||
wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0);
|
||||
|
@ -32,13 +31,11 @@ module VX_scheduler (
|
|||
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
|
||||
// classify our next instruction.
|
||||
wire is_mem = is_store || is_load;
|
||||
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
|
||||
wire is_csr = VX_bckE_req.is_csr;
|
||||
wire is_mem = is_store || is_load;
|
||||
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
|
||||
wire is_csr = VX_bckE_req.is_csr;
|
||||
wire is_exec = !is_mem && !is_gpu && !is_csr;
|
||||
|
||||
|
||||
|
||||
// wire rs1_pass = 0;
|
||||
// wire rs2_pass = 0;
|
||||
|
||||
|
@ -48,7 +45,6 @@ module VX_scheduler (
|
|||
wire rs2_rename_qual = ((rs2_rename) && (VX_bckE_req.rs2 != 0 && using_rs2));
|
||||
wire rd_rename_qual = ((rd_rename ) && (VX_bckE_req.rd != 0));
|
||||
|
||||
|
||||
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
|
||||
|
||||
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid))
|
||||
|
@ -61,7 +57,7 @@ module VX_scheduler (
|
|||
always @(posedge clk or posedge reset) begin
|
||||
|
||||
if (reset) begin
|
||||
for (w = 0; w < `NW; w=w+1)
|
||||
for (w = 0; w < `NUM_WARPS; w=w+1)
|
||||
begin
|
||||
for (i = 0; i < 32; i = i + 1)
|
||||
begin
|
||||
|
@ -74,7 +70,6 @@ module VX_scheduler (
|
|||
|
||||
if (valid_wb && ((rename_table[VX_writeback_inter.wb_warp_num][VX_writeback_inter.rd] & (~VX_writeback_inter.wb_valid)) == 0)) count_valid = count_valid - 1;
|
||||
if (!schedule_delay && wb_inc) count_valid = count_valid + 1;
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
|
||||
module VX_warp (
|
||||
|
@ -6,7 +6,7 @@ module VX_warp (
|
|||
input wire reset,
|
||||
input wire stall,
|
||||
input wire remove,
|
||||
input wire[`NT_M1:0] in_thread_mask,
|
||||
input wire[`NUM_THREADS-1:0] in_thread_mask,
|
||||
input wire in_change_mask,
|
||||
input wire in_jal,
|
||||
input wire[31:0] in_jal_dest,
|
||||
|
@ -16,20 +16,20 @@ module VX_warp (
|
|||
input wire[31:0] in_wspawn_pc,
|
||||
|
||||
output wire[31:0] out_PC,
|
||||
output wire[`NT_M1:0] out_valid
|
||||
output wire[`NUM_THREADS-1:0] out_valid
|
||||
);
|
||||
|
||||
reg[31:0] real_PC;
|
||||
logic [31:0] temp_PC;
|
||||
logic [31:0] use_PC;
|
||||
reg[`NT_M1:0] valid;
|
||||
reg[`NUM_THREADS-1:0] valid;
|
||||
|
||||
reg[`NT_M1:0] valid_zero;
|
||||
reg[`NUM_THREADS-1:0] valid_zero;
|
||||
|
||||
integer ini_cur_th = 0;
|
||||
initial begin
|
||||
real_PC = 0;
|
||||
for (ini_cur_th = 1; ini_cur_th < `NT; ini_cur_th=ini_cur_th+1) begin
|
||||
for (ini_cur_th = 1; ini_cur_th < `NUM_THREADS; ini_cur_th=ini_cur_th+1) begin
|
||||
valid[ini_cur_th] = 0; // Thread 1 active
|
||||
valid_zero[ini_cur_th] = 0;
|
||||
end
|
||||
|
@ -49,7 +49,7 @@ module VX_warp (
|
|||
|
||||
genvar out_cur_th;
|
||||
generate
|
||||
for (out_cur_th = 0; out_cur_th < `NT; out_cur_th = out_cur_th+1) begin : out_valid_assign
|
||||
for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : out_valid_assign
|
||||
assign out_valid[out_cur_th] = in_change_mask ? in_thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th];
|
||||
end
|
||||
endgenerate
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_warp_scheduler (
|
||||
input wire clk, // Clock
|
||||
|
@ -7,57 +7,57 @@ module VX_warp_scheduler (
|
|||
// Wspawn
|
||||
input wire wspawn,
|
||||
input wire[31:0] wsapwn_pc,
|
||||
input wire[`NW-1:0] wspawn_new_active,
|
||||
input wire[`NUM_WARPS-1:0] wspawn_new_active,
|
||||
|
||||
// CTM
|
||||
input wire ctm,
|
||||
input wire[`NT_M1:0] ctm_mask,
|
||||
input wire[`NW_M1:0] ctm_warp_num,
|
||||
input wire[`NUM_THREADS-1:0] ctm_mask,
|
||||
input wire[`NW_BITS-1:0] ctm_warp_num,
|
||||
|
||||
// WHALT
|
||||
input wire whalt,
|
||||
input wire[`NW_M1:0] whalt_warp_num,
|
||||
input wire[`NW_BITS-1:0] whalt_warp_num,
|
||||
|
||||
input wire is_barrier,
|
||||
input wire[31:0] barrier_id,
|
||||
input wire[$clog2(`NW):0] num_warps,
|
||||
input wire[`NW_M1:0] barrier_warp_num,
|
||||
input wire[$clog2(`NUM_WARPS):0] num_warps,
|
||||
input wire[`NW_BITS-1:0] barrier_warp_num,
|
||||
|
||||
// WSTALL
|
||||
input wire wstall,
|
||||
input wire[`NW_M1:0] wstall_warp_num,
|
||||
input wire[`NW_BITS-1:0] wstall_warp_num,
|
||||
|
||||
// Split
|
||||
input wire is_split,
|
||||
input wire dont_split,
|
||||
input wire[`NT_M1:0] split_new_mask,
|
||||
input wire[`NT_M1:0] split_later_mask,
|
||||
input wire[`NUM_THREADS-1:0] split_new_mask,
|
||||
input wire[`NUM_THREADS-1:0] split_later_mask,
|
||||
input wire[31:0] split_save_pc,
|
||||
input wire[`NW_M1:0] split_warp_num,
|
||||
input wire[`NW_BITS-1:0] split_warp_num,
|
||||
|
||||
// Join
|
||||
input wire is_join,
|
||||
input wire[`NW_M1:0] join_warp_num,
|
||||
input wire[`NW_BITS-1:0] join_warp_num,
|
||||
|
||||
// JAL
|
||||
input wire jal,
|
||||
input wire[31:0] jal_dest,
|
||||
input wire[`NW_M1:0] jal_warp_num,
|
||||
input wire[`NW_BITS-1:0] jal_warp_num,
|
||||
|
||||
// Branch
|
||||
input wire branch_valid,
|
||||
input wire branch_dir,
|
||||
input wire[31:0] branch_dest,
|
||||
input wire[`NW_M1:0] branch_warp_num,
|
||||
input wire[`NW_BITS-1:0] branch_warp_num,
|
||||
|
||||
output wire[`NT_M1:0] thread_mask,
|
||||
output wire[`NW_M1:0] warp_num,
|
||||
output wire[`NUM_THREADS-1:0] thread_mask,
|
||||
output wire[`NW_BITS-1:0] warp_num,
|
||||
output wire[31:0] warp_pc,
|
||||
output wire out_ebreak,
|
||||
output wire scheduled_warp,
|
||||
|
||||
input wire[`NW_M1:0] icache_stage_wid,
|
||||
input wire[`NT-1:0] icache_stage_valids
|
||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||
input wire[`NUM_THREADS-1:0] icache_stage_valids
|
||||
|
||||
);
|
||||
|
||||
|
@ -66,41 +66,41 @@ module VX_warp_scheduler (
|
|||
|
||||
wire update_visible_active;
|
||||
|
||||
wire[(1+32+`NT_M1):0] d[`NW-1:0];
|
||||
wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
|
||||
|
||||
wire join_fall;
|
||||
wire[31:0] join_pc;
|
||||
wire[`NT_M1:0] join_tm;
|
||||
wire[`NUM_THREADS-1:0] join_tm;
|
||||
|
||||
wire in_wspawn = wspawn;
|
||||
wire in_ctm = ctm;
|
||||
wire in_whalt = whalt;
|
||||
wire in_wstall = wstall;
|
||||
|
||||
reg[`NW-1:0] warp_active;
|
||||
reg[`NW-1:0] warp_stalled;
|
||||
reg[`NUM_WARPS-1:0] warp_active;
|
||||
reg[`NUM_WARPS-1:0] warp_stalled;
|
||||
|
||||
reg [`NW-1:0] visible_active;
|
||||
wire[`NW-1:0] use_active;
|
||||
reg [`NUM_WARPS-1:0] visible_active;
|
||||
wire[`NUM_WARPS-1:0] use_active;
|
||||
|
||||
reg [`NW-1:0] warp_lock;
|
||||
reg [`NUM_WARPS-1:0] warp_lock;
|
||||
|
||||
wire wstall_this_cycle;
|
||||
|
||||
reg[`NT_M1:0] thread_masks[`NW-1:0];
|
||||
reg[31:0] warp_pcs[`NW-1:0];
|
||||
reg[`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
|
||||
reg[31:0] warp_pcs[`NUM_WARPS-1:0];
|
||||
|
||||
// barriers
|
||||
reg[`NW-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
|
||||
reg[`NUM_WARPS-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
|
||||
wire reached_barrier_limit;
|
||||
wire[`NW-1:0] curr_barrier_mask;
|
||||
wire[$clog2(`NW):0] curr_barrier_count;
|
||||
wire[`NUM_WARPS-1:0] curr_barrier_mask;
|
||||
wire[$clog2(`NUM_WARPS):0] curr_barrier_count;
|
||||
|
||||
// wsapwn
|
||||
reg[31:0] use_wsapwn_pc;
|
||||
reg[`NW-1:0] use_wsapwn;
|
||||
reg[`NUM_WARPS-1:0] use_wsapwn;
|
||||
|
||||
wire[`NW_M1:0] warp_to_schedule;
|
||||
wire[`NW_BITS-1:0] warp_to_schedule;
|
||||
wire schedule;
|
||||
|
||||
wire hazard;
|
||||
|
@ -110,12 +110,12 @@ module VX_warp_scheduler (
|
|||
|
||||
wire[31:0] new_pc;
|
||||
|
||||
reg[`NW-1:0] total_barrier_stall;
|
||||
reg[`NUM_WARPS-1:0] total_barrier_stall;
|
||||
|
||||
reg didnt_split;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
// wire[$clog2(`NW):0] num_active;
|
||||
// wire[$clog2(`NUM_WARPS):0] num_active;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
integer curr_w_help;
|
||||
|
@ -135,7 +135,7 @@ module VX_warp_scheduler (
|
|||
didnt_split <= 0;
|
||||
warp_lock <= 0;
|
||||
// total_barrier_stall = 0;
|
||||
for (curr_w_help = 1; curr_w_help < `NW; curr_w_help=curr_w_help+1) begin
|
||||
for (curr_w_help = 1; curr_w_help < `NUM_WARPS; curr_w_help=curr_w_help+1) begin
|
||||
warp_pcs[curr_w_help] <= 0;
|
||||
warp_active[curr_w_help] <= 0; // Activating first warp
|
||||
visible_active[curr_w_help] <= 0; // Activating first warp
|
||||
|
@ -147,7 +147,7 @@ module VX_warp_scheduler (
|
|||
if (wspawn) begin
|
||||
warp_active <= wspawn_new_active;
|
||||
use_wsapwn_pc <= wsapwn_pc;
|
||||
use_wsapwn <= wspawn_new_active & (~`NW'b1);
|
||||
use_wsapwn <= wspawn_new_active & (~`NUM_WARPS'b1);
|
||||
end
|
||||
|
||||
if (is_barrier) begin
|
||||
|
@ -219,30 +219,30 @@ module VX_warp_scheduler (
|
|||
// Lock/Release
|
||||
if (scheduled_warp && !stall) begin
|
||||
warp_lock[warp_num] <= 1'b1;
|
||||
// warp_lock <= {`NW{1'b1}};
|
||||
// warp_lock <= {`NUM_WARPS{1'b1}};
|
||||
end
|
||||
if (|icache_stage_valids && !stall) begin
|
||||
warp_lock[icache_stage_wid] <= 1'b0;
|
||||
// warp_lock <= {`NW{1'b0}};
|
||||
// warp_lock <= {`NUM_WARPS{1'b0}};
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
VX_countones #(.N(`NW)) barrier_count(
|
||||
VX_countones #(.N(`NUM_WARPS)) barrier_count(
|
||||
.valids(curr_barrier_mask),
|
||||
.count (curr_barrier_count)
|
||||
);
|
||||
|
||||
wire[$clog2(`NW):0] count_visible_active;
|
||||
VX_countones #(.N(`NW)) num_visible(
|
||||
wire[$clog2(`NUM_WARPS):0] count_visible_active;
|
||||
VX_countones #(.N(`NUM_WARPS)) num_visible(
|
||||
.valids(visible_active),
|
||||
.count (count_visible_active)
|
||||
);
|
||||
|
||||
// assign curr_barrier_count = $countones(curr_barrier_mask);
|
||||
|
||||
assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NW-1:0];
|
||||
assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NUM_WARPS-1:0];
|
||||
assign reached_barrier_limit = curr_barrier_count == (num_warps);
|
||||
|
||||
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
|
||||
|
@ -253,15 +253,15 @@ module VX_warp_scheduler (
|
|||
// total_barrier_stall = 0;
|
||||
// for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
|
||||
// begin
|
||||
// total_barrier_stall[`NW-1:0] = total_barrier_stall[`NW-1:0] | barrier_stall_mask[curr_b];
|
||||
// total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b];
|
||||
// end
|
||||
// end
|
||||
|
||||
|
||||
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
|
||||
|
||||
wire[(1+32+`NT_M1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
|
||||
wire[(1+32+`NT_M1):0] q2 = {1'b0, split_save_pc , split_later_mask};
|
||||
wire[(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
|
||||
wire[(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc , split_later_mask};
|
||||
|
||||
|
||||
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
|
||||
|
@ -270,13 +270,13 @@ module VX_warp_scheduler (
|
|||
|
||||
genvar curr_warp;
|
||||
generate
|
||||
for (curr_warp = 0; curr_warp < `NW; curr_warp = curr_warp + 1) begin : stacks
|
||||
for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks
|
||||
wire correct_warp_s = (curr_warp == split_warp_num);
|
||||
wire correct_warp_j = (curr_warp == join_warp_num);
|
||||
|
||||
wire push = (is_split && !dont_split) && correct_warp_s;
|
||||
wire pop = is_join && correct_warp_j;
|
||||
VX_generic_stack #(.WIDTH(1+32+`NT), .DEPTH($clog2(`NT)+1)) ipdom_stack(
|
||||
VX_generic_stack #(.WIDTH(1+32+`NUM_THREADS), .DEPTH($clog2(`NUM_THREADS)+1)) ipdom_stack(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.push (push),
|
||||
|
@ -304,7 +304,7 @@ module VX_warp_scheduler (
|
|||
wire real_use_wspawn = use_wsapwn[warp_to_schedule];
|
||||
|
||||
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
|
||||
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NT'b1 : thread_masks[warp_to_schedule]);
|
||||
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]);
|
||||
assign warp_num = warp_to_schedule;
|
||||
|
||||
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_writeback (
|
||||
input wire clk,
|
||||
|
@ -64,9 +64,9 @@ module VX_writeback (
|
|||
|
||||
wire zero = 0;
|
||||
|
||||
wire[`NT-1:0][31:0] use_wb_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] use_wb_data;
|
||||
|
||||
VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register(
|
||||
VX_generic_register #(.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)) wb_register(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(zero),
|
||||
|
|
115
hw/rtl/Vortex.v
115
hw/rtl/Vortex.v
|
@ -1,5 +1,5 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_cache_config.v"
|
||||
`include "VX_define.vh"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module Vortex
|
||||
#(
|
||||
|
@ -13,24 +13,24 @@ module Vortex
|
|||
|
||||
// IO
|
||||
output wire io_valid,
|
||||
output wire[31:0] io_data,
|
||||
output wire [31:0] io_data,
|
||||
|
||||
// DRAM Dcache Req
|
||||
output wire dram_req,
|
||||
output wire dram_req_write,
|
||||
output wire dram_req_read,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [31:0] dram_req_size,
|
||||
output wire [31:0] dram_req_data[`DBANK_LINE_SIZE_RNG],
|
||||
output wire [31:0] dram_expected_lat,
|
||||
output wire dram_req,
|
||||
output wire dram_req_write,
|
||||
output wire dram_req_read,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [31:0] dram_req_size,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
output wire [31:0] dram_expected_lat,
|
||||
|
||||
input wire dram_req_delay,
|
||||
input wire dram_req_delay,
|
||||
|
||||
// DRAM Dcache Res
|
||||
output wire dram_fill_accept,
|
||||
input wire dram_fill_rsp,
|
||||
input wire [31:0] dram_fill_rsp_addr,
|
||||
input wire [31:0] dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG],
|
||||
output wire dram_fill_accept,
|
||||
input wire dram_fill_rsp,
|
||||
input wire [31:0] dram_fill_rsp_addr,
|
||||
input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data,
|
||||
|
||||
// DRAM Icache Req
|
||||
output wire I_dram_req,
|
||||
|
@ -38,25 +38,25 @@ module Vortex
|
|||
output wire I_dram_req_read,
|
||||
output wire [31:0] I_dram_req_addr,
|
||||
output wire [31:0] I_dram_req_size,
|
||||
output wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_req_data,
|
||||
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
||||
output wire [31:0] I_dram_expected_lat,
|
||||
|
||||
// DRAM Icache Res
|
||||
output wire I_dram_fill_accept,
|
||||
input wire I_dram_fill_rsp,
|
||||
input wire [31:0] I_dram_fill_rsp_addr,
|
||||
input wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_fill_rsp_data,
|
||||
input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data,
|
||||
|
||||
// LLC Snooping
|
||||
input wire snp_req,
|
||||
input wire [31:0] snp_req_addr,
|
||||
output wire snp_req_delay,
|
||||
input wire snp_req,
|
||||
input wire [31:0] snp_req_addr,
|
||||
output wire snp_req_delay,
|
||||
|
||||
input wire I_snp_req,
|
||||
input wire [31:0] I_snp_req_addr,
|
||||
output wire I_snp_req_delay,
|
||||
|
||||
output wire out_ebreak
|
||||
output wire out_ebreak
|
||||
|
||||
`else
|
||||
|
||||
|
@ -72,14 +72,14 @@ module Vortex
|
|||
output wire dram_req_read,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [31:0] dram_req_size,
|
||||
output wire [`DBANK_LINE_SIZE_RNG][31:0] dram_req_data,
|
||||
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
|
||||
output wire [31:0] dram_expected_lat,
|
||||
|
||||
// DRAM Dcache Res
|
||||
output wire dram_fill_accept,
|
||||
input wire dram_fill_rsp,
|
||||
input wire [31:0] dram_fill_rsp_addr,
|
||||
input wire [`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data,
|
||||
input wire [`DBANK_LINE_SIZE-1:0] dram_fill_rsp_data,
|
||||
|
||||
|
||||
// DRAM Icache Req
|
||||
|
@ -88,16 +88,16 @@ module Vortex
|
|||
output wire I_dram_req_read,
|
||||
output wire [31:0] I_dram_req_addr,
|
||||
output wire [31:0] I_dram_req_size,
|
||||
output wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_req_data,
|
||||
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
|
||||
output wire [31:0] I_dram_expected_lat,
|
||||
|
||||
// DRAM Icache Res
|
||||
output wire I_dram_fill_accept,
|
||||
input wire I_dram_fill_rsp,
|
||||
input wire [31:0] I_dram_fill_rsp_addr,
|
||||
input wire [`IBANK_LINE_SIZE_RNG][31:0] I_dram_fill_rsp_data,
|
||||
input wire [`IBANK_LINE_SIZE-1:0] I_dram_fill_rsp_data,
|
||||
|
||||
input wire dram_req_delay,
|
||||
input wire dram_req_delay,
|
||||
|
||||
input wire snp_req,
|
||||
input wire [31:0] snp_req_addr,
|
||||
|
@ -110,27 +110,24 @@ module Vortex
|
|||
output wire out_ebreak
|
||||
`endif
|
||||
);
|
||||
|
||||
wire scheduler_empty;
|
||||
wire out_ebreak_unqual;
|
||||
|
||||
// assign out_ebreak = out_ebreak_unqual && (scheduler_empty && 1);
|
||||
assign out_ebreak = out_ebreak_unqual;
|
||||
|
||||
|
||||
wire memory_delay;
|
||||
wire exec_delay;
|
||||
wire gpr_stage_delay;
|
||||
wire schedule_delay;
|
||||
|
||||
|
||||
// Dcache Interface
|
||||
VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_rsp();
|
||||
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req();
|
||||
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`DNUMBER_REQUESTS)) VX_dcache_req_qual();
|
||||
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`DBANK_LINE_SIZE_WORDS)) VX_gpu_dcache_dram_res();
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) VX_gpu_dcache_dram_res();
|
||||
|
||||
|
||||
assign VX_gpu_dcache_dram_res.dram_fill_rsp = dram_fill_rsp;
|
||||
|
@ -146,36 +143,40 @@ module Vortex
|
|||
|
||||
assign VX_gpu_dcache_dram_req.dram_req_delay = dram_req_delay;
|
||||
|
||||
genvar wordy;
|
||||
genvar i;
|
||||
generate
|
||||
for (wordy = 0; wordy < `DBANK_LINE_SIZE_WORDS; wordy=wordy+1) begin
|
||||
assign VX_gpu_dcache_dram_res.dram_fill_rsp_data[wordy] = dram_fill_rsp_data[wordy];
|
||||
assign dram_req_data[wordy] = VX_gpu_dcache_dram_req.dram_req_data[wordy];
|
||||
for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
|
||||
assign VX_gpu_dcache_dram_res.dram_fill_rsp_data[i] = dram_fill_rsp_data[i * 32 +: 32];
|
||||
assign dram_req_data[i * 32 +: 32] = VX_gpu_dcache_dram_req.dram_req_data[i];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire temp_io_valid = (!memory_delay) && (|VX_dcache_req.core_req_valid) && (VX_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE) && (VX_dcache_req.core_req_addr[0] == 32'h00010000);
|
||||
wire temp_io_valid = (!memory_delay)
|
||||
&& (|VX_dcache_req.core_req_valid)
|
||||
&& (VX_dcache_req.core_req_mem_write[0] != `NO_MEM_WRITE)
|
||||
&& (VX_dcache_req.core_req_addr[0] == 32'h00010000);
|
||||
|
||||
wire[31:0] temp_io_data = VX_dcache_req.core_req_writedata[0];
|
||||
assign io_valid = temp_io_valid;
|
||||
assign io_data = temp_io_data;
|
||||
|
||||
assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NT{~io_valid}};
|
||||
assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr;
|
||||
assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata;
|
||||
assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read;
|
||||
assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write;
|
||||
assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd;
|
||||
assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb;
|
||||
assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num;
|
||||
assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc;
|
||||
assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot;
|
||||
assign VX_dcache_req_qual.core_req_valid = VX_dcache_req.core_req_valid & {`NUM_THREADS{~io_valid}};
|
||||
assign VX_dcache_req_qual.core_req_addr = VX_dcache_req.core_req_addr;
|
||||
assign VX_dcache_req_qual.core_req_writedata = VX_dcache_req.core_req_writedata;
|
||||
assign VX_dcache_req_qual.core_req_mem_read = VX_dcache_req.core_req_mem_read;
|
||||
assign VX_dcache_req_qual.core_req_mem_write = VX_dcache_req.core_req_mem_write;
|
||||
assign VX_dcache_req_qual.core_req_rd = VX_dcache_req.core_req_rd;
|
||||
assign VX_dcache_req_qual.core_req_wb = VX_dcache_req.core_req_wb;
|
||||
assign VX_dcache_req_qual.core_req_warp_num = VX_dcache_req.core_req_warp_num;
|
||||
assign VX_dcache_req_qual.core_req_pc = VX_dcache_req.core_req_pc;
|
||||
assign VX_dcache_req_qual.core_no_wb_slot = VX_dcache_req.core_no_wb_slot;
|
||||
|
||||
|
||||
VX_gpu_dcache_res_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_rsp();
|
||||
VX_gpu_dcache_req_inter #(.NUMBER_REQUESTS(`INUMBER_REQUESTS)) VX_icache_req();
|
||||
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_SIZE_WORDS(`IBANK_LINE_SIZE_WORDS)) VX_gpu_icache_dram_res();
|
||||
VX_gpu_dcache_dram_req_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_req();
|
||||
VX_gpu_dcache_dram_res_inter #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) VX_gpu_icache_dram_res();
|
||||
|
||||
|
||||
assign VX_gpu_icache_dram_res.dram_fill_rsp = I_dram_fill_rsp;
|
||||
|
@ -191,11 +192,11 @@ module Vortex
|
|||
|
||||
assign VX_gpu_icache_dram_req.dram_req_delay = dram_req_delay;
|
||||
|
||||
genvar iwordy;
|
||||
genvar j;
|
||||
generate
|
||||
for (iwordy = 0; iwordy < `IBANK_LINE_SIZE_WORDS; iwordy=iwordy+1) begin
|
||||
assign VX_gpu_icache_dram_res.dram_fill_rsp_data[iwordy] = I_dram_fill_rsp_data[iwordy];
|
||||
assign I_dram_req_data[iwordy] = VX_gpu_icache_dram_req.dram_req_data[iwordy];
|
||||
for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin
|
||||
assign VX_gpu_icache_dram_res.dram_fill_rsp_data[j] = I_dram_fill_rsp_data[j * 32 +: 32];
|
||||
assign I_dram_req_data[j * 32 +: 32] = VX_gpu_icache_dram_req.dram_req_data[j];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
@ -239,7 +240,7 @@ VX_front_end vx_front_end(
|
|||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.fetch_ebreak (out_ebreak_unqual)
|
||||
);
|
||||
);
|
||||
|
||||
VX_scheduler schedule(
|
||||
.clk (clk),
|
||||
|
@ -251,7 +252,7 @@ VX_scheduler schedule(
|
|||
.VX_writeback_inter(VX_writeback_inter),
|
||||
.schedule_delay (schedule_delay),
|
||||
.is_empty (scheduler_empty)
|
||||
);
|
||||
);
|
||||
|
||||
VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
|
||||
.clk (clk),
|
||||
|
@ -267,7 +268,7 @@ VX_back_end #(.CORE_ID(CORE_ID)) vx_back_end(
|
|||
.out_mem_delay (memory_delay),
|
||||
.out_exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
);
|
||||
|
||||
|
||||
VX_dmem_controller VX_dmem_controller(
|
||||
|
@ -291,7 +292,7 @@ VX_dmem_controller VX_dmem_controller(
|
|||
// Core <-> Dcache
|
||||
.VX_dcache_req (VX_dcache_req_qual),
|
||||
.VX_dcache_rsp (VX_dcache_rsp)
|
||||
);
|
||||
);
|
||||
|
||||
// VX_csr_handler vx_csr_handler(
|
||||
// .clk (clk),
|
||||
|
@ -300,7 +301,7 @@ VX_dmem_controller VX_dmem_controller(
|
|||
// .in_wb_valid (VX_writeback_inter.wb_valid[0]),
|
||||
|
||||
// .out_decode_csr_data (csr_decode_csr_data)
|
||||
// );
|
||||
// );
|
||||
|
||||
endmodule // Vortex
|
||||
|
||||
|
|
|
@ -1,20 +1,18 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_cache_config.v"
|
||||
|
||||
`include "VX_define.vh"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module Vortex_Cluster
|
||||
#(
|
||||
parameter CLUSTER_ID = 0
|
||||
)
|
||||
(
|
||||
#(
|
||||
parameter CLUSTER_ID = 0
|
||||
) (
|
||||
|
||||
// Clock
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// IO
|
||||
output wire[`NUMBER_CORES_PER_CLUSTER-1:0] io_valid,
|
||||
output wire[`NUMBER_CORES_PER_CLUSTER-1:0][31:0] io_data,
|
||||
output wire[`NUM_CORES_PER_CLUSTER-1:0] io_valid,
|
||||
output wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] io_data,
|
||||
|
||||
// DRAM Req
|
||||
output wire out_dram_req,
|
||||
|
@ -22,7 +20,7 @@ module Vortex_Cluster
|
|||
output wire out_dram_req_read,
|
||||
output wire [31:0] out_dram_req_addr,
|
||||
output wire [31:0] out_dram_req_size,
|
||||
output wire [31:0] out_dram_req_data[`DBANK_LINE_SIZE_RNG],
|
||||
output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0],
|
||||
output wire [31:0] out_dram_expected_lat,
|
||||
input wire out_dram_req_delay,
|
||||
|
||||
|
@ -30,8 +28,7 @@ module Vortex_Cluster
|
|||
output wire out_dram_fill_accept,
|
||||
input wire out_dram_fill_rsp,
|
||||
input wire [31:0] out_dram_fill_rsp_addr,
|
||||
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG],
|
||||
|
||||
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0],
|
||||
|
||||
// LLC Snooping
|
||||
input wire llc_snp_req,
|
||||
|
@ -40,142 +37,133 @@ module Vortex_Cluster
|
|||
|
||||
output wire out_ebreak
|
||||
);
|
||||
|
||||
// DRAM Dcache Req
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req_write;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_req_read;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_addr;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_size;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_core_dram_req_data;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_expected_lat;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_write;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_req_read;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_req_size;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_req_data;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_expected_lat;
|
||||
|
||||
// DRAM Dcache Res
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_fill_accept;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_dram_fill_rsp;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_fill_rsp_addr;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_core_dram_fill_rsp_data;
|
||||
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_accept;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_dram_fill_rsp;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_dram_fill_rsp_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_core_dram_fill_rsp_data;
|
||||
|
||||
// DRAM Icache Req
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_read;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_addr;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_size;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`IBANK_LINE_SIZE_RNG][31:0] per_core_I_dram_req_data;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_expected_lat;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_write;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_req_read;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_req_size;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_req_data;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_expected_lat;
|
||||
|
||||
// DRAM Icache Res
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_accept;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_rsp;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_fill_rsp_addr;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0][`IBANK_LINE_SIZE_RNG][31:0] per_core_I_dram_fill_rsp_data;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_accept;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_I_dram_fill_rsp;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] [31:0] per_core_I_dram_fill_rsp_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][`IBANK_LINE_WORDS-1:0][31:0] per_core_I_dram_fill_rsp_data;
|
||||
|
||||
// Out ebreak
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_out_ebreak;
|
||||
|
||||
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] per_core_io_valid;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0][31:0] per_core_io_data;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_out_ebreak;
|
||||
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] per_core_io_valid;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0][31:0] per_core_io_data;
|
||||
|
||||
wire l2c_core_accept;
|
||||
|
||||
|
||||
wire snp_fwd;
|
||||
wire[31:0] snp_fwd_addr;
|
||||
wire[`NUMBER_CORES_PER_CLUSTER-1:0] snp_fwd_delay;
|
||||
wire snp_fwd;
|
||||
wire[31:0] snp_fwd_addr;
|
||||
wire[`NUM_CORES_PER_CLUSTER-1:0] snp_fwd_delay;
|
||||
|
||||
assign out_ebreak = (&per_core_out_ebreak);
|
||||
|
||||
genvar curr_core;
|
||||
generate
|
||||
|
||||
for (curr_core = 0; curr_core < `NUMBER_CORES_PER_CLUSTER; curr_core=curr_core+1) begin
|
||||
for (curr_core = 0; curr_core < `NUM_CORES_PER_CLUSTER; curr_core=curr_core+1) begin
|
||||
|
||||
wire [`IBANK_LINE_SIZE_RNG][31:0] curr_core_I_dram_req_data;
|
||||
wire [`DBANK_LINE_SIZE_RNG][31:0] curr_core_dram_req_data ;
|
||||
wire [`IBANK_LINE_WORDS-1:0][31:0] curr_core_I_dram_req_data;
|
||||
wire [`DBANK_LINE_WORDS-1:0][31:0] curr_core_dram_req_data ;
|
||||
|
||||
assign io_valid[curr_core] = per_core_io_valid[curr_core];
|
||||
assign io_data [curr_core] = per_core_io_data [curr_core];
|
||||
|
||||
Vortex #(.CORE_ID(curr_core + (CLUSTER_ID * `NUMBER_CORES_PER_CLUSTER))) vortex_core(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.io_valid (per_core_io_valid [curr_core]),
|
||||
.io_data (per_core_io_data [curr_core]),
|
||||
.dram_req (per_core_dram_req [curr_core]),
|
||||
.dram_req_write (per_core_dram_req_write [curr_core]),
|
||||
.dram_req_read (per_core_dram_req_read [curr_core]),
|
||||
.dram_req_addr (per_core_dram_req_addr [curr_core]),
|
||||
.dram_req_size (per_core_dram_req_size [curr_core]),
|
||||
.dram_req_data (curr_core_dram_req_data ),
|
||||
.dram_expected_lat (per_core_dram_expected_lat [curr_core]),
|
||||
.dram_fill_accept (per_core_dram_fill_accept [curr_core]),
|
||||
.dram_fill_rsp (per_core_dram_fill_rsp [curr_core]),
|
||||
.dram_fill_rsp_addr (per_core_dram_fill_rsp_addr [curr_core]),
|
||||
.dram_fill_rsp_data (per_core_dram_fill_rsp_data [curr_core]),
|
||||
.I_dram_req (per_core_I_dram_req [curr_core]),
|
||||
.I_dram_req_write (per_core_I_dram_req_write [curr_core]),
|
||||
.I_dram_req_read (per_core_I_dram_req_read [curr_core]),
|
||||
.I_dram_req_addr (per_core_I_dram_req_addr [curr_core]),
|
||||
.I_dram_req_size (per_core_I_dram_req_size [curr_core]),
|
||||
.I_dram_req_data (curr_core_I_dram_req_data ),
|
||||
.I_dram_expected_lat (per_core_I_dram_expected_lat [curr_core]),
|
||||
.I_dram_fill_accept (per_core_I_dram_fill_accept [curr_core]),
|
||||
.I_dram_fill_rsp (per_core_I_dram_fill_rsp [curr_core]),
|
||||
.I_dram_fill_rsp_addr (per_core_I_dram_fill_rsp_addr[curr_core]),
|
||||
.I_dram_fill_rsp_data (per_core_I_dram_fill_rsp_data[curr_core]),
|
||||
.dram_req_delay (l2c_core_accept ),
|
||||
.out_ebreak (per_core_out_ebreak [curr_core]),
|
||||
.snp_req (snp_fwd),
|
||||
.snp_req_addr (snp_fwd_addr),
|
||||
.snp_req_delay (snp_fwd_delay[curr_core]),
|
||||
.I_snp_req (0),
|
||||
.I_snp_req_addr (),
|
||||
.I_snp_req_delay ()
|
||||
);
|
||||
Vortex #(
|
||||
.CORE_ID(curr_core + (CLUSTER_ID * `NUM_CORES_PER_CLUSTER))
|
||||
) vortex_core(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.io_valid (per_core_io_valid [curr_core]),
|
||||
.io_data (per_core_io_data [curr_core]),
|
||||
.dram_req (per_core_dram_req [curr_core]),
|
||||
.dram_req_write (per_core_dram_req_write [curr_core]),
|
||||
.dram_req_read (per_core_dram_req_read [curr_core]),
|
||||
.dram_req_addr (per_core_dram_req_addr [curr_core]),
|
||||
.dram_req_size (per_core_dram_req_size [curr_core]),
|
||||
.dram_req_data (curr_core_dram_req_data ),
|
||||
.dram_expected_lat (per_core_dram_expected_lat [curr_core]),
|
||||
.dram_fill_accept (per_core_dram_fill_accept [curr_core]),
|
||||
.dram_fill_rsp (per_core_dram_fill_rsp [curr_core]),
|
||||
.dram_fill_rsp_addr (per_core_dram_fill_rsp_addr [curr_core]),
|
||||
.dram_fill_rsp_data (per_core_dram_fill_rsp_data [curr_core]),
|
||||
.I_dram_req (per_core_I_dram_req [curr_core]),
|
||||
.I_dram_req_write (per_core_I_dram_req_write [curr_core]),
|
||||
.I_dram_req_read (per_core_I_dram_req_read [curr_core]),
|
||||
.I_dram_req_addr (per_core_I_dram_req_addr [curr_core]),
|
||||
.I_dram_req_size (per_core_I_dram_req_size [curr_core]),
|
||||
.I_dram_req_data (curr_core_I_dram_req_data ),
|
||||
.I_dram_expected_lat (per_core_I_dram_expected_lat [curr_core]),
|
||||
.I_dram_fill_accept (per_core_I_dram_fill_accept [curr_core]),
|
||||
.I_dram_fill_rsp (per_core_I_dram_fill_rsp [curr_core]),
|
||||
.I_dram_fill_rsp_addr (per_core_I_dram_fill_rsp_addr[curr_core]),
|
||||
.I_dram_fill_rsp_data (per_core_I_dram_fill_rsp_data[curr_core]),
|
||||
.dram_req_delay (l2c_core_accept ),
|
||||
.out_ebreak (per_core_out_ebreak [curr_core]),
|
||||
.snp_req (snp_fwd),
|
||||
.snp_req_addr (snp_fwd_addr),
|
||||
.snp_req_delay (snp_fwd_delay[curr_core]),
|
||||
.I_snp_req (0),
|
||||
.I_snp_req_addr (),
|
||||
.I_snp_req_delay ()
|
||||
);
|
||||
|
||||
assign per_core_dram_req_data [curr_core] = curr_core_dram_req_data;
|
||||
assign per_core_I_dram_req_data[curr_core] = curr_core_I_dram_req_data;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
//////////////////// L2 Cache ////////////////////
|
||||
wire[`LLNUMBER_REQUESTS-1:0] l2c_core_req;
|
||||
wire[`LLNUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_write;
|
||||
wire[`LLNUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_read;
|
||||
wire[`LLNUMBER_REQUESTS-1:0][31:0] l2c_core_req_addr;
|
||||
wire[`LLNUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l2c_core_req_data;
|
||||
wire[`LLNUMBER_REQUESTS-1:0][1:0] l2c_core_req_wb;
|
||||
wire[`L2NUMBER_REQUESTS-1:0] l2c_core_req;
|
||||
wire[`L2NUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_write;
|
||||
wire[`L2NUMBER_REQUESTS-1:0][2:0] l2c_core_req_mem_read;
|
||||
wire[`L2NUMBER_REQUESTS-1:0][31:0] l2c_core_req_addr;
|
||||
wire[`L2NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_core_req_data;
|
||||
wire[`L2NUMBER_REQUESTS-1:0][1:0] l2c_core_req_wb;
|
||||
|
||||
wire[`LLNUMBER_REQUESTS-1:0] l2c_core_no_wb_slot;
|
||||
wire[`L2NUMBER_REQUESTS-1:0] l2c_core_no_wb_slot;
|
||||
|
||||
wire[`L2NUMBER_REQUESTS-1:0] l2c_wb;
|
||||
wire[`L2NUMBER_REQUESTS-1:0] [31:0] l2c_wb_addr;
|
||||
wire[`L2NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l2c_wb_data;
|
||||
|
||||
|
||||
wire[`LLNUMBER_REQUESTS-1:0] l2c_wb;
|
||||
wire[`LLNUMBER_REQUESTS-1:0] [31:0] l2c_wb_addr;
|
||||
wire[`LLNUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l2c_wb_data;
|
||||
|
||||
|
||||
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_req_data_port;
|
||||
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port;
|
||||
|
||||
genvar llb_index;
|
||||
generate
|
||||
for (llb_index = 0; llb_index < `DBANK_LINE_SIZE_WORDS; llb_index=llb_index+1) begin
|
||||
for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin
|
||||
assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index];
|
||||
assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index];
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
//
|
||||
genvar l2c_curr_core;
|
||||
generate
|
||||
for (l2c_curr_core = 0; l2c_curr_core < `LLNUMBER_REQUESTS; l2c_curr_core=l2c_curr_core+2) begin
|
||||
for (l2c_curr_core = 0; l2c_curr_core < `L2NUMBER_REQUESTS; l2c_curr_core=l2c_curr_core+2) begin
|
||||
// Core Request
|
||||
assign l2c_core_req [l2c_curr_core] = per_core_dram_req [(l2c_curr_core/2)];
|
||||
assign l2c_core_req [l2c_curr_core+1] = per_core_I_dram_req[(l2c_curr_core/2)];
|
||||
|
@ -214,29 +202,27 @@ module Vortex_Cluster
|
|||
wire dram_snp_full;
|
||||
wire dram_req_because_of_wb;
|
||||
VX_cache #(
|
||||
.CACHE_SIZE_BYTES (`LLCACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (`LLBANK_LINE_SIZE_BYTES),
|
||||
.NUMBER_BANKS (`LLNUMBER_BANKS),
|
||||
.WORD_SIZE_BYTES (`LLWORD_SIZE_BYTES),
|
||||
.NUMBER_REQUESTS (`LLNUMBER_REQUESTS),
|
||||
.STAGE_1_CYCLES (`LLSTAGE_1_CYCLES),
|
||||
.FUNC_ID (`LLFUNC_ID),
|
||||
.REQQ_SIZE (`LLREQQ_SIZE),
|
||||
.MRVQ_SIZE (`LLMRVQ_SIZE),
|
||||
.DFPQ_SIZE (`LLDFPQ_SIZE),
|
||||
.SNRQ_SIZE (`LLSNRQ_SIZE),
|
||||
.CWBQ_SIZE (`LLCWBQ_SIZE),
|
||||
.DWBQ_SIZE (`LLDWBQ_SIZE),
|
||||
.DFQQ_SIZE (`LLDFQQ_SIZE),
|
||||
.LLVQ_SIZE (`LLLLVQ_SIZE),
|
||||
.FFSQ_SIZE (`LLFFSQ_SIZE),
|
||||
.PRFQ_SIZE (`LLPRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`LLPRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`LLFILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`LLSIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
gpu_l2cache
|
||||
(
|
||||
.CACHE_SIZE_BYTES (`L2CACHE_SIZE_BYTES),
|
||||
.BANK_LINE_SIZE_BYTES (`L2BANK_LINE_SIZE_BYTES),
|
||||
.NUMBER_BANKS (`L2NUMBER_BANKS),
|
||||
.WORD_SIZE_BYTES (`L2WORD_SIZE_BYTES),
|
||||
.NUMBER_REQUESTS (`L2NUMBER_REQUESTS),
|
||||
.STAGE_1_CYCLES (`L2STAGE_1_CYCLES),
|
||||
.FUNC_ID (`L2FUNC_ID),
|
||||
.REQQ_SIZE (`L2REQQ_SIZE),
|
||||
.MRVQ_SIZE (`L2MRVQ_SIZE),
|
||||
.DFPQ_SIZE (`L2DFPQ_SIZE),
|
||||
.SNRQ_SIZE (`L2SNRQ_SIZE),
|
||||
.CWBQ_SIZE (`L2CWBQ_SIZE),
|
||||
.DWBQ_SIZE (`L2DWBQ_SIZE),
|
||||
.DFQQ_SIZE (`L2DFQQ_SIZE),
|
||||
.LLVQ_SIZE (`L2LLVQ_SIZE),
|
||||
.FFSQ_SIZE (`L2FFSQ_SIZE),
|
||||
.PRFQ_SIZE (`L2PRFQ_SIZE),
|
||||
.PRFQ_STRIDE (`L2PRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`L2SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
) gpu_l2cache (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
@ -295,8 +281,6 @@ module Vortex_Cluster
|
|||
.snp_fwd (snp_fwd),
|
||||
.snp_fwd_addr (snp_fwd_addr),
|
||||
.snp_fwd_delay (|snp_fwd_delay)
|
||||
);
|
||||
|
||||
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -1,5 +1,5 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_cache_config.v"
|
||||
`include "VX_define.vh"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module Vortex_Socket (
|
||||
|
||||
|
@ -8,8 +8,8 @@ module Vortex_Socket (
|
|||
input wire reset,
|
||||
|
||||
// IO
|
||||
output wire io_valid[`NUMBER_CORES-1:0],
|
||||
output wire[31:0] io_data [`NUMBER_CORES-1:0],
|
||||
output wire io_valid[`NUM_CORES-1:0],
|
||||
output wire[31:0] io_data [`NUM_CORES-1:0],
|
||||
|
||||
output wire[31:0] number_cores,
|
||||
|
||||
|
@ -19,7 +19,7 @@ module Vortex_Socket (
|
|||
output wire out_dram_req_read,
|
||||
output wire [31:0] out_dram_req_addr,
|
||||
output wire [31:0] out_dram_req_size,
|
||||
output wire [31:0] out_dram_req_data[`DBANK_LINE_SIZE_RNG],
|
||||
output wire [31:0] out_dram_req_data[`DBANK_LINE_WORDS-1:0],
|
||||
output wire [31:0] out_dram_expected_lat,
|
||||
input wire out_dram_req_delay,
|
||||
|
||||
|
@ -27,7 +27,7 @@ module Vortex_Socket (
|
|||
output wire out_dram_fill_accept,
|
||||
input wire out_dram_fill_rsp,
|
||||
input wire [31:0] out_dram_fill_rsp_addr,
|
||||
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_SIZE_RNG],
|
||||
input wire [31:0] out_dram_fill_rsp_data[`DBANK_LINE_WORDS-1:0],
|
||||
|
||||
// LLC Snooping
|
||||
input wire llc_snp_req,
|
||||
|
@ -36,18 +36,16 @@ module Vortex_Socket (
|
|||
|
||||
output wire out_ebreak
|
||||
);
|
||||
assign number_cores = `NUM_CORES;
|
||||
|
||||
assign number_cores = `NUMBER_CORES;
|
||||
if (`NUM_CLUSTERS == 1) begin
|
||||
|
||||
|
||||
if (`NUMBER_CLUSTERS == 1) begin
|
||||
|
||||
wire[`NUMBER_CORES-1:0] cluster_io_valid;
|
||||
wire[`NUMBER_CORES-1:0][31:0] cluster_io_data;
|
||||
wire[`NUM_CORES-1:0] cluster_io_valid;
|
||||
wire[`NUM_CORES-1:0][31:0] cluster_io_data;
|
||||
|
||||
|
||||
genvar curr_c;
|
||||
for (curr_c = 0; curr_c < `NUMBER_CORES; curr_c=curr_c+1) begin
|
||||
for (curr_c = 0; curr_c < `NUM_CORES; curr_c=curr_c+1) begin
|
||||
assign io_valid[curr_c] = cluster_io_valid[curr_c];
|
||||
assign io_data [curr_c] = cluster_io_data [curr_c];
|
||||
end
|
||||
|
@ -76,62 +74,57 @@ module Vortex_Socket (
|
|||
.llc_snp_req_addr (llc_snp_req_addr),
|
||||
.llc_snp_req_delay (llc_snp_req_delay),
|
||||
.out_ebreak (out_ebreak)
|
||||
);
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
wire snp_fwd;
|
||||
wire[31:0] snp_fwd_addr;
|
||||
wire[`NUMBER_CLUSTERS-1:0] snp_fwd_delay;
|
||||
wire[`NUM_CLUSTERS-1:0] snp_fwd_delay;
|
||||
|
||||
wire[`NUMBER_CLUSTERS-1:0] per_cluster_out_ebreak;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_out_ebreak;
|
||||
|
||||
assign out_ebreak = (&per_cluster_out_ebreak);
|
||||
|
||||
|
||||
// // DRAM Dcache Req
|
||||
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req;
|
||||
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req_write;
|
||||
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_req_read;
|
||||
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_req_addr;
|
||||
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_req_size;
|
||||
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_expected_lat;
|
||||
wire[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_cluster_dram_req_data;
|
||||
wire[31:0] per_cluster_dram_req_data_up[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG];
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_write;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_read;
|
||||
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_addr;
|
||||
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_req_size;
|
||||
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_expected_lat;
|
||||
wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_req_data;
|
||||
wire[31:0] per_cluster_dram_req_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
|
||||
|
||||
wire l3c_core_accept;
|
||||
|
||||
// // DRAM Dcache Res
|
||||
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_fill_accept;
|
||||
wire[`NUMBER_CLUSTERS-1:0] per_cluster_dram_fill_rsp;
|
||||
wire[`NUMBER_CLUSTERS-1:0] [31:0] per_cluster_dram_fill_rsp_addr;
|
||||
wire[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG][31:0] per_cluster_dram_fill_rsp_data;
|
||||
wire[31:0] per_cluster_dram_fill_rsp_data_up[`NUMBER_CLUSTERS-1:0][`DBANK_LINE_SIZE_RNG];
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_accept;
|
||||
wire[`NUM_CLUSTERS-1:0] per_cluster_dram_fill_rsp;
|
||||
wire[`NUM_CLUSTERS-1:0] [31:0] per_cluster_dram_fill_rsp_addr;
|
||||
wire[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0][31:0] per_cluster_dram_fill_rsp_data;
|
||||
wire[31:0] per_cluster_dram_fill_rsp_data_up[`NUM_CLUSTERS-1:0][`DBANK_LINE_WORDS-1:0];
|
||||
|
||||
wire[`NUMBER_CLUSTERS-1:0][`NUMBER_CORES_PER_CLUSTER-1:0] per_cluster_io_valid;
|
||||
wire[`NUMBER_CLUSTERS-1:0][`NUMBER_CORES_PER_CLUSTER-1:0][31:0] per_cluster_io_data;
|
||||
wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0] per_cluster_io_valid;
|
||||
wire[`NUM_CLUSTERS-1:0][`NUM_CORES_PER_CLUSTER-1:0][31:0] per_cluster_io_data;
|
||||
|
||||
genvar curr_c;
|
||||
genvar curr_cc;
|
||||
genvar curr_word;
|
||||
for (curr_c = 0; curr_c < `NUMBER_CLUSTERS; curr_c =curr_c+1) begin
|
||||
for (curr_cc = 0; curr_cc < `NUMBER_CORES_PER_CLUSTER; curr_cc=curr_cc+1) begin
|
||||
assign io_valid[curr_cc+(curr_c*`NUMBER_CORES_PER_CLUSTER)] = per_cluster_io_valid[curr_c][curr_cc];
|
||||
assign io_data [curr_cc+(curr_c*`NUMBER_CORES_PER_CLUSTER)] = per_cluster_io_data [curr_c][curr_cc];
|
||||
genvar curr_c, curr_cc, curr_word;
|
||||
for (curr_c = 0; curr_c < `NUM_CLUSTERS; curr_c =curr_c+1) begin
|
||||
for (curr_cc = 0; curr_cc < `NUM_CORES_PER_CLUSTER; curr_cc=curr_cc+1) begin
|
||||
assign io_valid[curr_cc+(curr_c*`NUM_CORES_PER_CLUSTER)] = per_cluster_io_valid[curr_c][curr_cc];
|
||||
assign io_data [curr_cc+(curr_c*`NUM_CORES_PER_CLUSTER)] = per_cluster_io_data [curr_c][curr_cc];
|
||||
end
|
||||
|
||||
|
||||
for (curr_word = 0; curr_word < `DBANK_LINE_SIZE_WORDS; curr_word = curr_word+1) begin
|
||||
for (curr_word = 0; curr_word < `DBANK_LINE_WORDS; curr_word = curr_word+1) begin
|
||||
assign per_cluster_dram_req_data [curr_c][curr_word] = per_cluster_dram_req_data_up [curr_c][curr_word];
|
||||
assign per_cluster_dram_fill_rsp_data_up[curr_c][curr_word] = per_cluster_dram_fill_rsp_data[curr_c][curr_word];
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
genvar curr_cluster;
|
||||
for (curr_cluster = 0; curr_cluster < `NUMBER_CLUSTERS; curr_cluster=curr_cluster+1) begin
|
||||
|
||||
for (curr_cluster = 0; curr_cluster < `NUM_CLUSTERS; curr_cluster=curr_cluster+1) begin
|
||||
|
||||
Vortex_Cluster #(.CLUSTER_ID(curr_cluster)) Vortex_Cluster(
|
||||
.clk (clk),
|
||||
|
@ -158,37 +151,33 @@ module Vortex_Socket (
|
|||
.llc_snp_req_delay (snp_fwd_delay[curr_cluster]),
|
||||
|
||||
.out_ebreak (per_cluster_out_ebreak [curr_cluster])
|
||||
);
|
||||
);
|
||||
end
|
||||
|
||||
|
||||
//////////////////// L3 Cache ////////////////////
|
||||
wire[`L3NUMBER_REQUESTS-1:0] l3c_core_req;
|
||||
wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_write;
|
||||
wire[`L3NUMBER_REQUESTS-1:0][2:0] l3c_core_req_mem_read;
|
||||
wire[`L3NUMBER_REQUESTS-1:0][31:0] l3c_core_req_addr;
|
||||
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l3c_core_req_data;
|
||||
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_core_req_data;
|
||||
wire[`L3NUMBER_REQUESTS-1:0][1:0] l3c_core_req_wb;
|
||||
|
||||
wire[`L3NUMBER_REQUESTS-1:0] l3c_core_no_wb_slot;
|
||||
|
||||
|
||||
|
||||
wire[`L3NUMBER_REQUESTS-1:0] l3c_wb;
|
||||
wire[`L3NUMBER_REQUESTS-1:0] [31:0] l3c_wb_addr;
|
||||
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_SIZE_RNG][31:0] l3c_wb_data;
|
||||
wire[`L3NUMBER_REQUESTS-1:0][`IBANK_LINE_WORDS-1:0][31:0] l3c_wb_data;
|
||||
|
||||
|
||||
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_req_data_port;
|
||||
wire[`DBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_req_data_port;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data_port;
|
||||
|
||||
genvar llb_index;
|
||||
for (llb_index = 0; llb_index < `DBANK_LINE_SIZE_WORDS; llb_index=llb_index+1) begin
|
||||
for (llb_index = 0; llb_index < `DBANK_LINE_WORDS; llb_index=llb_index+1) begin
|
||||
assign out_dram_req_data [llb_index] = dram_req_data_port[llb_index];
|
||||
assign dram_fill_rsp_data_port[llb_index] = out_dram_fill_rsp_data[llb_index];
|
||||
end
|
||||
|
||||
|
||||
//
|
||||
genvar l3c_curr_cluster;
|
||||
for (l3c_curr_cluster = 0; l3c_curr_cluster < `L3NUMBER_REQUESTS; l3c_curr_cluster=l3c_curr_cluster+1) begin
|
||||
|
@ -212,7 +201,6 @@ module Vortex_Socket (
|
|||
assign per_cluster_dram_fill_rsp [l3c_curr_cluster] = l3c_wb [l3c_curr_cluster];
|
||||
assign per_cluster_dram_fill_rsp_data[l3c_curr_cluster] = l3c_wb_data[l3c_curr_cluster];
|
||||
assign per_cluster_dram_fill_rsp_addr[l3c_curr_cluster] = l3c_wb_addr[l3c_curr_cluster];
|
||||
|
||||
end
|
||||
|
||||
wire dram_snp_full;
|
||||
|
@ -224,7 +212,7 @@ module Vortex_Socket (
|
|||
.WORD_SIZE_BYTES (`L3WORD_SIZE_BYTES),
|
||||
.NUMBER_REQUESTS (`L3NUMBER_REQUESTS),
|
||||
.STAGE_1_CYCLES (`L3STAGE_1_CYCLES),
|
||||
.FUNC_ID (`LLFUNC_ID),
|
||||
.FUNC_ID (`L2FUNC_ID),
|
||||
.REQQ_SIZE (`L3REQQ_SIZE),
|
||||
.MRVQ_SIZE (`L3MRVQ_SIZE),
|
||||
.DFPQ_SIZE (`L3DFPQ_SIZE),
|
||||
|
@ -238,9 +226,7 @@ module Vortex_Socket (
|
|||
.PRFQ_STRIDE (`L3PRFQ_STRIDE),
|
||||
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
|
||||
.SIMULATED_DRAM_LATENCY_CYCLES(`L3SIMULATED_DRAM_LATENCY_CYCLES)
|
||||
)
|
||||
gpu_l3cache
|
||||
(
|
||||
) gpu_l3cache (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
@ -300,10 +286,8 @@ module Vortex_Socket (
|
|||
.snp_fwd (snp_fwd),
|
||||
.snp_fwd_addr (snp_fwd_addr),
|
||||
.snp_fwd_delay (|snp_fwd_delay)
|
||||
);
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
endmodule
|
|
@ -1,50 +1,45 @@
|
|||
|
||||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module byte_enabled_simple_dual_port_ram
|
||||
(
|
||||
input we, clk,
|
||||
input wire reset,
|
||||
input wire[4:0] waddr, raddr1, raddr2,
|
||||
input wire[`NT_M1:0] be,
|
||||
input wire[`NT_M1:0][31:0] wdata,
|
||||
output reg[`NT_M1:0][31:0] q1, q2
|
||||
input wire[`NUM_THREADS-1:0] be,
|
||||
input wire[`NUM_THREADS-1:0][31:0] wdata,
|
||||
output reg[`NUM_THREADS-1:0][31:0] q1, q2
|
||||
);
|
||||
|
||||
// integer regi;
|
||||
// integer threadi;
|
||||
// integer regi;
|
||||
// integer threadi;
|
||||
|
||||
// Thread Byte Bit
|
||||
logic [`NT_M1:0][3:0][7:0] GPR[31:0];
|
||||
// Thread Byte Bit
|
||||
logic [`NUM_THREADS-1:0][3:0][7:0] GPR[31:0];
|
||||
|
||||
// initial begin
|
||||
// for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] = 0;
|
||||
// end
|
||||
|
||||
integer ini;
|
||||
always @(posedge clk, posedge reset) begin
|
||||
// TODO Clearing ram not currently supported on FPGA.
|
||||
if (reset) begin
|
||||
// `ifdef ASIC
|
||||
for (ini = 0; ini < 32; ini = ini + 1) GPR[ini] <= 0;
|
||||
// `endif
|
||||
end
|
||||
else if(we) begin
|
||||
always @(posedge clk) begin
|
||||
if (we) begin
|
||||
integer thread_ind;
|
||||
for (thread_ind = 0; thread_ind <= `NT_M1; thread_ind = thread_ind + 1) begin
|
||||
if(be[thread_ind]) GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
|
||||
if(be[thread_ind]) GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
|
||||
if(be[thread_ind]) GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
|
||||
if(be[thread_ind]) GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
|
||||
for (thread_ind = 0; thread_ind < `NUM_THREADS; thread_ind = thread_ind + 1) begin
|
||||
if (be[thread_ind]) begin
|
||||
GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
|
||||
GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
|
||||
GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
|
||||
GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
|
||||
end
|
||||
end
|
||||
end
|
||||
// $display("^^^^^^^^^^^^^^^^^^^^^^^");
|
||||
// for (regi = 0; regi <= 31; regi = regi + 1) begin
|
||||
// for (threadi = 0; threadi <= `NT_M1; threadi = threadi + 1) begin
|
||||
// for (threadi = 0; threadi < `NUM_THREADS; threadi = threadi + 1) begin
|
||||
// if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]);
|
||||
// end
|
||||
// end
|
||||
|
||||
end
|
||||
|
||||
assign q1 = GPR[raddr1];
|
||||
|
|
4
hw/rtl/cache/VX_Cache_Bank.v
vendored
4
hw/rtl/cache/VX_Cache_Bank.v
vendored
|
@ -2,7 +2,7 @@
|
|||
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
|
||||
// Also make sure all possible output states are transmitted back to the bank correctly
|
||||
|
||||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_Cache_Bank
|
||||
#(
|
||||
|
@ -67,7 +67,7 @@ module VX_Cache_Bank
|
|||
localparam RECIV_MEM_RSP = 2;
|
||||
|
||||
|
||||
localparam BLOCK_NUM_BITS = `CLOG2(CACHE_BLOCK);
|
||||
localparam BLOCK_NUM_BITS = `LOG2UP(CACHE_BLOCK);
|
||||
// Inputs
|
||||
input wire rst;
|
||||
input wire clk;
|
||||
|
|
2
hw/rtl/cache/VX_cache_bank_valid.v
vendored
2
hw/rtl/cache/VX_cache_bank_valid.v
vendored
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_cache_bank_valid
|
||||
#(
|
||||
|
|
2
hw/rtl/cache/VX_cache_data.v
vendored
2
hw/rtl/cache/VX_cache_data.v
vendored
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_cache_data
|
||||
#(
|
||||
|
|
2
hw/rtl/cache/VX_cache_data_per_index.v
vendored
2
hw/rtl/cache/VX_cache_data_per_index.v
vendored
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_cache_data_per_index
|
||||
#(
|
||||
|
|
2
hw/rtl/cache/VX_d_cache.v
vendored
2
hw/rtl/cache/VX_d_cache.v
vendored
|
@ -8,7 +8,7 @@
|
|||
// TO DO:
|
||||
// - Send in a response from memory of what the data is from the test bench
|
||||
|
||||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
//`include "VX_Cache_Bank.v"
|
||||
//`include "VX_cache_bank_valid.v"
|
||||
//`include "VX_priority_encoder.v"
|
||||
|
|
24
hw/rtl/cache/VX_d_cache_encapsulate.v
vendored
24
hw/rtl/cache/VX_d_cache_encapsulate.v
vendored
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
`define NUM_WORDS_PER_BLOCK 4
|
||||
|
||||
|
@ -33,17 +33,17 @@ module VX_d_cache_encapsulate (
|
|||
//parameter cache_entry = 9;
|
||||
input wire clk, rst;
|
||||
|
||||
input wire i_p_valid[`NT_M1:0];
|
||||
input wire [31:0] i_p_addr[`NT_M1:0];
|
||||
input wire i_p_valid[`NUM_THREADS-1:0];
|
||||
input wire [31:0] i_p_addr[`NUM_THREADS-1:0];
|
||||
input wire i_p_initial_request;
|
||||
input wire [31:0] i_p_writedata[`NT_M1:0];
|
||||
input wire [31:0] i_p_writedata[`NUM_THREADS-1:0];
|
||||
input wire i_p_read_or_write;
|
||||
|
||||
input wire [31:0] i_m_readdata[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0];
|
||||
input wire i_m_ready;
|
||||
|
||||
output reg [31:0] o_p_readdata[`NT_M1:0];
|
||||
output reg o_p_readdata_valid[`NT_M1:0] ;
|
||||
output reg [31:0] o_p_readdata[`NUM_THREADS-1:0];
|
||||
output reg o_p_readdata_valid[`NUM_THREADS-1:0] ;
|
||||
output reg o_p_waitrequest;
|
||||
|
||||
output reg [31:0] o_m_addr;
|
||||
|
@ -53,12 +53,12 @@ module VX_d_cache_encapsulate (
|
|||
|
||||
|
||||
// Inter
|
||||
wire [`NT_M1:0] i_p_valid_inter;
|
||||
wire [`NT_M1:0][31:0] i_p_addr_inter;
|
||||
wire [`NT_M1:0][31:0] i_p_writedata_inter;
|
||||
wire [`NUM_THREADS-1:0] i_p_valid_inter;
|
||||
wire [`NUM_THREADS-1:0][31:0] i_p_addr_inter;
|
||||
wire [`NUM_THREADS-1:0][31:0] i_p_writedata_inter;
|
||||
|
||||
reg [`NT_M1:0][31:0] o_p_readdata_inter;
|
||||
reg [`NT_M1:0] o_p_readdata_valid_inter;
|
||||
reg [`NUM_THREADS-1:0][31:0] o_p_readdata_inter;
|
||||
reg [`NUM_THREADS-1:0] o_p_readdata_valid_inter;
|
||||
|
||||
reg[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] o_m_writedata_inter;
|
||||
wire[NUMBER_BANKS - 1:0][`NUM_WORDS_PER_BLOCK-1:0][31:0] i_m_readdata_inter;
|
||||
|
@ -66,7 +66,7 @@ module VX_d_cache_encapsulate (
|
|||
|
||||
genvar curr_thraed, curr_bank, curr_word;
|
||||
generate
|
||||
for (curr_thraed = 0; curr_thraed < `NT; curr_thraed = curr_thraed + 1) begin : threads
|
||||
for (curr_thraed = 0; curr_thraed < `NUM_THREADS; curr_thraed = curr_thraed + 1) begin : threads
|
||||
assign i_p_valid_inter[curr_thraed] = i_p_valid[curr_thraed];
|
||||
assign i_p_addr_inter[curr_thraed] = i_p_addr[curr_thraed];
|
||||
assign i_p_writedata_inter[curr_thraed] = i_p_writedata[curr_thraed];
|
||||
|
|
12
hw/rtl/cache/VX_d_cache_tb.v
vendored
12
hw/rtl/cache/VX_d_cache_tb.v
vendored
|
@ -1,4 +1,4 @@
|
|||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
`include "VX_d_cache.v"
|
||||
|
||||
module VX_d_cache_tb;
|
||||
|
@ -6,13 +6,13 @@ module VX_d_cache_tb;
|
|||
parameter NUMBER_BANKS = 8;
|
||||
|
||||
reg clk, reset, im_ready;
|
||||
reg [`NT_M1:0] i_p_valid;
|
||||
reg [`NT_M1:0][13:0] i_p_addr; // FIXME
|
||||
reg [`NUM_THREADS-1:0] i_p_valid;
|
||||
reg [`NUM_THREADS-1:0][13:0] i_p_addr; // FIXME
|
||||
reg i_p_initial_request;
|
||||
reg [`NT_M1:0][31:0] i_p_writedata;
|
||||
reg [`NUM_THREADS-1:0][31:0] i_p_writedata;
|
||||
reg i_p_read_or_write; //, i_p_write;
|
||||
reg [`NT_M1:0][31:0] o_p_readdata;
|
||||
reg [`NT_M1:0] o_p_readdata_valid;
|
||||
reg [`NUM_THREADS-1:0][31:0] o_p_readdata;
|
||||
reg [`NUM_THREADS-1:0] o_p_readdata_valid;
|
||||
reg o_p_waitrequest;
|
||||
reg [13:0] o_m_addr; // Only one address is sent out at a time to memory
|
||||
reg o_m_valid;
|
||||
|
|
4
hw/rtl/cache/cache_set.v
vendored
4
hw/rtl/cache/cache_set.v
vendored
|
@ -2,7 +2,7 @@
|
|||
// Also add a bit about wheter the "Way ID" is valid / being held or if it is just default
|
||||
// Also make sure all possible output states are transmitted back to the bank correctly
|
||||
|
||||
// `include "VX_define.v"
|
||||
// `include "VX_define.vh"
|
||||
module cache_set(clk,
|
||||
rst,
|
||||
// These next 4 are possible modes that the Set could be in, I am making them 4 different variables for indexing purposes
|
||||
|
@ -94,7 +94,7 @@ module cache_set(clk,
|
|||
readdata <= data[3];
|
||||
end
|
||||
end else if (access) begin
|
||||
//tag[`NT_M1:0] <= i_p_addr[`NT_M1:0][13:12];
|
||||
//tag[`NUM_THREADS-1:0] <= i_p_addr[`NUM_THREADS-1:0][13:12];
|
||||
counter <= ((counter + 1) ^ 3'b100); // Counter determining which to evict in the event of miss only increment when miss !!! NEED TO FIX LOGIC
|
||||
// Hit in First Column
|
||||
if (tag[0] == o_tag && valid[0]) begin
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_define.v"
|
||||
`include "VX_cache_config.vh"
|
||||
`include "VX_define.vh"
|
||||
module VX_bank
|
||||
#(
|
||||
// Size of cache in bytes
|
||||
|
@ -60,7 +60,7 @@ module VX_bank
|
|||
input wire [4:0] bank_rd,
|
||||
input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb,
|
||||
input wire [31:0] bank_pc,
|
||||
input wire [`NW_M1:0] bank_warp_num,
|
||||
input wire [`NW_BITS-1:0] bank_warp_num,
|
||||
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read,
|
||||
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write,
|
||||
output wire reqq_full,
|
||||
|
@ -71,7 +71,7 @@ module VX_bank
|
|||
output wire [`vx_clog2(NUMBER_REQUESTS)-1:0] bank_wb_tid,
|
||||
output wire [4:0] bank_wb_rd,
|
||||
output wire [1:0] bank_wb_wb,
|
||||
output wire [`NW_M1:0] bank_wb_warp_num,
|
||||
output wire [`NW_BITS-1:0] bank_wb_warp_num,
|
||||
output wire [`WORD_SIZE_RNG] bank_wb_data,
|
||||
output wire [31:0] bank_wb_pc,
|
||||
output wire [31:0] bank_wb_address,
|
||||
|
@ -86,14 +86,14 @@ module VX_bank
|
|||
// Dram Fill Response
|
||||
input wire dram_fill_rsp,
|
||||
input wire [31:0] dram_fill_addr,
|
||||
input wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dram_fill_rsp_data,
|
||||
input wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_fill_rsp_data,
|
||||
output wire dram_fill_accept,
|
||||
|
||||
// Dram WB Requests
|
||||
input wire dram_wb_queue_pop,
|
||||
output wire dram_wb_req,
|
||||
output wire[31:0] dram_wb_req_addr,
|
||||
output wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dram_wb_req_data,
|
||||
output wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dram_wb_req_data,
|
||||
|
||||
// Snp Request
|
||||
input wire snp_req,
|
||||
|
@ -112,7 +112,7 @@ module VX_bank
|
|||
if (reset) begin
|
||||
snoop_state <= 0;
|
||||
end else begin
|
||||
snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID));
|
||||
snoop_state <= (snoop_state | snp_req) && ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID));
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -139,11 +139,11 @@ module VX_bank
|
|||
wire dfpq_empty;
|
||||
wire dfpq_full;
|
||||
wire[31:0] dfpq_addr_st0;
|
||||
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dfpq_filldata_st0;
|
||||
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dfpq_filldata_st0;
|
||||
|
||||
assign dram_fill_accept = !dfpq_full;
|
||||
|
||||
VX_generic_queue_ll #(.DATAW(32+(`BANK_LINE_SIZE_WORDS*`WORD_SIZE)), .SIZE(DFPQ_SIZE)) dfp_queue(
|
||||
VX_generic_queue_ll #(.DATAW(32+(`BANK_LINE_WORDS*`WORD_SIZE)), .SIZE(DFPQ_SIZE)) dfp_queue(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (dram_fill_rsp),
|
||||
|
@ -164,7 +164,7 @@ module VX_bank
|
|||
wire [`WORD_SIZE_RNG] reqq_req_writeword_st0;
|
||||
wire [4:0] reqq_req_rd_st0;
|
||||
wire [1:0] reqq_req_wb_st0;
|
||||
wire [`NW_M1:0] reqq_req_warp_num_st0;
|
||||
wire [`NW_BITS-1:0] reqq_req_warp_num_st0;
|
||||
wire [2:0] reqq_req_mem_read_st0;
|
||||
wire [2:0] reqq_req_mem_write_st0;
|
||||
wire [31:0] reqq_req_pc_st0;
|
||||
|
@ -231,7 +231,7 @@ module VX_bank
|
|||
wire [4:0] mrvq_rd_st0;
|
||||
wire [1:0] mrvq_wb_st0;
|
||||
wire [31:0] miss_resrv_pc_st0;
|
||||
wire [`NW_M1:0] mrvq_warp_num_st0;
|
||||
wire [`NW_BITS-1:0] mrvq_warp_num_st0;
|
||||
wire [2:0] mrvq_mem_read_st0;
|
||||
wire [2:0] mrvq_mem_write_st0;
|
||||
|
||||
|
@ -241,7 +241,7 @@ module VX_bank
|
|||
wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid;
|
||||
wire[4:0] miss_add_rd;
|
||||
wire[1:0] miss_add_wb;
|
||||
wire[`NW_M1:0] miss_add_warp_num;
|
||||
wire[`NW_BITS-1:0] miss_add_warp_num;
|
||||
wire[2:0] miss_add_mem_read;
|
||||
wire[2:0] miss_add_mem_write;
|
||||
|
||||
|
@ -336,7 +336,7 @@ module VX_bank
|
|||
wire qual_valid_st0;
|
||||
wire [31:0] qual_addr_st0;
|
||||
wire [`WORD_SIZE_RNG] qual_writeword_st0;
|
||||
wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] qual_writedata_st0;
|
||||
wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] qual_writedata_st0;
|
||||
wire [`REQ_INST_META_SIZE-1:0] qual_inst_meta_st0;
|
||||
wire qual_going_to_write_st0;
|
||||
wire qual_is_snp;
|
||||
|
@ -344,7 +344,7 @@ module VX_bank
|
|||
|
||||
wire [`WORD_SIZE_RNG] writeword_st1 [STAGE_1_CYCLES-1:0];
|
||||
wire [`REQ_INST_META_SIZE-1:0] inst_meta_st1 [STAGE_1_CYCLES-1:0];
|
||||
wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] writedata_st1[STAGE_1_CYCLES-1:0];
|
||||
wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] writedata_st1[STAGE_1_CYCLES-1:0];
|
||||
wire is_snp_st1 [STAGE_1_CYCLES-1:0];
|
||||
wire [31:0] pc_st1 [STAGE_1_CYCLES-1:0];
|
||||
|
||||
|
@ -387,7 +387,7 @@ module VX_bank
|
|||
reqq_pop ? reqq_req_writeword_st0 :
|
||||
0;
|
||||
|
||||
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_c0 (
|
||||
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_c0 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_bank_pipe),
|
||||
|
@ -399,7 +399,7 @@ module VX_bank
|
|||
genvar curr_stage;
|
||||
generate
|
||||
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES; curr_stage = curr_stage + 1) begin
|
||||
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_SIZE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_cc (
|
||||
VX_generic_register #(.N( 1 + 1 + 1 + `WORD_SIZE + 32 + `REQ_INST_META_SIZE + (`BANK_LINE_WORDS*`WORD_SIZE) + 1 + 32)) s0_1_cc (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_bank_pipe),
|
||||
|
@ -412,7 +412,7 @@ module VX_bank
|
|||
|
||||
|
||||
wire[`WORD_SIZE_RNG] readword_st1e;
|
||||
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] readdata_st1e;
|
||||
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] readdata_st1e;
|
||||
wire[`TAG_SELECT_SIZE_RNG] readtag_st1e;
|
||||
wire miss_st1e;
|
||||
wire dirty_st1e;
|
||||
|
@ -421,7 +421,7 @@ module VX_bank
|
|||
|
||||
wire [4:0] rd_st1e;
|
||||
wire [1:0] wb_st1e;
|
||||
wire [`NW_M1:0] warp_num_st1e;
|
||||
wire [`NW_BITS-1:0] warp_num_st1e;
|
||||
wire [2:0] mem_read_st1e;
|
||||
wire [2:0] mem_write_st1e;
|
||||
wire [`vx_clog2(NUMBER_REQUESTS)-1:0] tid_st1e;
|
||||
|
@ -488,7 +488,7 @@ module VX_bank
|
|||
wire valid_st2;
|
||||
wire[`WORD_SIZE_RNG] writeword_st2;
|
||||
wire[`WORD_SIZE_RNG] readword_st2;
|
||||
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] readdata_st2;
|
||||
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] readdata_st2;
|
||||
wire miss_st2;
|
||||
wire dirty_st2;
|
||||
wire[`REQ_INST_META_SIZE-1:0] inst_meta_st2;
|
||||
|
@ -498,7 +498,7 @@ module VX_bank
|
|||
wire [31:0] pc_st2;
|
||||
|
||||
|
||||
VX_generic_register #(.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_SIZE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)) st_1e_2 (
|
||||
VX_generic_register #(.N( 1+1+1+1+32+`WORD_SIZE+`WORD_SIZE+(`BANK_LINE_WORDS * `WORD_SIZE) + `REQ_INST_META_SIZE + `TAG_SELECT_NUM_BITS + 32 + 2)) st_1e_2 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_bank_pipe),
|
||||
|
@ -525,17 +525,17 @@ module VX_bank
|
|||
|
||||
|
||||
// Enqueue to CWB Queue
|
||||
wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `LLFUNC_ID) && (miss_add_wb == 0)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
wire cwbq_push = (valid_st2 && !miss_st2) && !cwbq_full && !((FUNC_ID == `L2FUNC_ID) && (miss_add_wb == 0)) && !((is_snp_st2 && valid_st2 && ffsq_full) || (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full) || (valid_st2 && miss_st2 && mrvq_full) || (valid_st2 && miss_st2 && !invalidate_fill && dram_fill_req_queue_full));
|
||||
wire [`WORD_SIZE_RNG] cwbq_data = readword_st2;
|
||||
wire [`vx_clog2(NUMBER_REQUESTS)-1:0] cwbq_tid = miss_add_tid;
|
||||
wire [4:0] cwbq_rd = miss_add_rd;
|
||||
wire [1:0] cwbq_wb = miss_add_wb;
|
||||
wire [`NW_M1:0] cwbq_warp_num = miss_add_warp_num;
|
||||
wire [`NW_BITS-1:0] cwbq_warp_num = miss_add_warp_num;
|
||||
wire [31:0] cwbq_pc = pc_st2;
|
||||
|
||||
wire cwbq_empty;
|
||||
assign bank_wb_valid = !cwbq_empty;
|
||||
VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1+1) + `WORD_SIZE + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue(
|
||||
VX_generic_queue_ll #(.DATAW( `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1+1) + `WORD_SIZE + 32 + 32), .SIZE(CWBQ_SIZE)) cwb_queue(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
@ -554,8 +554,8 @@ module VX_bank
|
|||
wire[31:0] dwbq_req_addr;
|
||||
wire dwbq_empty;
|
||||
|
||||
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] dwbq_req_data;
|
||||
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] dwbq_req_data;
|
||||
if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
|
||||
assign dwbq_req_addr = (should_flush && dwbq_push) ? (addr_st2) : ({readtag_st2, addr_st2[`LINE_SELECT_ADDR_END:0]} & `BASE_ADDR_MASK);
|
||||
end else begin
|
||||
|
@ -603,7 +603,7 @@ module VX_bank
|
|||
assign dram_fill_req_addr = addr_st2 & `BASE_ADDR_MASK;
|
||||
|
||||
assign dram_wb_req = !dwbq_empty;
|
||||
VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_SIZE_WORDS * `WORD_SIZE)), .SIZE(DWBQ_SIZE)) dwb_queue(
|
||||
VX_generic_queue_ll #(.DATAW( 32 + (`BANK_LINE_WORDS * `WORD_SIZE)), .SIZE(DWBQ_SIZE)) dwb_queue(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache
|
||||
#(
|
||||
|
@ -66,7 +66,7 @@ module VX_cache
|
|||
// Req meta
|
||||
input wire [4:0] core_req_rd,
|
||||
input wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb,
|
||||
input wire [`NW_M1:0] core_req_warp_num,
|
||||
input wire [`NW_BITS-1:0] core_req_warp_num,
|
||||
input wire [31:0] core_req_pc,
|
||||
output wire delay_req,
|
||||
|
||||
|
@ -75,7 +75,7 @@ module VX_cache
|
|||
output wire [NUMBER_REQUESTS-1:0] core_wb_valid,
|
||||
output wire [4:0] core_wb_req_rd,
|
||||
output wire [1:0] core_wb_req_wb,
|
||||
output wire [`NW_M1:0] core_wb_warp_num,
|
||||
output wire [`NW_BITS-1:0] core_wb_warp_num,
|
||||
output wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] core_wb_readdata,
|
||||
output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc,
|
||||
output wire [NUMBER_REQUESTS-1:0][31:0] core_wb_address,
|
||||
|
@ -84,7 +84,7 @@ module VX_cache
|
|||
// Dram Fill Response
|
||||
input wire dram_fill_rsp,
|
||||
input wire [31:0] dram_fill_rsp_addr,
|
||||
input wire [`IBANK_LINE_SIZE_RNG][31:0] dram_fill_rsp_data,
|
||||
input wire [`IBANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data,
|
||||
output wire dram_fill_accept,
|
||||
|
||||
// Dram request
|
||||
|
@ -93,7 +93,7 @@ module VX_cache
|
|||
output wire dram_req_read,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [31:0] dram_req_size,
|
||||
output wire [`IBANK_LINE_SIZE_RNG][31:0] dram_req_data,
|
||||
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
|
||||
output wire dram_req_because_of_wb,
|
||||
input wire dram_req_delay,
|
||||
|
||||
|
@ -119,7 +119,7 @@ module VX_cache
|
|||
wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid;
|
||||
wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd;
|
||||
wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb;
|
||||
wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num;
|
||||
wire [NUMBER_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num;
|
||||
wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data;
|
||||
wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc;
|
||||
wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address;
|
||||
|
@ -134,7 +134,7 @@ module VX_cache
|
|||
wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req;
|
||||
wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp;
|
||||
wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr;
|
||||
wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] per_bank_dram_wb_req_data;
|
||||
wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data;
|
||||
|
||||
wire[NUMBER_BANKS-1:0] per_bank_reqq_full;
|
||||
|
||||
|
@ -287,7 +287,7 @@ module VX_cache
|
|||
wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] curr_bank_writedata;
|
||||
wire [4:0] curr_bank_rd;
|
||||
wire [NUMBER_REQUESTS-1:0][1:0] curr_bank_wb;
|
||||
wire [`NW_M1:0] curr_bank_warp_num;
|
||||
wire [`NW_BITS-1:0] curr_bank_warp_num;
|
||||
wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_read;
|
||||
wire [NUMBER_REQUESTS-1:0][2:0] curr_bank_mem_write;
|
||||
wire [31:0] curr_bank_pc;
|
||||
|
@ -298,13 +298,13 @@ module VX_cache
|
|||
wire [31:0] curr_bank_wb_pc;
|
||||
wire [4:0] curr_bank_wb_rd;
|
||||
wire [1:0] curr_bank_wb_wb;
|
||||
wire [`NW_M1:0] curr_bank_wb_warp_num;
|
||||
wire [`NW_BITS-1:0] curr_bank_wb_warp_num;
|
||||
wire [`WORD_SIZE_RNG] curr_bank_wb_data;
|
||||
wire [31:0] curr_bank_wb_address;
|
||||
|
||||
wire curr_bank_dram_fill_rsp;
|
||||
wire [31:0] curr_bank_dram_fill_rsp_addr;
|
||||
wire [`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] curr_bank_dram_fill_rsp_data;
|
||||
wire [`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_fill_rsp_data;
|
||||
wire curr_bank_dram_fill_accept;
|
||||
|
||||
wire curr_bank_dfqq_full;
|
||||
|
@ -316,7 +316,7 @@ module VX_cache
|
|||
wire curr_bank_dram_wb_queue_pop;
|
||||
wire curr_bank_dram_wb_req;
|
||||
wire[31:0] curr_bank_dram_wb_req_addr;
|
||||
wire[`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] curr_bank_dram_wb_req_data;
|
||||
wire[`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] curr_bank_dram_wb_req_data;
|
||||
|
||||
wire curr_bank_snp_req;
|
||||
wire[31:0] curr_bank_snp_req_addr;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
`ifndef VX_CACHE_CONFIG
|
||||
`define VX_CACHE_CONFIG
|
||||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
|
||||
// data tid rd wb warp_num read write
|
||||
|
@ -10,10 +10,10 @@
|
|||
`define vx_clog2(value) ((value == 1) ? 1 : $clog2(value))
|
||||
|
||||
|
||||
`define MRVQ_METADATA_SIZE (`WORD_SIZE + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1 + 1) + 3 + 3)
|
||||
`define MRVQ_METADATA_SIZE (`WORD_SIZE + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1) + 3 + 3)
|
||||
|
||||
// 5 + 2 + 4 + 3 + 3 + 1
|
||||
`define REQ_INST_META_SIZE (5 + 2 + (`NW_M1+1) + 3 + 3 + `vx_clog2(NUMBER_REQUESTS))
|
||||
`define REQ_INST_META_SIZE (5 + 2 + (`NW_BITS-1+1) + 3 + 3 + `vx_clog2(NUMBER_REQUESTS))
|
||||
|
||||
// `define vx_clog2_h(value, x) (value == (1 << x)) ? (x)
|
||||
|
||||
|
@ -60,9 +60,7 @@
|
|||
// 8
|
||||
`define BANK_LINE_COUNT (`BANK_SIZE_BYTES/BANK_LINE_SIZE_BYTES)
|
||||
// 4
|
||||
`define BANK_LINE_SIZE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES)
|
||||
// 3:0
|
||||
`define BANK_LINE_SIZE_RNG `BANK_LINE_SIZE_WORDS-1:0
|
||||
`define BANK_LINE_WORDS (BANK_LINE_SIZE_BYTES / WORD_SIZE_BYTES)
|
||||
|
||||
// Offset is fixed
|
||||
`define OFFSET_ADDR_NUM_BITS 2
|
||||
|
@ -73,7 +71,7 @@
|
|||
`define OFFSET_SIZE_RNG `OFFSET_SIZE_END:0
|
||||
|
||||
// 2
|
||||
`define WORD_SELECT_NUM_BITS (`vx_clog2(`BANK_LINE_SIZE_WORDS))
|
||||
`define WORD_SELECT_NUM_BITS (`vx_clog2(`BANK_LINE_WORDS))
|
||||
// 2
|
||||
`define WORD_SELECT_SIZE_END (`WORD_SELECT_NUM_BITS)
|
||||
// 2
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_core_req_bank_sel
|
||||
#(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_dfq_queue
|
||||
#(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_dram_req_arb
|
||||
#(
|
||||
|
@ -62,7 +62,7 @@ module VX_cache_dram_req_arb
|
|||
output wire[NUMBER_BANKS-1:0] per_bank_dram_wb_queue_pop,
|
||||
input wire[NUMBER_BANKS-1:0] per_bank_dram_wb_req,
|
||||
input wire[NUMBER_BANKS-1:0][31:0] per_bank_dram_wb_req_addr,
|
||||
input wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
|
||||
input wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
|
||||
input wire[NUMBER_BANKS-1:0] per_bank_dram_because_of_snp,
|
||||
|
||||
// real Dram request
|
||||
|
@ -71,7 +71,7 @@ module VX_cache_dram_req_arb
|
|||
output wire dram_req_read,
|
||||
output wire [31:0] dram_req_addr,
|
||||
output wire [31:0] dram_req_size,
|
||||
output wire [`IBANK_LINE_SIZE_RNG][31:0] dram_req_data,
|
||||
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
|
||||
output wire dram_req_because_of_wb,
|
||||
|
||||
input wire dram_req_delay
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_miss_resrv
|
||||
#(
|
||||
|
@ -56,7 +56,7 @@ module VX_cache_miss_resrv
|
|||
input wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_add_tid,
|
||||
input wire[4:0] miss_add_rd,
|
||||
input wire[1:0] miss_add_wb,
|
||||
input wire[`NW_M1:0] miss_add_warp_num,
|
||||
input wire[`NW_BITS-1:0] miss_add_warp_num,
|
||||
input wire[2:0] miss_add_mem_read,
|
||||
input wire[2:0] miss_add_mem_write,
|
||||
input wire[31:0] miss_add_pc,
|
||||
|
@ -75,14 +75,14 @@ module VX_cache_miss_resrv
|
|||
output wire[`vx_clog2(NUMBER_REQUESTS)-1:0] miss_resrv_tid_st0,
|
||||
output wire[4:0] miss_resrv_rd_st0,
|
||||
output wire[1:0] miss_resrv_wb_st0,
|
||||
output wire[`NW_M1:0] miss_resrv_warp_num_st0,
|
||||
output wire[`NW_BITS-1:0] miss_resrv_warp_num_st0,
|
||||
output wire[2:0] miss_resrv_mem_read_st0,
|
||||
output wire[31:0] miss_resrv_pc_st0,
|
||||
output wire[2:0] miss_resrv_mem_write_st0
|
||||
|
||||
);
|
||||
|
||||
// Size of metadata = 32 + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_M1 + 1)
|
||||
// Size of metadata = 32 + `vx_clog2(NUMBER_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1)
|
||||
reg[`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0];
|
||||
reg[MRVQ_SIZE-1:0][31:0] addr_table;
|
||||
reg[MRVQ_SIZE-1:0][31:0] pc_table;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_req_queue
|
||||
#(
|
||||
|
@ -55,7 +55,7 @@ module VX_cache_req_queue
|
|||
input wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] bank_writedata,
|
||||
input wire [4:0] bank_rd,
|
||||
input wire [NUMBER_REQUESTS-1:0][1:0] bank_wb,
|
||||
input wire [`NW_M1:0] bank_warp_num,
|
||||
input wire [`NW_BITS-1:0] bank_warp_num,
|
||||
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_read,
|
||||
input wire [NUMBER_REQUESTS-1:0][2:0] bank_mem_write,
|
||||
input wire [31:0] bank_pc,
|
||||
|
@ -68,7 +68,7 @@ module VX_cache_req_queue
|
|||
output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0,
|
||||
output wire [4:0] reqq_req_rd_st0,
|
||||
output wire [1:0] reqq_req_wb_st0,
|
||||
output wire [`NW_M1:0] reqq_req_warp_num_st0,
|
||||
output wire [`NW_BITS-1:0] reqq_req_warp_num_st0,
|
||||
output wire [2:0] reqq_req_mem_read_st0,
|
||||
output wire [2:0] reqq_req_mem_write_st0,
|
||||
output wire [31:0] reqq_req_pc_st0,
|
||||
|
@ -83,7 +83,7 @@ module VX_cache_req_queue
|
|||
wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] out_per_writedata;
|
||||
wire [4:0] out_per_rd;
|
||||
wire [NUMBER_REQUESTS-1:0][1:0] out_per_wb;
|
||||
wire [`NW_M1:0] out_per_warp_num;
|
||||
wire [`NW_BITS-1:0] out_per_warp_num;
|
||||
wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_read;
|
||||
wire [NUMBER_REQUESTS-1:0][2:0] out_per_mem_write;
|
||||
wire [31:0] out_per_pc;
|
||||
|
@ -95,7 +95,7 @@ module VX_cache_req_queue
|
|||
reg [4:0] use_per_rd;
|
||||
reg [NUMBER_REQUESTS-1:0][1:0] use_per_wb;
|
||||
reg [31:0] use_per_pc;
|
||||
reg [`NW_M1:0] use_per_warp_num;
|
||||
reg [`NW_BITS-1:0] use_per_warp_num;
|
||||
reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_read;
|
||||
reg [NUMBER_REQUESTS-1:0][2:0] use_per_mem_write;
|
||||
|
||||
|
@ -105,7 +105,7 @@ module VX_cache_req_queue
|
|||
wire [NUMBER_REQUESTS-1:0][`WORD_SIZE_RNG] qual_writedata;
|
||||
wire [4:0] qual_rd;
|
||||
wire [NUMBER_REQUESTS-1:0][1:0] qual_wb;
|
||||
wire [`NW_M1:0] qual_warp_num;
|
||||
wire [`NW_BITS-1:0] qual_warp_num;
|
||||
wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_read;
|
||||
wire [NUMBER_REQUESTS-1:0][2:0] qual_mem_write;
|
||||
wire [31:0] qual_pc;
|
||||
|
@ -120,7 +120,7 @@ module VX_cache_req_queue
|
|||
wire push_qual = reqq_push && !reqq_full;
|
||||
wire pop_qual = !out_empty && use_empty;
|
||||
|
||||
VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUMBER_REQUESTS*2) + (`NW_M1+1) + (NUMBER_REQUESTS * (3 + 3)) + 32 ), .SIZE(REQQ_SIZE)) reqq_queue(
|
||||
VX_generic_queue_ll #(.DATAW( (NUMBER_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUMBER_REQUESTS*2) + (`NW_BITS-1+1) + (NUMBER_REQUESTS * (3 + 3)) + 32 ), .SIZE(REQQ_SIZE)) reqq_queue(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push_qual),
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_cache_wb_sel_merge
|
||||
#(
|
||||
|
@ -53,7 +53,7 @@ module VX_cache_wb_sel_merge
|
|||
input wire [NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_wb_tid,
|
||||
input wire [NUMBER_BANKS-1:0][4:0] per_bank_wb_rd,
|
||||
input wire [NUMBER_BANKS-1:0][1:0] per_bank_wb_wb,
|
||||
input wire [NUMBER_BANKS-1:0][`NW_M1:0] per_bank_wb_warp_num,
|
||||
input wire [NUMBER_BANKS-1:0][`NW_BITS-1:0] per_bank_wb_warp_num,
|
||||
input wire [NUMBER_BANKS-1:0][`WORD_SIZE_RNG] per_bank_wb_data,
|
||||
input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_pc,
|
||||
input wire [NUMBER_BANKS-1:0][31:0] per_bank_wb_address,
|
||||
|
@ -67,7 +67,7 @@ module VX_cache_wb_sel_merge
|
|||
output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_pc,
|
||||
output wire [4:0] core_wb_req_rd,
|
||||
output wire [1:0] core_wb_req_wb,
|
||||
output wire [`NW_M1:0] core_wb_warp_num,
|
||||
output wire [`NW_BITS-1:0] core_wb_warp_num,
|
||||
output reg [NUMBER_REQUESTS-1:0][31:0] core_wb_address
|
||||
|
||||
);
|
||||
|
@ -105,7 +105,7 @@ module VX_cache_wb_sel_merge
|
|||
core_wb_pc = 0;
|
||||
core_wb_address = 0;
|
||||
for (this_bank = 0; this_bank < NUMBER_BANKS; this_bank = this_bank + 1) begin
|
||||
if ((FUNC_ID == `LLFUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
|
||||
|
||||
if (found_bank && !core_wb_valid[per_bank_wb_tid[this_bank]] && per_bank_wb_valid[this_bank] && ((this_bank == main_bank_index) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin
|
||||
core_wb_valid[per_bank_wb_tid[this_bank]] = 1;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_dcache_llv_resp_bank_sel
|
||||
#(
|
||||
|
@ -48,13 +48,13 @@ module VX_dcache_llv_resp_bank_sel
|
|||
output reg [NUMBER_BANKS-1:0] per_bank_llvq_pop,
|
||||
input wire[NUMBER_BANKS-1:0] per_bank_llvq_valid,
|
||||
input wire[NUMBER_BANKS-1:0][31:0] per_bank_llvq_res_addr,
|
||||
input wire[NUMBER_BANKS-1:0][`BANK_LINE_SIZE_RNG][31:0] per_bank_llvq_res_data,
|
||||
input wire[NUMBER_BANKS-1:0][`BANK_LINE_WORDS-1:0][31:0] per_bank_llvq_res_data,
|
||||
input wire[NUMBER_BANKS-1:0][`vx_clog2(NUMBER_REQUESTS)-1:0] per_bank_llvq_res_tid,
|
||||
|
||||
input wire llvq_pop,
|
||||
output reg[NUMBER_REQUESTS-1:0] llvq_valid,
|
||||
output reg[NUMBER_REQUESTS-1:0][31:0] llvq_res_addr,
|
||||
output reg[NUMBER_REQUESTS-1:0][`BANK_LINE_SIZE_RNG][31:0] llvq_res_data
|
||||
output reg[NUMBER_REQUESTS-1:0][`BANK_LINE_WORDS-1:0][31:0] llvq_res_data
|
||||
|
||||
|
||||
);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_fill_invalidator
|
||||
#(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_prefetcher
|
||||
#(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_snp_fwd_arb
|
||||
#(
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_tag_data_access
|
||||
#(
|
||||
|
@ -60,12 +60,12 @@ module VX_tag_data_access
|
|||
input wire writefill_st1e,
|
||||
input wire[31:0] writeaddr_st1e,
|
||||
input wire[`WORD_SIZE_RNG] writeword_st1e,
|
||||
input wire[`DBANK_LINE_SIZE_RNG][31:0] writedata_st1e,
|
||||
input wire[`DBANK_LINE_WORDS-1:0][31:0] writedata_st1e,
|
||||
input wire[2:0] mem_write_st1e,
|
||||
input wire[2:0] mem_read_st1e,
|
||||
|
||||
output wire[`WORD_SIZE_RNG] readword_st1e,
|
||||
output wire[`DBANK_LINE_SIZE_RNG][31:0] readdata_st1e,
|
||||
output wire[`DBANK_LINE_WORDS-1:0][31:0] readdata_st1e,
|
||||
output wire[`TAG_SELECT_SIZE_RNG] readtag_st1e,
|
||||
output wire miss_st1e,
|
||||
output wire dirty_st1e,
|
||||
|
@ -74,25 +74,25 @@ module VX_tag_data_access
|
|||
);
|
||||
|
||||
|
||||
reg[`DBANK_LINE_SIZE_RNG][31:0] readdata_st[STAGE_1_CYCLES-1:0];
|
||||
reg[`DBANK_LINE_WORDS-1:0][31:0] readdata_st[STAGE_1_CYCLES-1:0];
|
||||
|
||||
reg read_valid_st1c[STAGE_1_CYCLES-1:0];
|
||||
reg read_dirty_st1c[STAGE_1_CYCLES-1:0];
|
||||
reg[`TAG_SELECT_SIZE_RNG] read_tag_st1c [STAGE_1_CYCLES-1:0];
|
||||
reg[`DBANK_LINE_SIZE_RNG][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
|
||||
reg[`DBANK_LINE_WORDS-1:0][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
|
||||
|
||||
|
||||
wire qual_read_valid_st1;
|
||||
wire qual_read_dirty_st1;
|
||||
wire[`TAG_SELECT_SIZE_RNG] qual_read_tag_st1;
|
||||
wire[`DBANK_LINE_SIZE_RNG][31:0] qual_read_data_st1;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] qual_read_data_st1;
|
||||
|
||||
wire use_read_valid_st1e;
|
||||
wire use_read_dirty_st1e;
|
||||
wire[`TAG_SELECT_SIZE_RNG] use_read_tag_st1e;
|
||||
wire[`DBANK_LINE_SIZE_RNG][31:0] use_read_data_st1e;
|
||||
wire[`DBANK_LINE_SIZE_RNG][3:0] use_write_enable;
|
||||
wire[`DBANK_LINE_SIZE_RNG][31:0] use_write_data;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] use_read_data_st1e;
|
||||
wire[`DBANK_LINE_WORDS-1:0][3:0] use_write_enable;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] use_write_data;
|
||||
|
||||
wire sw, sb, sh;
|
||||
|
||||
|
@ -140,8 +140,8 @@ module VX_tag_data_access
|
|||
.fill_sent (fill_sent)
|
||||
);
|
||||
|
||||
// VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_c0 (
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) ), .Valid(0)) s0_1_c0 (
|
||||
// VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_c0 (
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) ), .Valid(0)) s0_1_c0 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
|
@ -153,7 +153,7 @@ module VX_tag_data_access
|
|||
genvar curr_stage;
|
||||
generate
|
||||
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_SIZE_WORDS*32) )) s0_1_cc (
|
||||
VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_NUM_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_cc (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
|
@ -170,7 +170,7 @@ module VX_tag_data_access
|
|||
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM
|
||||
|
||||
genvar curr_w;
|
||||
for (curr_w = 0; curr_w < `DBANK_LINE_SIZE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0];
|
||||
for (curr_w = 0; curr_w < `DBANK_LINE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0];
|
||||
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1];
|
||||
|
||||
/////////////////////// LOAD LOGIC ///////////////////
|
||||
|
@ -243,23 +243,23 @@ module VX_tag_data_access
|
|||
wire should_write = (sw || sb || sh) && valid_req_st1e && use_read_valid_st1e && !miss_st1e && !is_snp_st1e;
|
||||
wire force_write = real_writefill;
|
||||
|
||||
wire[`DBANK_LINE_SIZE_RNG][3:0] we;
|
||||
wire[`DBANK_LINE_SIZE_RNG][31:0] data_write;
|
||||
wire[`DBANK_LINE_WORDS-1:0][3:0] we;
|
||||
wire[`DBANK_LINE_WORDS-1:0][31:0] data_write;
|
||||
genvar g;
|
||||
generate
|
||||
for (g = 0; g < `DBANK_LINE_SIZE_WORDS; g = g + 1) begin : write_enables
|
||||
for (g = 0; g < `DBANK_LINE_WORDS; g = g + 1) begin : write_enables
|
||||
wire normal_write = (block_offset == g[`WORD_SELECT_SIZE_RNG]) && should_write && !real_writefill;
|
||||
|
||||
assign we[g] = (force_write) ? 4'b1111 :
|
||||
(should_write && !real_writefill && (FUNC_ID == `LLFUNC_ID)) ? 4'b1111 :
|
||||
(should_write && !real_writefill && (FUNC_ID == `L2FUNC_ID)) ? 4'b1111 :
|
||||
(normal_write && sw) ? 4'b1111 :
|
||||
(normal_write && sb) ? sb_mask :
|
||||
(normal_write && sh) ? sh_mask :
|
||||
4'b0000;
|
||||
|
||||
if (!(FUNC_ID == `LLFUNC_ID)) assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat;
|
||||
if (!(FUNC_ID == `L2FUNC_ID)) assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat;
|
||||
end
|
||||
if ((FUNC_ID == `LLFUNC_ID)) begin
|
||||
if ((FUNC_ID == `L2FUNC_ID)) begin
|
||||
assign data_write = force_write ? writedata_st1e : writeword_st1e;
|
||||
end
|
||||
endgenerate
|
||||
|
@ -268,7 +268,7 @@ module VX_tag_data_access
|
|||
assign use_write_data = data_write;
|
||||
|
||||
///////////////////////
|
||||
if (FUNC_ID == `LLFUNC_ID) begin
|
||||
if (FUNC_ID == `L2FUNC_ID) begin
|
||||
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
|
||||
end else begin
|
||||
assign readword_st1e = data_Qual;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "VX_cache_config.v"
|
||||
`include "VX_cache_config.vh"
|
||||
|
||||
module VX_tag_data_structure
|
||||
#(
|
||||
|
@ -55,18 +55,18 @@ module VX_tag_data_structure
|
|||
output wire read_valid,
|
||||
output wire read_dirty,
|
||||
output wire[`TAG_SELECT_SIZE_RNG] read_tag,
|
||||
output wire[`DBANK_LINE_SIZE_RNG][31:0] read_data,
|
||||
output wire[`DBANK_LINE_WORDS-1:0][31:0] read_data,
|
||||
|
||||
input wire invalidate,
|
||||
input wire[`DBANK_LINE_SIZE_RNG][3:0] write_enable,
|
||||
input wire[`DBANK_LINE_WORDS-1:0][3:0] write_enable,
|
||||
input wire write_fill,
|
||||
input wire[31:0] write_addr,
|
||||
input wire[`DBANK_LINE_SIZE_RNG][31:0] write_data,
|
||||
input wire[`DBANK_LINE_WORDS-1:0][31:0] write_data,
|
||||
input wire fill_sent
|
||||
|
||||
);
|
||||
|
||||
reg[`DBANK_LINE_SIZE_RNG][3:0][7:0] data [`BANK_LINE_COUNT-1:0];
|
||||
reg[`DBANK_LINE_WORDS-1:0][3:0][7:0] data [`BANK_LINE_COUNT-1:0];
|
||||
reg[`TAG_SELECT_SIZE_RNG] tag [`BANK_LINE_COUNT-1:0];
|
||||
reg valid[`BANK_LINE_COUNT-1:0];
|
||||
reg dirty[`BANK_LINE_COUNT-1:0];
|
||||
|
@ -110,7 +110,7 @@ module VX_tag_data_structure
|
|||
valid[write_addr[`LINE_SELECT_ADDR_RNG]] <= 0;
|
||||
end
|
||||
|
||||
for (f = 0; f < `DBANK_LINE_SIZE_WORDS; f = f + 1) begin
|
||||
for (f = 0; f < `DBANK_LINE_WORDS; f = f + 1) begin
|
||||
if (write_enable[f][0]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][0] <= write_data[f][7 :0 ];
|
||||
if (write_enable[f][1]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][1] <= write_data[f][15:8 ];
|
||||
if (write_enable[f][2]) data[write_addr[`LINE_SELECT_ADDR_RNG]][f][2] <= write_data[f][23:16];
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_BRANCH_RSP
|
||||
|
||||
|
@ -9,7 +9,7 @@ interface VX_branch_response_inter ();
|
|||
wire valid_branch;
|
||||
wire branch_dir;
|
||||
wire[31:0] branch_dest;
|
||||
wire[`NW_M1:0] branch_warp_num;
|
||||
wire[`NW_BITS-1:0] branch_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_CSR_REQ
|
||||
|
||||
|
@ -7,8 +7,8 @@
|
|||
|
||||
interface VX_csr_req_inter ();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[4:0] alu_op;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_CSR_WB_REQ
|
||||
|
||||
|
@ -7,15 +7,13 @@
|
|||
|
||||
interface VX_csr_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
|
||||
wire[`NT_M1:0][31:0] csr_result;
|
||||
|
||||
wire[`NUM_THREADS-1:0][31:0] csr_result;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_DCACHE_REQ
|
||||
|
||||
|
@ -7,11 +7,11 @@
|
|||
|
||||
interface VX_dcache_request_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] out_cache_driver_in_address;
|
||||
wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_address;
|
||||
wire[2:0] out_cache_driver_in_mem_read;
|
||||
wire[2:0] out_cache_driver_in_mem_write;
|
||||
wire[`NT_M1:0] out_cache_driver_in_valid;
|
||||
wire[`NT_M1:0][31:0] out_cache_driver_in_data;
|
||||
wire[`NUM_THREADS-1:0] out_cache_driver_in_valid;
|
||||
wire[`NUM_THREADS-1:0][31:0] out_cache_driver_in_data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_DCACHE_RSP
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
|||
|
||||
interface VX_dcache_response_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] in_cache_driver_out_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] in_cache_driver_out_data;
|
||||
wire delay;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_DRAM_REQ_RSP_INTER
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_EXE_UNIT_REQ_INTER
|
||||
|
||||
|
@ -8,8 +8,8 @@
|
|||
interface VX_exec_unit_req_inter ();
|
||||
|
||||
// Meta
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[31:0] curr_PC;
|
||||
wire[31:0] PC_next;
|
||||
|
||||
|
@ -18,8 +18,8 @@ interface VX_exec_unit_req_inter ();
|
|||
wire[1:0] wb;
|
||||
|
||||
// Data and alu op
|
||||
wire[`NT_M1:0][31:0] a_reg_data;
|
||||
wire[`NT_M1:0][31:0] b_reg_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] b_reg_data;
|
||||
wire[4:0] alu_op;
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "VX_define.v"
|
||||
`include "VX_define.vh"
|
||||
|
||||
`ifndef VX_FrE_to_BE_INTER
|
||||
|
||||
|
@ -30,8 +30,8 @@ interface VX_frE_to_bckE_req_inter ();
|
|||
wire jal;
|
||||
wire[31:0] jal_offset;
|
||||
wire[31:0] PC_next;
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
|
||||
// GPGPU stuff
|
||||
wire is_wspawn;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_GPR_CLONE_INTER
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
|||
interface VX_gpr_clone_inter ();
|
||||
/* verilator lint_off UNUSED */
|
||||
wire is_clone;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
/* verilator lint_on UNUSED */
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_gpr_data_INTER
|
||||
|
||||
`define VX_gpr_data_INTER
|
||||
|
||||
interface VX_gpr_data_inter ();
|
||||
wire[`NT_M1:0][31:0] a_reg_data;
|
||||
wire[`NT_M1:0][31:0] b_reg_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] b_reg_data;
|
||||
endinterface
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
`ifndef VX_GPR_JAL_INTER
|
||||
|
||||
`define VX_GPR_JAL_INTER
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
`ifndef VX_GPR_READ
|
||||
|
||||
`define VX_GPR_READ
|
||||
|
@ -8,7 +8,7 @@ interface VX_gpr_read_inter ();
|
|||
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
`ifndef VX_GPR_WSPAWN_INTER
|
||||
|
||||
`define VX_GPR_WSPAWN_INTER
|
||||
|
@ -7,8 +7,8 @@
|
|||
interface VX_gpr_wspawn_inter ();
|
||||
/* verilator lint_off UNUSED */
|
||||
wire is_wspawn;
|
||||
wire[`NW_M1:0] which_wspawn;
|
||||
// wire[`NW_M1:0] warp_num;
|
||||
wire[`NW_BITS-1:0] which_wspawn;
|
||||
// wire[`NW_BITS-1:0] warp_num;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
|
||||
`include "../generic_cache/VX_cache_config.v"
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
`ifndef VX_GPU_DRAM_DCACHE_REQ
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
|||
|
||||
interface VX_gpu_dcache_dram_req_inter
|
||||
#(
|
||||
parameter BANK_LINE_SIZE_WORDS = 2
|
||||
parameter BANK_LINE_WORDS = 2
|
||||
)
|
||||
();
|
||||
|
||||
|
@ -18,7 +18,7 @@ interface VX_gpu_dcache_dram_req_inter
|
|||
wire dram_req_read;
|
||||
wire [31:0] dram_req_addr;
|
||||
wire [31:0] dram_req_size;
|
||||
wire [BANK_LINE_SIZE_WORDS-1:0][31:0] dram_req_data;
|
||||
wire [BANK_LINE_WORDS-1:0][31:0] dram_req_data;
|
||||
|
||||
// Snoop
|
||||
wire dram_because_of_snp;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
|
||||
|
||||
`include "../generic_cache/VX_cache_config.v"
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
`ifndef VX_GPU_DRAM_DCACHE_RES
|
||||
|
||||
|
@ -9,13 +9,13 @@
|
|||
|
||||
interface VX_gpu_dcache_dram_res_inter
|
||||
#(
|
||||
parameter BANK_LINE_SIZE_WORDS = 2
|
||||
parameter BANK_LINE_WORDS = 2
|
||||
)
|
||||
();
|
||||
// DRAM Rsponse
|
||||
wire dram_fill_rsp;
|
||||
wire [31:0] dram_fill_rsp_addr;
|
||||
wire [BANK_LINE_SIZE_WORDS-1:0][31:0] dram_fill_rsp_data;
|
||||
wire [BANK_LINE_WORDS-1:0][31:0] dram_fill_rsp_data;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
|
||||
`include "../generic_cache/VX_cache_config.v"
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
`ifndef VX_GPU_DCACHE_REQ
|
||||
|
||||
|
@ -20,7 +20,7 @@ interface VX_gpu_dcache_req_inter
|
|||
wire [NUMBER_REQUESTS-1:0][2:0] core_req_mem_write;
|
||||
wire [4:0] core_req_rd;
|
||||
wire [NUMBER_REQUESTS-1:0][1:0] core_req_wb;
|
||||
wire [`NW_M1:0] core_req_warp_num;
|
||||
wire [`NW_BITS-1:0] core_req_warp_num;
|
||||
wire [31:0] core_req_pc;
|
||||
|
||||
// Can't WB
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
|
||||
`include "../generic_cache/VX_cache_config.v"
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
`ifndef VX_GPU_DCACHE_RES
|
||||
|
||||
|
@ -16,7 +16,7 @@ interface VX_gpu_dcache_res_inter
|
|||
wire [NUMBER_REQUESTS-1:0] core_wb_valid;
|
||||
wire [4:0] core_wb_req_rd;
|
||||
wire [1:0] core_wb_req_wb;
|
||||
wire [`NW_M1:0] core_wb_warp_num;
|
||||
wire [`NW_BITS-1:0] core_wb_warp_num;
|
||||
wire [NUMBER_REQUESTS-1:0][31:0] core_wb_readdata;
|
||||
wire [NUMBER_REQUESTS-1:0][31:0] core_wb_pc;
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
|
||||
|
||||
`include "../generic_cache/VX_cache_config.v"
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
`ifndef VX_GPU_SNP_REQ
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_GPU_INST_REQ_IN
|
||||
|
||||
|
@ -6,8 +6,8 @@
|
|||
|
||||
interface VX_gpu_inst_req_inter();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire is_wspawn;
|
||||
wire is_tmc;
|
||||
wire is_split;
|
||||
|
@ -16,7 +16,7 @@ interface VX_gpu_inst_req_inter();
|
|||
|
||||
wire[31:0] pc_next;
|
||||
|
||||
wire[`NT_M1:0][31:0] a_reg_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] a_reg_data;
|
||||
wire[31:0] rd2;
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../generic_cache/VX_cache_config.v"
|
||||
`include "../generic_cache/VX_cache_config.vh"
|
||||
|
||||
`ifndef VX_GPU_SNP_REQ_RSP
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_ICACHE_REQ
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_ICACHE_RSP
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_EXEC_UNIT_WB_INST_INTER
|
||||
|
||||
|
@ -7,12 +7,12 @@
|
|||
|
||||
interface VX_inst_exec_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[31:0] exec_wb_pc;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NT_M1:0] wb_valid;
|
||||
wire[`NW_M1:0] wb_warp_num;
|
||||
wire[`NUM_THREADS-1:0] wb_valid;
|
||||
wire[`NW_BITS-1:0] wb_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_MEM_WB_INST_INTER
|
||||
|
||||
|
@ -7,12 +7,12 @@
|
|||
|
||||
interface VX_inst_mem_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] loaded_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] loaded_data;
|
||||
wire[31:0] mem_wb_pc;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NT_M1:0] wb_valid;
|
||||
wire[`NW_M1:0] wb_warp_num;
|
||||
wire[`NUM_THREADS-1:0] wb_valid;
|
||||
wire[`NW_BITS-1:0] wb_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_F_D_INTER
|
||||
|
||||
|
@ -7,8 +7,8 @@
|
|||
interface VX_inst_meta_inter ();
|
||||
wire[31:0] instruction;
|
||||
wire[31:0] inst_pc;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_JAL_RSP
|
||||
|
||||
|
@ -9,7 +9,7 @@ interface VX_jal_response_inter ();
|
|||
|
||||
wire jal;
|
||||
wire[31:0] jal_dest;
|
||||
wire[`NW_M1:0] jal_warp_num;
|
||||
wire[`NW_BITS-1:0] jal_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_JOIN_INTER
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
|||
interface VX_join_inter ();
|
||||
|
||||
wire is_join;
|
||||
wire[`NW_M1:0] join_warp_num;
|
||||
wire[`NW_BITS-1:0] join_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_LSU_REQ_INTER
|
||||
|
||||
|
@ -7,11 +7,11 @@
|
|||
|
||||
interface VX_lsu_req_inter ();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[31:0] lsu_pc;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NT_M1:0][31:0] store_data;
|
||||
wire[`NT_M1:0][31:0] base_address; // A reg data
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] base_address; // A reg data
|
||||
wire[31:0] offset; // itype_immed
|
||||
wire[2:0] mem_read;
|
||||
wire[2:0] mem_write;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_MEM_REQ_IN
|
||||
|
||||
|
@ -6,20 +6,20 @@
|
|||
|
||||
interface VX_mem_req_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[2:0] mem_read;
|
||||
wire[2:0] mem_write;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire[`NT_M1:0][31:0] rd2;
|
||||
wire[`NUM_THREADS-1:0][31:0] rd2;
|
||||
wire[31:0] PC_next;
|
||||
wire[31:0] curr_PC;
|
||||
wire[31:0] branch_offset;
|
||||
wire[2:0] branch_type;
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_MW_WB_INTER
|
||||
|
||||
|
@ -7,13 +7,13 @@
|
|||
|
||||
interface VX_mw_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[`NT_M1:0][31:0] mem_result;
|
||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire[`NUM_THREADS-1:0][31:0] mem_result;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[31:0] PC_next;
|
||||
wire[`NT_M1:0] valid;
|
||||
wire [`NW_M1:0] warp_num;
|
||||
wire[`NUM_THREADS-1:0] valid;
|
||||
wire [`NW_BITS-1:0] warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_WARP_CTL_INTER
|
||||
|
||||
|
@ -7,26 +7,26 @@
|
|||
|
||||
interface VX_warp_ctl_inter ();
|
||||
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
wire change_mask;
|
||||
wire[`NT_M1:0] thread_mask;
|
||||
wire[`NUM_THREADS-1:0] thread_mask;
|
||||
|
||||
wire wspawn;
|
||||
wire[31:0] wspawn_pc;
|
||||
wire[`NW-1:0] wspawn_new_active;
|
||||
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
||||
|
||||
wire ebreak;
|
||||
|
||||
// barrier
|
||||
wire is_barrier;
|
||||
wire[31:0] barrier_id;
|
||||
wire[$clog2(`NW):0] num_warps;
|
||||
wire[$clog2(`NUM_WARPS):0] num_warps;
|
||||
|
||||
wire is_split;
|
||||
wire dont_split;
|
||||
wire[`NW_M1:0] split_warp_num;
|
||||
wire[`NT_M1:0] split_new_mask;
|
||||
wire[`NT_M1:0] split_later_mask;
|
||||
wire[`NW_BITS-1:0] split_warp_num;
|
||||
wire[`NUM_THREADS-1:0] split_new_mask;
|
||||
wire[`NUM_THREADS-1:0] split_later_mask;
|
||||
wire[31:0] split_save_pc;
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_WB_INTER
|
||||
|
||||
|
@ -7,12 +7,12 @@
|
|||
|
||||
interface VX_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] write_data;
|
||||
wire[`NUM_THREADS-1:0][31:0] write_data;
|
||||
wire[31:0] wb_pc;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NT_M1:0] wb_valid;
|
||||
wire[`NW_M1:0] wb_warp_num;
|
||||
wire[`NUM_THREADS-1:0] wb_valid;
|
||||
wire[`NW_BITS-1:0] wb_warp_num;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
`ifndef VX_WSTALL_INTER
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
|||
|
||||
interface VX_wstall_inter();
|
||||
wire wstall;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NW_BITS-1:0] warp_num;
|
||||
endinterface
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
module VX_d_e_reg (
|
||||
input wire clk,
|
||||
|
@ -16,7 +16,7 @@ module VX_d_e_reg (
|
|||
wire flush = (in_branch_stall == `STALL);
|
||||
|
||||
|
||||
VX_generic_register #(.N(233 + `NW_M1 + 1 + `NT)) d_e_reg
|
||||
VX_generic_register #(.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)) d_e_reg
|
||||
(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
module VX_f_d_reg (
|
||||
input wire clk,
|
||||
|
@ -13,7 +13,7 @@ module VX_f_d_reg (
|
|||
wire flush = 1'b0;
|
||||
wire stall = in_freeze == 1'b1;
|
||||
|
||||
VX_generic_register #( .N(64+`NW_M1+1+`NT) ) f_d_reg (
|
||||
VX_generic_register #( .N(64+`NW_BITS-1+1+`NUM_THREADS) ) f_d_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
module VX_i_d_reg (
|
||||
input wire clk,
|
||||
|
@ -14,7 +14,7 @@ module VX_i_d_reg (
|
|||
wire stall = in_freeze == 1'b1;
|
||||
|
||||
|
||||
VX_generic_register #( .N( 64 + `NW_M1 + 1 + `NT ) ) i_d_reg (
|
||||
VX_generic_register #( .N( 64 + `NW_BITS-1 + 1 + `NUM_THREADS ) ) i_d_reg (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
`include "../VX_define.v"
|
||||
`include "../VX_define.vh"
|
||||
|
||||
// Converts in_valids to bank_valids
|
||||
module VX_bank_valids
|
||||
|
@ -7,16 +7,16 @@ module VX_bank_valids
|
|||
parameter BITS_PER_BANK = 3
|
||||
)
|
||||
(
|
||||
input wire[`NT_M1:0] in_valids,
|
||||
input wire[`NT_M1:0][31:0] in_addr,
|
||||
output reg[NB:0][`NT_M1:0] bank_valids
|
||||
input wire[`NUM_THREADS-1:0] in_valids,
|
||||
input wire[`NUM_THREADS-1:0][31:0] in_addr,
|
||||
output reg[NB:0][`NUM_THREADS-1:0] bank_valids
|
||||
);
|
||||
|
||||
|
||||
integer i, j;
|
||||
always@(*) begin
|
||||
for(j = 0; j <= NB; j = j+1 ) begin
|
||||
for(i = 0; i <= `NT_M1; i = i+1) begin
|
||||
for(i = 0; i < `NUM_THREADS; i = i+1) begin
|
||||
if(in_valids[i]) begin
|
||||
if(in_addr[i][(2+BITS_PER_BANK-1):2] == j[BITS_PER_BANK-1:0]) begin
|
||||
bank_valids[j][i] = 1'b1;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue