mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 05:17:45 -04:00
Instruction Multiplex LSU/EXU 1 cycle DONE
This commit is contained in:
parent
62db9ae691
commit
6779d0fade
18 changed files with 712 additions and 491 deletions
34
rtl/Makefile
34
rtl/Makefile
|
@ -1,19 +1,45 @@
|
|||
all: RUNFILE
|
||||
|
||||
INCLUDE=-I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -I/usr/local/systemc/ -Iinterfaces/ -Ipipe_regs/
|
||||
|
||||
FILE=Vortex.v
|
||||
|
||||
EXE=--exe test_bench.cpp
|
||||
|
||||
COMP=--compiler gcc
|
||||
|
||||
WNO=-Wno-fatal -Wno-UNOPTFLAT -Wno-UNDRIVEN -Wno-UNSIGNED -Wno-lint --Wno-PINMISSING
|
||||
|
||||
LIB=-LDFLAGS '-L/usr/local/systemc/'
|
||||
|
||||
CF=-CFLAGS '-std=c++11 -O3'
|
||||
|
||||
DEB=--prof-cfuncs -DVL_DEBUG=1 --coverage --trace
|
||||
|
||||
|
||||
MAKECPP=(cd obj_dir && make -j -f VVortex.mk)
|
||||
|
||||
# -LDFLAGS '-lsystemc'
|
||||
VERILATOR:
|
||||
echo "#define VCD_OFF" > tb_debug.h
|
||||
verilator --compiler gcc --Wno-PINMISSING -cc Vortex.v -I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -I/usr/local/systemc/ -Iinterfaces/ -Ipipe_regs/ --exe test_bench.cpp -CFLAGS '-std=c++11 -O3' -LDFLAGS '-L/usr/local/systemc/'
|
||||
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF)
|
||||
|
||||
VERILATORnoWarnings:
|
||||
echo "#define VCD_OFF" > tb_debug.h
|
||||
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(WNO)
|
||||
|
||||
compdebug:
|
||||
echo "#define VCD_OUTPUT" > tb_debug.h
|
||||
verilator --compiler gcc -Wno-fatal -Wno-UNOPTFLAT -Wno-UNDRIVEN -Wno-UNSIGNED -Wno-lint --prof-cfuncs -DVL_DEBUG=1 --coverage --trace -cc Vortex.v -I/usr/local/systemc/ -I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ --exe test_bench.cpp -CFLAGS '-std=c++11 -DVL_DEBUG' -LDFLAGS '-L/usr/local/systemc/'
|
||||
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) -CFLAGS '-std=c++11 -DVL_DEBUG' $(WNO) $(DEB)
|
||||
|
||||
RUNFILE: VERILATOR
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
$(MAKECPP)
|
||||
|
||||
debug: compdebug
|
||||
(cd obj_dir && make -j -f VVortex.mk)
|
||||
$(MAKECPP)
|
||||
|
||||
w: VERILATORnoWarnings
|
||||
$(MAKECPP)
|
||||
|
||||
clean:
|
||||
rm obj_dir/*
|
|
@ -4,31 +4,31 @@ module VX_back_end (
|
|||
input wire fetch_delay,
|
||||
input wire schedule_delay,
|
||||
|
||||
input wire[31:0] csr_decode_csr_data,
|
||||
output wire execute_branch_stall,
|
||||
input wire in_fwd_stall,
|
||||
input wire[31:0] csr_decode_csr_data,
|
||||
output wire execute_branch_stall,
|
||||
input wire in_fwd_stall,
|
||||
|
||||
output wire out_mem_delay,
|
||||
output wire out_gpr_stall,
|
||||
output wire out_mem_delay,
|
||||
output wire out_gpr_stall,
|
||||
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
|
||||
VX_warp_ctl_inter VX_warp_ctl,
|
||||
VX_warp_ctl_inter VX_warp_ctl,
|
||||
|
||||
VX_dcache_response_inter VX_dcache_rsp,
|
||||
VX_dcache_request_inter VX_dcache_req,
|
||||
VX_dcache_response_inter VX_dcache_rsp,
|
||||
VX_dcache_request_inter VX_dcache_req,
|
||||
|
||||
VX_forward_reqeust_inter VX_fwd_req_de,
|
||||
VX_forward_response_inter VX_fwd_rsp,
|
||||
|
||||
VX_forward_exe_inter VX_fwd_exe,
|
||||
VX_forward_mem_inter VX_fwd_mem,
|
||||
VX_forward_wb_inter VX_fwd_wb,
|
||||
VX_forward_exe_inter VX_fwd_exe,
|
||||
VX_forward_mem_inter VX_fwd_mem,
|
||||
VX_forward_wb_inter VX_fwd_wb,
|
||||
|
||||
|
||||
VX_csr_write_request_inter VX_csr_w_req
|
||||
|
@ -52,7 +52,6 @@ wire[31:0] execute_jal_dest;
|
|||
|
||||
|
||||
VX_mw_wb_inter VX_mw_wb();
|
||||
VX_inst_mem_wb_inter VX_mem_wb();
|
||||
|
||||
|
||||
VX_mem_req_inter VX_exe_mem_req();
|
||||
|
@ -63,6 +62,14 @@ VX_gpr_data_inter VX_gpr_data();
|
|||
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req_out();
|
||||
|
||||
// LSU input + output
|
||||
VX_lsu_req_inter VX_lsu_req();
|
||||
VX_inst_mem_wb_inter VX_mem_wb();
|
||||
|
||||
// Exec unit input + output
|
||||
VX_exec_unit_req_inter VX_exec_unit_req();
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb();
|
||||
|
||||
VX_gpr_stage VX_gpr_stage(
|
||||
.clk (clk),
|
||||
.schedule_delay (schedule_delay),
|
||||
|
@ -78,76 +85,118 @@ VX_gpr_stage VX_gpr_stage(
|
|||
);
|
||||
|
||||
|
||||
VX_execute vx_execute(
|
||||
.VX_bckE_req (VX_bckE_req_out),
|
||||
.VX_gpr_data (VX_gpr_data),
|
||||
.VX_fwd_exe (VX_fwd_exe),
|
||||
.in_csr_data (csr_decode_csr_data),
|
||||
|
||||
.VX_exe_mem_req (VX_exe_mem_req),
|
||||
.out_csr_address (execute_csr_address),
|
||||
.out_is_csr (execute_is_csr),
|
||||
.out_csr_result (execute_csr_result),
|
||||
.out_jal (execute_jal),
|
||||
.out_jal_dest (execute_jal_dest),
|
||||
.out_branch_stall (execute_branch_stall)
|
||||
VX_inst_multiplex VX_inst_mult(
|
||||
.VX_bckE_req (VX_bckE_req_out),
|
||||
.VX_gpr_data (VX_gpr_data),
|
||||
.VX_exec_unit_req(VX_exec_unit_req),
|
||||
.VX_lsu_req (VX_lsu_req)
|
||||
);
|
||||
|
||||
|
||||
assign VX_jal_rsp.jal_warp_num = VX_mem_req.warp_num;
|
||||
|
||||
VX_e_m_reg vx_e_m_reg(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_csr_address (execute_csr_address),
|
||||
.in_is_csr (execute_is_csr),
|
||||
.in_csr_result (execute_csr_result),
|
||||
.in_jal (execute_jal),
|
||||
.in_jal_dest (execute_jal_dest),
|
||||
.in_freeze (total_freeze),
|
||||
.VX_exe_mem_req (VX_exe_mem_req),
|
||||
|
||||
.VX_mem_req (VX_mem_req),
|
||||
.out_csr_address (VX_csr_w_req.csr_address),
|
||||
.out_is_csr (VX_csr_w_req.is_csr),
|
||||
.out_csr_result (VX_csr_w_req.csr_result),
|
||||
.out_jal (VX_jal_rsp.jal),
|
||||
.out_jal_dest (VX_jal_rsp.jal_dest)
|
||||
VX_lsu load_store_unit(
|
||||
// .clk (clk),
|
||||
.VX_lsu_req (VX_lsu_req),
|
||||
.VX_mem_wb (VX_mem_wb),
|
||||
.VX_fwd_mem (VX_fwd_mem),
|
||||
.VX_dcache_rsp(VX_dcache_rsp),
|
||||
.VX_dcache_req(VX_dcache_req),
|
||||
.out_delay (memory_delay)
|
||||
);
|
||||
|
||||
VX_memory vx_memory(
|
||||
.VX_mem_req (VX_mem_req),
|
||||
.VX_mem_wb (VX_mem_wb),
|
||||
.VX_fwd_mem (VX_fwd_mem),
|
||||
.out_delay (memory_delay),
|
||||
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
VX_execute_unit VX_execUnit(
|
||||
// .clk (clk),
|
||||
.VX_exec_unit_req(VX_exec_unit_req),
|
||||
.VX_inst_exec_wb (VX_inst_exec_wb),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
|
||||
.VX_dcache_rsp(VX_dcache_rsp),
|
||||
.VX_dcache_req (VX_dcache_req)
|
||||
.VX_fwd_exe (VX_fwd_exe),
|
||||
.in_csr_data (csr_decode_csr_data),
|
||||
.out_csr_address (VX_csr_w_req.csr_address),
|
||||
.out_is_csr (VX_csr_w_req.is_csr),
|
||||
.out_csr_result (VX_csr_w_req.csr_result),
|
||||
.out_branch_stall(execute_branch_stall)
|
||||
);
|
||||
|
||||
// VX_m_w_reg vx_m_w_reg(
|
||||
// .clk (clk),
|
||||
// .reset (reset),
|
||||
// .in_freeze (total_freeze),
|
||||
// .VX_mem_wb (VX_mem_wb),
|
||||
// .VX_mw_wb (VX_mw_wb)
|
||||
VX_writeback VX_wb(
|
||||
.VX_mem_wb (VX_mem_wb),
|
||||
.VX_inst_exec_wb (VX_inst_exec_wb),
|
||||
|
||||
.VX_fwd_wb (VX_fwd_wb),
|
||||
.VX_writeback_inter(VX_writeback_inter)
|
||||
);
|
||||
|
||||
// VX_execute vx_execute(
|
||||
// .VX_bckE_req (VX_bckE_req_out),
|
||||
// .VX_gpr_data (VX_gpr_data),
|
||||
// .VX_fwd_exe (VX_fwd_exe),
|
||||
// .in_csr_data (csr_decode_csr_data),
|
||||
|
||||
// .VX_exe_mem_req (VX_exe_mem_req),
|
||||
// .out_csr_address (execute_csr_address),
|
||||
// .out_is_csr (execute_is_csr),
|
||||
// .out_csr_result (execute_csr_result),
|
||||
// .out_jal (execute_jal),
|
||||
// .out_jal_dest (execute_jal_dest),
|
||||
// .out_branch_stall (execute_branch_stall)
|
||||
// );
|
||||
|
||||
assign VX_mw_wb.alu_result = VX_mem_wb.alu_result;
|
||||
assign VX_mw_wb.mem_result = VX_mem_wb.mem_result;
|
||||
assign VX_mw_wb.rd = VX_mem_wb.rd;
|
||||
assign VX_mw_wb.wb = VX_mem_wb.wb;
|
||||
assign VX_mw_wb.PC_next = VX_mem_wb.PC_next;
|
||||
assign VX_mw_wb.valid = VX_mem_wb.valid;
|
||||
assign VX_mw_wb.warp_num = VX_mem_wb.warp_num;
|
||||
|
||||
// assign VX_jal_rsp.jal_warp_num = VX_mem_req.warp_num;
|
||||
|
||||
// VX_e_m_reg vx_e_m_reg(
|
||||
// .clk (clk),
|
||||
// .reset (reset),
|
||||
// .in_csr_address (execute_csr_address),
|
||||
// .in_is_csr (execute_is_csr),
|
||||
// .in_csr_result (execute_csr_result),
|
||||
// .in_jal (execute_jal),
|
||||
// .in_jal_dest (execute_jal_dest),
|
||||
// .in_freeze (total_freeze),
|
||||
// .VX_exe_mem_req (VX_exe_mem_req),
|
||||
|
||||
// .VX_mem_req (VX_mem_req),
|
||||
// .out_csr_address (VX_csr_w_req.csr_address),
|
||||
// .out_is_csr (VX_csr_w_req.is_csr),
|
||||
// .out_csr_result (VX_csr_w_req.csr_result),
|
||||
// .out_jal (VX_jal_rsp.jal),
|
||||
// .out_jal_dest (VX_jal_rsp.jal_dest)
|
||||
// );
|
||||
|
||||
// VX_memory vx_memory(
|
||||
// .VX_mem_req (VX_mem_req),
|
||||
// .VX_mem_wb (VX_mem_wb),
|
||||
// .VX_fwd_mem (VX_fwd_mem),
|
||||
// .out_delay (memory_delay),
|
||||
|
||||
// .VX_branch_rsp (VX_branch_rsp),
|
||||
|
||||
// .VX_dcache_rsp(VX_dcache_rsp),
|
||||
// .VX_dcache_req (VX_dcache_req)
|
||||
// );
|
||||
|
||||
// // VX_m_w_reg vx_m_w_reg(
|
||||
// // .clk (clk),
|
||||
// // .reset (reset),
|
||||
// // .in_freeze (total_freeze),
|
||||
// // .VX_mem_wb (VX_mem_wb),
|
||||
// // .VX_mw_wb (VX_mw_wb)
|
||||
// // );
|
||||
|
||||
// assign VX_mw_wb.alu_result = VX_mem_wb.alu_result;
|
||||
// assign VX_mw_wb.mem_result = VX_mem_wb.mem_result;
|
||||
// assign VX_mw_wb.rd = VX_mem_wb.rd;
|
||||
// assign VX_mw_wb.wb = VX_mem_wb.wb;
|
||||
// assign VX_mw_wb.PC_next = VX_mem_wb.PC_next;
|
||||
// assign VX_mw_wb.valid = VX_mem_wb.valid;
|
||||
// assign VX_mw_wb.warp_num = VX_mem_wb.warp_num;
|
||||
|
||||
|
||||
VX_writeback vx_writeback(
|
||||
.VX_mw_wb (VX_mw_wb),
|
||||
.VX_fwd_wb (VX_fwd_wb),
|
||||
.VX_writeback_inter(VX_writeback_inter)
|
||||
);
|
||||
// VX_writeback vx_writeback(
|
||||
// .VX_mw_wb (VX_mw_wb),
|
||||
// .VX_fwd_wb (VX_fwd_wb),
|
||||
// .VX_writeback_inter(VX_writeback_inter)
|
||||
// );
|
||||
|
||||
endmodule
|
110
rtl/VX_execute.v
110
rtl/VX_execute.v
|
@ -1,110 +0,0 @@
|
|||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_execute (
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_gpr_data_inter VX_gpr_data,
|
||||
VX_forward_exe_inter VX_fwd_exe,
|
||||
input wire[31:0] in_csr_data,
|
||||
|
||||
VX_mem_req_inter VX_exe_mem_req,
|
||||
output wire[11:0] out_csr_address,
|
||||
output wire out_is_csr,
|
||||
output reg[31:0] out_csr_result,
|
||||
output wire out_jal,
|
||||
output wire[31:0] out_jal_dest,
|
||||
output wire out_branch_stall
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] in_a_reg_data;
|
||||
wire[`NT_M1:0][31:0] in_b_reg_data;
|
||||
wire[4:0] in_alu_op;
|
||||
wire in_rs2_src;
|
||||
wire[31:0] in_itype_immed;
|
||||
wire[2:0] in_branch_type;
|
||||
wire[19:0] in_upper_immed;
|
||||
wire[31:0] in_csr_mask;
|
||||
wire in_jal;
|
||||
wire[31:0] in_jal_offset;
|
||||
wire[31:0] in_curr_PC;
|
||||
|
||||
assign in_a_reg_data = VX_gpr_data.a_reg_data;
|
||||
assign in_b_reg_data = VX_gpr_data.b_reg_data;
|
||||
assign in_alu_op = VX_bckE_req.alu_op;
|
||||
assign in_rs2_src = VX_bckE_req.rs2_src;
|
||||
assign in_itype_immed = VX_bckE_req.itype_immed;
|
||||
assign in_branch_type = VX_bckE_req.branch_type;
|
||||
assign in_upper_immed = VX_bckE_req.upper_immed;
|
||||
assign in_csr_mask = VX_bckE_req.csr_mask;
|
||||
assign in_jal = VX_bckE_req.jal;
|
||||
assign in_jal_offset = VX_bckE_req.jal_offset;
|
||||
assign in_curr_PC = VX_bckE_req.curr_PC;
|
||||
|
||||
genvar index_out_reg;
|
||||
generate
|
||||
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1)
|
||||
begin
|
||||
VX_alu vx_alu(
|
||||
// .in_reg_data (in_reg_data[1:0]),
|
||||
.in_1 (in_a_reg_data[index_out_reg]),
|
||||
.in_2 (in_b_reg_data[index_out_reg]),
|
||||
.in_rs2_src (in_rs2_src),
|
||||
.in_itype_immed(in_itype_immed),
|
||||
.in_upper_immed(in_upper_immed),
|
||||
.in_alu_op (in_alu_op),
|
||||
.in_csr_data (in_csr_data),
|
||||
.in_curr_PC (in_curr_PC),
|
||||
.out_alu_result(VX_exe_mem_req.alu_result[index_out_reg])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
assign out_jal_dest = $signed(in_a_reg_data[0]) + $signed(in_jal_offset);
|
||||
assign out_jal = in_jal;
|
||||
|
||||
always @(*) begin
|
||||
|
||||
case(in_alu_op)
|
||||
`CSR_ALU_RW: out_csr_result = in_csr_mask;
|
||||
`CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
|
||||
`CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
|
||||
default: out_csr_result = 32'hdeadbeef;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
|
||||
|
||||
assign out_branch_stall = ((in_branch_type != `NO_BRANCH) || in_jal ) ? `STALL : `NO_STALL;
|
||||
|
||||
|
||||
assign VX_exe_mem_req.mem_read = VX_bckE_req.mem_read;
|
||||
assign VX_exe_mem_req.mem_write = VX_bckE_req.mem_write;
|
||||
assign VX_exe_mem_req.wb = VX_bckE_req.wb;
|
||||
assign VX_exe_mem_req.rs1 = VX_bckE_req.rs1;
|
||||
assign VX_exe_mem_req.rs2 = VX_bckE_req.rs2;
|
||||
assign VX_exe_mem_req.rd = VX_bckE_req.rd;
|
||||
assign VX_exe_mem_req.rd2 = VX_gpr_data.b_reg_data;
|
||||
assign VX_exe_mem_req.wb = VX_bckE_req.wb;
|
||||
assign VX_exe_mem_req.PC_next = VX_bckE_req.PC_next;
|
||||
assign VX_exe_mem_req.curr_PC = VX_bckE_req.curr_PC;
|
||||
assign VX_exe_mem_req.branch_offset = VX_bckE_req.itype_immed;
|
||||
assign VX_exe_mem_req.branch_type = VX_bckE_req.branch_type;
|
||||
assign VX_exe_mem_req.valid = VX_bckE_req.valid;
|
||||
assign VX_exe_mem_req.warp_num = VX_bckE_req.warp_num;
|
||||
|
||||
|
||||
assign VX_fwd_exe.dest = VX_exe_mem_req.rd;
|
||||
assign VX_fwd_exe.wb = VX_exe_mem_req.wb;
|
||||
assign VX_fwd_exe.alu_result = VX_exe_mem_req.alu_result;
|
||||
assign VX_fwd_exe.PC_next = VX_exe_mem_req.PC_next;
|
||||
assign VX_fwd_exe.warp_num = VX_exe_mem_req.warp_num;
|
||||
|
||||
|
||||
assign out_is_csr = VX_bckE_req.is_csr;
|
||||
assign out_csr_address = VX_bckE_req.csr_address;
|
||||
|
||||
|
||||
endmodule // VX_execute
|
140
rtl/VX_execute_unit.v
Normal file
140
rtl/VX_execute_unit.v
Normal file
|
@ -0,0 +1,140 @@
|
|||
module VX_execute_unit (
|
||||
// input wire clk,
|
||||
// Input
|
||||
// Request
|
||||
VX_exec_unit_req_inter VX_exec_unit_req,
|
||||
|
||||
// Output
|
||||
// Writeback
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb,
|
||||
// JAL Response
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
// Branch Response
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
// Forward data
|
||||
VX_forward_exe_inter VX_fwd_exe,
|
||||
|
||||
|
||||
input wire[31:0] in_csr_data,
|
||||
output wire[11:0] out_csr_address,
|
||||
output wire out_is_csr,
|
||||
output reg[31:0] out_csr_result,
|
||||
output wire out_branch_stall
|
||||
|
||||
|
||||
);
|
||||
|
||||
assign VX_fwd_exe.dest = 0;
|
||||
assign VX_fwd_exe.wb = 0;
|
||||
assign VX_fwd_exe.alu_result = 0;
|
||||
assign VX_fwd_exe.PC_next = 0;
|
||||
assign VX_fwd_exe.warp_num = 0;
|
||||
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] in_a_reg_data;
|
||||
wire[`NT_M1:0][31:0] in_b_reg_data;
|
||||
wire[4:0] in_alu_op;
|
||||
wire in_rs2_src;
|
||||
wire[31:0] in_itype_immed;
|
||||
wire[2:0] in_branch_type;
|
||||
wire[19:0] in_upper_immed;
|
||||
wire[31:0] in_csr_mask;
|
||||
wire in_jal;
|
||||
wire[31:0] in_jal_offset;
|
||||
wire[31:0] in_curr_PC;
|
||||
|
||||
assign in_a_reg_data = VX_exec_unit_req.a_reg_data;
|
||||
assign in_b_reg_data = VX_exec_unit_req.b_reg_data;
|
||||
assign in_alu_op = VX_exec_unit_req.alu_op;
|
||||
assign in_rs2_src = VX_exec_unit_req.rs2_src;
|
||||
assign in_itype_immed = VX_exec_unit_req.itype_immed;
|
||||
assign in_branch_type = VX_exec_unit_req.branch_type;
|
||||
assign in_upper_immed = VX_exec_unit_req.upper_immed;
|
||||
assign in_csr_mask = VX_exec_unit_req.csr_mask;
|
||||
assign in_jal = VX_exec_unit_req.jal;
|
||||
assign in_jal_offset = VX_exec_unit_req.jal_offset;
|
||||
assign in_curr_PC = VX_exec_unit_req.curr_PC;
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
genvar index_out_reg;
|
||||
generate
|
||||
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1)
|
||||
begin
|
||||
VX_alu vx_alu(
|
||||
// .in_reg_data (in_reg_data[1:0]),
|
||||
.in_1 (in_a_reg_data[index_out_reg]),
|
||||
.in_2 (in_b_reg_data[index_out_reg]),
|
||||
.in_rs2_src (in_rs2_src),
|
||||
.in_itype_immed(in_itype_immed),
|
||||
.in_upper_immed(in_upper_immed),
|
||||
.in_alu_op (in_alu_op),
|
||||
.in_csr_data (in_csr_data),
|
||||
.in_curr_PC (in_curr_PC),
|
||||
.out_alu_result(alu_result[index_out_reg])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
|
||||
reg temp_branch_dir;
|
||||
always @(*)
|
||||
begin
|
||||
case(VX_exec_unit_req.branch_type)
|
||||
`BEQ: temp_branch_dir = (alu_result[0] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BNE: temp_branch_dir = (alu_result[0] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BLT: temp_branch_dir = (alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGT: temp_branch_dir = (alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BLTU: temp_branch_dir = (alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGTU: temp_branch_dir = (alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`NO_BRANCH: temp_branch_dir = `NOT_TAKEN;
|
||||
default: temp_branch_dir = `NOT_TAKEN;
|
||||
endcase // in_branch_type
|
||||
end
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] duplicate_PC_data;
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NT; i=i+1)
|
||||
begin
|
||||
assign duplicate_PC_data[i] = VX_exec_unit_req.PC_next;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// Actual Writeback
|
||||
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
|
||||
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
|
||||
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid;
|
||||
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
|
||||
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
|
||||
|
||||
// Jal rsp
|
||||
assign VX_jal_rsp.jal = in_jal;
|
||||
assign VX_jal_rsp.jal_dest = $signed(in_a_reg_data[0]) + $signed(in_jal_offset);
|
||||
assign VX_jal_rsp.jal_warp_num = VX_exec_unit_req.warp_num;
|
||||
|
||||
// Branch rsp
|
||||
assign VX_branch_rsp.valid_branch = (VX_exec_unit_req.branch_type != `NO_BRANCH) && (|VX_exec_unit_req.valid);
|
||||
assign VX_branch_rsp.branch_dir = temp_branch_dir;
|
||||
assign VX_branch_rsp.branch_warp_num = VX_exec_unit_req.warp_num;
|
||||
assign VX_branch_rsp.branch_dest = $signed(VX_exec_unit_req.curr_PC) + ($signed(VX_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset
|
||||
assign out_branch_stall = ((in_branch_type != `NO_BRANCH) || in_jal ) ? `STALL : `NO_STALL;
|
||||
|
||||
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`CSR_ALU_RW: out_csr_result = in_csr_mask;
|
||||
`CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
|
||||
`CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
|
||||
default: out_csr_result = 32'hdeadbeef;
|
||||
endcase
|
||||
|
||||
end
|
||||
|
||||
|
||||
assign out_is_csr = VX_exec_unit_req.is_csr;
|
||||
assign out_csr_address = VX_exec_unit_req.csr_address;
|
||||
|
||||
endmodule
|
174
rtl/VX_gpr.v
174
rtl/VX_gpr.v
|
@ -15,17 +15,17 @@ module VX_gpr (
|
|||
|
||||
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0));
|
||||
|
||||
// byte_enabled_simple_dual_port_ram first_ram(
|
||||
// .we (write_enable),
|
||||
// .clk (clk),
|
||||
// .waddr (VX_writeback_inter.rd),
|
||||
// .raddr1(VX_gpr_read.rs1),
|
||||
// .raddr2(VX_gpr_read.rs2),
|
||||
// .be (VX_writeback_inter.wb_valid),
|
||||
// .wdata (VX_writeback_inter.write_data),
|
||||
// .q1 (out_a_reg_data),
|
||||
// .q2 (out_b_reg_data)
|
||||
// );
|
||||
byte_enabled_simple_dual_port_ram first_ram(
|
||||
.we (write_enable),
|
||||
.clk (clk),
|
||||
.waddr (VX_writeback_inter.rd),
|
||||
.raddr1(VX_gpr_read.rs1),
|
||||
.raddr2(VX_gpr_read.rs2),
|
||||
.be (VX_writeback_inter.wb_valid),
|
||||
.wdata (VX_writeback_inter.write_data),
|
||||
.q1 (out_a_reg_data),
|
||||
.q2 (out_b_reg_data)
|
||||
);
|
||||
|
||||
// =======
|
||||
// byte_enabled_simple_dual_port_ram first_ram(
|
||||
|
@ -48,82 +48,82 @@ module VX_gpr (
|
|||
// .q1 (out_b_reg_data)
|
||||
// );
|
||||
|
||||
// wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(out_a_reg_data),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(1'b0),
|
||||
.AA(VX_gpr_read.rs1),
|
||||
.CLKB(clk),
|
||||
.CENB(1'b0),
|
||||
.WENB(write_bit_mask),
|
||||
.AB(VX_writeback_inter.rd),
|
||||
.DB(VX_writeback_inter.write_data),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
// // wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
|
||||
// /* verilator lint_off PINCONNECTEMPTY */
|
||||
// rf2_32x128_wm1 first_ram (
|
||||
// .CENYA(),
|
||||
// .AYA(),
|
||||
// .CENYB(),
|
||||
// .WENYB(),
|
||||
// .AYB(),
|
||||
// .QA(out_a_reg_data),
|
||||
// .SOA(),
|
||||
// .SOB(),
|
||||
// .CLKA(clk),
|
||||
// .CENA(1'b0),
|
||||
// .AA(VX_gpr_read.rs1),
|
||||
// .CLKB(clk),
|
||||
// .CENB(1'b0),
|
||||
// .WENB(write_bit_mask),
|
||||
// .AB(VX_writeback_inter.rd),
|
||||
// .DB(VX_writeback_inter.write_data),
|
||||
// .EMAA(3'b011),
|
||||
// .EMASA(1'b0),
|
||||
// .EMAB(3'b011),
|
||||
// .TENA(1'b1),
|
||||
// .TCENA(1'b0),
|
||||
// .TAA(5'b0),
|
||||
// .TENB(1'b1),
|
||||
// .TCENB(1'b0),
|
||||
// .TWENB(128'b0),
|
||||
// .TAB(5'b0),
|
||||
// .TDB(128'b0),
|
||||
// .RET1N(1'b1),
|
||||
// .SIA(2'b0),
|
||||
// .SEA(1'b0),
|
||||
// .DFTRAMBYP(1'b0),
|
||||
// .SIB(2'b0),
|
||||
// .SEB(1'b0),
|
||||
// .COLLDISN(1'b1)
|
||||
// );
|
||||
// /* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(out_b_reg_data),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(1'b0),
|
||||
.AA(VX_gpr_read.rs2),
|
||||
.CLKB(clk),
|
||||
.CENB(1'b0),
|
||||
.WENB(write_bit_mask),
|
||||
.AB(VX_writeback_inter.rd),
|
||||
.DB(VX_writeback_inter.write_data),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
// /* verilator lint_off PINCONNECTEMPTY */
|
||||
// rf2_32x128_wm1 second_ram (
|
||||
// .CENYA(),
|
||||
// .AYA(),
|
||||
// .CENYB(),
|
||||
// .WENYB(),
|
||||
// .AYB(),
|
||||
// .QA(out_b_reg_data),
|
||||
// .SOA(),
|
||||
// .SOB(),
|
||||
// .CLKA(clk),
|
||||
// .CENA(1'b0),
|
||||
// .AA(VX_gpr_read.rs2),
|
||||
// .CLKB(clk),
|
||||
// .CENB(1'b0),
|
||||
// .WENB(write_bit_mask),
|
||||
// .AB(VX_writeback_inter.rd),
|
||||
// .DB(VX_writeback_inter.write_data),
|
||||
// .EMAA(3'b011),
|
||||
// .EMASA(1'b0),
|
||||
// .EMAB(3'b011),
|
||||
// .TENA(1'b1),
|
||||
// .TCENA(1'b0),
|
||||
// .TAA(5'b0),
|
||||
// .TENB(1'b1),
|
||||
// .TCENB(1'b0),
|
||||
// .TWENB(128'b0),
|
||||
// .TAB(5'b0),
|
||||
// .TDB(128'b0),
|
||||
// .RET1N(1'b1),
|
||||
// .SIA(2'b0),
|
||||
// .SEA(1'b0),
|
||||
// .DFTRAMBYP(1'b0),
|
||||
// .SIB(2'b0),
|
||||
// .SEB(1'b0),
|
||||
// .COLLDISN(1'b1)
|
||||
// );
|
||||
|
||||
endmodule
|
||||
|
|
60
rtl/VX_inst_multiplex.v
Normal file
60
rtl/VX_inst_multiplex.v
Normal file
|
@ -0,0 +1,60 @@
|
|||
module VX_inst_multiplex (
|
||||
// Inputs
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_gpr_data_inter VX_gpr_data,
|
||||
|
||||
// Outputs
|
||||
VX_exec_unit_req_inter VX_exec_unit_req,
|
||||
VX_lsu_req_inter VX_lsu_req
|
||||
|
||||
);
|
||||
|
||||
wire[`NT_M1:0] is_mem_mask;
|
||||
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
|
||||
|
||||
genvar currT;
|
||||
for (currT = 0; currT < `NT; currT = currT + 1) assign is_mem_mask[currT] = is_mem;
|
||||
|
||||
// LSU Unit
|
||||
assign VX_lsu_req.valid = VX_bckE_req.valid & is_mem_mask;
|
||||
assign VX_lsu_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_lsu_req.base_address = VX_gpr_data.a_reg_data;
|
||||
assign VX_lsu_req.store_data = VX_gpr_data.b_reg_data;
|
||||
|
||||
assign VX_lsu_req.offset = VX_bckE_req.itype_immed;
|
||||
|
||||
assign VX_lsu_req.mem_read = VX_bckE_req.mem_read;
|
||||
assign VX_lsu_req.mem_write = VX_bckE_req.mem_write;
|
||||
assign VX_lsu_req.rd = VX_bckE_req.rd;
|
||||
assign VX_lsu_req.wb = VX_bckE_req.wb;
|
||||
|
||||
|
||||
// Execute Unit
|
||||
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask);
|
||||
assign VX_exec_unit_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_exec_unit_req.curr_PC = VX_bckE_req.curr_PC;
|
||||
assign VX_exec_unit_req.PC_next = VX_bckE_req.PC_next;
|
||||
assign VX_exec_unit_req.rd = VX_bckE_req.rd;
|
||||
assign VX_exec_unit_req.wb = VX_bckE_req.wb;
|
||||
assign VX_exec_unit_req.a_reg_data = VX_gpr_data.a_reg_data;
|
||||
assign VX_exec_unit_req.b_reg_data = VX_gpr_data.b_reg_data;
|
||||
assign VX_exec_unit_req.alu_op = VX_bckE_req.alu_op;
|
||||
assign VX_exec_unit_req.rs1 = VX_bckE_req.rs1;
|
||||
assign VX_exec_unit_req.rs2 = VX_bckE_req.rs2;
|
||||
assign VX_exec_unit_req.rs2_src = VX_bckE_req.rs2_src;
|
||||
assign VX_exec_unit_req.itype_immed = VX_bckE_req.itype_immed;
|
||||
assign VX_exec_unit_req.upper_immed = VX_bckE_req.upper_immed;
|
||||
assign VX_exec_unit_req.branch_type = VX_bckE_req.branch_type;
|
||||
assign VX_exec_unit_req.jalQual = VX_bckE_req.jalQual;
|
||||
assign VX_exec_unit_req.jal = VX_bckE_req.jal;
|
||||
assign VX_exec_unit_req.jal_offset = VX_bckE_req.jal_offset;
|
||||
assign VX_exec_unit_req.wspawn = VX_bckE_req.wspawn;
|
||||
assign VX_exec_unit_req.ebreak = VX_bckE_req.ebreak;
|
||||
assign VX_exec_unit_req.is_csr = VX_bckE_req.is_csr;
|
||||
assign VX_exec_unit_req.csr_address = VX_bckE_req.csr_address;
|
||||
assign VX_exec_unit_req.csr_immed = VX_bckE_req.csr_immed;
|
||||
assign VX_exec_unit_req.csr_mask = VX_bckE_req.csr_mask;
|
||||
|
||||
|
||||
endmodule
|
78
rtl/VX_lsu.v
Normal file
78
rtl/VX_lsu.v
Normal file
|
@ -0,0 +1,78 @@
|
|||
|
||||
`include "VX_define.v"
|
||||
|
||||
|
||||
module VX_lsu (
|
||||
// input wire clk,
|
||||
VX_lsu_req_inter VX_lsu_req,
|
||||
|
||||
// Write back to GPR
|
||||
VX_inst_mem_wb_inter VX_mem_wb,
|
||||
|
||||
// FWD info
|
||||
VX_forward_mem_inter VX_fwd_mem,
|
||||
|
||||
|
||||
VX_dcache_response_inter VX_dcache_rsp,
|
||||
VX_dcache_request_inter VX_dcache_req,
|
||||
output wire out_delay
|
||||
);
|
||||
|
||||
// VX_inst_mem_wb_inter VX_mem_wb_temp();
|
||||
|
||||
assign out_delay = 1'b0;
|
||||
|
||||
|
||||
// Generate Addresses
|
||||
wire[`NT_M1:0][31:0] address;
|
||||
VX_lsu_addr_gen VX_lsu_addr_gen
|
||||
(
|
||||
.base_address(VX_lsu_req.base_address),
|
||||
.offset (VX_lsu_req.offset),
|
||||
.address (address)
|
||||
);
|
||||
|
||||
|
||||
genvar index;
|
||||
for (index = 0; index <= `NT_M1; index = index + 1) begin
|
||||
assign VX_dcache_req.out_cache_driver_in_address[index] = address[index];
|
||||
assign VX_dcache_req.out_cache_driver_in_data[index] = VX_lsu_req.store_data[index];
|
||||
assign VX_dcache_req.out_cache_driver_in_valid[index] = VX_lsu_req.valid[index];
|
||||
|
||||
assign VX_mem_wb.loaded_data[index] = VX_dcache_rsp.in_cache_driver_out_data[index];
|
||||
end
|
||||
|
||||
assign VX_dcache_req.out_cache_driver_in_mem_read = VX_lsu_req.mem_read;
|
||||
assign VX_dcache_req.out_cache_driver_in_mem_write = VX_lsu_req.mem_write;
|
||||
|
||||
|
||||
assign VX_mem_wb.rd = VX_lsu_req.rd;
|
||||
assign VX_mem_wb.wb = VX_lsu_req.wb;
|
||||
assign VX_mem_wb.wb_valid = VX_lsu_req.valid;
|
||||
assign VX_mem_wb.wb_warp_num = VX_lsu_req.warp_num;
|
||||
|
||||
|
||||
|
||||
// wire zero_temp = 0;
|
||||
// VX_generic_register #(.N(256)) register_wb_data
|
||||
// (
|
||||
// .clk (clk),
|
||||
// .reset(zero_temp),
|
||||
// .stall(zero_temp),
|
||||
// .flush(zero_temp),
|
||||
// .in ({VX_mem_wb_temp.loaded_data, VX_mem_wb_temp.rd, VX_mem_wb_temp.wb, VX_mem_wb_temp.wb_valid, VX_mem_wb_temp.wb_warp_num}),
|
||||
// .out ({VX_mem_wb.loaded_data , VX_mem_wb.rd , VX_mem_wb.wb , VX_mem_wb.wb_valid , VX_mem_wb.wb_warp_num })
|
||||
// );
|
||||
|
||||
// Delete
|
||||
assign VX_fwd_mem.dest = 0;
|
||||
assign VX_fwd_mem.wb = 0;
|
||||
assign VX_fwd_mem.alu_result = 0;
|
||||
assign VX_fwd_mem.mem_data = 0;
|
||||
assign VX_fwd_mem.PC_next = 0;
|
||||
assign VX_fwd_mem.warp_num = 0;
|
||||
|
||||
|
||||
endmodule // Memory
|
||||
|
||||
|
15
rtl/VX_lsu_addr_gen.v
Normal file
15
rtl/VX_lsu_addr_gen.v
Normal file
|
@ -0,0 +1,15 @@
|
|||
module VX_lsu_addr_gen (
|
||||
input wire[`NT_M1:0][31:0] base_address,
|
||||
input wire[31:0] offset,
|
||||
output wire[`NT_M1:0][31:0] address
|
||||
|
||||
);
|
||||
|
||||
|
||||
genvar index;
|
||||
for (index = 0; index < `NT; index = index + 1)
|
||||
begin
|
||||
assign address[index] = base_address[index] + offset;
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -1,78 +0,0 @@
|
|||
|
||||
`include "VX_define.v"
|
||||
|
||||
|
||||
module VX_memory (
|
||||
VX_mem_req_inter VX_mem_req,
|
||||
VX_inst_mem_wb_inter VX_mem_wb,
|
||||
|
||||
VX_forward_mem_inter VX_fwd_mem,
|
||||
|
||||
|
||||
output wire out_delay,
|
||||
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
|
||||
|
||||
VX_dcache_response_inter VX_dcache_rsp,
|
||||
VX_dcache_request_inter VX_dcache_req
|
||||
|
||||
);
|
||||
|
||||
|
||||
genvar index;
|
||||
for (index = 0; index <= `NT_M1; index = index + 1) begin
|
||||
assign VX_dcache_req.out_cache_driver_in_address[index] = VX_mem_req.alu_result[index];
|
||||
assign VX_dcache_req.out_cache_driver_in_data[index] = VX_mem_req.rd2[index];
|
||||
assign VX_dcache_req.out_cache_driver_in_valid[index] = VX_mem_req.valid[index];
|
||||
|
||||
assign VX_mem_wb.mem_result[index] = VX_dcache_rsp.in_cache_driver_out_data[index];
|
||||
|
||||
end
|
||||
|
||||
assign out_delay = 1'b0;
|
||||
|
||||
assign VX_dcache_req.out_cache_driver_in_mem_read = VX_mem_req.mem_read;
|
||||
assign VX_dcache_req.out_cache_driver_in_mem_write = VX_mem_req.mem_write;
|
||||
|
||||
|
||||
assign VX_mem_wb.alu_result = VX_mem_req.alu_result;
|
||||
assign VX_mem_wb.rd = VX_mem_req.rd;
|
||||
assign VX_mem_wb.wb = VX_mem_req.wb;
|
||||
assign VX_mem_wb.PC_next = VX_mem_req.PC_next;
|
||||
assign VX_mem_wb.valid = VX_mem_req.valid;
|
||||
assign VX_mem_wb.warp_num = VX_mem_req.warp_num;
|
||||
|
||||
assign VX_fwd_mem.dest = VX_mem_wb.rd;
|
||||
assign VX_fwd_mem.wb = VX_mem_wb.wb;
|
||||
assign VX_fwd_mem.alu_result = VX_mem_wb.alu_result;
|
||||
assign VX_fwd_mem.mem_data = VX_mem_wb.mem_result;
|
||||
assign VX_fwd_mem.PC_next = VX_mem_wb.PC_next;
|
||||
assign VX_fwd_mem.warp_num = VX_mem_wb.warp_num;
|
||||
|
||||
|
||||
reg temp_branch_dir;
|
||||
|
||||
|
||||
assign VX_branch_rsp.branch_dest = $signed(VX_mem_req.curr_PC) + ($signed(VX_mem_req.branch_offset) << 1);
|
||||
|
||||
always @(*) begin
|
||||
case(VX_mem_req.branch_type)
|
||||
`BEQ: temp_branch_dir = (VX_mem_req.alu_result[0] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BNE: temp_branch_dir = (VX_mem_req.alu_result[0] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BLT: temp_branch_dir = (VX_mem_req.alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGT: temp_branch_dir = (VX_mem_req.alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`BLTU: temp_branch_dir = (VX_mem_req.alu_result[0][31] == 0) ? `NOT_TAKEN : `TAKEN;
|
||||
`BGTU: temp_branch_dir = (VX_mem_req.alu_result[0][31] == 0) ? `TAKEN : `NOT_TAKEN;
|
||||
`NO_BRANCH: temp_branch_dir = `NOT_TAKEN;
|
||||
default: temp_branch_dir = `NOT_TAKEN;
|
||||
endcase // in_branch_type
|
||||
end
|
||||
|
||||
assign VX_branch_rsp.valid_branch = (VX_mem_req.branch_type != `NO_BRANCH) && (|VX_mem_req.valid);
|
||||
assign VX_branch_rsp.branch_dir = temp_branch_dir;
|
||||
assign VX_branch_rsp.branch_warp_num = VX_mem_req.warp_num;
|
||||
|
||||
endmodule // Memory
|
||||
|
||||
|
|
@ -3,58 +3,87 @@
|
|||
|
||||
|
||||
module VX_writeback (
|
||||
VX_mw_wb_inter VX_mw_wb,
|
||||
VX_forward_wb_inter VX_fwd_wb,
|
||||
VX_wb_inter VX_writeback_inter
|
||||
// Mem WB info
|
||||
VX_inst_mem_wb_inter VX_mem_wb,
|
||||
// EXEC Unit WB info
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb,
|
||||
|
||||
VX_forward_wb_inter VX_fwd_wb,
|
||||
// Actual WB to GPR
|
||||
VX_wb_inter VX_writeback_inter
|
||||
);
|
||||
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] in_alu_result = VX_mw_wb.alu_result;
|
||||
wire[`NT_M1:0][31:0] in_mem_result = VX_mw_wb.mem_result;
|
||||
wire[4:0] in_rd = VX_mw_wb.rd;
|
||||
wire[1:0] in_wb = VX_mw_wb.wb;
|
||||
wire[31:0] in_PC_next = VX_mw_wb.PC_next;
|
||||
wire[`NT_M1:0] in_valid = VX_mw_wb.valid;
|
||||
wire [`NW_M1:0] in_warp_num = VX_mw_wb.warp_num;
|
||||
|
||||
wire is_jal;
|
||||
wire uses_alu;
|
||||
|
||||
wire[`NT_M1:0][31:0] out_pc_data;
|
||||
assign VX_fwd_wb.dest = 0;
|
||||
assign VX_fwd_wb.wb = 0;
|
||||
assign VX_fwd_wb.alu_result = 0;
|
||||
assign VX_fwd_wb.mem_data = 0;
|
||||
assign VX_fwd_wb.PC_next = 0;
|
||||
assign VX_fwd_wb.warp_num = 0;
|
||||
|
||||
|
||||
genvar i;
|
||||
generate
|
||||
for (i = 0; i < `NT; i=i+1)
|
||||
begin
|
||||
assign out_pc_data[i] = in_PC_next;
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// assign out_pc_data[0] = in_PC_next;
|
||||
|
||||
// assign out_pc_data[1] = in_PC_next;
|
||||
|
||||
assign is_jal = in_wb == `WB_JAL;
|
||||
assign uses_alu = in_wb == `WB_ALU;
|
||||
|
||||
assign VX_writeback_inter.write_data = is_jal ? out_pc_data :
|
||||
uses_alu ? in_alu_result :
|
||||
in_mem_result;
|
||||
|
||||
assign VX_writeback_inter.wb_valid = in_valid;
|
||||
assign VX_writeback_inter.rd = in_rd;
|
||||
assign VX_writeback_inter.wb = in_wb;
|
||||
assign VX_writeback_inter.wb_warp_num = in_warp_num;
|
||||
wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid);
|
||||
wire mem_wb = (VX_mem_wb.wb != 0) && (|VX_mem_wb.wb_valid);
|
||||
|
||||
|
||||
assign VX_fwd_wb.dest = VX_writeback_inter.rd;
|
||||
assign VX_fwd_wb.wb = VX_writeback_inter.wb;
|
||||
assign VX_fwd_wb.alu_result = in_alu_result;
|
||||
assign VX_fwd_wb.mem_data = in_mem_result;
|
||||
assign VX_fwd_wb.PC_next = in_PC_next;
|
||||
assign VX_fwd_wb.warp_num = VX_writeback_inter.wb_warp_num;
|
||||
assign VX_writeback_inter.write_data = exec_wb ? VX_inst_exec_wb.alu_result :
|
||||
mem_wb ? VX_mem_wb.loaded_data :
|
||||
0;
|
||||
|
||||
|
||||
assign VX_writeback_inter.wb_valid = exec_wb ? VX_inst_exec_wb.wb_valid :
|
||||
mem_wb ? VX_mem_wb.wb_valid :
|
||||
0;
|
||||
|
||||
assign VX_writeback_inter.rd = exec_wb ? VX_inst_exec_wb.rd :
|
||||
mem_wb ? VX_mem_wb.rd :
|
||||
0;
|
||||
|
||||
assign VX_writeback_inter.wb = exec_wb ? VX_inst_exec_wb.wb :
|
||||
mem_wb ? VX_mem_wb.wb :
|
||||
0;
|
||||
|
||||
assign VX_writeback_inter.wb_warp_num = exec_wb ? VX_inst_exec_wb.wb_warp_num :
|
||||
mem_wb ? VX_mem_wb.wb_warp_num :
|
||||
0;
|
||||
|
||||
// wire[`NT_M1:0][31:0] in_alu_result = VX_mw_wb.alu_result;
|
||||
// wire[`NT_M1:0][31:0] in_mem_result = VX_mw_wb.mem_result;
|
||||
// wire[4:0] in_rd = VX_mw_wb.rd;
|
||||
// wire[1:0] in_wb = VX_mw_wb.wb;
|
||||
// wire[31:0] in_PC_next = VX_mw_wb.PC_next;
|
||||
// wire[`NT_M1:0] in_valid = VX_mw_wb.valid;
|
||||
// wire [`NW_M1:0] in_warp_num = VX_mw_wb.warp_num;
|
||||
|
||||
// wire is_jal;
|
||||
// wire uses_alu;
|
||||
|
||||
// wire[`NT_M1:0][31:0] out_pc_data;
|
||||
|
||||
|
||||
// genvar i;
|
||||
// generate
|
||||
// for (i = 0; i < `NT; i=i+1)
|
||||
// begin
|
||||
// assign out_pc_data[i] = in_PC_next;
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// // assign out_pc_data[0] = in_PC_next;
|
||||
|
||||
// // assign out_pc_data[1] = in_PC_next;
|
||||
|
||||
// assign is_jal = in_wb == `WB_JAL;
|
||||
// assign uses_alu = in_wb == `WB_ALU;
|
||||
|
||||
// assign VX_writeback_inter.write_data = is_jal ? out_pc_data :
|
||||
// uses_alu ? in_alu_result :
|
||||
// in_mem_result;
|
||||
|
||||
// assign VX_writeback_inter.wb_valid = in_valid;
|
||||
// assign VX_writeback_inter.rd = in_rd;
|
||||
// assign VX_writeback_inter.wb = in_wb;
|
||||
// assign VX_writeback_inter.wb_warp_num = in_warp_num;
|
||||
|
||||
|
||||
endmodule // VX_writeback
|
53
rtl/interfaces/VX_exec_unit_req_inter.v
Normal file
53
rtl/interfaces/VX_exec_unit_req_inter.v
Normal file
|
@ -0,0 +1,53 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_EXE_UNIT_REQ_INTER
|
||||
|
||||
`define VX_EXE_UNIT_REQ_INTER
|
||||
|
||||
interface VX_exec_unit_req_inter ();
|
||||
|
||||
// Meta
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[31:0] curr_PC;
|
||||
wire[31:0] PC_next;
|
||||
|
||||
// Write Back Info
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
|
||||
// Data and alu op
|
||||
wire[`NT_M1:0][31:0] a_reg_data;
|
||||
wire[`NT_M1:0][31:0] b_reg_data;
|
||||
wire[4:0] alu_op;
|
||||
wire[4:0] rs1;
|
||||
wire[4:0] rs2;
|
||||
wire rs2_src;
|
||||
wire[31:0] itype_immed;
|
||||
wire[19:0] upper_immed;
|
||||
|
||||
// Branch type
|
||||
wire[2:0] branch_type;
|
||||
|
||||
// Jal info
|
||||
wire jalQual;
|
||||
wire jal;
|
||||
wire[31:0] jal_offset;
|
||||
|
||||
/* verilator lint_off UNUSED */
|
||||
wire ebreak;
|
||||
wire wspawn;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
// CSR info
|
||||
wire is_csr;
|
||||
wire[11:0] csr_address;
|
||||
/* verilator lint_off UNUSED */
|
||||
wire csr_immed;
|
||||
/* verilator lint_on UNUSED */
|
||||
wire[31:0] csr_mask;
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
20
rtl/interfaces/VX_inst_exec_wb_inter.v
Normal file
20
rtl/interfaces/VX_inst_exec_wb_inter.v
Normal file
|
@ -0,0 +1,20 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_EXEC_UNIT_WB_INST_INTER
|
||||
|
||||
`define VX_EXEC_UNIT_WB_INST_INTER
|
||||
|
||||
interface VX_inst_exec_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NT_M1:0] wb_valid;
|
||||
wire[`NW_M1:0] wb_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
|
@ -7,13 +7,11 @@
|
|||
|
||||
interface VX_inst_mem_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[`NT_M1:0][31:0] mem_result;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[31:0] PC_next;
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NT_M1:0][31:0] loaded_data;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
wire[`NT_M1:0] wb_valid;
|
||||
wire[`NW_M1:0] wb_warp_num;
|
||||
|
||||
|
||||
endinterface
|
||||
|
|
23
rtl/interfaces/VX_lsu_req_inter.v
Normal file
23
rtl/interfaces/VX_lsu_req_inter.v
Normal file
|
@ -0,0 +1,23 @@
|
|||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_LSU_REQ_INTER
|
||||
|
||||
`define VX_LSU_REQ_INTER
|
||||
|
||||
interface VX_lsu_req_inter ();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[`NT_M1:0][31:0] store_data;
|
||||
wire[`NT_M1:0][31:0] base_address; // A reg data
|
||||
wire[31:0] offset; // itype_immed
|
||||
wire[2:0] mem_read;
|
||||
wire[2:0] mem_write;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
|
@ -1,49 +0,0 @@
|
|||
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
|
||||
module VX_e_m_reg (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire in_freeze,
|
||||
input wire[11:0] in_csr_address,
|
||||
input wire in_is_csr,
|
||||
input wire[31:0] in_csr_result,
|
||||
input wire in_jal,
|
||||
input wire[31:0] in_jal_dest,
|
||||
VX_mem_req_inter VX_exe_mem_req,
|
||||
|
||||
|
||||
VX_mem_req_inter VX_mem_req,
|
||||
output wire[11:0] out_csr_address,
|
||||
output wire out_is_csr,
|
||||
output wire[31:0] out_csr_result,
|
||||
output wire out_jal,
|
||||
output wire[31:0] out_jal_dest
|
||||
);
|
||||
|
||||
|
||||
wire flush = 0;
|
||||
wire stall = in_freeze;
|
||||
|
||||
wire temp_out_jal;
|
||||
|
||||
assign out_jal = temp_out_jal && VX_mem_req.valid[0];
|
||||
|
||||
VX_generic_register #(.N(463)) f_d_reg
|
||||
(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
.flush(flush),
|
||||
.in ({in_csr_address , in_is_csr , in_csr_result , in_jal , in_jal_dest , VX_exe_mem_req.alu_result, VX_exe_mem_req.mem_read, VX_exe_mem_req.mem_write, VX_exe_mem_req.rd, VX_exe_mem_req.wb, VX_exe_mem_req.rs1, VX_exe_mem_req.rs2, VX_exe_mem_req.rd2, VX_exe_mem_req.PC_next, VX_exe_mem_req.curr_PC, VX_exe_mem_req.branch_offset, VX_exe_mem_req.branch_type, VX_exe_mem_req.valid, VX_exe_mem_req.warp_num}),
|
||||
.out ({out_csr_address, out_is_csr, out_csr_result, temp_out_jal, out_jal_dest, VX_mem_req.alu_result , VX_mem_req.mem_read , VX_mem_req.mem_write , VX_mem_req.rd , VX_mem_req.wb , VX_mem_req.rs1 , VX_mem_req.rs2 , VX_mem_req.rd2 , VX_mem_req.PC_next , VX_mem_req.curr_PC , VX_mem_req.branch_offset , VX_mem_req.branch_type , VX_mem_req.valid , VX_mem_req.warp_num})
|
||||
);
|
||||
|
||||
endmodule // VX_e_m_reg
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
|
||||
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_m_w_reg (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire in_freeze,
|
||||
|
||||
VX_inst_mem_wb_inter VX_mem_wb,
|
||||
VX_mw_wb_inter VX_mw_wb
|
||||
);
|
||||
|
||||
wire flush = 0;
|
||||
wire stall = in_freeze;
|
||||
|
||||
|
||||
VX_generic_register #(.N(303)) m_w_reg
|
||||
(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall),
|
||||
.flush(flush),
|
||||
.in ({VX_mem_wb.alu_result, VX_mem_wb.mem_result, VX_mem_wb.rd, VX_mem_wb.wb, VX_mem_wb.PC_next, VX_mem_wb.valid, VX_mem_wb.warp_num}),
|
||||
.out ({VX_mw_wb.alu_result , VX_mw_wb.mem_result , VX_mw_wb.rd , VX_mw_wb.wb , VX_mw_wb.PC_next , VX_mw_wb.valid , VX_mw_wb.warp_num })
|
||||
);
|
||||
|
||||
|
||||
|
||||
endmodule // VX_m_w_reg
|
||||
|
||||
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
# Dynamic Instructions: 67875
|
||||
# of total cycles: 67891
|
||||
# Dynamic Instructions: 52683
|
||||
# of total cycles: 52699
|
||||
# of forwarding stalls: 0
|
||||
# of branch stalls: 0
|
||||
# CPI: 1.00024
|
||||
# CPI: 1.0003
|
||||
# time to simulate: 0 milliseconds
|
||||
# GRADE: Failed on test: 4294967295
|
||||
|
|
|
@ -3,7 +3,7 @@ set link_library [concat * sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_
|
|||
set symbol_library {}
|
||||
set target_library [concat sc12mc_cln28hpm_base_ulvt_c35_ssg_typical_max_0p81v_m40c.db]
|
||||
|
||||
set verilog_files [ list VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_execute.v VX_scheduler.v VX_fetch.v VX_forwarding.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_memory.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v Vortex.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_csr_write_request_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_forward_csr_response_inter.v VX_forward_exe_inter.v VX_forward_mem_inter.v VX_forward_reqeust_inter.v VX_forward_response_inter.v VX_forward_wb_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_e_m_reg.v VX_f_d_reg.v VX_m_w_reg.v \
|
||||
set verilog_files [ list VX_inst_exec_wb_inter.v VX_lsu.v VX_execute_unit.v VX_lsu_addr_gen.v VX_inst_multiplex.v VX_exec_unit_req_inter.v VX_lsu_req_inter.v VX_alu.v VX_back_end.v VX_gpr_stage.v VX_gpr_data_inter.v VX_csr_handler.v VX_decode.v VX_define.v VX_scheduler.v VX_fetch.v VX_forwarding.v VX_front_end.v VX_generic_register.v VX_gpr.v VX_gpr_wrapper.v VX_one_counter.v VX_priority_encoder.v VX_warp.v VX_warp_scheduler.v VX_writeback.v Vortex.v byte_enabled_simple_dual_port_ram.v VX_branch_response_inter.v VX_csr_write_request_inter.v VX_dcache_request_inter.v VX_dcache_response_inter.v VX_forward_csr_response_inter.v VX_forward_exe_inter.v VX_forward_mem_inter.v VX_forward_reqeust_inter.v VX_forward_response_inter.v VX_forward_wb_inter.v VX_frE_to_bckE_req_inter.v VX_gpr_clone_inter.v VX_gpr_jal_inter.v VX_gpr_read_inter.v VX_gpr_wspawn_inter.v VX_icache_request_inter.v VX_icache_response_inter.v VX_inst_mem_wb_inter.v VX_inst_meta_inter.v VX_jal_response_inter.v VX_mem_req_inter.v VX_mw_wb_inter.v VX_warp_ctl_inter.v VX_wb_inter.v VX_d_e_reg.v VX_f_d_reg.v \
|
||||
]
|
||||
|
||||
analyze -format sverilog $verilog_files
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue