modelsim fixes && pipeline optimization

This commit is contained in:
Blaise Tine 2020-07-28 14:20:23 -07:00
parent 1c9846d10b
commit c2dd0a8b39
38 changed files with 417 additions and 358 deletions

View file

@ -72,7 +72,7 @@ PROJECT = libvortex.so
all: $(PROJECT)
$(PROJECT): $(SRCS)
verilator --exe --cc $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make -j -C obj_dir -f V$(TOP).mk
clean:

View file

@ -4,16 +4,18 @@ FPGA_BUILD_DIR=build_fpga
all: ase-1c
sources.txt:
./gen_sources.sh
sources.txt:
./gen_sources.sh > sources.txt
ase-1c: setup-ase-1c sources.txt
gen_sources: sources.txt
ase-1c: setup-ase-1c gen_sources
make -C $(ASE_BUILD_DIR)_1c
ase-2c: setup-ase-2c sources.txt
ase-2c: setup-ase-2c gen_sources
make -C $(ASE_BUILD_DIR)_2c
ase-4c: setup-ase-4c sources.txt
ase-4c: setup-ase-4c gen_sources
make -C $(ASE_BUILD_DIR)_4c
setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile
@ -22,22 +24,22 @@ setup-ase-2c: $(ASE_BUILD_DIR)_2c/Makefile
setup-ase-4c: $(ASE_BUILD_DIR)_4c/Makefile
$(ASE_BUILD_DIR)_1c/Makefile:
$(ASE_BUILD_DIR)_1c/Makefile: sources.txt
afu_sim_setup -s sources_1c.txt $(ASE_BUILD_DIR)_1c
$(ASE_BUILD_DIR)_2c/Makefile:
$(ASE_BUILD_DIR)_2c/Makefile: sources.txt
afu_sim_setup -s sources_2c.txt $(ASE_BUILD_DIR)_2c
$(ASE_BUILD_DIR)_4c/Makefile:
$(ASE_BUILD_DIR)_4c/Makefile: sources.txt
afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c
fpga-1c: setup-fpga-1c sources.txt
fpga-1c: setup-fpga-1c gen_sources
cd $(FPGA_BUILD_DIR)_1c && qsub-synth
fpga-2c: setup-fpga-2c sources.txt
fpga-2c: setup-fpga-2c gen_sources
cd $(FPGA_BUILD_DIR)_2c && qsub-synth
fpga-4c: setup-fpga-4c sources.txt
fpga-4c: setup-fpga-4c gen_sources
cd $(FPGA_BUILD_DIR)_4c && qsub-synth
setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf

View file

@ -1,21 +1,17 @@
#!/bin/bash
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl'
dir_list='../rtl/libs ../rtl/cache ../rtl/interfaces ../rtl ../rtl/fp_cores/fpnew/src/common_cells/include ../rtl/fp_cores/fpnew/src/common_cells/src ../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl ../rtl/fp_cores/fpnew/src'
inc_list=""
for dir in $dir_list; do
inc_list="$inc_list -I$dir"
done
echo "inc_list=$inc_list"
{
# read design sources
for dir in $dir_list; do
echo "+incdir+$dir"
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
do
echo $file
done
# read design sources
for dir in $dir_list; do
echo "+incdir+$dir"
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f)
do
echo $file
done
} > sources.txt
done

View file

@ -65,8 +65,6 @@ module VX_commit #(
assign cmt_to_issue_if.fpu_tag = fpu_commit_if.issue_tag;
assign cmt_to_issue_if.gpu_tag = gpu_commit_if.issue_tag;
assign gpu_commit_if.ready = 1'b1; // doesn't writeback
VX_writeback #(
.CORE_ID(CORE_ID)
) writeback (
@ -77,9 +75,10 @@ module VX_commit #(
.lsu_commit_if (lsu_commit_if),
.csr_commit_if (csr_commit_if),
.mul_commit_if (mul_commit_if),
.fpu_commit_if (fpu_commit_if),
.cmt_to_issue_if(cmt_to_issue_if),
.fpu_commit_if (fpu_commit_if),
.gpu_commit_if (gpu_commit_if),
.cmt_to_issue_if(cmt_to_issue_if),
.writeback_if (writeback_if)
);

View file

@ -57,7 +57,7 @@
`define EXT_M_ENABLE
`define EXT_F_ENABLE
//`define EXT_F_ENABLE
// Configuration Values =======================================================

View file

@ -1,5 +1,5 @@
`include "VX_define.vh"
`include "VX_print_instr.vh"
module VX_decode #(
parameter CORE_ID = 0

View file

@ -1,69 +1,15 @@
`ifndef VX_DEFINE
`define VX_DEFINE
`include "VX_platform.vh"
`include "VX_config.vh"
`include "VX_scope.vh"
`define QUEUE_FORCE_MLAB 1
// `define SYNTHESIS 1
// `define ASIC 1
///////////////////////////////////////////////////////////////////////////////
`ifndef NDEBUG
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
x \
/* verilator lint_on UNUSED */
`else
`define DEBUG_BLOCK(x)
`endif
`define DEBUG_BEGIN /* verilator lint_off UNUSED */
`define DEBUG_END /* verilator lint_on UNUSED */
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
/* verilator lint_off PINCONNECTEMPTY */ \
/* verilator lint_off WIDTH */ \
/* verilator lint_off UNOPTFLAT */ \
/* verilator lint_off UNDRIVEN */ \
/* verilator lint_off DECLFILENAME */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
/* verilator lint_on WIDTH */ \
/* verilator lint_on UNOPTFLAT */ \
/* verilator lint_on UNDRIVEN */ \
/* verilator lint_on DECLFILENAME */
`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \
wire [$bits(x)-1:0] __``x``__ = x; \
/* verilator lint_on UNUSED */
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
. x () \
/* verilator lint_on PINCONNECTEMPTY */
`define STRINGIFY(x) `"x`"
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error(msg); \
endgenerate
`define ENABLE_TRACING /* verilator tracing_on */
`define DISABLE_TRACING /* verilator tracing_off */
`define CLOG2(x) $clog2(x)
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
`define MIN(x, y) ((x < y) ? (x) : (y))
`define MAX(x, y) ((x > y) ? (x) : (y))
`define UP(x) (((x) > 0) ? x : 1)
`define QUEUE_FORCE_MLAB 1
// `define SYNTHESIS 1
// `define ASIC 1
///////////////////////////////////////////////////////////////////////////////
@ -87,7 +33,7 @@
`define ISTAG_BITS `LOG2UP(`ISSUEQ_SIZE)
`define LATENCY_IDIV 21
`define LATENCY_IDIV 23
`define LATENCY_IMUL 2
`define LATENCY_FMULADD 2
@ -457,146 +403,13 @@
///////////////////////////////////////////////////////////////////////////////
task print_ex_type;
input [`EX_BITS-1:0] ex;
begin
case (ex)
`EX_ALU: $write("ALU");
`EX_LSU: $write("LSU");
`EX_CSR: $write("CSR");
`EX_MUL: $write("MUL");
`EX_FPU: $write("FPU");
`EX_GPU: $write("GPU");
default: $write("NOP");
endcase
end
endtask
task print_ex_op;
input [`EX_BITS-1:0] ex;
input [`OP_BITS-1:0] op;
begin
case (ex)
`EX_ALU: begin
case (`ALU_BITS'(op))
`ALU_ADD: $write("ADD");
`ALU_SUB: $write("SUB");
`ALU_SLL: $write("SLL");
`ALU_SRL: $write("SRL");
`ALU_SRA: $write("SRA");
`ALU_SLT: $write("SLT");
`ALU_SLTU: $write("SLTU");
`ALU_XOR: $write("XOR");
`ALU_OR: $write("OR");
`ALU_AND: $write("AND");
`ALU_LUI: $write("LUI");
`ALU_AUIPC: $write("AUIPC");
`ALU_BEQ: $write("BEQ");
`ALU_BNE: $write("BNE");
`ALU_BLT: $write("BLT");
`ALU_BGE: $write("BGE");
`ALU_BLTU: $write("BLTU");
`ALU_BGEU: $write("BGEU");
`ALU_JAL: $write("JAL");
`ALU_JALR: $write("JALR");
`ALU_ECALL: $write("ECALL");
`ALU_EBREAK:$write("EBREAK");
`ALU_MRET: $write("MRET");
`ALU_SRET: $write("SRET");
`ALU_DRET: $write("DRET");
default: $write("?");
endcase
end
`EX_LSU: begin
case (`LSU_BITS'(op))
`LSU_LB: $write("LB");
`LSU_LH: $write("LH");
`LSU_LW: $write("LW");
`LSU_LBU: $write("LBU");
`LSU_LHU: $write("LHU");
`LSU_SB: $write("SB");
`LSU_SH: $write("SH");
`LSU_SW: $write("SW");
`LSU_SBU: $write("SBU");
`LSU_SHU: $write("SHU");
default: $write("?");
endcase
end
`EX_CSR: begin
case (`CSR_BITS'(op))
`CSR_RW: $write("CSRW");
`CSR_RS: $write("CSRS");
`CSR_RC: $write("CSRC");
default: $write("?");
endcase
end
`EX_MUL: begin
case (`MUL_BITS'(op))
`MUL_MUL: $write("MUL");
`MUL_MULH: $write("MULH");
`MUL_MULHSU:$write("MULHSU");
`MUL_MULHU: $write("MULHU");
`MUL_DIV: $write("DIV");
`MUL_DIVU: $write("DIVU");
`MUL_REM: $write("REM");
`MUL_REMU: $write("REMU");
default: $write("?");
endcase
end
`EX_FPU: begin
case (`FPU_BITS'(op))
`FPU_ADD: $write("ADD");
`FPU_SUB: $write("SUB");
`FPU_MUL: $write("MUL");
`FPU_DIV: $write("DIV");
`FPU_SQRT: $write("SQRT");
`FPU_MADD: $write("MADD");
`FPU_NMSUB: $write("NMSUB");
`FPU_NMADD: $write("NMADD");
`FPU_SGNJ: $write("SGNJ");
`FPU_SGNJN: $write("SGNJN");
`FPU_SGNJX: $write("SGNJX");
`FPU_MIN: $write("MIN");
`FPU_MAX: $write("MAX");
`FPU_CVTWS: $write("CVTWS");
`FPU_CVTWUS:$write("CVTWUS");
`FPU_CVTSW: $write("CVTSW");
`FPU_CVTSWU:$write("CVTSWU");
`FPU_MVXW: $write("MVXW");
`FPU_MVWX: $write("MVWX");
`FPU_CLASS: $write("CLASS");
`FPU_CMP: $write("CMP");
default: $write("?");
endcase
end
`EX_GPU: begin
case (`GPU_BITS'(op))
`GPU_TMC: $write("TMC");
`GPU_WSPAWN:$write("WSPAWN");
`GPU_SPLIT: $write("SPLIT");
`GPU_JOIN: $write("JOIN");
`GPU_BAR: $write("BAR");
default: $write("?");
endcase
end
default:;
endcase
end
endtask
task print_frm;
input [`FRM_BITS-1:0] frm;
begin
case (frm)
`FRM_RNE: $write("RNE");
`FRM_RTZ: $write("RTZ");
`FRM_RDN: $write("RDN");
`FRM_RUP: $write("RUP");
`FRM_RMM: $write("RMM");
`FRM_DYN: $write("DYN");
default: $write("?");
endcase
end
endtask
typedef struct packed {
logic [`NW_BITS-1:0] warp_num;
logic [`NUM_THREADS-1:0] thread_mask;
logic [31:0] curr_PC;
logic [`NR_BITS-1:0] rd;
logic rd_is_fp;
logic wb;
} is_data_t;
`endif

View file

@ -88,8 +88,10 @@ module VX_execute #(
.alu_commit_if (mul_commit_if)
);
`else
assign mul_req_if.ready = 0;
assign mul_commit_if.valid = 0;
assign mul_req_if.ready = 0;
assign mul_commit_if.valid = 0;
assign mul_commit_if.issue_tag = 0;
assign mul_commit_if.data = 0;
`endif
`ifdef EXT_F_ENABLE
@ -103,9 +105,16 @@ module VX_execute #(
.fpu_commit_if (fpu_commit_if)
);
`else
assign fpu_req_if.ready = 0;
assign fpu_commit_if.valid = 0;
assign fpu_to_csr_if.valid = 0;
assign fpu_req_if.ready = 0;
assign fpu_commit_if.valid = 0;
assign fpu_commit_if.issue_tag = 0;
assign fpu_commit_if.data = 0;
assign fpu_commit_if.upd_fflags = 0;
assign fpu_commit_if.fflags_NV = 0;
assign fpu_commit_if.fflags_DZ = 0;
assign fpu_commit_if.fflags_OF = 0;
assign fpu_commit_if.fflags_UF = 0;
assign fpu_commit_if.fflags_NX = 0;
`endif
VX_gpu_unit #(

View file

@ -121,12 +121,12 @@ module VX_fpu_unit #(
.clk_i (clk),
.rst_ni (1'b1),
.operands_i (fpu_operands),
.rnd_mode_i (fpu_rnd),
.op_i (fpu_op),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpu_src_fmt),
.dst_fmt_i (fpu_dst_fmt),
.int_fmt_i (fpu_int_fmt),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b1),
.tag_i ({fpu_in_tag, fflags_en, is_class_op_i}),
.in_valid_i (fpu_in_valid),
@ -149,9 +149,9 @@ module VX_fpu_unit #(
if (is_class_op_o) begin
integer i;
for (i = 0; i < `NUM_THREADS; i++) begin
integer l = i / 4;
integer w = i % 4;
integer class_mask = fpu_result[l][w * 8 +: 8];
automatic integer l = i / 4;
automatic integer w = i % 4;
automatic integer class_mask = fpu_result[l][w * 8 +: 8];
fpu_result_qual[i][0] = class_mask[7] & class_mask[0];
fpu_result_qual[i][1] = class_mask[7] & class_mask[1];

View file

@ -77,8 +77,18 @@ module VX_gpr_stage #(
assign gpr_read_if.rs1_data = rs1_int_data[gpr_read_if.warp_num];
assign gpr_read_if.rs2_data = rs2_int_data[gpr_read_if.warp_num];
assign gpr_read_if.rs3_data = 0;
assign gpr_delay = 0;
`UNUSED_VAR (schedule_delay)
assign gpr_read_if.ready = 1;
wire valid = gpr_read_if.valid;
wire rs1_is_fp = gpr_read_if.rs1_is_fp;
wire rs2_is_fp = gpr_read_if.rs2_is_fp;
wire use_rs3 = gpr_read_if.use_rs3;
wire [`NR_BITS-1:0] rs3 = gpr_read_if.rs3;
`UNUSED_VAR (valid);
`UNUSED_VAR (rs1_is_fp);
`UNUSED_VAR (rs2_is_fp);
`UNUSED_VAR (use_rs3);
`UNUSED_VAR (rs3);
`endif
assign writeback_if.ready = 1'b1;

View file

@ -51,13 +51,16 @@ module VX_issue_demux (
assign csr_req_if.is_io = 1'b0;
// MUL unit
`ifdef EXT_M_ENABLE
assign mul_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_MUL);
assign mul_req_if.issue_tag = issue_tag;
assign mul_req_if.mul_op = `MUL_OP(decode_if.ex_op);
assign mul_req_if.rs1_data = gpr_read_if.rs1_data;
assign mul_req_if.rs2_data = gpr_read_if.rs2_data;
`endif
// FPU unit
`ifdef EXT_F_ENABLE
assign fpu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_FPU);
assign fpu_req_if.issue_tag = issue_tag;
assign fpu_req_if.warp_num = decode_if.warp_num;
@ -66,6 +69,7 @@ module VX_issue_demux (
assign fpu_req_if.rs2_data = gpr_read_if.rs2_data;
assign fpu_req_if.rs3_data = gpr_read_if.rs3_data;
assign fpu_req_if.frm = decode_if.frm;
`endif
// GPU unit
assign gpu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_GPU);

58
hw/rtl/VX_platform.vh Normal file
View file

@ -0,0 +1,58 @@
`ifndef VX_PLATFORM
`define VX_PLATFORM
`ifndef NDEBUG
`define DEBUG_BLOCK(x) /* verilator lint_off UNUSED */ \
x \
/* verilator lint_on UNUSED */
`else
`define DEBUG_BLOCK(x)
`endif
`define DEBUG_BEGIN /* verilator lint_off UNUSED */
`define DEBUG_END /* verilator lint_on UNUSED */
`define IGNORE_WARNINGS_BEGIN /* verilator lint_off UNUSED */ \
/* verilator lint_off PINCONNECTEMPTY */ \
/* verilator lint_off WIDTH */ \
/* verilator lint_off UNOPTFLAT */ \
/* verilator lint_off UNDRIVEN */ \
/* verilator lint_off DECLFILENAME */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
/* verilator lint_on WIDTH */ \
/* verilator lint_on UNOPTFLAT */ \
/* verilator lint_on UNDRIVEN */ \
/* verilator lint_on DECLFILENAME */
`define UNUSED_VAR(x) /* verilator lint_off UNUSED */ \
wire [$bits(x)-1:0] __``x``__ = x; \
/* verilator lint_on UNUSED */
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
. x () \
/* verilator lint_on PINCONNECTEMPTY */
`define STRINGIFY(x) `"x`"
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error(msg); \
endgenerate
`define ENABLE_TRACING /* verilator tracing_on */
`define DISABLE_TRACING /* verilator tracing_off */
`define CLOG2(x) $clog2(x)
`define FLOG2(x) ($clog2(x) - (((1 << $clog2(x)) > (x)) ? 1 : 0))
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
`define MIN(x, y) ((x < y) ? (x) : (y))
`define MAX(x, y) ((x > y) ? (x) : (y))
`define UP(x) (((x) > 0) ? x : 1)
`endif

148
hw/rtl/VX_print_instr.vh Normal file
View file

@ -0,0 +1,148 @@
`ifndef VX_PRINT_INSTR
`define VX_PRINT_INSTR
`include "VX_define.vh"
task print_ex_type;
input [`EX_BITS-1:0] ex;
begin
case (ex)
`EX_ALU: $write("ALU");
`EX_LSU: $write("LSU");
`EX_CSR: $write("CSR");
`EX_MUL: $write("MUL");
`EX_FPU: $write("FPU");
`EX_GPU: $write("GPU");
default: $write("NOP");
endcase
end
endtask
task print_ex_op;
input [`EX_BITS-1:0] ex;
input [`OP_BITS-1:0] op;
begin
case (ex)
`EX_ALU: begin
case (`ALU_BITS'(op))
`ALU_ADD: $write("ADD");
`ALU_SUB: $write("SUB");
`ALU_SLL: $write("SLL");
`ALU_SRL: $write("SRL");
`ALU_SRA: $write("SRA");
`ALU_SLT: $write("SLT");
`ALU_SLTU: $write("SLTU");
`ALU_XOR: $write("XOR");
`ALU_OR: $write("OR");
`ALU_AND: $write("AND");
`ALU_LUI: $write("LUI");
`ALU_AUIPC: $write("AUIPC");
`ALU_BEQ: $write("BEQ");
`ALU_BNE: $write("BNE");
`ALU_BLT: $write("BLT");
`ALU_BGE: $write("BGE");
`ALU_BLTU: $write("BLTU");
`ALU_BGEU: $write("BGEU");
`ALU_JAL: $write("JAL");
`ALU_JALR: $write("JALR");
`ALU_ECALL: $write("ECALL");
`ALU_EBREAK:$write("EBREAK");
`ALU_MRET: $write("MRET");
`ALU_SRET: $write("SRET");
`ALU_DRET: $write("DRET");
default: $write("?");
endcase
end
`EX_LSU: begin
case (`LSU_BITS'(op))
`LSU_LB: $write("LB");
`LSU_LH: $write("LH");
`LSU_LW: $write("LW");
`LSU_LBU: $write("LBU");
`LSU_LHU: $write("LHU");
`LSU_SB: $write("SB");
`LSU_SH: $write("SH");
`LSU_SW: $write("SW");
`LSU_SBU: $write("SBU");
`LSU_SHU: $write("SHU");
default: $write("?");
endcase
end
`EX_CSR: begin
case (`CSR_BITS'(op))
`CSR_RW: $write("CSRW");
`CSR_RS: $write("CSRS");
`CSR_RC: $write("CSRC");
default: $write("?");
endcase
end
`EX_MUL: begin
case (`MUL_BITS'(op))
`MUL_MUL: $write("MUL");
`MUL_MULH: $write("MULH");
`MUL_MULHSU:$write("MULHSU");
`MUL_MULHU: $write("MULHU");
`MUL_DIV: $write("DIV");
`MUL_DIVU: $write("DIVU");
`MUL_REM: $write("REM");
`MUL_REMU: $write("REMU");
default: $write("?");
endcase
end
`EX_FPU: begin
case (`FPU_BITS'(op))
`FPU_ADD: $write("ADD");
`FPU_SUB: $write("SUB");
`FPU_MUL: $write("MUL");
`FPU_DIV: $write("DIV");
`FPU_SQRT: $write("SQRT");
`FPU_MADD: $write("MADD");
`FPU_NMSUB: $write("NMSUB");
`FPU_NMADD: $write("NMADD");
`FPU_SGNJ: $write("SGNJ");
`FPU_SGNJN: $write("SGNJN");
`FPU_SGNJX: $write("SGNJX");
`FPU_MIN: $write("MIN");
`FPU_MAX: $write("MAX");
`FPU_CVTWS: $write("CVTWS");
`FPU_CVTWUS:$write("CVTWUS");
`FPU_CVTSW: $write("CVTSW");
`FPU_CVTSWU:$write("CVTSWU");
`FPU_MVXW: $write("MVXW");
`FPU_MVWX: $write("MVWX");
`FPU_CLASS: $write("CLASS");
`FPU_CMP: $write("CMP");
default: $write("?");
endcase
end
`EX_GPU: begin
case (`GPU_BITS'(op))
`GPU_TMC: $write("TMC");
`GPU_WSPAWN:$write("WSPAWN");
`GPU_SPLIT: $write("SPLIT");
`GPU_JOIN: $write("JOIN");
`GPU_BAR: $write("BAR");
default: $write("?");
endcase
end
default:;
endcase
end
endtask
task print_frm;
input [`FRM_BITS-1:0] frm;
begin
case (frm)
`FRM_RNE: $write("RNE");
`FRM_RTZ: $write("RTZ");
`FRM_RDN: $write("RDN");
`FRM_RUP: $write("RUP");
`FRM_RMM: $write("RMM");
`FRM_DYN: $write("DYN");
default: $write("?");
endcase
end
endtask
`endif

View file

@ -9,96 +9,114 @@ module VX_writeback #(
// inputs
VX_exu_to_cmt_if alu_commit_if,
VX_exu_to_cmt_if lsu_commit_if,
VX_exu_to_cmt_if mul_commit_if,
VX_fpu_to_cmt_if fpu_commit_if,
VX_exu_to_cmt_if csr_commit_if,
VX_exu_to_cmt_if mul_commit_if,
VX_fpu_to_cmt_if fpu_commit_if,
VX_exu_to_cmt_if gpu_commit_if,
VX_cmt_to_issue_if cmt_to_issue_if,
// outputs
VX_wb_if writeback_if
);
wire alu_valid = alu_commit_if.valid && cmt_to_issue_if.alu_data.wb;
wire lsu_valid = lsu_commit_if.valid && cmt_to_issue_if.lsu_data.wb;
wire csr_valid = csr_commit_if.valid && cmt_to_issue_if.csr_data.wb;
wire mul_valid = mul_commit_if.valid && cmt_to_issue_if.mul_data.wb;
wire fpu_valid = fpu_commit_if.valid && cmt_to_issue_if.fpu_data.wb;
reg [`NUM_THREADS-1:0][31:0] wb_data [`ISSUEQ_SIZE-1:0];
reg [`NW_BITS-1:0] wb_warp_num [`ISSUEQ_SIZE-1:0];
reg [`NUM_THREADS-1:0] wb_thread_mask [`ISSUEQ_SIZE-1:0];
reg [31:0] wb_curr_PC [`ISSUEQ_SIZE-1:0];
reg [`NR_BITS-1:0] wb_rd [`ISSUEQ_SIZE-1:0];
reg wb_rd_is_fp [`ISSUEQ_SIZE-1:0];
reg [`ISSUEQ_SIZE-1:0] wb_pending;
VX_wb_if writeback_tmp_if();
wire [`ISTAG_BITS-1:0] wb_index;
wire wb_valid, wb_valid_unqual;
assign writeback_tmp_if.valid = alu_valid ? alu_commit_if.valid :
lsu_valid ? lsu_commit_if.valid :
csr_valid ? csr_commit_if.valid :
mul_valid ? mul_commit_if.valid :
fpu_valid ? fpu_commit_if.valid :
0;
assign writeback_tmp_if.warp_num = alu_valid ? cmt_to_issue_if.alu_data.warp_num :
lsu_valid ? cmt_to_issue_if.lsu_data.warp_num :
csr_valid ? cmt_to_issue_if.csr_data.warp_num :
mul_valid ? cmt_to_issue_if.mul_data.warp_num :
fpu_valid ? cmt_to_issue_if.fpu_data.warp_num :
0;
assign writeback_tmp_if.curr_PC = alu_valid ? cmt_to_issue_if.alu_data.curr_PC :
lsu_valid ? cmt_to_issue_if.lsu_data.curr_PC :
csr_valid ? cmt_to_issue_if.csr_data.curr_PC :
mul_valid ? cmt_to_issue_if.mul_data.curr_PC :
fpu_valid ? cmt_to_issue_if.fpu_data.curr_PC :
0;
assign writeback_tmp_if.thread_mask = alu_valid ? cmt_to_issue_if.alu_data.thread_mask :
lsu_valid ? cmt_to_issue_if.lsu_data.thread_mask :
csr_valid ? cmt_to_issue_if.csr_data.thread_mask :
mul_valid ? cmt_to_issue_if.mul_data.thread_mask :
fpu_valid ? cmt_to_issue_if.fpu_data.thread_mask :
0;
assign writeback_tmp_if.rd = alu_valid ? cmt_to_issue_if.alu_data.rd :
lsu_valid ? cmt_to_issue_if.lsu_data.rd :
csr_valid ? cmt_to_issue_if.csr_data.rd :
mul_valid ? cmt_to_issue_if.mul_data.rd :
fpu_valid ? cmt_to_issue_if.fpu_data.rd :
0;
assign writeback_tmp_if.rd_is_fp = alu_valid ? 0 :
lsu_valid ? cmt_to_issue_if.lsu_data.rd_is_fp :
csr_valid ? 0 :
mul_valid ? 0 :
fpu_valid ? cmt_to_issue_if.fpu_data.rd_is_fp :
0;
assign writeback_tmp_if.data = alu_valid ? alu_commit_if.data :
lsu_valid ? lsu_commit_if.data :
csr_valid ? csr_commit_if.data :
mul_valid ? mul_commit_if.data :
fpu_valid ? fpu_commit_if.data :
0;
wire stall = ~writeback_if.ready && writeback_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1)
) wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({writeback_tmp_if.valid, writeback_tmp_if.warp_num, writeback_tmp_if.curr_PC, writeback_tmp_if.thread_mask, writeback_tmp_if.rd, writeback_tmp_if.rd_is_fp, writeback_tmp_if.data}),
.out ({writeback_if.valid, writeback_if.warp_num, writeback_if.curr_PC, writeback_if.thread_mask, writeback_if.rd, writeback_if.rd_is_fp, writeback_if.data})
VX_priority_encoder #(
.N(`ISSUEQ_SIZE)
) free_slots_encoder (
.data_in (wb_pending),
.data_out (wb_index),
.valid_out (wb_valid_unqual)
);
assign alu_commit_if.ready = !stall;
assign lsu_commit_if.ready = !stall && !alu_valid;
assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid;
assign mul_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid;
assign fpu_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid && !mul_valid;
assign wb_valid = wb_valid_unqual && writeback_if.ready;
always @(posedge clk) begin
if (reset) begin
wb_pending <= 0;
end else begin
if (alu_commit_if.valid) begin
wb_data [alu_commit_if.issue_tag] <= alu_commit_if.data;
wb_warp_num [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.warp_num;
wb_thread_mask [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.thread_mask;
wb_curr_PC [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.curr_PC;
wb_rd [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd;
wb_rd_is_fp [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.rd_is_fp;
wb_pending [alu_commit_if.issue_tag] <= cmt_to_issue_if.alu_data.wb;
end
if (lsu_commit_if.valid) begin
wb_data [lsu_commit_if.issue_tag] <= lsu_commit_if.data;
wb_warp_num [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.warp_num;
wb_thread_mask [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.thread_mask;
wb_curr_PC [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.curr_PC;
wb_rd [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd;
wb_rd_is_fp [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.rd_is_fp;
wb_pending [lsu_commit_if.issue_tag] <= cmt_to_issue_if.lsu_data.wb;
end
if (csr_commit_if.valid) begin
wb_data [csr_commit_if.issue_tag] <= csr_commit_if.data;
wb_warp_num [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.warp_num;
wb_thread_mask [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.thread_mask;
wb_curr_PC [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.curr_PC;
wb_rd [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd;
wb_rd_is_fp [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.rd_is_fp;
wb_pending [csr_commit_if.issue_tag] <= cmt_to_issue_if.csr_data.wb;
end
if (mul_commit_if.valid) begin
wb_data [mul_commit_if.issue_tag] <= mul_commit_if.data;
wb_warp_num [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.warp_num;
wb_thread_mask [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.thread_mask;
wb_curr_PC [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.curr_PC;
wb_rd [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd;
wb_rd_is_fp [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.rd_is_fp;
wb_pending [mul_commit_if.issue_tag] <= cmt_to_issue_if.mul_data.wb;
end
if (fpu_commit_if.valid) begin
wb_data [fpu_commit_if.issue_tag] <= fpu_commit_if.data;
wb_warp_num [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.warp_num;
wb_thread_mask [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.thread_mask;
wb_curr_PC [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.curr_PC;
wb_rd [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd;
wb_rd_is_fp [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.rd_is_fp;
wb_pending [fpu_commit_if.issue_tag] <= cmt_to_issue_if.fpu_data.wb;
end
if (wb_valid) begin
wb_pending [wb_index] <= 0;
end
end
end
// writeback request
assign writeback_if.valid = wb_pending [wb_index];
assign writeback_if.warp_num = wb_warp_num [wb_index];
assign writeback_if.thread_mask = wb_thread_mask [wb_index];
assign writeback_if.curr_PC = wb_curr_PC [wb_index];
assign writeback_if.rd = wb_rd [wb_index];
assign writeback_if.rd_is_fp = wb_rd_is_fp [wb_index];
assign writeback_if.data = wb_data [wb_index];
// commit back-pressure
assign alu_commit_if.ready = 1'b1;
assign lsu_commit_if.ready = 1'b1;
assign csr_commit_if.ready = 1'b1;
assign mul_commit_if.ready = 1'b1;
assign fpu_commit_if.ready = 1'b1;
assign gpu_commit_if.ready = 1'b1;
// special workaround to get RISC-V tests Pass/Fail status
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
always @(posedge clk) begin
if (writeback_tmp_if.valid && ~stall) begin
last_wb_value[writeback_tmp_if.rd] <= writeback_tmp_if.data[0];
if (writeback_if.valid) begin
last_wb_value[writeback_if.rd] <= writeback_if.data[0];
end
end

View file

@ -1,10 +1,13 @@
`ifndef VX_CACHE_CONFIG
`define VX_CACHE_CONFIG
`include "VX_define.vh"
`include "VX_platform.vh"
`include "VX_scope.vh"
`define REQ_TAG_WIDTH `MAX(CORE_TAG_WIDTH, SNP_REQ_TAG_WIDTH)
`define REQS_BITS `LOG2UP(NUM_REQUESTS)
// tag rw byteen tid
`define REQ_INST_META_WIDTH (`REQ_TAG_WIDTH + 1 + WORD_SIZE + `REQS_BITS)

View file

@ -1,4 +1,3 @@
`include "VX_cache_config.vh"
module VX_cache_core_req_bank_sel #(

View file

@ -141,11 +141,14 @@ module VX_tag_data_access #(
assign use_read_tag_st1e = DRAM_ENABLE ? read_tag_st1c[STAGE_1_CYCLES-1] : writetag_st1e; // Tag is always the same in SM
assign use_read_dirtyb_st1e= read_dirtyb_st1c[STAGE_1_CYCLES-1];
assign use_read_data_st1e = read_data_st1c[STAGE_1_CYCLES-1];
for (i = 0; i < WORD_SIZE; i++) begin
if (`WORD_SELECT_WIDTH != 0) begin
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH][i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
end else begin
if (`WORD_SELECT_WIDTH != 0) begin
wire [`WORD_WIDTH-1:0] readword = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
for (i = 0; i < WORD_SIZE; i++) begin
assign readword_st1e[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
end
end else begin
for (i = 0; i < WORD_SIZE; i++) begin
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
end
end

View file

@ -3,16 +3,6 @@
`include "VX_define.vh"
typedef struct packed {
logic [`NW_BITS-1:0] warp_num;
logic [`NUM_THREADS-1:0] thread_mask;
logic [31:0] curr_PC;
logic [`NR_BITS-1:0] rd;
logic rd_is_fp;
logic wb;
} is_data_t;
interface VX_cmt_to_issue_if ();
wire alu_valid;

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_cam_buffer #(
parameter DATAW = 1,
@ -43,7 +43,7 @@ module VX_cam_buffer #(
if (release_slot[i]) begin
free_slots_n[read_addr[i]] = 1;
end
assign read_data[i] = entries[read_addr[i]];
read_data[i] = entries[read_addr[i]];
end
end

View file

@ -1,3 +1,6 @@
`include "VX_platform.vh"
module VX_countones #(
parameter N = 10
) (

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_divide #(
parameter WIDTHN = 1,

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_fair_arbiter #(
parameter N = 1

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_fixed_arbiter #(
parameter N = 1

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_generic_queue #(
parameter DATAW = 1,

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_generic_register #(
parameter N = 1,

View file

@ -1,4 +1,6 @@
`include "VX_platform.vh"
module VX_generic_stack #(
parameter WIDTH = 1,
parameter DEPTH = 1

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_index_queue #(
parameter DATAW = 1,

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_matrix_arbiter #(
parameter N = 1

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_multiplier #(
parameter WIDTHA = 1,

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_onehot_encoder #(
parameter N = 6

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_priority_encoder #(
parameter N = 1

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_rr_arbiter #(
parameter N = 1

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_scope #(
parameter DATAW = 64,

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_shift_register #(
parameter DATAW = 1,

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_tex_mgr (
input wire clk,

View file

@ -1,4 +1,4 @@
`include "VX_define.vh"
`include "VX_platform.vh"
module VX_tex_unit #(
parameter TADDRW = 32,

View file

@ -29,8 +29,8 @@ CF += -std=c++11 -fms-extensions -I../..
VF += --language 1800-2009 --assert -Wall -Wpedantic
VF += -Wno-DECLFILENAME
VF += --x-initial unique --x-assign unique
VF += -exe $(SRCS) $(INCLUDE)
VF += -cc Vortex.v -top-module Vortex
VF += --exe $(SRCS) $(INCLUDE)
VF += --cc Vortex.v --top-module Vortex
VF += verilator.vlt
DBG += -DVCD_OUTPUT $(DBG_FLAGS)

View file

@ -1,6 +1,8 @@
PROJECT = VX_pipeline
TOP_LEVEL_ENTITY = VX_pipeline
SRC_FILE = VX_pipeline.v
FPU_INCLUDE = ../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
@ -49,7 +51,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces"
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg