minor updates
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions

This commit is contained in:
tinebp 2025-01-11 03:24:06 -08:00
parent 083cf04afd
commit 347889c504
11 changed files with 154 additions and 106 deletions

View file

@ -157,7 +157,7 @@
`ifdef QUARTUS
`define MAX_FANOUT 8
`define MAX_LUTRAM 1024
`define FORCE_BRAM(d,w) (d >= 16 || w >= 128 || (d * w) >= 256)
`define USE_BLOCK_BRAM (* ramstyle = "block" *)
`define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *)
`define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *)
@ -168,7 +168,7 @@
`define STRING string
`elsif VIVADO
`define MAX_FANOUT 8
`define MAX_LUTRAM 1024
`define FORCE_BRAM(d,w) (d >= 16 || w >= 128 || (d * w) >= 256)
`define USE_BLOCK_BRAM (* ram_style = "block" *)
`define USE_FAST_BRAM (* ram_style = "distributed" *)
`define NO_RW_RAM_CHECK (* rw_addr_collision = "no" *)
@ -179,7 +179,7 @@
`define STRING
`else
`define MAX_FANOUT 8
`define MAX_LUTRAM 1024
`define FORCE_BRAM(d,w) (d >= 16 || w >= 128 || (d * w) >= 256)
`define USE_BLOCK_BRAM
`define USE_FAST_BRAM
`define NO_RW_RAM_CHECK

View file

@ -106,10 +106,9 @@ module VX_cache import VX_gpu_pkg::*; #(
localparam MEM_ARB_SEL_BITS = `CLOG2(`CDIV(NUM_BANKS, MEM_PORTS));
localparam MEM_ARB_SEL_WIDTH = `UP(MEM_ARB_SEL_BITS);
localparam CORE_RSP_REG_DISABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
localparam MEM_REQ_REG_DISABLE = (NUM_BANKS != 1);
localparam REQ_XBAR_BUF = (NUM_REQS > 4) ? 2 : 0;
localparam REQ_XBAR_BUF = (NUM_REQS > 2) ? 2 : 0;
localparam CORE_RSP_BUF_ENABLE = (NUM_BANKS != 1) || (NUM_REQS != 1);
localparam MEM_REQ_BUF_ENABLE = (NUM_BANKS != 1);
`ifdef PERF_ENABLE
wire [NUM_BANKS-1:0] perf_read_miss_per_bank;
@ -133,7 +132,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.NUM_BANKS (NUM_BANKS),
.UUID_WIDTH(UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // bank xbar latency
.BANK_SEL_LATENCY (`TO_OUT_BUF_REG(REQ_XBAR_BUF)) // request xbar latency
) flush_unit (
.clk (clk),
.reset (reset),
@ -387,8 +386,8 @@ module VX_cache import VX_gpu_pkg::*; #(
.UUID_WIDTH (UUID_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.FLAGS_WIDTH (FLAGS_WIDTH),
.CORE_OUT_REG (CORE_RSP_REG_DISABLE ? 0 : 1),
.MEM_OUT_REG (MEM_REQ_REG_DISABLE ? 0 : 1)
.CORE_OUT_REG (CORE_RSP_BUF_ENABLE ? 0 : `TO_OUT_BUF_REG(CORE_OUT_BUF)),
.MEM_OUT_REG (MEM_REQ_BUF_ENABLE ? 0 : `TO_OUT_BUF_REG(MEM_OUT_BUF))
) bank (
.clk (clk),
.reset (reset),
@ -481,7 +480,7 @@ module VX_cache import VX_gpu_pkg::*; #(
for (genvar i = 0; i < NUM_REQS; ++i) begin : g_core_rsp_buf
VX_elastic_buffer #(
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
.SIZE (CORE_RSP_REG_DISABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
.SIZE (CORE_RSP_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(CORE_OUT_BUF))
) core_rsp_buf (
.clk (clk),
@ -578,7 +577,7 @@ module VX_cache import VX_gpu_pkg::*; #(
VX_elastic_buffer #(
.DATAW (1 + LINE_SIZE + `CS_MEM_ADDR_WIDTH + `CS_LINE_WIDTH + MEM_TAG_WIDTH + `UP(FLAGS_WIDTH)),
.SIZE (MEM_REQ_REG_DISABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.SIZE (MEM_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(MEM_OUT_BUF) : 0),
.OUT_REG (`TO_OUT_BUF_REG(MEM_OUT_BUF))
) mem_req_buf (
.clk (clk),

View file

@ -153,7 +153,7 @@ module VX_cache_top import VX_gpu_pkg::*; #(
assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready;
end
VX_cache #(
VX_cache_wrap #(
.INSTANCE_ID (INSTANCE_ID),
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),

View file

@ -8,4 +8,5 @@ FPU_INCLUDE = -I$(RTL_DIR)/fpu
ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
FPU_INCLUDE += -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/include -J$(THIRD_PARTY_DIR)/cvfpu/src/common_cells/src -J$(THIRD_PARTY_DIR)/cvfpu/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/cvfpu/src
endif
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE)
RTL_INCLUDE = -I$(RTL_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(IP_CACHE_DIR) $(FPU_INCLUDE)
RTL_INCLUDE = -I..

View file

@ -5,7 +5,6 @@ DEVICE_FAMILY ?= arria10
PREFIX ?= build$(XLEN)
TARGET ?= fpga
NUM_CORES ?= 1
SRC_DIR := $(VORTEX_HOME)/hw/syn/altera/opae
@ -44,6 +43,7 @@ ifeq ($(DEVICE_FAMILY), arria10)
CONFIGS += -DALTERA_A10
endif
ifdef NUM_CORES
# cluster configuration
CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1
CONFIGS_2c := -DNUM_CLUSTERS=1 -DNUM_CORES=2
@ -53,6 +53,7 @@ CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16
CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16
CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16
CONFIGS += $(CONFIGS_$(NUM_CORES)c)
endif
# include sources
RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv

View file

@ -47,14 +47,18 @@ TARGET=hw PLATFORM=xilinx_u50_gen3x16_xdma_5_202210_1 make chipscope
# analyze build report
vitis_analyzer build_xilinx_u50_gen3x16_xdma_5_202210_1_hw_4c/bin/vortex_afu.xclbin.link_summary
# resuming build for routing
# resuming builds
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.synth" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.opt_design" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.place_design" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.phys_opt_design" make > build.log 2>&1 &
TARGET=hw PLATFORM=xilinx_u55c_gen3x16_xdma_3_202210_1 VPP_FLAGS="--from_step vpl.impl.route_design" make > build.log 2>&1 &
# running test
FPGA_BIN_DIR=<bin_dir> TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo
FPGA_BIN_DIR=<bin_dir> TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo
FPGA_BIN_DIR=<bin_dir> TARGET=hw_emu ./ci/blackbox.sh --driver=xrt --app=demo
FPGA_BIN_DIR=<bin_dir> XRT_DEVICE_INDEX=1 TARGET=hw ./ci/blackbox.sh --driver=xrt --app=demo
FPGA_BIN_DIR=<bin_dir> TARGET=hw ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n1024"
FPGA_BIN_DIR=<bin_dir> XRT_DEVICE_INDEX=1 TARGET=hw ./ci/blackbox.sh --driver=xrt --app=sgemm --args="-n1024"
# build report logs
<build_dir>/bin/vortex_afu.xclbin.info

View file

@ -37,10 +37,15 @@ else
endif
clean:
ifndef RESUME
rm -rf project_1
rm -rf .Xil
rm -f *.rpt
rm -f vivado*.log
rm -f vivado*.jou
rm -f *.log
rm -f *.jou
rm -f *.dcp
else
@echo "RESUME is defined, skipping clean."
endif
.PHONY: all gen-sources build clean

View file

@ -11,9 +11,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Start time
set start_time [clock seconds]
if { $::argc != 4 } {
puts "ERROR: Program \"$::argv0\" requires 4 arguments!\n"
puts "Usage: $::argv0 <top_module> <device_part> <vcs_file> <xdc_file>\n"
@ -46,95 +43,135 @@ if {[info exists ::env(MAX_JOBS)]} {
set num_jobs 0
}
# create fpu ip
if {[info exists ::env(FPU_IP)]} {
set ip_dir $::env(FPU_IP)
set argv [list $ip_dir $device_part]
set argc 2
source ${script_dir}/xilinx_ip_gen.tcl
proc run_setup {} {
global project_name
global top_module device_part vcs_file xdc_file
global script_dir source_dir
global num_jobs
global argv argc ;# Using global system variables: argv and argc
# create fpu ip
if {[info exists ::env(FPU_IP)]} {
set ip_dir $::env(FPU_IP)
set argv [list $ip_dir $device_part]
set argc 2
source ${script_dir}/xilinx_ip_gen.tcl
}
source "${script_dir}/parse_vcs_list.tcl"
set vlist [parse_vcs_list "${vcs_file}"]
set vsources_list [lindex $vlist 0]
set vincludes_list [lindex $vlist 1]
set vdefines_list [lindex $vlist 2]
#puts $vsources_list
#puts $vincludes_list
#puts $vdefines_list
# Create project
create_project $project_name $project_name -force -part $device_part
# Add constrains file
read_xdc $xdc_file
# Add the design sources
add_files -norecurse -verbose $vsources_list
# process defines
set_property verilog_define ${vdefines_list} [current_fileset]
# add fpu ip
if {[info exists ::env(FPU_IP)]} {
set ip_dir $::env(FPU_IP)
add_files -norecurse -verbose ${ip_dir}/xil_fma/xil_fma.xci
add_files -norecurse -verbose ${ip_dir}/xil_fdiv/xil_fdiv.xci
add_files -norecurse -verbose ${ip_dir}/xil_fsqrt/xil_fsqrt.xci
}
# Synthesis
set_property top $top_module [current_fileset]
set_property \
-name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \
-value {-mode out_of_context -flatten_hierarchy "rebuilt"} \
-objects [get_runs synth_1]
# register compilation hooks
#set_property STEPS.SYNTH_DESIGN.TCL.PRE ${source_dir}/pre_synth_hook.tcl [get_runs synth_1]
#set_property STEPS.SYNTH_DESIGN.TCL.POST ${source_dir}/post_synth_hook.tcl [get_runs synth_1]
set_property STEPS.OPT_DESIGN.TCL.PRE ${script_dir}/xilinx_async_bram_patch.tcl [get_runs impl_1]
#set_property STEPS.OPT_DESIGN.TCL.POST ${source_dir}/post_opt_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.PRE ${source_dir}/pre_route_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.POST ${source_dir}/post_route_hook.tcl [get_runs impl_1]
update_compile_order -fileset sources_1
}
source "${script_dir}/parse_vcs_list.tcl"
set vlist [parse_vcs_list "${vcs_file}"]
proc run_synthesis {} {
global num_jobs
set vsources_list [lindex $vlist 0]
set vincludes_list [lindex $vlist 1]
set vdefines_list [lindex $vlist 2]
#puts $vsources_list
#puts $vincludes_list
#puts $vdefines_list
# Create project
create_project $project_name $project_name -force -part $device_part
# Add constrains file
read_xdc $xdc_file
# Add the design sources
add_files -norecurse -verbose $vsources_list
# process defines
set_property verilog_define ${vdefines_list} [current_fileset]
# add fpu ip
if {[info exists ::env(FPU_IP)]} {
set ip_dir $::env(FPU_IP)
add_files -norecurse -verbose ${ip_dir}/xil_fma/xil_fma.xci
add_files -norecurse -verbose ${ip_dir}/xil_fdiv/xil_fdiv.xci
add_files -norecurse -verbose ${ip_dir}/xil_fsqrt/xil_fsqrt.xci
if {$num_jobs != 0} {
launch_runs synth_1 -verbose -jobs $num_jobs
} else {
launch_runs synth_1 -verbose
}
wait_on_run synth_1
open_run synth_1
report_utilization -file post_synth_util.rpt -hierarchical -hierarchical_percentages
write_checkpoint -force post_synth.dcp
}
update_compile_order -fileset sources_1
proc run_implementation {} {
global num_jobs
# Synthesis
set_property top $top_module [current_fileset]
if {$num_jobs != 0} {
launch_runs impl_1 -verbose -jobs $num_jobs
} else {
launch_runs impl_1 -verbose
}
wait_on_run impl_1
open_run impl_1
report_utilization -file post_impl_util.rpt -hierarchical -hierarchical_percentages
write_checkpoint -force post_impl.dcp
}
set_property \
-name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \
-value {-mode out_of_context -flatten_hierarchy "rebuilt"} \
-objects [get_runs synth_1]
proc run_report {} {
# Generate the synthesis report
report_place_status -file place.rpt
report_route_status -file route.rpt
report_timing_summary -file timing.rpt
# register compilation hooks
#set_property STEPS.SYNTH_DESIGN.TCL.PRE ${source_dir}/pre_synth_hook.tcl [get_runs synth_1]
#set_property STEPS.SYNTH_DESIGN.TCL.POST ${source_dir}/post_synth_hook.tcl [get_runs synth_1]
set_property STEPS.OPT_DESIGN.TCL.PRE ${script_dir}/xilinx_async_bram_patch.tcl [get_runs impl_1]
#set_property STEPS.OPT_DESIGN.TCL.POST ${source_dir}/post_opt_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.PRE ${source_dir}/pre_route_hook.tcl [get_runs impl_1]
#set_property STEPS.ROUTE_DESIGN.TCL.POST ${source_dir}/post_route_hook.tcl [get_runs impl_1]
# Generate timing report
report_timing -nworst 100 -delay_type max -sort_by group -file timing.rpt
if {$num_jobs != 0} {
launch_runs synth_1 -verbose -jobs $num_jobs
# Generate power and drc reports
report_power -file power.rpt
report_drc -file drc.rpt
}
###############################################################################
# Start time
set start_time [clock seconds]
set checkpoint_synth "post_synth.dcp"
set checkpoint_impl "post_impl.dcp"
if { [file exists $checkpoint_impl] } {
puts "Resuming from post-implementation checkpoint: $checkpoint_impl"
open_checkpoint $checkpoint_impl
run_report
} elseif { [file exists $checkpoint_synth] } {
puts "Resuming from post-synthesis checkpoint: $checkpoint_synth"
open_checkpoint $checkpoint_synth
run_implementation
run_report
} else {
launch_runs synth_1 -verbose
# Execute full pipeline
run_setup
run_synthesis
run_implementation
run_report
}
wait_on_run synth_1
open_run synth_1
write_checkpoint -force post_synth.dcp
report_utilization -file post_synth_util.rpt -hierarchical -hierarchical_percentages
# Implementation
if {$num_jobs != 0} {
launch_runs impl_1 -verbose -jobs $num_jobs
} else {
launch_runs impl_1 -verbose
}
wait_on_run impl_1
open_run impl_1
write_checkpoint -force post_impl.dcp
report_utilization -file post_impl_util.rpt -hierarchical -hierarchical_percentages
# Generate the synthesis report
report_place_status -file place.rpt
report_route_status -file route.rpt
report_timing_summary -file timing.rpt
# Generate timing report
report_timing -nworst 10 -delay_type max -sort_by group -file timing.rpt
# Generate power and drc reports
report_power -file power.rpt
report_drc -file drc.rpt
# End time and calculation
set elapsed_time [expr {[clock seconds] - $start_time}]

View file

@ -458,7 +458,7 @@ if { [file exists post_impl.dcp] } {
run_implementation
run_report
} else {
# execute full pipeline
# Execute full pipeline
run_setup
run_synthesis
run_implementation

View file

@ -15,7 +15,6 @@ endif
TARGET ?= hw
PLATFORM ?=
NUM_CORES ?= 1
PREFIX ?= build$(XLEN)
MAX_JOBS ?= 8
@ -64,6 +63,7 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
ifdef NUM_CORES
# cluster configuration
CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1
CONFIGS_2c := -DNUM_CLUSTERS=1 -DNUM_CORES=2
@ -73,6 +73,7 @@ CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16
CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16
CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16
CONFIGS += $(CONFIGS_$(NUM_CORES)c)
endif
# include sources
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv

View file

@ -5,7 +5,6 @@ SRC_DIR := $(VORTEX_HOME)/hw/syn/yosys
TOP_LEVEL_ENTITY ?= Vortex
PREFIX ?= build
NUM_CORES ?= 1
SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts
RTL_DIR := $(VORTEX_HOME)/hw/rtl
@ -30,7 +29,7 @@ DBG_SCOPE_FLAGS += -DDBG_SCOPE_ISSUE
DBG_SCOPE_FLAGS += -DDBG_SCOPE_FETCH
DBG_SCOPE_FLAGS += -DDBG_SCOPE_LSU
ifdef NUM_CORES
# cluster configuration
CONFIGS_1c := -DNUM_CLUSTERS=1 -DNUM_CORES=1
CONFIGS_2c := -DNUM_CLUSTERS=1 -DNUM_CORES=2
@ -40,6 +39,7 @@ CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS += $(CONFIGS_$(NUM_CORES)c)
endif
# include paths
FPU_INCLUDE = -I$(RTL_DIR)/fpu