mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
xMerge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
commit
fe86fd7936
640 changed files with 394599 additions and 653711 deletions
10
Makefile
10
Makefile
|
@ -4,17 +4,11 @@ all:
|
|||
$(MAKE) -C driver
|
||||
$(MAKE) -C runtime
|
||||
$(MAKE) -C simX
|
||||
$(MAKE) -C benchmarks/opencl
|
||||
|
||||
perf-demo:
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C driver rtlsim
|
||||
$(MAKE) -C driver/tests/demo/ run-rtlsim
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean:
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C driver clean
|
||||
$(MAKE) -C simX clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C benchmarks/opencl clean
|
||||
|
||||
$(MAKE) -C tests clean
|
|
@ -120,12 +120,12 @@ case $DRIVER in
|
|||
;;
|
||||
esac
|
||||
|
||||
if [ -d "$VORTEX_HOME/driver/tests/$APP" ];
|
||||
if [ -d "$VORTEX_HOME/tests/opencl/$APP" ];
|
||||
then
|
||||
APP_PATH=$VORTEX_HOME/driver/tests/$APP
|
||||
elif [ -d "$VORTEX_HOME/benchmarks/opencl/$APP" ];
|
||||
APP_PATH=$VORTEX_HOME/tests/opencl/$APP
|
||||
elif [ -d "$VORTEX_HOME/tests/regression/$APP" ];
|
||||
then
|
||||
APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP
|
||||
APP_PATH=$VORTEX_HOME/tests/regression/$APP
|
||||
else
|
||||
echo "Application folder found: $APP"
|
||||
exit -1
|
||||
|
|
|
@ -5,12 +5,11 @@ set -e
|
|||
|
||||
make -s
|
||||
|
||||
# Dogfood tests
|
||||
./ci/test_runtime.sh
|
||||
./ci/test_riscv_isa.sh
|
||||
./ci/test_opencl.sh
|
||||
./ci/test_driver.sh
|
||||
./ci/test_simx.sh
|
||||
# coverage tests
|
||||
make -C tests/runtime run
|
||||
make -C tests/riscv/isa run
|
||||
make -C tests/opencl run
|
||||
make -C simX run-tests
|
||||
|
||||
# warp/threads configurations
|
||||
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=2 --app=demo
|
||||
|
|
|
@ -7,12 +7,15 @@ set -e
|
|||
make -C runtime clean
|
||||
make -C runtime
|
||||
|
||||
# clear POCL cache
|
||||
rm -rf ~/.cache/pocl
|
||||
|
||||
# rebuild native kernel
|
||||
make -C driver/tests/dogfood clean-all
|
||||
make -C driver/tests/dogfood
|
||||
make -C tests/driver/dogfood clean-all
|
||||
make -C tests/driver/dogfood
|
||||
./ci/blackbox.sh --driver=vlsim --cores=1 --app=dogfood
|
||||
|
||||
# rebuild opencl kernel
|
||||
make -C benchmarks/opencl/sgemm clean-all
|
||||
make -C benchmarks/opencl/sgemm
|
||||
make -C tests/opencl/sgemm clean-all
|
||||
make -C tests/opencl/sgemm
|
||||
./ci/blackbox.sh --driver=vlsim --cores=1 --app=sgemm
|
|
@ -1,6 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
make -C driver/tests run
|
|
@ -1,6 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
make -C benchmarks/opencl run
|
|
@ -1,6 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
make -C benchmarks/riscv_tests/isa run
|
|
@ -1,6 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
make -C runtime/tests run
|
|
@ -1,6 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# exit when any command fails
|
||||
set -e
|
||||
|
||||
make -C simX run
|
|
@ -2,34 +2,33 @@
|
|||
|
||||
The directory/file layout of the Vortex codebase is as followed:
|
||||
|
||||
- `benchmark`: contains opencl, risc-v, and vector tests
|
||||
- `opencl`: contains basic kernel operation tests (i.e. vector add, transpose, dot product)
|
||||
- `riscv`: contains official riscv tests which are pre-compiled into binaries
|
||||
- `vector`: tests for vector instructions (not yet implemented)
|
||||
- `ci`: contain tests to be run during continuous integration (Travis CI)
|
||||
- driver, opencl, riscv_isa, and runtime tests
|
||||
- `driver`: contains driver software implementation (software that is run on the host to communicate with the vortex processor)
|
||||
- `opae`: contains code for driver that runs on FPGA
|
||||
- `rtlsim`: contains code for driver that runs on local machine (driver built using verilator which converts rtl to c++ binary)
|
||||
- `simx`: contains code for driver that runs on local machine (vortex)
|
||||
- `include`: contains vortex.h which has the vortex API that is used by the drivers
|
||||
- `runtime`: contains software used inside kernel programs to expose GPGPU capabilities
|
||||
- `include`: contains vortex API needed for runtime
|
||||
- `linker`: contains linker file for compiling kernels
|
||||
- `src`: contains implementation of vortex API (from include folder)
|
||||
- `tests`: contains runtime tests
|
||||
- `simple`: contains test for GPGPU functionality allowed in vortex
|
||||
- `simx`: contains simX, the cycle approximate simulator for vortex
|
||||
- `miscs`: contains old code that is no longer used
|
||||
- `hw`:
|
||||
- `unit_tests`: contains unit test for RTL of cache and queue
|
||||
- `syn`: contains all synthesis scripts (quartus and yosys)
|
||||
- `quartus`: contains code to synthesis cache, core, pipeline, top, and vortex stand-alone
|
||||
- `simulate`: contains RTL simulator (verilator)
|
||||
- `testbench.cpp`: runs either the riscv, runtime, or opencl tests
|
||||
- `opae`: contains source code for the accelerator functional unit (AFU) and code which programs the fpga
|
||||
- `quartus`: contains synthesis scripts for Intel Quartus toolchain
|
||||
- `opae`: contains synthesis scripts for Intel OPAE FPGA
|
||||
- `simulate`: contains RTL simulator (verilator)
|
||||
- `rtl`: contains rtl source code
|
||||
- `cache`: contains cache subsystem code
|
||||
- `fp_cores`: contains floating point unit code
|
||||
- `interfaces`: contains code that handles communication for each of the units of the microarchitecture
|
||||
- `libs`: contains general-purpose modules (i.e., buffers, encoders, arbiters, pipe registers)
|
||||
- `libs`: contains general-purpose modules (i.e., buffers, encoders, arbiters, pipe registers)
|
||||
- `driver`: contains driver software implementation (software that is run on the host to communicate with the vortex processor)
|
||||
- `include`: contains vortex.h which has the vortex API that is used by the drivers
|
||||
- `opae`: contains code for driver that runs on FPGA
|
||||
- `rtlsim`: contains code for driver that runs on local machine (driver built using verilator which converts rtl to c++ binary)
|
||||
- `simx`: contains code for driver that runs on local machine (vortex)
|
||||
- `runtime`: contains software used inside kernel programs to expose GPGPU capabilities
|
||||
- `include`: contains vortex API needed for runtime
|
||||
- `linker`: contains linker file for compiling kernels
|
||||
- `src`: contains implementation of vortex API (from include folder)
|
||||
- `simX`: contains simX, the cycle approximate simulator for vortex
|
||||
- `tests`: contains tests suite
|
||||
- `runtime`: contains vortex runtime tests
|
||||
- `driver`: contains vortex driver tests
|
||||
- `opencl`: contains opencl tests and benchmarks
|
||||
- `riscv`: contains official riscv tests
|
||||
- `regression`: contains regression tests
|
||||
- `vector`: tests for vector instructions (not yet implemented)
|
||||
- `ci`: contain tests to be run during continuous integration (Travis CI)
|
||||
- `miscs`: contains miscellaneous stuffs
|
|
@ -1,4 +1,4 @@
|
|||
# Flubber FPGA Startup and Configuration Guide
|
||||
# FPGA Startup and Configuration Guide
|
||||
|
||||
OPAE Environment Setup
|
||||
----------------------
|
||||
|
@ -27,7 +27,7 @@ To enable L3 cache and profile counters for a build, simply uncomment the defini
|
|||
OPAE Build
|
||||
------------------
|
||||
|
||||
The Flubber FPGA has to following configuration options:
|
||||
The FPGA has to following configuration options:
|
||||
- 1 core fpga (fpga-1c)
|
||||
- 2 cores fpga (fpga-2c)
|
||||
- 4 cores fpga (fpga-4c)
|
|
@ -10,7 +10,7 @@ SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant
|
|||
|
||||
### FGPA Simulation
|
||||
|
||||
The current target FPGA for simulation is the Arria10 Intel Accelerator Card v1.0. The guide to build the fpga with specific configurations is located [here.](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Flubber_FPGA_Startup_Guide.md)
|
||||
The current target FPGA for simulation is the Arria10 Intel Accelerator Card v1.0. The guide to build the fpga with specific configurations is located [here.](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/FPGA_Startup_Guide.md)
|
||||
|
||||
### How to Test
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
- [Vortex Cache Subsystem](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Cache_Subsystem.md)
|
||||
- Vortex Software
|
||||
- [Vortex Simulation](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Simulation.md)
|
||||
- [FPGA Configuration, Program and Test](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Flubber_FPGA_Startup_Guide.md)
|
||||
- [FPGA Configuration, Program and Test](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/FPGA_Startup_Guide.md)
|
||||
- Debugging
|
||||
- Useful Links
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
all: stub rtlsim simx opae tests
|
||||
all: stub rtlsim simx opae
|
||||
|
||||
stub:
|
||||
$(MAKE) -C stub
|
||||
|
@ -12,14 +12,10 @@ rtlsim:
|
|||
simx:
|
||||
$(MAKE) -C simx
|
||||
|
||||
tests:
|
||||
$(MAKE) -C tests
|
||||
|
||||
clean:
|
||||
$(MAKE) clean -C stub
|
||||
$(MAKE) clean -C opae
|
||||
$(MAKE) clean -C rtlsim
|
||||
$(MAKE) clean -C simx
|
||||
$(MAKE) clean -C tests
|
||||
|
||||
.PHONY: all stub opae rtlsim simx tests clean
|
||||
.PHONY: all stub opae rtlsim simx clean
|
|
@ -1,54 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import pandas as pd
|
||||
from os import path
|
||||
from glob import glob
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print('usage: python3 ' + sys.argv[0] + ' <path to test_outputs>')
|
||||
exit()
|
||||
|
||||
output_dir = sys.argv[1]
|
||||
|
||||
|
||||
config_names = []
|
||||
test_names = []
|
||||
cycle_counts = []
|
||||
|
||||
for filename in glob(path.join(output_dir, '*.log')):
|
||||
cycle_line = None
|
||||
with open(filename, 'r') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith('[sim] total cycles:'):
|
||||
cycle_line = line
|
||||
print(filename, cycle_line)
|
||||
|
||||
full_name, _, _ = path.basename(filename).partition('.')
|
||||
|
||||
if cycle_line is None:
|
||||
count = None
|
||||
else:
|
||||
_, _, count = cycle_line.partition(':')
|
||||
count = int(count.strip())
|
||||
|
||||
config, test = full_name.rsplit('-', 1)
|
||||
config_names.append(config)
|
||||
test_names.append(test)
|
||||
cycle_counts.append(count)
|
||||
|
||||
df = pd.DataFrame({
|
||||
'config': config_names,
|
||||
'test': test_names,
|
||||
'cycle_count': cycle_counts,
|
||||
})
|
||||
|
||||
print(df.head())
|
||||
|
||||
pivot = pd.pivot_table(df, values='cycle_count', index=['config'], columns=['test'])
|
||||
|
||||
print(pivot.head())
|
||||
|
||||
pivot.to_csv('results.csv')
|
||||
print('Table written to results.csv')
|
||||
|
|
@ -1,101 +0,0 @@
|
|||
|
||||
Warps: 2, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
292351
|
||||
|
||||
Warps: 2, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
174990
|
||||
|
||||
Warps: 2, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
87686
|
||||
|
||||
Warps: 2, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
44034
|
||||
|
||||
Warps: 2, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
29981
|
||||
|
||||
Warps: 4, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
751528
|
||||
|
||||
Warps: 4, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
490532
|
||||
|
||||
Warps: 4, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
245460
|
||||
|
||||
Warps: 4, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
122924
|
||||
|
||||
Warps: 4, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
61656
|
||||
|
||||
Warps: 8, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
609347
|
||||
|
||||
Warps: 8, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
337922
|
||||
|
||||
Warps: 8, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
103378
|
||||
|
||||
Warps: 8, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
44932
|
||||
|
||||
Warps: 8, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
30426
|
||||
|
||||
Warps: 16, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
446749
|
||||
|
||||
Warps: 16, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
197890
|
||||
|
||||
Warps: 16, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
106086
|
||||
|
||||
Warps: 16, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
46464
|
||||
|
||||
Warps: 16, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
31573
|
||||
|
||||
Warps: 32, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
274885
|
||||
|
||||
Warps: 32, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
185833
|
||||
|
||||
Warps: 32, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
95355
|
||||
|
||||
Warps: 32, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
49745
|
||||
|
||||
Warps: 32, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
|
||||
33326
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
|
||||
Warps: 2, Threads: 2
|
||||
2037907
|
||||
|
||||
Warps: 2, Threads: 4
|
||||
1205061
|
||||
|
||||
Warps: 2, Threads: 8
|
||||
583051
|
||||
|
||||
Warps: 2, Threads: 16
|
||||
358821
|
||||
|
||||
Warps: 2, Threads: 32
|
||||
168914
|
||||
|
||||
Warps: 4, Threads: 2
|
||||
1647415
|
||||
|
||||
Warps: 4, Threads: 4
|
||||
Warps: 4, Threads: 2
|
||||
1719354
|
||||
|
||||
Warps: 4, Threads: 4
|
||||
837672
|
||||
|
||||
Warps: 4, Threads: 8
|
||||
358354
|
||||
|
||||
Warps: 4, Threads: 16
|
||||
218991
|
||||
|
||||
Warps: 4, Threads: 32
|
||||
174153
|
||||
|
||||
Warps: 8, Threads: 2
|
||||
1684691
|
||||
|
||||
Warps: 8, Threads: 4
|
||||
1035207
|
||||
|
||||
Warps: 8, Threads: 8
|
||||
552477
|
||||
|
||||
Warps: 8, Threads: 16
|
||||
316346
|
||||
|
||||
Warps: 8, Threads: 32
|
||||
128139
|
||||
|
||||
Warps: 16, Threads: 2
|
||||
1666519
|
||||
|
||||
Warps: 16, Threads: 4
|
||||
1043940
|
||||
|
||||
Warps: 16, Threads: 8
|
||||
554168
|
||||
|
||||
Warps: 16, Threads: 16
|
||||
316615
|
||||
|
||||
Warps: 16, Threads: 32
|
||||
131018
|
||||
|
||||
Warps: 32, Threads: 2
|
||||
1637051
|
||||
|
||||
Warps: 32, Threads: 4
|
||||
1036768
|
||||
|
||||
Warps: 32, Threads: 8
|
||||
544135
|
||||
|
||||
Warps: 32, Threads: 16
|
||||
310251
|
||||
|
||||
Warps: 32, Threads: 32
|
||||
157421
|
||||
|
|
@ -1,276 +0,0 @@
|
|||
|
||||
Warps: 2, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
14663775
|
||||
Warps: 2, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
10280838
|
||||
Warps: 2, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
5133778
|
||||
Warps: 2, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
2670416
|
||||
Warps: 2, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
1353300
|
||||
Warps: 4, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
14014523
|
||||
Warps: 4, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
6700429
|
||||
Warps: 4, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
4196995
|
||||
Warps: 4, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
2179254
|
||||
Warps: 4, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
1303963
|
||||
Warps: 8, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
8146968
|
||||
Warps: 8, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
4180557
|
||||
Warps: 8, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
1946300
|
||||
Warps: 8, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
1056178
|
||||
Warps: 8, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
449062
|
||||
Warps: 16, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
4103843
|
||||
Warps: 16, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
2198894
|
||||
Warps: 16, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
1080948
|
||||
Warps: 16, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
630038
|
||||
Warps: 16, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
448537
|
||||
Warps: 32, Threads: 2
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
2512219
|
||||
Warps: 32, Threads: 4
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
1524192
|
||||
Warps: 32, Threads: 8
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
936191
|
||||
Warps: 32, Threads: 16
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
446168
|
||||
Warps: 32, Threads: 32
|
||||
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
|
||||
ctx->num_groups[0]: 64
|
||||
ctx->num_groups[1]: 64
|
||||
ctx->num_groups[2]: 64
|
||||
|
||||
|
||||
ctx->local_size[0]: 1
|
||||
ctx->local_size[1]: 1
|
||||
ctx->local_size[2]: 1
|
||||
380334
|
|
@ -1,51 +0,0 @@
|
|||
|
||||
Warps: 2, Threads: 2
|
||||
1.313778
|
||||
Warps: 2, Threads: 4
|
||||
1.869814
|
||||
Warps: 2, Threads: 8
|
||||
3.794385
|
||||
Warps: 2, Threads: 16
|
||||
7.532425
|
||||
Warps: 2, Threads: 32
|
||||
15.194329
|
||||
Warps: 4, Threads: 2
|
||||
1.373928
|
||||
Warps: 4, Threads: 4
|
||||
2.106374
|
||||
Warps: 4, Threads: 8
|
||||
4.214628
|
||||
Warps: 4, Threads: 16
|
||||
8.372964
|
||||
Warps: 4, Threads: 32
|
||||
16.604193
|
||||
Warps: 8, Threads: 2
|
||||
0.647895
|
||||
Warps: 8, Threads: 4
|
||||
1.232910
|
||||
Warps: 8, Threads: 8
|
||||
2.505588
|
||||
Warps: 8, Threads: 16
|
||||
5.622365
|
||||
Warps: 8, Threads: 32
|
||||
13.141898
|
||||
Warps: 16, Threads: 2
|
||||
0.683937
|
||||
Warps: 16, Threads: 4
|
||||
1.362874
|
||||
Warps: 16, Threads: 8
|
||||
2.877766
|
||||
Warps: 16, Threads: 16
|
||||
7.303546
|
||||
Warps: 16, Threads: 32
|
||||
12.981466
|
||||
Warps: 32, Threads: 2
|
||||
0.919473
|
||||
Warps: 32, Threads: 4
|
||||
1.601678
|
||||
Warps: 32, Threads: 8
|
||||
3.462736
|
||||
Warps: 32, Threads: 16
|
||||
7.460658
|
||||
Warps: 32, Threads: 32
|
||||
14.898925
|
|
@ -1,27 +0,0 @@
|
|||
|
||||
|
||||
for PROJECT in sfilter; do
|
||||
echo "" > $PROJECT.result
|
||||
for number_of_warps in 2 4 8 16 32; do
|
||||
for number_of_threads in 2 4 8 16 32; do
|
||||
|
||||
echo "$PROJECT = Warp Count: $number_of_warps Thread Count: $number_of_threads Launched"
|
||||
echo "#define TOTAL_THREADS $number_of_threads" > ../../runtime/config.h
|
||||
echo "#define TOTAL_WARPS $number_of_warps" >> ../../runtime/config.h
|
||||
|
||||
cd ../opencl/$PROJECT
|
||||
make clean &>> /dev/null
|
||||
make &>> /dev/null
|
||||
cd ../../test_benchmark
|
||||
|
||||
echo "Warps: $number_of_warps, Threads: $number_of_threads" >> $PROJECT.result
|
||||
|
||||
# echo ../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/$PROJECT/$PROJECT.hex -s -b &>> $PROJECT.result
|
||||
|
||||
../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/$PROJECT/$PROJECT.hex -s -b &>> $PROJECT.result
|
||||
|
||||
|
||||
done
|
||||
done
|
||||
|
||||
done
|
|
@ -1,24 +0,0 @@
|
|||
|
||||
PROJECT=sgemm
|
||||
|
||||
echo "" > $PROJECT.result
|
||||
|
||||
for number_of_warps in 2 4 8 16 32; do
|
||||
for number_of_threads in 2 4 8 16 32; do
|
||||
|
||||
echo "Warp Count: $number_of_warps Thread Count: $number_of_threads Launched"
|
||||
echo "#define TOTAL_THREADS $number_of_threads" > ../../runtime/config.h
|
||||
echo "#define TOTAL_WARPS $number_of_warps" >> ../../runtime/config.h
|
||||
|
||||
cd ../opencl/$PROJECT
|
||||
make clean &>> /dev/null
|
||||
make &>> /dev/null
|
||||
cd ../../test_benchmark
|
||||
|
||||
echo "Warps: $number_of_warps, Threads: $number_of_threads" >> $PROJECT.result
|
||||
|
||||
../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/$PROJECT/$PROJECT.hex -s -b &>> $PROJECT.result
|
||||
|
||||
|
||||
done
|
||||
done
|
|
@ -1,17 +0,0 @@
|
|||
Fitter Status : Successful - Sat Mar 6 08:45:37 2021
|
||||
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
||||
Device : 10AX115N2F40E2LG
|
||||
Timing Models : Final
|
||||
Logic utilization (in ALMs) : 359,139 / 427,200 ( 84 % )
|
||||
Total registers : 546782
|
||||
Total pins : 310 / 826 ( 38 % )
|
||||
Total virtual pins : 0
|
||||
Total block memory bits : 12,692,200 / 55,562,240 ( 23 % )
|
||||
Total RAM Blocks : 2,285 / 2,713 ( 84 % )
|
||||
Total DSP Blocks : 448 / 1,518 ( 30 % )
|
||||
Total HSSI RX channels : 12 / 48 ( 25 % )
|
||||
Total HSSI TX channels : 12 / 48 ( 25 % )
|
||||
Total PLLs : 25 / 112 ( 22 % )
|
File diff suppressed because it is too large
Load diff
|
@ -1,4 +0,0 @@
|
|||
Synthesis Status : Successful - Sat Mar 6 05:12:07 2021
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
File diff suppressed because it is too large
Load diff
|
@ -1,29 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
|
||||
OK
|
||||
The result of matrix m is:
|
||||
0.00 0.00 0.00 0.00
|
||||
0.50 0.00 0.00 0.00
|
||||
0.67 0.26 0.00 0.00
|
||||
-0.00 0.15 -0.28 0.00
|
||||
|
||||
The result of matrix a is:
|
||||
-0.60 -0.50 0.70 0.30
|
||||
0.00 -0.65 -0.05 0.55
|
||||
0.00 0.00 -0.75 -1.14
|
||||
0.00 0.00 0.00 0.50
|
||||
|
||||
The result of array b is:
|
||||
-0.85 -0.25 0.87 -0.25
|
||||
|
||||
The final solution is:
|
||||
0.70 0.00 -0.40 -0.50
|
||||
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
|
||||
loading db: cane4_0.db
|
||||
loading db: cane4_1.db
|
||||
loading db: cane4_2.db
|
||||
Number of records: 1500
|
||||
Finding the 5 closest neighbors.
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
|
||||
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
|
||||
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
|
||||
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
|
||||
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
|
||||
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
|
@ -1,458 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: core0: instrs=23498, cycles=16249, IPC=1.446120
|
||||
PERF: core0: ibuffer stalls=2272
|
||||
PERF: core0: scoreboard stalls=4197
|
||||
PERF: core0: alu unit stalls=737
|
||||
PERF: core0: lsu unit stalls=355
|
||||
PERF: core0: csr unit stalls=0
|
||||
PERF: core0: fpu unit stalls=3
|
||||
PERF: core0: gpu unit stalls=0
|
||||
PERF: core0: icache reads=6155
|
||||
PERF: core0: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core0: icache pipeline stalls=2466
|
||||
PERF: core0: icache reponse stalls=2272
|
||||
PERF: core0: dcache reads=2862
|
||||
PERF: core0: dcache writes=101
|
||||
PERF: core0: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core0: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core0: dcache bank stalls=2189 (utilization=57%)
|
||||
PERF: core0: dcache mshr stalls=2617
|
||||
PERF: core0: dcache pipeline stalls=4967
|
||||
PERF: core0: dcache reponse stalls=16
|
||||
PERF: core0: smem reads=538
|
||||
PERF: core0: smem writes=447
|
||||
PERF: core0: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core0: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core0: dram stalls=1211 (utilization=15%)
|
||||
PERF: core0: dram average latency=31 cycles
|
||||
PERF: core1: instrs=23498, cycles=16180, IPC=1.452287
|
||||
PERF: core1: ibuffer stalls=2244
|
||||
PERF: core1: scoreboard stalls=4144
|
||||
PERF: core1: alu unit stalls=735
|
||||
PERF: core1: lsu unit stalls=399
|
||||
PERF: core1: csr unit stalls=0
|
||||
PERF: core1: fpu unit stalls=1
|
||||
PERF: core1: gpu unit stalls=0
|
||||
PERF: core1: icache reads=6155
|
||||
PERF: core1: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core1: icache pipeline stalls=2462
|
||||
PERF: core1: icache reponse stalls=2244
|
||||
PERF: core1: dcache reads=2862
|
||||
PERF: core1: dcache writes=101
|
||||
PERF: core1: dcache read misses=635 (hit ratio=77%)
|
||||
PERF: core1: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core1: dcache bank stalls=2190 (utilization=57%)
|
||||
PERF: core1: dcache mshr stalls=2515
|
||||
PERF: core1: dcache pipeline stalls=4793
|
||||
PERF: core1: dcache reponse stalls=16
|
||||
PERF: core1: smem reads=538
|
||||
PERF: core1: smem writes=447
|
||||
PERF: core1: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core1: dram requests=227 (reads=126, writes=101)
|
||||
PERF: core1: dram stalls=1257 (utilization=15%)
|
||||
PERF: core1: dram average latency=30 cycles
|
||||
PERF: core2: instrs=23498, cycles=16179, IPC=1.452376
|
||||
PERF: core2: ibuffer stalls=2224
|
||||
PERF: core2: scoreboard stalls=4120
|
||||
PERF: core2: alu unit stalls=730
|
||||
PERF: core2: lsu unit stalls=423
|
||||
PERF: core2: csr unit stalls=0
|
||||
PERF: core2: fpu unit stalls=2
|
||||
PERF: core2: gpu unit stalls=0
|
||||
PERF: core2: icache reads=6155
|
||||
PERF: core2: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core2: icache pipeline stalls=2455
|
||||
PERF: core2: icache reponse stalls=2224
|
||||
PERF: core2: dcache reads=2862
|
||||
PERF: core2: dcache writes=101
|
||||
PERF: core2: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core2: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core2: dcache bank stalls=2187 (utilization=57%)
|
||||
PERF: core2: dcache mshr stalls=2417
|
||||
PERF: core2: dcache pipeline stalls=4427
|
||||
PERF: core2: dcache reponse stalls=16
|
||||
PERF: core2: smem reads=538
|
||||
PERF: core2: smem writes=447
|
||||
PERF: core2: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core2: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core2: dram stalls=1123 (utilization=16%)
|
||||
PERF: core2: dram average latency=31 cycles
|
||||
PERF: core3: instrs=23498, cycles=16102, IPC=1.459322
|
||||
PERF: core3: ibuffer stalls=2190
|
||||
PERF: core3: scoreboard stalls=4072
|
||||
PERF: core3: alu unit stalls=741
|
||||
PERF: core3: lsu unit stalls=410
|
||||
PERF: core3: csr unit stalls=0
|
||||
PERF: core3: fpu unit stalls=1
|
||||
PERF: core3: gpu unit stalls=0
|
||||
PERF: core3: icache reads=6155
|
||||
PERF: core3: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core3: icache pipeline stalls=2380
|
||||
PERF: core3: icache reponse stalls=2190
|
||||
PERF: core3: dcache reads=2862
|
||||
PERF: core3: dcache writes=101
|
||||
PERF: core3: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core3: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core3: dcache bank stalls=2192 (utilization=57%)
|
||||
PERF: core3: dcache mshr stalls=2345
|
||||
PERF: core3: dcache pipeline stalls=3768
|
||||
PERF: core3: dcache reponse stalls=16
|
||||
PERF: core3: smem reads=538
|
||||
PERF: core3: smem writes=447
|
||||
PERF: core3: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core3: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core3: dram stalls=699 (utilization=24%)
|
||||
PERF: core3: dram average latency=30 cycles
|
||||
PERF: core4: instrs=23498, cycles=16254, IPC=1.445675
|
||||
PERF: core4: ibuffer stalls=2311
|
||||
PERF: core4: scoreboard stalls=4269
|
||||
PERF: core4: alu unit stalls=733
|
||||
PERF: core4: lsu unit stalls=377
|
||||
PERF: core4: csr unit stalls=0
|
||||
PERF: core4: fpu unit stalls=0
|
||||
PERF: core4: gpu unit stalls=0
|
||||
PERF: core4: icache reads=6155
|
||||
PERF: core4: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core4: icache pipeline stalls=2532
|
||||
PERF: core4: icache reponse stalls=2311
|
||||
PERF: core4: dcache reads=2862
|
||||
PERF: core4: dcache writes=101
|
||||
PERF: core4: dcache read misses=653 (hit ratio=77%)
|
||||
PERF: core4: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core4: dcache bank stalls=2189 (utilization=57%)
|
||||
PERF: core4: dcache mshr stalls=2519
|
||||
PERF: core4: dcache pipeline stalls=4555
|
||||
PERF: core4: dcache reponse stalls=16
|
||||
PERF: core4: smem reads=538
|
||||
PERF: core4: smem writes=447
|
||||
PERF: core4: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core4: dram requests=233 (reads=132, writes=101)
|
||||
PERF: core4: dram stalls=1018 (utilization=18%)
|
||||
PERF: core4: dram average latency=30 cycles
|
||||
PERF: core5: instrs=23498, cycles=16177, IPC=1.452556
|
||||
PERF: core5: ibuffer stalls=2232
|
||||
PERF: core5: scoreboard stalls=4137
|
||||
PERF: core5: alu unit stalls=730
|
||||
PERF: core5: lsu unit stalls=411
|
||||
PERF: core5: csr unit stalls=0
|
||||
PERF: core5: fpu unit stalls=1
|
||||
PERF: core5: gpu unit stalls=0
|
||||
PERF: core5: icache reads=6155
|
||||
PERF: core5: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core5: icache pipeline stalls=2454
|
||||
PERF: core5: icache reponse stalls=2232
|
||||
PERF: core5: dcache reads=2862
|
||||
PERF: core5: dcache writes=101
|
||||
PERF: core5: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core5: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core5: dcache bank stalls=2184 (utilization=57%)
|
||||
PERF: core5: dcache mshr stalls=2446
|
||||
PERF: core5: dcache pipeline stalls=4560
|
||||
PERF: core5: dcache reponse stalls=16
|
||||
PERF: core5: smem reads=538
|
||||
PERF: core5: smem writes=447
|
||||
PERF: core5: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core5: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core5: dram stalls=1086 (utilization=17%)
|
||||
PERF: core5: dram average latency=30 cycles
|
||||
PERF: core6: instrs=23498, cycles=16164, IPC=1.453724
|
||||
PERF: core6: ibuffer stalls=2228
|
||||
PERF: core6: scoreboard stalls=4108
|
||||
PERF: core6: alu unit stalls=727
|
||||
PERF: core6: lsu unit stalls=419
|
||||
PERF: core6: csr unit stalls=0
|
||||
PERF: core6: fpu unit stalls=3
|
||||
PERF: core6: gpu unit stalls=0
|
||||
PERF: core6: icache reads=6155
|
||||
PERF: core6: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core6: icache pipeline stalls=2434
|
||||
PERF: core6: icache reponse stalls=2228
|
||||
PERF: core6: dcache reads=2862
|
||||
PERF: core6: dcache writes=101
|
||||
PERF: core6: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core6: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core6: dcache bank stalls=2190 (utilization=57%)
|
||||
PERF: core6: dcache mshr stalls=2451
|
||||
PERF: core6: dcache pipeline stalls=4321
|
||||
PERF: core6: dcache reponse stalls=16
|
||||
PERF: core6: smem reads=538
|
||||
PERF: core6: smem writes=447
|
||||
PERF: core6: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core6: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core6: dram stalls=930 (utilization=19%)
|
||||
PERF: core6: dram average latency=31 cycles
|
||||
PERF: core7: instrs=23498, cycles=16105, IPC=1.459050
|
||||
PERF: core7: ibuffer stalls=2189
|
||||
PERF: core7: scoreboard stalls=4068
|
||||
PERF: core7: alu unit stalls=746
|
||||
PERF: core7: lsu unit stalls=411
|
||||
PERF: core7: csr unit stalls=0
|
||||
PERF: core7: fpu unit stalls=0
|
||||
PERF: core7: gpu unit stalls=0
|
||||
PERF: core7: icache reads=6155
|
||||
PERF: core7: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core7: icache pipeline stalls=2369
|
||||
PERF: core7: icache reponse stalls=2189
|
||||
PERF: core7: dcache reads=2862
|
||||
PERF: core7: dcache writes=101
|
||||
PERF: core7: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core7: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core7: dcache bank stalls=2189 (utilization=57%)
|
||||
PERF: core7: dcache mshr stalls=2357
|
||||
PERF: core7: dcache pipeline stalls=3798
|
||||
PERF: core7: dcache reponse stalls=16
|
||||
PERF: core7: smem reads=538
|
||||
PERF: core7: smem writes=447
|
||||
PERF: core7: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core7: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core7: dram stalls=763 (utilization=22%)
|
||||
PERF: core7: dram average latency=30 cycles
|
||||
PERF: core8: instrs=23498, cycles=16256, IPC=1.445497
|
||||
PERF: core8: ibuffer stalls=2249
|
||||
PERF: core8: scoreboard stalls=4153
|
||||
PERF: core8: alu unit stalls=740
|
||||
PERF: core8: lsu unit stalls=382
|
||||
PERF: core8: csr unit stalls=0
|
||||
PERF: core8: fpu unit stalls=4
|
||||
PERF: core8: gpu unit stalls=0
|
||||
PERF: core8: icache reads=6155
|
||||
PERF: core8: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core8: icache pipeline stalls=2457
|
||||
PERF: core8: icache reponse stalls=2249
|
||||
PERF: core8: dcache reads=2862
|
||||
PERF: core8: dcache writes=101
|
||||
PERF: core8: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core8: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core8: dcache bank stalls=2193 (utilization=57%)
|
||||
PERF: core8: dcache mshr stalls=2563
|
||||
PERF: core8: dcache pipeline stalls=5209
|
||||
PERF: core8: dcache reponse stalls=15
|
||||
PERF: core8: smem reads=538
|
||||
PERF: core8: smem writes=447
|
||||
PERF: core8: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core8: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core8: dram stalls=1474 (utilization=13%)
|
||||
PERF: core8: dram average latency=31 cycles
|
||||
PERF: core9: instrs=23498, cycles=16264, IPC=1.444786
|
||||
PERF: core9: ibuffer stalls=2245
|
||||
PERF: core9: scoreboard stalls=4151
|
||||
PERF: core9: alu unit stalls=742
|
||||
PERF: core9: lsu unit stalls=385
|
||||
PERF: core9: csr unit stalls=0
|
||||
PERF: core9: fpu unit stalls=2
|
||||
PERF: core9: gpu unit stalls=0
|
||||
PERF: core9: icache reads=6155
|
||||
PERF: core9: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core9: icache pipeline stalls=2471
|
||||
PERF: core9: icache reponse stalls=2245
|
||||
PERF: core9: dcache reads=2862
|
||||
PERF: core9: dcache writes=101
|
||||
PERF: core9: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core9: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core9: dcache bank stalls=2200 (utilization=57%)
|
||||
PERF: core9: dcache mshr stalls=2548
|
||||
PERF: core9: dcache pipeline stalls=5160
|
||||
PERF: core9: dcache reponse stalls=16
|
||||
PERF: core9: smem reads=538
|
||||
PERF: core9: smem writes=447
|
||||
PERF: core9: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core9: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core9: dram stalls=1449 (utilization=13%)
|
||||
PERF: core9: dram average latency=31 cycles
|
||||
PERF: core10: instrs=23498, cycles=16253, IPC=1.445764
|
||||
PERF: core10: ibuffer stalls=2228
|
||||
PERF: core10: scoreboard stalls=4119
|
||||
PERF: core10: alu unit stalls=724
|
||||
PERF: core10: lsu unit stalls=420
|
||||
PERF: core10: csr unit stalls=0
|
||||
PERF: core10: fpu unit stalls=4
|
||||
PERF: core10: gpu unit stalls=0
|
||||
PERF: core10: icache reads=6155
|
||||
PERF: core10: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core10: icache pipeline stalls=2457
|
||||
PERF: core10: icache reponse stalls=2228
|
||||
PERF: core10: dcache reads=2862
|
||||
PERF: core10: dcache writes=101
|
||||
PERF: core10: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core10: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core10: dcache bank stalls=2182 (utilization=57%)
|
||||
PERF: core10: dcache mshr stalls=2427
|
||||
PERF: core10: dcache pipeline stalls=4855
|
||||
PERF: core10: dcache reponse stalls=16
|
||||
PERF: core10: smem reads=538
|
||||
PERF: core10: smem writes=447
|
||||
PERF: core10: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core10: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core10: dram stalls=1326 (utilization=14%)
|
||||
PERF: core10: dram average latency=31 cycles
|
||||
PERF: core11: instrs=23498, cycles=16175, IPC=1.452736
|
||||
PERF: core11: ibuffer stalls=2225
|
||||
PERF: core11: scoreboard stalls=4114
|
||||
PERF: core11: alu unit stalls=734
|
||||
PERF: core11: lsu unit stalls=425
|
||||
PERF: core11: csr unit stalls=0
|
||||
PERF: core11: fpu unit stalls=0
|
||||
PERF: core11: gpu unit stalls=0
|
||||
PERF: core11: icache reads=6155
|
||||
PERF: core11: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core11: icache pipeline stalls=2448
|
||||
PERF: core11: icache reponse stalls=2225
|
||||
PERF: core11: dcache reads=2862
|
||||
PERF: core11: dcache writes=101
|
||||
PERF: core11: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core11: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core11: dcache bank stalls=2195 (utilization=57%)
|
||||
PERF: core11: dcache mshr stalls=2455
|
||||
PERF: core11: dcache pipeline stalls=4007
|
||||
PERF: core11: dcache reponse stalls=15
|
||||
PERF: core11: smem reads=538
|
||||
PERF: core11: smem writes=447
|
||||
PERF: core11: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core11: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core11: dram stalls=967 (utilization=18%)
|
||||
PERF: core11: dram average latency=31 cycles
|
||||
PERF: core12: instrs=23498, cycles=16248, IPC=1.446209
|
||||
PERF: core12: ibuffer stalls=2243
|
||||
PERF: core12: scoreboard stalls=4147
|
||||
PERF: core12: alu unit stalls=745
|
||||
PERF: core12: lsu unit stalls=391
|
||||
PERF: core12: csr unit stalls=0
|
||||
PERF: core12: fpu unit stalls=2
|
||||
PERF: core12: gpu unit stalls=0
|
||||
PERF: core12: icache reads=6155
|
||||
PERF: core12: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core12: icache pipeline stalls=2456
|
||||
PERF: core12: icache reponse stalls=2243
|
||||
PERF: core12: dcache reads=2862
|
||||
PERF: core12: dcache writes=101
|
||||
PERF: core12: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core12: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core12: dcache bank stalls=2198 (utilization=57%)
|
||||
PERF: core12: dcache mshr stalls=2515
|
||||
PERF: core12: dcache pipeline stalls=4956
|
||||
PERF: core12: dcache reponse stalls=16
|
||||
PERF: core12: smem reads=538
|
||||
PERF: core12: smem writes=447
|
||||
PERF: core12: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core12: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core12: dram stalls=1387 (utilization=14%)
|
||||
PERF: core12: dram average latency=31 cycles
|
||||
PERF: core13: instrs=23498, cycles=16176, IPC=1.452646
|
||||
PERF: core13: ibuffer stalls=2224
|
||||
PERF: core13: scoreboard stalls=4117
|
||||
PERF: core13: alu unit stalls=732
|
||||
PERF: core13: lsu unit stalls=431
|
||||
PERF: core13: csr unit stalls=0
|
||||
PERF: core13: fpu unit stalls=3
|
||||
PERF: core13: gpu unit stalls=0
|
||||
PERF: core13: icache reads=6155
|
||||
PERF: core13: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core13: icache pipeline stalls=2446
|
||||
PERF: core13: icache reponse stalls=2224
|
||||
PERF: core13: dcache reads=2862
|
||||
PERF: core13: dcache writes=101
|
||||
PERF: core13: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core13: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core13: dcache bank stalls=2193 (utilization=57%)
|
||||
PERF: core13: dcache mshr stalls=2425
|
||||
PERF: core13: dcache pipeline stalls=4623
|
||||
PERF: core13: dcache reponse stalls=15
|
||||
PERF: core13: smem reads=538
|
||||
PERF: core13: smem writes=447
|
||||
PERF: core13: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core13: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core13: dram stalls=1260 (utilization=15%)
|
||||
PERF: core13: dram average latency=31 cycles
|
||||
PERF: core14: instrs=23498, cycles=16165, IPC=1.453634
|
||||
PERF: core14: ibuffer stalls=2233
|
||||
PERF: core14: scoreboard stalls=4091
|
||||
PERF: core14: alu unit stalls=742
|
||||
PERF: core14: lsu unit stalls=428
|
||||
PERF: core14: csr unit stalls=0
|
||||
PERF: core14: fpu unit stalls=2
|
||||
PERF: core14: gpu unit stalls=0
|
||||
PERF: core14: icache reads=6155
|
||||
PERF: core14: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core14: icache pipeline stalls=2452
|
||||
PERF: core14: icache reponse stalls=2233
|
||||
PERF: core14: dcache reads=2862
|
||||
PERF: core14: dcache writes=101
|
||||
PERF: core14: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core14: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core14: dcache bank stalls=2193 (utilization=57%)
|
||||
PERF: core14: dcache mshr stalls=2426
|
||||
PERF: core14: dcache pipeline stalls=3984
|
||||
PERF: core14: dcache reponse stalls=15
|
||||
PERF: core14: smem reads=538
|
||||
PERF: core14: smem writes=447
|
||||
PERF: core14: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core14: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core14: dram stalls=952 (utilization=19%)
|
||||
PERF: core14: dram average latency=30 cycles
|
||||
PERF: core15: instrs=23500, cycles=16251, IPC=1.446065
|
||||
PERF: core15: ibuffer stalls=2268
|
||||
PERF: core15: scoreboard stalls=4241
|
||||
PERF: core15: alu unit stalls=745
|
||||
PERF: core15: lsu unit stalls=374
|
||||
PERF: core15: csr unit stalls=0
|
||||
PERF: core15: fpu unit stalls=1
|
||||
PERF: core15: gpu unit stalls=0
|
||||
PERF: core15: icache reads=6157
|
||||
PERF: core15: icache read misses=73 (hit ratio=98%)
|
||||
PERF: core15: icache pipeline stalls=2455
|
||||
PERF: core15: icache reponse stalls=2268
|
||||
PERF: core15: dcache reads=2862
|
||||
PERF: core15: dcache writes=101
|
||||
PERF: core15: dcache read misses=634 (hit ratio=77%)
|
||||
PERF: core15: dcache write misses=97 (hit ratio=3%)
|
||||
PERF: core15: dcache bank stalls=2195 (utilization=57%)
|
||||
PERF: core15: dcache mshr stalls=2567
|
||||
PERF: core15: dcache pipeline stalls=5084
|
||||
PERF: core15: dcache reponse stalls=16
|
||||
PERF: core15: smem reads=538
|
||||
PERF: core15: smem writes=447
|
||||
PERF: core15: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core15: dram requests=226 (reads=125, writes=101)
|
||||
PERF: core15: dram stalls=1220 (utilization=15%)
|
||||
PERF: core15: dram average latency=31 cycles
|
||||
PERF: instrs=375970, cycles=16264, IPC=23.116699
|
||||
PERF: ibuffer stalls=35805
|
||||
PERF: scoreboard stalls=66248
|
||||
PERF: alu unit stalls=11783
|
||||
PERF: lsu unit stalls=6441
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=29
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=98482
|
||||
PERF: icache read misses=1168 (hit ratio=98%)
|
||||
PERF: icache pipeline stalls=39194
|
||||
PERF: icache reponse stalls=35805
|
||||
PERF: dcache reads=45792
|
||||
PERF: dcache writes=1616
|
||||
PERF: dcache read misses=10164 (hit ratio=77%)
|
||||
PERF: dcache write misses=1552 (hit ratio=3%)
|
||||
PERF: dcache bank stalls=35059 (utilization=57%)
|
||||
PERF: dcache mshr stalls=39593
|
||||
PERF: dcache pipeline stalls=73067
|
||||
PERF: dcache reponse stalls=252
|
||||
PERF: smem reads=8608
|
||||
PERF: smem writes=7152
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: dram requests=3624 (reads=2008, writes=1616)
|
||||
PERF: dram stalls=18122 (utilization=16%)
|
||||
PERF: dram average latency=31 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
|
@ -1,3 +0,0 @@
|
|||
# Generated by Platform Interface Manager user_clock_config.tcl
|
||||
afu-image/clock-frequency-low:83.5
|
||||
afu-image/clock-frequency-high:167
|
|
@ -1,459 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Allocate device buffers
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: core0: instrs=2019, cycles=5194, IPC=0.388718
|
||||
PERF: core0: ibuffer stalls=89
|
||||
PERF: core0: scoreboard stalls=493
|
||||
PERF: core0: alu unit stalls=68
|
||||
PERF: core0: lsu unit stalls=50
|
||||
PERF: core0: csr unit stalls=0
|
||||
PERF: core0: fpu unit stalls=0
|
||||
PERF: core0: gpu unit stalls=0
|
||||
PERF: core0: icache reads=804
|
||||
PERF: core0: icache read misses=65 (hit ratio=91%)
|
||||
PERF: core0: icache pipeline stalls=444
|
||||
PERF: core0: icache reponse stalls=89
|
||||
PERF: core0: dcache reads=114
|
||||
PERF: core0: dcache writes=65
|
||||
PERF: core0: dcache read misses=28 (hit ratio=75%)
|
||||
PERF: core0: dcache write misses=60 (hit ratio=7%)
|
||||
PERF: core0: dcache bank stalls=72 (utilization=71%)
|
||||
PERF: core0: dcache mshr stalls=58
|
||||
PERF: core0: dcache pipeline stalls=596
|
||||
PERF: core0: dcache reponse stalls=1
|
||||
PERF: core0: smem reads=70
|
||||
PERF: core0: smem writes=63
|
||||
PERF: core0: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core0: dram requests=109 (reads=44, writes=65)
|
||||
PERF: core0: dram stalls=780 (utilization=12%)
|
||||
PERF: core0: dram average latency=31 cycles
|
||||
PERF: core1: instrs=2019, cycles=5191, IPC=0.388942
|
||||
PERF: core1: ibuffer stalls=89
|
||||
PERF: core1: scoreboard stalls=494
|
||||
PERF: core1: alu unit stalls=68
|
||||
PERF: core1: lsu unit stalls=48
|
||||
PERF: core1: csr unit stalls=0
|
||||
PERF: core1: fpu unit stalls=0
|
||||
PERF: core1: gpu unit stalls=0
|
||||
PERF: core1: icache reads=804
|
||||
PERF: core1: icache read misses=65 (hit ratio=91%)
|
||||
PERF: core1: icache pipeline stalls=455
|
||||
PERF: core1: icache reponse stalls=89
|
||||
PERF: core1: dcache reads=114
|
||||
PERF: core1: dcache writes=65
|
||||
PERF: core1: dcache read misses=28 (hit ratio=75%)
|
||||
PERF: core1: dcache write misses=60 (hit ratio=7%)
|
||||
PERF: core1: dcache bank stalls=72 (utilization=71%)
|
||||
PERF: core1: dcache mshr stalls=58
|
||||
PERF: core1: dcache pipeline stalls=596
|
||||
PERF: core1: dcache reponse stalls=1
|
||||
PERF: core1: smem reads=70
|
||||
PERF: core1: smem writes=63
|
||||
PERF: core1: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core1: dram requests=109 (reads=44, writes=65)
|
||||
PERF: core1: dram stalls=774 (utilization=12%)
|
||||
PERF: core1: dram average latency=31 cycles
|
||||
PERF: core2: instrs=2019, cycles=5110, IPC=0.395108
|
||||
PERF: core2: ibuffer stalls=89
|
||||
PERF: core2: scoreboard stalls=485
|
||||
PERF: core2: alu unit stalls=68
|
||||
PERF: core2: lsu unit stalls=53
|
||||
PERF: core2: csr unit stalls=0
|
||||
PERF: core2: fpu unit stalls=0
|
||||
PERF: core2: gpu unit stalls=0
|
||||
PERF: core2: icache reads=804
|
||||
PERF: core2: icache read misses=65 (hit ratio=91%)
|
||||
PERF: core2: icache pipeline stalls=401
|
||||
PERF: core2: icache reponse stalls=89
|
||||
PERF: core2: dcache reads=114
|
||||
PERF: core2: dcache writes=65
|
||||
PERF: core2: dcache read misses=28 (hit ratio=75%)
|
||||
PERF: core2: dcache write misses=60 (hit ratio=7%)
|
||||
PERF: core2: dcache bank stalls=72 (utilization=71%)
|
||||
PERF: core2: dcache mshr stalls=60
|
||||
PERF: core2: dcache pipeline stalls=541
|
||||
PERF: core2: dcache reponse stalls=1
|
||||
PERF: core2: smem reads=70
|
||||
PERF: core2: smem writes=63
|
||||
PERF: core2: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core2: dram requests=109 (reads=44, writes=65)
|
||||
PERF: core2: dram stalls=731 (utilization=12%)
|
||||
PERF: core2: dram average latency=30 cycles
|
||||
PERF: core3: instrs=2019, cycles=5101, IPC=0.395805
|
||||
PERF: core3: ibuffer stalls=89
|
||||
PERF: core3: scoreboard stalls=486
|
||||
PERF: core3: alu unit stalls=68
|
||||
PERF: core3: lsu unit stalls=52
|
||||
PERF: core3: csr unit stalls=0
|
||||
PERF: core3: fpu unit stalls=0
|
||||
PERF: core3: gpu unit stalls=0
|
||||
PERF: core3: icache reads=804
|
||||
PERF: core3: icache read misses=65 (hit ratio=91%)
|
||||
PERF: core3: icache pipeline stalls=401
|
||||
PERF: core3: icache reponse stalls=89
|
||||
PERF: core3: dcache reads=114
|
||||
PERF: core3: dcache writes=65
|
||||
PERF: core3: dcache read misses=28 (hit ratio=75%)
|
||||
PERF: core3: dcache write misses=60 (hit ratio=7%)
|
||||
PERF: core3: dcache bank stalls=72 (utilization=71%)
|
||||
PERF: core3: dcache mshr stalls=58
|
||||
PERF: core3: dcache pipeline stalls=532
|
||||
PERF: core3: dcache reponse stalls=1
|
||||
PERF: core3: smem reads=70
|
||||
PERF: core3: smem writes=63
|
||||
PERF: core3: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core3: dram requests=109 (reads=44, writes=65)
|
||||
PERF: core3: dram stalls=731 (utilization=12%)
|
||||
PERF: core3: dram average latency=29 cycles
|
||||
PERF: core4: instrs=495, cycles=3605, IPC=0.137309
|
||||
PERF: core4: ibuffer stalls=0
|
||||
PERF: core4: scoreboard stalls=267
|
||||
PERF: core4: alu unit stalls=0
|
||||
PERF: core4: lsu unit stalls=0
|
||||
PERF: core4: csr unit stalls=0
|
||||
PERF: core4: fpu unit stalls=0
|
||||
PERF: core4: gpu unit stalls=0
|
||||
PERF: core4: icache reads=348
|
||||
PERF: core4: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core4: icache pipeline stalls=63
|
||||
PERF: core4: icache reponse stalls=0
|
||||
PERF: core4: dcache reads=18
|
||||
PERF: core4: dcache writes=48
|
||||
PERF: core4: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core4: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core4: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core4: dcache mshr stalls=0
|
||||
PERF: core4: dcache pipeline stalls=525
|
||||
PERF: core4: dcache reponse stalls=0
|
||||
PERF: core4: smem reads=23
|
||||
PERF: core4: smem writes=25
|
||||
PERF: core4: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core4: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core4: dram stalls=765 (utilization=9%)
|
||||
PERF: core4: dram average latency=31 cycles
|
||||
PERF: core5: instrs=495, cycles=3603, IPC=0.137386
|
||||
PERF: core5: ibuffer stalls=0
|
||||
PERF: core5: scoreboard stalls=269
|
||||
PERF: core5: alu unit stalls=0
|
||||
PERF: core5: lsu unit stalls=0
|
||||
PERF: core5: csr unit stalls=0
|
||||
PERF: core5: fpu unit stalls=0
|
||||
PERF: core5: gpu unit stalls=0
|
||||
PERF: core5: icache reads=348
|
||||
PERF: core5: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core5: icache pipeline stalls=63
|
||||
PERF: core5: icache reponse stalls=0
|
||||
PERF: core5: dcache reads=18
|
||||
PERF: core5: dcache writes=48
|
||||
PERF: core5: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core5: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core5: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core5: dcache mshr stalls=0
|
||||
PERF: core5: dcache pipeline stalls=514
|
||||
PERF: core5: dcache reponse stalls=0
|
||||
PERF: core5: smem reads=23
|
||||
PERF: core5: smem writes=25
|
||||
PERF: core5: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core5: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core5: dram stalls=758 (utilization=9%)
|
||||
PERF: core5: dram average latency=31 cycles
|
||||
PERF: core6: instrs=495, cycles=3587, IPC=0.137998
|
||||
PERF: core6: ibuffer stalls=0
|
||||
PERF: core6: scoreboard stalls=260
|
||||
PERF: core6: alu unit stalls=0
|
||||
PERF: core6: lsu unit stalls=0
|
||||
PERF: core6: csr unit stalls=0
|
||||
PERF: core6: fpu unit stalls=0
|
||||
PERF: core6: gpu unit stalls=0
|
||||
PERF: core6: icache reads=348
|
||||
PERF: core6: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core6: icache pipeline stalls=63
|
||||
PERF: core6: icache reponse stalls=0
|
||||
PERF: core6: dcache reads=18
|
||||
PERF: core6: dcache writes=48
|
||||
PERF: core6: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core6: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core6: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core6: dcache mshr stalls=0
|
||||
PERF: core6: dcache pipeline stalls=472
|
||||
PERF: core6: dcache reponse stalls=0
|
||||
PERF: core6: smem reads=23
|
||||
PERF: core6: smem writes=25
|
||||
PERF: core6: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core6: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core6: dram stalls=727 (utilization=9%)
|
||||
PERF: core6: dram average latency=31 cycles
|
||||
PERF: core7: instrs=495, cycles=3573, IPC=0.138539
|
||||
PERF: core7: ibuffer stalls=0
|
||||
PERF: core7: scoreboard stalls=260
|
||||
PERF: core7: alu unit stalls=0
|
||||
PERF: core7: lsu unit stalls=0
|
||||
PERF: core7: csr unit stalls=0
|
||||
PERF: core7: fpu unit stalls=0
|
||||
PERF: core7: gpu unit stalls=0
|
||||
PERF: core7: icache reads=348
|
||||
PERF: core7: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core7: icache pipeline stalls=63
|
||||
PERF: core7: icache reponse stalls=0
|
||||
PERF: core7: dcache reads=18
|
||||
PERF: core7: dcache writes=48
|
||||
PERF: core7: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core7: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core7: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core7: dcache mshr stalls=0
|
||||
PERF: core7: dcache pipeline stalls=474
|
||||
PERF: core7: dcache reponse stalls=0
|
||||
PERF: core7: smem reads=23
|
||||
PERF: core7: smem writes=25
|
||||
PERF: core7: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core7: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core7: dram stalls=728 (utilization=9%)
|
||||
PERF: core7: dram average latency=31 cycles
|
||||
PERF: core8: instrs=495, cycles=3604, IPC=0.137347
|
||||
PERF: core8: ibuffer stalls=0
|
||||
PERF: core8: scoreboard stalls=268
|
||||
PERF: core8: alu unit stalls=0
|
||||
PERF: core8: lsu unit stalls=0
|
||||
PERF: core8: csr unit stalls=0
|
||||
PERF: core8: fpu unit stalls=0
|
||||
PERF: core8: gpu unit stalls=0
|
||||
PERF: core8: icache reads=348
|
||||
PERF: core8: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core8: icache pipeline stalls=63
|
||||
PERF: core8: icache reponse stalls=0
|
||||
PERF: core8: dcache reads=18
|
||||
PERF: core8: dcache writes=48
|
||||
PERF: core8: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core8: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core8: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core8: dcache mshr stalls=0
|
||||
PERF: core8: dcache pipeline stalls=525
|
||||
PERF: core8: dcache reponse stalls=0
|
||||
PERF: core8: smem reads=23
|
||||
PERF: core8: smem writes=25
|
||||
PERF: core8: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core8: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core8: dram stalls=764 (utilization=9%)
|
||||
PERF: core8: dram average latency=31 cycles
|
||||
PERF: core9: instrs=495, cycles=3600, IPC=0.137500
|
||||
PERF: core9: ibuffer stalls=0
|
||||
PERF: core9: scoreboard stalls=268
|
||||
PERF: core9: alu unit stalls=0
|
||||
PERF: core9: lsu unit stalls=0
|
||||
PERF: core9: csr unit stalls=0
|
||||
PERF: core9: fpu unit stalls=0
|
||||
PERF: core9: gpu unit stalls=0
|
||||
PERF: core9: icache reads=348
|
||||
PERF: core9: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core9: icache pipeline stalls=63
|
||||
PERF: core9: icache reponse stalls=0
|
||||
PERF: core9: dcache reads=18
|
||||
PERF: core9: dcache writes=48
|
||||
PERF: core9: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core9: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core9: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core9: dcache mshr stalls=0
|
||||
PERF: core9: dcache pipeline stalls=514
|
||||
PERF: core9: dcache reponse stalls=0
|
||||
PERF: core9: smem reads=23
|
||||
PERF: core9: smem writes=25
|
||||
PERF: core9: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core9: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core9: dram stalls=756 (utilization=9%)
|
||||
PERF: core9: dram average latency=31 cycles
|
||||
PERF: core10: instrs=495, cycles=3585, IPC=0.138075
|
||||
PERF: core10: ibuffer stalls=0
|
||||
PERF: core10: scoreboard stalls=261
|
||||
PERF: core10: alu unit stalls=0
|
||||
PERF: core10: lsu unit stalls=0
|
||||
PERF: core10: csr unit stalls=0
|
||||
PERF: core10: fpu unit stalls=0
|
||||
PERF: core10: gpu unit stalls=0
|
||||
PERF: core10: icache reads=348
|
||||
PERF: core10: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core10: icache pipeline stalls=63
|
||||
PERF: core10: icache reponse stalls=0
|
||||
PERF: core10: dcache reads=18
|
||||
PERF: core10: dcache writes=48
|
||||
PERF: core10: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core10: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core10: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core10: dcache mshr stalls=0
|
||||
PERF: core10: dcache pipeline stalls=472
|
||||
PERF: core10: dcache reponse stalls=0
|
||||
PERF: core10: smem reads=23
|
||||
PERF: core10: smem writes=25
|
||||
PERF: core10: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core10: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core10: dram stalls=728 (utilization=9%)
|
||||
PERF: core10: dram average latency=31 cycles
|
||||
PERF: core11: instrs=495, cycles=3572, IPC=0.138578
|
||||
PERF: core11: ibuffer stalls=0
|
||||
PERF: core11: scoreboard stalls=259
|
||||
PERF: core11: alu unit stalls=0
|
||||
PERF: core11: lsu unit stalls=0
|
||||
PERF: core11: csr unit stalls=0
|
||||
PERF: core11: fpu unit stalls=0
|
||||
PERF: core11: gpu unit stalls=0
|
||||
PERF: core11: icache reads=348
|
||||
PERF: core11: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core11: icache pipeline stalls=63
|
||||
PERF: core11: icache reponse stalls=0
|
||||
PERF: core11: dcache reads=18
|
||||
PERF: core11: dcache writes=48
|
||||
PERF: core11: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core11: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core11: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core11: dcache mshr stalls=0
|
||||
PERF: core11: dcache pipeline stalls=474
|
||||
PERF: core11: dcache reponse stalls=0
|
||||
PERF: core11: smem reads=23
|
||||
PERF: core11: smem writes=25
|
||||
PERF: core11: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core11: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core11: dram stalls=728 (utilization=9%)
|
||||
PERF: core11: dram average latency=31 cycles
|
||||
PERF: core12: instrs=495, cycles=3599, IPC=0.137538
|
||||
PERF: core12: ibuffer stalls=0
|
||||
PERF: core12: scoreboard stalls=261
|
||||
PERF: core12: alu unit stalls=0
|
||||
PERF: core12: lsu unit stalls=0
|
||||
PERF: core12: csr unit stalls=0
|
||||
PERF: core12: fpu unit stalls=0
|
||||
PERF: core12: gpu unit stalls=0
|
||||
PERF: core12: icache reads=348
|
||||
PERF: core12: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core12: icache pipeline stalls=63
|
||||
PERF: core12: icache reponse stalls=0
|
||||
PERF: core12: dcache reads=18
|
||||
PERF: core12: dcache writes=48
|
||||
PERF: core12: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core12: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core12: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core12: dcache mshr stalls=0
|
||||
PERF: core12: dcache pipeline stalls=533
|
||||
PERF: core12: dcache reponse stalls=0
|
||||
PERF: core12: smem reads=23
|
||||
PERF: core12: smem writes=25
|
||||
PERF: core12: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core12: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core12: dram stalls=762 (utilization=9%)
|
||||
PERF: core12: dram average latency=31 cycles
|
||||
PERF: core13: instrs=495, cycles=3589, IPC=0.137921
|
||||
PERF: core13: ibuffer stalls=0
|
||||
PERF: core13: scoreboard stalls=257
|
||||
PERF: core13: alu unit stalls=0
|
||||
PERF: core13: lsu unit stalls=0
|
||||
PERF: core13: csr unit stalls=0
|
||||
PERF: core13: fpu unit stalls=0
|
||||
PERF: core13: gpu unit stalls=0
|
||||
PERF: core13: icache reads=348
|
||||
PERF: core13: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core13: icache pipeline stalls=63
|
||||
PERF: core13: icache reponse stalls=0
|
||||
PERF: core13: dcache reads=18
|
||||
PERF: core13: dcache writes=48
|
||||
PERF: core13: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core13: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core13: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core13: dcache mshr stalls=0
|
||||
PERF: core13: dcache pipeline stalls=478
|
||||
PERF: core13: dcache reponse stalls=0
|
||||
PERF: core13: smem reads=23
|
||||
PERF: core13: smem writes=25
|
||||
PERF: core13: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core13: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core13: dram stalls=736 (utilization=9%)
|
||||
PERF: core13: dram average latency=31 cycles
|
||||
PERF: core14: instrs=495, cycles=3584, IPC=0.138114
|
||||
PERF: core14: ibuffer stalls=0
|
||||
PERF: core14: scoreboard stalls=255
|
||||
PERF: core14: alu unit stalls=0
|
||||
PERF: core14: lsu unit stalls=0
|
||||
PERF: core14: csr unit stalls=0
|
||||
PERF: core14: fpu unit stalls=0
|
||||
PERF: core14: gpu unit stalls=0
|
||||
PERF: core14: icache reads=348
|
||||
PERF: core14: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core14: icache pipeline stalls=63
|
||||
PERF: core14: icache reponse stalls=0
|
||||
PERF: core14: dcache reads=18
|
||||
PERF: core14: dcache writes=48
|
||||
PERF: core14: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core14: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core14: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core14: dcache mshr stalls=0
|
||||
PERF: core14: dcache pipeline stalls=480
|
||||
PERF: core14: dcache reponse stalls=0
|
||||
PERF: core14: smem reads=23
|
||||
PERF: core14: smem writes=25
|
||||
PERF: core14: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core14: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core14: dram stalls=734 (utilization=9%)
|
||||
PERF: core14: dram average latency=31 cycles
|
||||
PERF: core15: instrs=495, cycles=3570, IPC=0.138655
|
||||
PERF: core15: ibuffer stalls=0
|
||||
PERF: core15: scoreboard stalls=241
|
||||
PERF: core15: alu unit stalls=0
|
||||
PERF: core15: lsu unit stalls=0
|
||||
PERF: core15: csr unit stalls=0
|
||||
PERF: core15: fpu unit stalls=0
|
||||
PERF: core15: gpu unit stalls=0
|
||||
PERF: core15: icache reads=348
|
||||
PERF: core15: icache read misses=31 (hit ratio=91%)
|
||||
PERF: core15: icache pipeline stalls=62
|
||||
PERF: core15: icache reponse stalls=0
|
||||
PERF: core15: dcache reads=18
|
||||
PERF: core15: dcache writes=48
|
||||
PERF: core15: dcache read misses=8 (hit ratio=55%)
|
||||
PERF: core15: dcache write misses=44 (hit ratio=8%)
|
||||
PERF: core15: dcache bank stalls=0 (utilization=100%)
|
||||
PERF: core15: dcache mshr stalls=0
|
||||
PERF: core15: dcache pipeline stalls=419
|
||||
PERF: core15: dcache reponse stalls=0
|
||||
PERF: core15: smem reads=23
|
||||
PERF: core15: smem writes=25
|
||||
PERF: core15: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core15: dram requests=79 (reads=31, writes=48)
|
||||
PERF: core15: dram stalls=667 (utilization=10%)
|
||||
PERF: core15: dram average latency=31 cycles
|
||||
PERF: instrs=14016, cycles=5194, IPC=2.698498
|
||||
PERF: ibuffer stalls=356
|
||||
PERF: scoreboard stalls=5084
|
||||
PERF: alu unit stalls=272
|
||||
PERF: lsu unit stalls=203
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=7392
|
||||
PERF: icache read misses=632 (hit ratio=91%)
|
||||
PERF: icache pipeline stalls=2456
|
||||
PERF: icache reponse stalls=356
|
||||
PERF: dcache reads=672
|
||||
PERF: dcache writes=836
|
||||
PERF: dcache read misses=208 (hit ratio=69%)
|
||||
PERF: dcache write misses=768 (hit ratio=8%)
|
||||
PERF: dcache bank stalls=288 (utilization=83%)
|
||||
PERF: dcache mshr stalls=234
|
||||
PERF: dcache pipeline stalls=8145
|
||||
PERF: dcache reponse stalls=4
|
||||
PERF: smem reads=556
|
||||
PERF: smem writes=552
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: dram requests=1384 (reads=548, writes=836)
|
||||
PERF: dram stalls=11869 (utilization=10%)
|
||||
PERF: dram average latency=31 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
|
@ -1,17 +0,0 @@
|
|||
Fitter Status : Successful - Sat Mar 6 19:19:28 2021
|
||||
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
||||
Device : 10AX115N2F40E2LG
|
||||
Timing Models : Final
|
||||
Logic utilization (in ALMs) : 55,747 / 427,200 ( 13 % )
|
||||
Total registers : 79974
|
||||
Total pins : 310 / 826 ( 38 % )
|
||||
Total virtual pins : 0
|
||||
Total block memory bits : 2,272,720 / 55,562,240 ( 4 % )
|
||||
Total RAM Blocks : 320 / 2,713 ( 12 % )
|
||||
Total DSP Blocks : 28 / 1,518 ( 2 % )
|
||||
Total HSSI RX channels : 12 / 48 ( 25 % )
|
||||
Total HSSI TX channels : 12 / 48 ( 25 % )
|
||||
Total PLLs : 25 / 112 ( 22 % )
|
File diff suppressed because it is too large
Load diff
|
@ -1,4 +0,0 @@
|
|||
Synthesis Status : Successful - Sat Mar 6 18:56:26 2021
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
File diff suppressed because it is too large
Load diff
|
@ -1,29 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
|
||||
OK
|
||||
The result of matrix m is:
|
||||
0.00 0.00 0.00 0.00
|
||||
0.50 0.00 0.00 0.00
|
||||
0.67 0.26 0.00 0.00
|
||||
-0.00 0.15 -0.28 0.00
|
||||
|
||||
The result of matrix a is:
|
||||
-0.60 -0.50 0.70 0.30
|
||||
0.00 -0.65 -0.05 0.55
|
||||
0.00 0.00 -0.75 -1.14
|
||||
0.00 0.00 0.00 0.50
|
||||
|
||||
The result of array b is:
|
||||
-0.85 -0.25 0.87 -0.25
|
||||
|
||||
The final solution is:
|
||||
0.70 0.00 -0.40 -0.50
|
||||
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
|
||||
loading db: cane4_0.db
|
||||
loading db: cane4_1.db
|
||||
loading db: cane4_2.db
|
||||
Number of records: 1500
|
||||
Finding the 5 closest neighbors.
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
|
||||
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
|
||||
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
|
||||
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
|
||||
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
|
||||
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
|
@ -1,42 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: instrs=360460, cycles=175991, IPC=2.048173
|
||||
PERF: ibuffer stalls=20439
|
||||
PERF: scoreboard stalls=50656
|
||||
PERF: alu unit stalls=7129
|
||||
PERF: lsu unit stalls=16771
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=90397
|
||||
PERF: icache read misses=73 (hit ratio=99%)
|
||||
PERF: icache pipeline stalls=12325
|
||||
PERF: icache reponse stalls=20439
|
||||
PERF: dcache reads=45342
|
||||
PERF: dcache writes=1061
|
||||
PERF: dcache read misses=1252 (hit ratio=97%)
|
||||
PERF: dcache write misses=1057 (hit ratio=0%)
|
||||
PERF: dcache bank stalls=50688 (utilization=47%)
|
||||
PERF: dcache mshr stalls=2005
|
||||
PERF: dcache pipeline stalls=2034
|
||||
PERF: dcache reponse stalls=192
|
||||
PERF: smem reads=7978
|
||||
PERF: smem writes=6207
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: dram requests=1423 (reads=362, writes=1061)
|
||||
PERF: dram stalls=0 (utilization=100%)
|
||||
PERF: dram average latency=26 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
|
@ -1,3 +0,0 @@
|
|||
# Generated by Platform Interface Manager user_clock_config.tcl
|
||||
afu-image/clock-frequency-low:88.5
|
||||
afu-image/clock-frequency-high:177
|
|
@ -1,43 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Allocate device buffers
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: instrs=4908, cycles=6173, IPC=0.795075
|
||||
PERF: ibuffer stalls=247
|
||||
PERF: scoreboard stalls=629
|
||||
PERF: alu unit stalls=130
|
||||
PERF: lsu unit stalls=204
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=1528
|
||||
PERF: icache read misses=65 (hit ratio=95%)
|
||||
PERF: icache pipeline stalls=546
|
||||
PERF: icache reponse stalls=247
|
||||
PERF: dcache reads=371
|
||||
PERF: dcache writes=113
|
||||
PERF: dcache read misses=105 (hit ratio=71%)
|
||||
PERF: dcache write misses=108 (hit ratio=4%)
|
||||
PERF: dcache bank stalls=184 (utilization=72%)
|
||||
PERF: dcache mshr stalls=125
|
||||
PERF: dcache pipeline stalls=259
|
||||
PERF: dcache reponse stalls=15
|
||||
PERF: smem reads=154
|
||||
PERF: smem writes=63
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: dram requests=175 (reads=62, writes=113)
|
||||
PERF: dram stalls=0 (utilization=100%)
|
||||
PERF: dram average latency=26 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
|
@ -1,17 +0,0 @@
|
|||
Fitter Status : Successful - Sat Mar 6 01:44:47 2021
|
||||
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
||||
Device : 10AX115N2F40E2LG
|
||||
Timing Models : Final
|
||||
Logic utilization (in ALMs) : 74,001 / 427,200 ( 17 % )
|
||||
Total registers : 109164
|
||||
Total pins : 310 / 826 ( 38 % )
|
||||
Total virtual pins : 0
|
||||
Total block memory bits : 2,967,352 / 55,562,240 ( 5 % )
|
||||
Total RAM Blocks : 451 / 2,713 ( 17 % )
|
||||
Total DSP Blocks : 56 / 1,518 ( 4 % )
|
||||
Total HSSI RX channels : 12 / 48 ( 25 % )
|
||||
Total HSSI TX channels : 12 / 48 ( 25 % )
|
||||
Total PLLs : 25 / 112 ( 22 % )
|
File diff suppressed because it is too large
Load diff
|
@ -1,4 +0,0 @@
|
|||
Synthesis Status : Successful - Sat Mar 6 01:12:13 2021
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
File diff suppressed because it is too large
Load diff
|
@ -1,29 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
|
||||
OK
|
||||
The result of matrix m is:
|
||||
0.00 0.00 0.00 0.00
|
||||
0.50 0.00 0.00 0.00
|
||||
0.67 0.26 0.00 0.00
|
||||
-0.00 0.15 -0.28 0.00
|
||||
|
||||
The result of matrix a is:
|
||||
-0.60 -0.50 0.70 0.30
|
||||
0.00 -0.65 -0.05 0.55
|
||||
0.00 0.00 -0.75 -1.14
|
||||
0.00 0.00 0.00 0.50
|
||||
|
||||
The result of array b is:
|
||||
-0.85 -0.25 0.87 -0.25
|
||||
|
||||
The final solution is:
|
||||
0.70 0.00 -0.40 -0.50
|
||||
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
|
||||
loading db: cane4_0.db
|
||||
loading db: cane4_1.db
|
||||
loading db: cane4_2.db
|
||||
Number of records: 1500
|
||||
Finding the 5 closest neighbors.
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
|
||||
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
|
||||
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
|
||||
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
|
||||
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
|
||||
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
|
@ -1,94 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: core0: instrs=180750, cycles=84306, IPC=2.143975
|
||||
PERF: core0: ibuffer stalls=0
|
||||
PERF: core0: scoreboard stalls=0
|
||||
PERF: core0: alu unit stalls=0
|
||||
PERF: core0: lsu unit stalls=0
|
||||
PERF: core0: csr unit stalls=0
|
||||
PERF: core0: fpu unit stalls=0
|
||||
PERF: core0: gpu unit stalls=0
|
||||
PERF: core0: icache reads=0
|
||||
PERF: core0: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: icache pipeline stalls=0
|
||||
PERF: core0: icache reponse stalls=0
|
||||
PERF: core0: dcache reads=0
|
||||
PERF: core0: dcache writes=0
|
||||
PERF: core0: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dcache mshr stalls=0
|
||||
PERF: core0: dcache pipeline stalls=0
|
||||
PERF: core0: dcache reponse stalls=0
|
||||
PERF: core0: smem reads=0
|
||||
PERF: core0: smem writes=0
|
||||
PERF: core0: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core0: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dram average latency=-2147483648 cycles
|
||||
PERF: core1: instrs=180752, cycles=84131, IPC=2.148459
|
||||
PERF: core1: ibuffer stalls=0
|
||||
PERF: core1: scoreboard stalls=0
|
||||
PERF: core1: alu unit stalls=0
|
||||
PERF: core1: lsu unit stalls=0
|
||||
PERF: core1: csr unit stalls=0
|
||||
PERF: core1: fpu unit stalls=0
|
||||
PERF: core1: gpu unit stalls=0
|
||||
PERF: core1: icache reads=0
|
||||
PERF: core1: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: icache pipeline stalls=0
|
||||
PERF: core1: icache reponse stalls=0
|
||||
PERF: core1: dcache reads=0
|
||||
PERF: core1: dcache writes=0
|
||||
PERF: core1: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dcache mshr stalls=0
|
||||
PERF: core1: dcache pipeline stalls=0
|
||||
PERF: core1: dcache reponse stalls=0
|
||||
PERF: core1: smem reads=0
|
||||
PERF: core1: smem writes=0
|
||||
PERF: core1: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core1: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dram average latency=-2147483648 cycles
|
||||
PERF: instrs=361502, cycles=84306, IPC=4.287975
|
||||
PERF: ibuffer stalls=0
|
||||
PERF: scoreboard stalls=0
|
||||
PERF: alu unit stalls=0
|
||||
PERF: lsu unit stalls=0
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=0
|
||||
PERF: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: icache pipeline stalls=0
|
||||
PERF: icache reponse stalls=0
|
||||
PERF: dcache reads=0
|
||||
PERF: dcache writes=0
|
||||
PERF: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: dcache mshr stalls=0
|
||||
PERF: dcache pipeline stalls=0
|
||||
PERF: dcache reponse stalls=0
|
||||
PERF: smem reads=0
|
||||
PERF: smem writes=0
|
||||
PERF: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: dram requests=0 (reads=0, writes=0)
|
||||
PERF: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: dram average latency=-2147483648 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
|
@ -1,3 +0,0 @@
|
|||
# Generated by Platform Interface Manager user_clock_config.tcl
|
||||
afu-image/clock-frequency-low:92.0
|
||||
afu-image/clock-frequency-high:184
|
|
@ -1,95 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Allocate device buffers
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: core0: instrs=2981, cycles=5416, IPC=0.550406
|
||||
PERF: core0: ibuffer stalls=0
|
||||
PERF: core0: scoreboard stalls=0
|
||||
PERF: core0: alu unit stalls=0
|
||||
PERF: core0: lsu unit stalls=0
|
||||
PERF: core0: csr unit stalls=0
|
||||
PERF: core0: fpu unit stalls=0
|
||||
PERF: core0: gpu unit stalls=0
|
||||
PERF: core0: icache reads=0
|
||||
PERF: core0: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: icache pipeline stalls=0
|
||||
PERF: core0: icache reponse stalls=0
|
||||
PERF: core0: dcache reads=0
|
||||
PERF: core0: dcache writes=0
|
||||
PERF: core0: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dcache mshr stalls=0
|
||||
PERF: core0: dcache pipeline stalls=0
|
||||
PERF: core0: dcache reponse stalls=0
|
||||
PERF: core0: smem reads=0
|
||||
PERF: core0: smem writes=0
|
||||
PERF: core0: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core0: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dram average latency=-2147483648 cycles
|
||||
PERF: core1: instrs=2983, cycles=5353, IPC=0.557258
|
||||
PERF: core1: ibuffer stalls=0
|
||||
PERF: core1: scoreboard stalls=0
|
||||
PERF: core1: alu unit stalls=0
|
||||
PERF: core1: lsu unit stalls=0
|
||||
PERF: core1: csr unit stalls=0
|
||||
PERF: core1: fpu unit stalls=0
|
||||
PERF: core1: gpu unit stalls=0
|
||||
PERF: core1: icache reads=0
|
||||
PERF: core1: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: icache pipeline stalls=0
|
||||
PERF: core1: icache reponse stalls=0
|
||||
PERF: core1: dcache reads=0
|
||||
PERF: core1: dcache writes=0
|
||||
PERF: core1: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dcache mshr stalls=0
|
||||
PERF: core1: dcache pipeline stalls=0
|
||||
PERF: core1: dcache reponse stalls=0
|
||||
PERF: core1: smem reads=0
|
||||
PERF: core1: smem writes=0
|
||||
PERF: core1: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core1: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dram average latency=-2147483648 cycles
|
||||
PERF: instrs=5964, cycles=5416, IPC=1.101182
|
||||
PERF: ibuffer stalls=0
|
||||
PERF: scoreboard stalls=0
|
||||
PERF: alu unit stalls=0
|
||||
PERF: lsu unit stalls=0
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=0
|
||||
PERF: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: icache pipeline stalls=0
|
||||
PERF: icache reponse stalls=0
|
||||
PERF: dcache reads=0
|
||||
PERF: dcache writes=0
|
||||
PERF: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: dcache mshr stalls=0
|
||||
PERF: dcache pipeline stalls=0
|
||||
PERF: dcache reponse stalls=0
|
||||
PERF: smem reads=0
|
||||
PERF: smem writes=0
|
||||
PERF: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: dram requests=0 (reads=0, writes=0)
|
||||
PERF: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: dram average latency=-2147483648 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
|
@ -1,17 +0,0 @@
|
|||
Fitter Status : Successful - Sat Mar 6 02:49:17 2021
|
||||
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
||||
Device : 10AX115N2F40E2LG
|
||||
Timing Models : Final
|
||||
Logic utilization (in ALMs) : 117,451 / 427,200 ( 27 % )
|
||||
Total registers : 173797
|
||||
Total pins : 310 / 826 ( 38 % )
|
||||
Total virtual pins : 0
|
||||
Total block memory bits : 4,356,616 / 55,562,240 ( 8 % )
|
||||
Total RAM Blocks : 713 / 2,713 ( 26 % )
|
||||
Total DSP Blocks : 112 / 1,518 ( 7 % )
|
||||
Total HSSI RX channels : 12 / 48 ( 25 % )
|
||||
Total HSSI TX channels : 12 / 48 ( 25 % )
|
||||
Total PLLs : 25 / 112 ( 22 % )
|
File diff suppressed because it is too large
Load diff
|
@ -1,4 +0,0 @@
|
|||
Synthesis Status : Successful - Sat Mar 6 01:57:55 2021
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
File diff suppressed because it is too large
Load diff
|
@ -1,29 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
|
||||
OK
|
||||
The result of matrix m is:
|
||||
0.00 0.00 0.00 0.00
|
||||
0.50 0.00 0.00 0.00
|
||||
0.67 0.26 0.00 0.00
|
||||
-0.00 0.15 -0.28 0.00
|
||||
|
||||
The result of matrix a is:
|
||||
-0.60 -0.50 0.70 0.30
|
||||
0.00 -0.65 -0.05 0.55
|
||||
0.00 0.00 -0.75 -1.14
|
||||
0.00 0.00 0.00 0.50
|
||||
|
||||
The result of array b is:
|
||||
-0.85 -0.25 0.87 -0.25
|
||||
|
||||
The final solution is:
|
||||
0.70 0.00 -0.40 -0.50
|
||||
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
|
||||
loading db: cane4_0.db
|
||||
loading db: cane4_1.db
|
||||
loading db: cane4_2.db
|
||||
Number of records: 1500
|
||||
Finding the 5 closest neighbors.
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
|
||||
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
|
||||
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
|
||||
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
|
||||
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
|
||||
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
|
@ -1,146 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 3 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: core0: instrs=90890, cycles=51133, IPC=1.777521
|
||||
PERF: core0: ibuffer stalls=10132
|
||||
PERF: core0: scoreboard stalls=15251
|
||||
PERF: core0: alu unit stalls=2423
|
||||
PERF: core0: lsu unit stalls=3859
|
||||
PERF: core0: csr unit stalls=0
|
||||
PERF: core0: fpu unit stalls=0
|
||||
PERF: core0: gpu unit stalls=0
|
||||
PERF: core0: icache reads=23003
|
||||
PERF: core0: icache read misses=73 (hit ratio=99%)
|
||||
PERF: core0: icache pipeline stalls=7639
|
||||
PERF: core0: icache reponse stalls=10132
|
||||
PERF: core0: dcache reads=17502
|
||||
PERF: core0: dcache writes=293
|
||||
PERF: core0: dcache read misses=1041 (hit ratio=94%)
|
||||
PERF: core0: dcache write misses=289 (hit ratio=1%)
|
||||
PERF: core0: dcache bank stalls=8464 (utilization=67%)
|
||||
PERF: core0: dcache mshr stalls=4228
|
||||
PERF: core0: dcache pipeline stalls=9676
|
||||
PERF: core0: dcache reponse stalls=76
|
||||
PERF: core0: smem reads=2026
|
||||
PERF: core0: smem writes=1599
|
||||
PERF: core0: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core0: dram requests=479 (reads=186, writes=293)
|
||||
PERF: core0: dram stalls=789 (utilization=37%)
|
||||
PERF: core0: dram average latency=32 cycles
|
||||
PERF: core1: instrs=90890, cycles=51143, IPC=1.777174
|
||||
PERF: core1: ibuffer stalls=10158
|
||||
PERF: core1: scoreboard stalls=15244
|
||||
PERF: core1: alu unit stalls=2440
|
||||
PERF: core1: lsu unit stalls=3894
|
||||
PERF: core1: csr unit stalls=0
|
||||
PERF: core1: fpu unit stalls=0
|
||||
PERF: core1: gpu unit stalls=0
|
||||
PERF: core1: icache reads=23003
|
||||
PERF: core1: icache read misses=73 (hit ratio=99%)
|
||||
PERF: core1: icache pipeline stalls=7685
|
||||
PERF: core1: icache reponse stalls=10158
|
||||
PERF: core1: dcache reads=17502
|
||||
PERF: core1: dcache writes=293
|
||||
PERF: core1: dcache read misses=1101 (hit ratio=93%)
|
||||
PERF: core1: dcache write misses=289 (hit ratio=1%)
|
||||
PERF: core1: dcache bank stalls=8464 (utilization=67%)
|
||||
PERF: core1: dcache mshr stalls=4330
|
||||
PERF: core1: dcache pipeline stalls=9347
|
||||
PERF: core1: dcache reponse stalls=67
|
||||
PERF: core1: smem reads=2026
|
||||
PERF: core1: smem writes=1599
|
||||
PERF: core1: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core1: dram requests=509 (reads=216, writes=293)
|
||||
PERF: core1: dram stalls=715 (utilization=41%)
|
||||
PERF: core1: dram average latency=32 cycles
|
||||
PERF: core2: instrs=90890, cycles=51135, IPC=1.777452
|
||||
PERF: core2: ibuffer stalls=10120
|
||||
PERF: core2: scoreboard stalls=15237
|
||||
PERF: core2: alu unit stalls=2406
|
||||
PERF: core2: lsu unit stalls=3881
|
||||
PERF: core2: csr unit stalls=0
|
||||
PERF: core2: fpu unit stalls=0
|
||||
PERF: core2: gpu unit stalls=0
|
||||
PERF: core2: icache reads=23003
|
||||
PERF: core2: icache read misses=73 (hit ratio=99%)
|
||||
PERF: core2: icache pipeline stalls=7651
|
||||
PERF: core2: icache reponse stalls=10120
|
||||
PERF: core2: dcache reads=17502
|
||||
PERF: core2: dcache writes=293
|
||||
PERF: core2: dcache read misses=1040 (hit ratio=94%)
|
||||
PERF: core2: dcache write misses=289 (hit ratio=1%)
|
||||
PERF: core2: dcache bank stalls=8464 (utilization=67%)
|
||||
PERF: core2: dcache mshr stalls=4234
|
||||
PERF: core2: dcache pipeline stalls=9580
|
||||
PERF: core2: dcache reponse stalls=75
|
||||
PERF: core2: smem reads=2026
|
||||
PERF: core2: smem writes=1599
|
||||
PERF: core2: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core2: dram requests=478 (reads=185, writes=293)
|
||||
PERF: core2: dram stalls=776 (utilization=38%)
|
||||
PERF: core2: dram average latency=32 cycles
|
||||
PERF: core3: instrs=90892, cycles=51134, IPC=1.777526
|
||||
PERF: core3: ibuffer stalls=10116
|
||||
PERF: core3: scoreboard stalls=15282
|
||||
PERF: core3: alu unit stalls=2380
|
||||
PERF: core3: lsu unit stalls=3862
|
||||
PERF: core3: csr unit stalls=0
|
||||
PERF: core3: fpu unit stalls=0
|
||||
PERF: core3: gpu unit stalls=0
|
||||
PERF: core3: icache reads=23005
|
||||
PERF: core3: icache read misses=73 (hit ratio=99%)
|
||||
PERF: core3: icache pipeline stalls=7688
|
||||
PERF: core3: icache reponse stalls=10116
|
||||
PERF: core3: dcache reads=17502
|
||||
PERF: core3: dcache writes=293
|
||||
PERF: core3: dcache read misses=1040 (hit ratio=94%)
|
||||
PERF: core3: dcache write misses=289 (hit ratio=1%)
|
||||
PERF: core3: dcache bank stalls=8464 (utilization=67%)
|
||||
PERF: core3: dcache mshr stalls=4421
|
||||
PERF: core3: dcache pipeline stalls=9647
|
||||
PERF: core3: dcache reponse stalls=76
|
||||
PERF: core3: smem reads=2026
|
||||
PERF: core3: smem writes=1599
|
||||
PERF: core3: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core3: dram requests=478 (reads=185, writes=293)
|
||||
PERF: core3: dram stalls=684 (utilization=41%)
|
||||
PERF: core3: dram average latency=32 cycles
|
||||
PERF: instrs=363562, cycles=51143, IPC=7.108734
|
||||
PERF: ibuffer stalls=40526
|
||||
PERF: scoreboard stalls=61014
|
||||
PERF: alu unit stalls=9649
|
||||
PERF: lsu unit stalls=15496
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=92014
|
||||
PERF: icache read misses=292 (hit ratio=99%)
|
||||
PERF: icache pipeline stalls=30663
|
||||
PERF: icache reponse stalls=40526
|
||||
PERF: dcache reads=70008
|
||||
PERF: dcache writes=1172
|
||||
PERF: dcache read misses=4222 (hit ratio=93%)
|
||||
PERF: dcache write misses=1156 (hit ratio=1%)
|
||||
PERF: dcache bank stalls=33856 (utilization=67%)
|
||||
PERF: dcache mshr stalls=17213
|
||||
PERF: dcache pipeline stalls=38250
|
||||
PERF: dcache reponse stalls=294
|
||||
PERF: smem reads=8104
|
||||
PERF: smem writes=6396
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: dram requests=1944 (reads=772, writes=1172)
|
||||
PERF: dram stalls=2964 (utilization=39%)
|
||||
PERF: dram average latency=32 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
|
@ -1,3 +0,0 @@
|
|||
# Generated by Platform Interface Manager user_clock_config.tcl
|
||||
afu-image/clock-frequency-low:93.0
|
||||
afu-image/clock-frequency-high:186
|
|
@ -1,147 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Allocate device buffers
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: core0: instrs=2019, cycles=5042, IPC=0.400436
|
||||
PERF: core0: ibuffer stalls=86
|
||||
PERF: core0: scoreboard stalls=451
|
||||
PERF: core0: alu unit stalls=68
|
||||
PERF: core0: lsu unit stalls=53
|
||||
PERF: core0: csr unit stalls=0
|
||||
PERF: core0: fpu unit stalls=0
|
||||
PERF: core0: gpu unit stalls=0
|
||||
PERF: core0: icache reads=804
|
||||
PERF: core0: icache read misses=65 (hit ratio=91%)
|
||||
PERF: core0: icache pipeline stalls=469
|
||||
PERF: core0: icache reponse stalls=86
|
||||
PERF: core0: dcache reads=114
|
||||
PERF: core0: dcache writes=65
|
||||
PERF: core0: dcache read misses=28 (hit ratio=75%)
|
||||
PERF: core0: dcache write misses=60 (hit ratio=7%)
|
||||
PERF: core0: dcache bank stalls=72 (utilization=71%)
|
||||
PERF: core0: dcache mshr stalls=56
|
||||
PERF: core0: dcache pipeline stalls=88
|
||||
PERF: core0: dcache reponse stalls=1
|
||||
PERF: core0: smem reads=70
|
||||
PERF: core0: smem writes=63
|
||||
PERF: core0: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core0: dram requests=109 (reads=44, writes=65)
|
||||
PERF: core0: dram stalls=53 (utilization=67%)
|
||||
PERF: core0: dram average latency=31 cycles
|
||||
PERF: core1: instrs=2019, cycles=5041, IPC=0.400516
|
||||
PERF: core1: ibuffer stalls=86
|
||||
PERF: core1: scoreboard stalls=451
|
||||
PERF: core1: alu unit stalls=68
|
||||
PERF: core1: lsu unit stalls=53
|
||||
PERF: core1: csr unit stalls=0
|
||||
PERF: core1: fpu unit stalls=0
|
||||
PERF: core1: gpu unit stalls=0
|
||||
PERF: core1: icache reads=804
|
||||
PERF: core1: icache read misses=65 (hit ratio=91%)
|
||||
PERF: core1: icache pipeline stalls=470
|
||||
PERF: core1: icache reponse stalls=86
|
||||
PERF: core1: dcache reads=114
|
||||
PERF: core1: dcache writes=65
|
||||
PERF: core1: dcache read misses=28 (hit ratio=75%)
|
||||
PERF: core1: dcache write misses=60 (hit ratio=7%)
|
||||
PERF: core1: dcache bank stalls=72 (utilization=71%)
|
||||
PERF: core1: dcache mshr stalls=56
|
||||
PERF: core1: dcache pipeline stalls=88
|
||||
PERF: core1: dcache reponse stalls=1
|
||||
PERF: core1: smem reads=70
|
||||
PERF: core1: smem writes=63
|
||||
PERF: core1: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core1: dram requests=109 (reads=44, writes=65)
|
||||
PERF: core1: dram stalls=52 (utilization=67%)
|
||||
PERF: core1: dram average latency=31 cycles
|
||||
PERF: core2: instrs=2019, cycles=5040, IPC=0.400595
|
||||
PERF: core2: ibuffer stalls=86
|
||||
PERF: core2: scoreboard stalls=451
|
||||
PERF: core2: alu unit stalls=68
|
||||
PERF: core2: lsu unit stalls=53
|
||||
PERF: core2: csr unit stalls=0
|
||||
PERF: core2: fpu unit stalls=0
|
||||
PERF: core2: gpu unit stalls=0
|
||||
PERF: core2: icache reads=804
|
||||
PERF: core2: icache read misses=65 (hit ratio=91%)
|
||||
PERF: core2: icache pipeline stalls=470
|
||||
PERF: core2: icache reponse stalls=86
|
||||
PERF: core2: dcache reads=114
|
||||
PERF: core2: dcache writes=65
|
||||
PERF: core2: dcache read misses=28 (hit ratio=75%)
|
||||
PERF: core2: dcache write misses=60 (hit ratio=7%)
|
||||
PERF: core2: dcache bank stalls=72 (utilization=71%)
|
||||
PERF: core2: dcache mshr stalls=56
|
||||
PERF: core2: dcache pipeline stalls=88
|
||||
PERF: core2: dcache reponse stalls=1
|
||||
PERF: core2: smem reads=70
|
||||
PERF: core2: smem writes=63
|
||||
PERF: core2: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core2: dram requests=109 (reads=44, writes=65)
|
||||
PERF: core2: dram stalls=51 (utilization=68%)
|
||||
PERF: core2: dram average latency=31 cycles
|
||||
PERF: core3: instrs=2021, cycles=5043, IPC=0.400754
|
||||
PERF: core3: ibuffer stalls=102
|
||||
PERF: core3: scoreboard stalls=496
|
||||
PERF: core3: alu unit stalls=73
|
||||
PERF: core3: lsu unit stalls=53
|
||||
PERF: core3: csr unit stalls=0
|
||||
PERF: core3: fpu unit stalls=0
|
||||
PERF: core3: gpu unit stalls=0
|
||||
PERF: core3: icache reads=806
|
||||
PERF: core3: icache read misses=65 (hit ratio=91%)
|
||||
PERF: core3: icache pipeline stalls=439
|
||||
PERF: core3: icache reponse stalls=102
|
||||
PERF: core3: dcache reads=114
|
||||
PERF: core3: dcache writes=65
|
||||
PERF: core3: dcache read misses=28 (hit ratio=75%)
|
||||
PERF: core3: dcache write misses=60 (hit ratio=7%)
|
||||
PERF: core3: dcache bank stalls=72 (utilization=71%)
|
||||
PERF: core3: dcache mshr stalls=56
|
||||
PERF: core3: dcache pipeline stalls=88
|
||||
PERF: core3: dcache reponse stalls=1
|
||||
PERF: core3: smem reads=70
|
||||
PERF: core3: smem writes=63
|
||||
PERF: core3: smem bank stalls=0 (utilization=100%)
|
||||
PERF: core3: dram requests=109 (reads=44, writes=65)
|
||||
PERF: core3: dram stalls=50 (utilization=68%)
|
||||
PERF: core3: dram average latency=30 cycles
|
||||
PERF: instrs=8078, cycles=5043, IPC=1.601824
|
||||
PERF: ibuffer stalls=360
|
||||
PERF: scoreboard stalls=1849
|
||||
PERF: alu unit stalls=277
|
||||
PERF: lsu unit stalls=212
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=3218
|
||||
PERF: icache read misses=260 (hit ratio=91%)
|
||||
PERF: icache pipeline stalls=1848
|
||||
PERF: icache reponse stalls=360
|
||||
PERF: dcache reads=456
|
||||
PERF: dcache writes=260
|
||||
PERF: dcache read misses=112 (hit ratio=75%)
|
||||
PERF: dcache write misses=240 (hit ratio=7%)
|
||||
PERF: dcache bank stalls=288 (utilization=71%)
|
||||
PERF: dcache mshr stalls=224
|
||||
PERF: dcache pipeline stalls=352
|
||||
PERF: dcache reponse stalls=4
|
||||
PERF: smem reads=280
|
||||
PERF: smem writes=252
|
||||
PERF: smem bank stalls=0 (utilization=100%)
|
||||
PERF: dram requests=436 (reads=176, writes=260)
|
||||
PERF: dram stalls=206 (utilization=67%)
|
||||
PERF: dram average latency=30 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
|
@ -1,17 +0,0 @@
|
|||
Fitter Status : Successful - Sat Mar 6 04:32:43 2021
|
||||
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
||||
Device : 10AX115N2F40E2LG
|
||||
Timing Models : Final
|
||||
Logic utilization (in ALMs) : 190,373 / 427,200 ( 45 % )
|
||||
Total registers : 288074
|
||||
Total pins : 310 / 826 ( 38 % )
|
||||
Total virtual pins : 0
|
||||
Total block memory bits : 7,135,144 / 55,562,240 ( 13 % )
|
||||
Total RAM Blocks : 1,237 / 2,713 ( 46 % )
|
||||
Total DSP Blocks : 224 / 1,518 ( 15 % )
|
||||
Total HSSI RX channels : 12 / 48 ( 25 % )
|
||||
Total HSSI TX channels : 12 / 48 ( 25 % )
|
||||
Total PLLs : 25 / 112 ( 22 % )
|
File diff suppressed because it is too large
Load diff
|
@ -1,4 +0,0 @@
|
|||
Synthesis Status : Successful - Sat Mar 6 03:10:30 2021
|
||||
Revision Name : afu_default
|
||||
Top-level Entity Name : dcp_top
|
||||
Family : Arria 10
|
File diff suppressed because it is too large
Load diff
|
@ -1,29 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
|
||||
OK
|
||||
The result of matrix m is:
|
||||
0.00 0.00 0.00 0.00
|
||||
0.50 0.00 0.00 0.00
|
||||
0.67 0.26 0.00 0.00
|
||||
-0.00 0.15 -0.28 0.00
|
||||
|
||||
The result of matrix a is:
|
||||
-0.60 -0.50 0.70 0.30
|
||||
0.00 -0.65 -0.05 0.55
|
||||
0.00 0.00 -0.75 -1.14
|
||||
0.00 0.00 0.00 0.50
|
||||
|
||||
The result of array b is:
|
||||
-0.85 -0.25 0.87 -0.25
|
||||
|
||||
The final solution is:
|
||||
0.70 0.00 -0.40 -0.50
|
||||
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
|
||||
loading db: cane4_0.db
|
||||
loading db: cane4_1.db
|
||||
loading db: cane4_2.db
|
||||
Number of records: 1500
|
||||
Finding the 5 closest neighbors.
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
|
||||
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
|
||||
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
|
||||
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
|
||||
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
|
||||
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
|
||||
Passed!
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
|
|
@ -1,19 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
|
||||
enter demo main
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
|
||||
Attempting to create program from binary...
|
||||
Read program from binary.
|
||||
attempting to create input buffer
|
||||
attempting to create output buffer
|
||||
attempting to create kernel
|
||||
setting up kernel args
|
||||
attempting to enqueue write buffer
|
||||
attempting to enqueue kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
|
|
@ -1,250 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 4 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: core0: instrs=45962, cycles=25060, IPC=1.834078
|
||||
PERF: core0: ibuffer stalls=0
|
||||
PERF: core0: scoreboard stalls=0
|
||||
PERF: core0: alu unit stalls=0
|
||||
PERF: core0: lsu unit stalls=0
|
||||
PERF: core0: csr unit stalls=0
|
||||
PERF: core0: fpu unit stalls=0
|
||||
PERF: core0: gpu unit stalls=0
|
||||
PERF: core0: icache reads=0
|
||||
PERF: core0: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: icache pipeline stalls=0
|
||||
PERF: core0: icache reponse stalls=0
|
||||
PERF: core0: dcache reads=0
|
||||
PERF: core0: dcache writes=0
|
||||
PERF: core0: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dcache mshr stalls=0
|
||||
PERF: core0: dcache pipeline stalls=0
|
||||
PERF: core0: dcache reponse stalls=0
|
||||
PERF: core0: smem reads=0
|
||||
PERF: core0: smem writes=0
|
||||
PERF: core0: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core0: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dram average latency=-2147483648 cycles
|
||||
PERF: core1: instrs=45962, cycles=25057, IPC=1.834298
|
||||
PERF: core1: ibuffer stalls=0
|
||||
PERF: core1: scoreboard stalls=0
|
||||
PERF: core1: alu unit stalls=0
|
||||
PERF: core1: lsu unit stalls=0
|
||||
PERF: core1: csr unit stalls=0
|
||||
PERF: core1: fpu unit stalls=0
|
||||
PERF: core1: gpu unit stalls=0
|
||||
PERF: core1: icache reads=0
|
||||
PERF: core1: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: icache pipeline stalls=0
|
||||
PERF: core1: icache reponse stalls=0
|
||||
PERF: core1: dcache reads=0
|
||||
PERF: core1: dcache writes=0
|
||||
PERF: core1: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dcache mshr stalls=0
|
||||
PERF: core1: dcache pipeline stalls=0
|
||||
PERF: core1: dcache reponse stalls=0
|
||||
PERF: core1: smem reads=0
|
||||
PERF: core1: smem writes=0
|
||||
PERF: core1: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core1: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dram average latency=-2147483648 cycles
|
||||
PERF: core2: instrs=45962, cycles=25062, IPC=1.833932
|
||||
PERF: core2: ibuffer stalls=0
|
||||
PERF: core2: scoreboard stalls=0
|
||||
PERF: core2: alu unit stalls=0
|
||||
PERF: core2: lsu unit stalls=0
|
||||
PERF: core2: csr unit stalls=0
|
||||
PERF: core2: fpu unit stalls=0
|
||||
PERF: core2: gpu unit stalls=0
|
||||
PERF: core2: icache reads=0
|
||||
PERF: core2: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core2: icache pipeline stalls=0
|
||||
PERF: core2: icache reponse stalls=0
|
||||
PERF: core2: dcache reads=0
|
||||
PERF: core2: dcache writes=0
|
||||
PERF: core2: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core2: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core2: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core2: dcache mshr stalls=0
|
||||
PERF: core2: dcache pipeline stalls=0
|
||||
PERF: core2: dcache reponse stalls=0
|
||||
PERF: core2: smem reads=0
|
||||
PERF: core2: smem writes=0
|
||||
PERF: core2: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core2: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core2: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core2: dram average latency=-2147483648 cycles
|
||||
PERF: core3: instrs=45962, cycles=25054, IPC=1.834517
|
||||
PERF: core3: ibuffer stalls=0
|
||||
PERF: core3: scoreboard stalls=0
|
||||
PERF: core3: alu unit stalls=0
|
||||
PERF: core3: lsu unit stalls=0
|
||||
PERF: core3: csr unit stalls=0
|
||||
PERF: core3: fpu unit stalls=0
|
||||
PERF: core3: gpu unit stalls=0
|
||||
PERF: core3: icache reads=0
|
||||
PERF: core3: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core3: icache pipeline stalls=0
|
||||
PERF: core3: icache reponse stalls=0
|
||||
PERF: core3: dcache reads=0
|
||||
PERF: core3: dcache writes=0
|
||||
PERF: core3: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core3: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core3: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core3: dcache mshr stalls=0
|
||||
PERF: core3: dcache pipeline stalls=0
|
||||
PERF: core3: dcache reponse stalls=0
|
||||
PERF: core3: smem reads=0
|
||||
PERF: core3: smem writes=0
|
||||
PERF: core3: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core3: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core3: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core3: dram average latency=-2147483648 cycles
|
||||
PERF: core4: instrs=45962, cycles=25056, IPC=1.834371
|
||||
PERF: core4: ibuffer stalls=0
|
||||
PERF: core4: scoreboard stalls=0
|
||||
PERF: core4: alu unit stalls=0
|
||||
PERF: core4: lsu unit stalls=0
|
||||
PERF: core4: csr unit stalls=0
|
||||
PERF: core4: fpu unit stalls=0
|
||||
PERF: core4: gpu unit stalls=0
|
||||
PERF: core4: icache reads=0
|
||||
PERF: core4: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core4: icache pipeline stalls=0
|
||||
PERF: core4: icache reponse stalls=0
|
||||
PERF: core4: dcache reads=0
|
||||
PERF: core4: dcache writes=0
|
||||
PERF: core4: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core4: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core4: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core4: dcache mshr stalls=0
|
||||
PERF: core4: dcache pipeline stalls=0
|
||||
PERF: core4: dcache reponse stalls=0
|
||||
PERF: core4: smem reads=0
|
||||
PERF: core4: smem writes=0
|
||||
PERF: core4: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core4: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core4: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core4: dram average latency=-2147483648 cycles
|
||||
PERF: core5: instrs=45962, cycles=25066, IPC=1.833639
|
||||
PERF: core5: ibuffer stalls=0
|
||||
PERF: core5: scoreboard stalls=0
|
||||
PERF: core5: alu unit stalls=0
|
||||
PERF: core5: lsu unit stalls=0
|
||||
PERF: core5: csr unit stalls=0
|
||||
PERF: core5: fpu unit stalls=0
|
||||
PERF: core5: gpu unit stalls=0
|
||||
PERF: core5: icache reads=0
|
||||
PERF: core5: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core5: icache pipeline stalls=0
|
||||
PERF: core5: icache reponse stalls=0
|
||||
PERF: core5: dcache reads=0
|
||||
PERF: core5: dcache writes=0
|
||||
PERF: core5: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core5: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core5: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core5: dcache mshr stalls=0
|
||||
PERF: core5: dcache pipeline stalls=0
|
||||
PERF: core5: dcache reponse stalls=0
|
||||
PERF: core5: smem reads=0
|
||||
PERF: core5: smem writes=0
|
||||
PERF: core5: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core5: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core5: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core5: dram average latency=-2147483648 cycles
|
||||
PERF: core6: instrs=45962, cycles=25058, IPC=1.834225
|
||||
PERF: core6: ibuffer stalls=0
|
||||
PERF: core6: scoreboard stalls=0
|
||||
PERF: core6: alu unit stalls=0
|
||||
PERF: core6: lsu unit stalls=0
|
||||
PERF: core6: csr unit stalls=0
|
||||
PERF: core6: fpu unit stalls=0
|
||||
PERF: core6: gpu unit stalls=0
|
||||
PERF: core6: icache reads=0
|
||||
PERF: core6: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core6: icache pipeline stalls=0
|
||||
PERF: core6: icache reponse stalls=0
|
||||
PERF: core6: dcache reads=0
|
||||
PERF: core6: dcache writes=0
|
||||
PERF: core6: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core6: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core6: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core6: dcache mshr stalls=0
|
||||
PERF: core6: dcache pipeline stalls=0
|
||||
PERF: core6: dcache reponse stalls=0
|
||||
PERF: core6: smem reads=0
|
||||
PERF: core6: smem writes=0
|
||||
PERF: core6: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core6: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core6: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core6: dram average latency=-2147483648 cycles
|
||||
PERF: core7: instrs=45964, cycles=25061, IPC=1.834085
|
||||
PERF: core7: ibuffer stalls=0
|
||||
PERF: core7: scoreboard stalls=0
|
||||
PERF: core7: alu unit stalls=0
|
||||
PERF: core7: lsu unit stalls=0
|
||||
PERF: core7: csr unit stalls=0
|
||||
PERF: core7: fpu unit stalls=0
|
||||
PERF: core7: gpu unit stalls=0
|
||||
PERF: core7: icache reads=0
|
||||
PERF: core7: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core7: icache pipeline stalls=0
|
||||
PERF: core7: icache reponse stalls=0
|
||||
PERF: core7: dcache reads=0
|
||||
PERF: core7: dcache writes=0
|
||||
PERF: core7: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core7: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core7: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core7: dcache mshr stalls=0
|
||||
PERF: core7: dcache pipeline stalls=0
|
||||
PERF: core7: dcache reponse stalls=0
|
||||
PERF: core7: smem reads=0
|
||||
PERF: core7: smem writes=0
|
||||
PERF: core7: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core7: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core7: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core7: dram average latency=-2147483648 cycles
|
||||
PERF: instrs=367698, cycles=25066, IPC=14.669193
|
||||
PERF: ibuffer stalls=0
|
||||
PERF: scoreboard stalls=0
|
||||
PERF: alu unit stalls=0
|
||||
PERF: lsu unit stalls=0
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=0
|
||||
PERF: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: icache pipeline stalls=0
|
||||
PERF: icache reponse stalls=0
|
||||
PERF: dcache reads=0
|
||||
PERF: dcache writes=0
|
||||
PERF: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: dcache mshr stalls=0
|
||||
PERF: dcache pipeline stalls=0
|
||||
PERF: dcache reponse stalls=0
|
||||
PERF: smem reads=0
|
||||
PERF: smem writes=0
|
||||
PERF: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: dram requests=0 (reads=0, writes=0)
|
||||
PERF: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: dram average latency=-2147483648 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
|
|
@ -1,3 +0,0 @@
|
|||
# Generated by Platform Interface Manager user_clock_config.tcl
|
||||
afu-image/clock-frequency-low:90.0
|
||||
afu-image/clock-frequency-high:180
|
|
@ -1,251 +0,0 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
rm -rf libvortex.so *.o .depend
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
|
||||
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
||||
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
|
||||
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
|
||||
Create context
|
||||
Allocate device buffers
|
||||
Create program from kernel source
|
||||
Upload source buffers
|
||||
Execute the kernel
|
||||
Elapsed time: 3 ms
|
||||
Download destination buffer
|
||||
Verify result
|
||||
PASSED!
|
||||
PERF: core0: instrs=2019, cycles=4958, IPC=0.407221
|
||||
PERF: core0: ibuffer stalls=0
|
||||
PERF: core0: scoreboard stalls=0
|
||||
PERF: core0: alu unit stalls=0
|
||||
PERF: core0: lsu unit stalls=0
|
||||
PERF: core0: csr unit stalls=0
|
||||
PERF: core0: fpu unit stalls=0
|
||||
PERF: core0: gpu unit stalls=0
|
||||
PERF: core0: icache reads=0
|
||||
PERF: core0: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: icache pipeline stalls=0
|
||||
PERF: core0: icache reponse stalls=0
|
||||
PERF: core0: dcache reads=0
|
||||
PERF: core0: dcache writes=0
|
||||
PERF: core0: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core0: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dcache mshr stalls=0
|
||||
PERF: core0: dcache pipeline stalls=0
|
||||
PERF: core0: dcache reponse stalls=0
|
||||
PERF: core0: smem reads=0
|
||||
PERF: core0: smem writes=0
|
||||
PERF: core0: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core0: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core0: dram average latency=-2147483648 cycles
|
||||
PERF: core1: instrs=2019, cycles=4957, IPC=0.407303
|
||||
PERF: core1: ibuffer stalls=0
|
||||
PERF: core1: scoreboard stalls=0
|
||||
PERF: core1: alu unit stalls=0
|
||||
PERF: core1: lsu unit stalls=0
|
||||
PERF: core1: csr unit stalls=0
|
||||
PERF: core1: fpu unit stalls=0
|
||||
PERF: core1: gpu unit stalls=0
|
||||
PERF: core1: icache reads=0
|
||||
PERF: core1: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: icache pipeline stalls=0
|
||||
PERF: core1: icache reponse stalls=0
|
||||
PERF: core1: dcache reads=0
|
||||
PERF: core1: dcache writes=0
|
||||
PERF: core1: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core1: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dcache mshr stalls=0
|
||||
PERF: core1: dcache pipeline stalls=0
|
||||
PERF: core1: dcache reponse stalls=0
|
||||
PERF: core1: smem reads=0
|
||||
PERF: core1: smem writes=0
|
||||
PERF: core1: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core1: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core1: dram average latency=-2147483648 cycles
|
||||
PERF: core2: instrs=2019, cycles=4955, IPC=0.407467
|
||||
PERF: core2: ibuffer stalls=0
|
||||
PERF: core2: scoreboard stalls=0
|
||||
PERF: core2: alu unit stalls=0
|
||||
PERF: core2: lsu unit stalls=0
|
||||
PERF: core2: csr unit stalls=0
|
||||
PERF: core2: fpu unit stalls=0
|
||||
PERF: core2: gpu unit stalls=0
|
||||
PERF: core2: icache reads=0
|
||||
PERF: core2: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core2: icache pipeline stalls=0
|
||||
PERF: core2: icache reponse stalls=0
|
||||
PERF: core2: dcache reads=0
|
||||
PERF: core2: dcache writes=0
|
||||
PERF: core2: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core2: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core2: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core2: dcache mshr stalls=0
|
||||
PERF: core2: dcache pipeline stalls=0
|
||||
PERF: core2: dcache reponse stalls=0
|
||||
PERF: core2: smem reads=0
|
||||
PERF: core2: smem writes=0
|
||||
PERF: core2: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core2: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core2: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core2: dram average latency=-2147483648 cycles
|
||||
PERF: core3: instrs=2019, cycles=4953, IPC=0.407632
|
||||
PERF: core3: ibuffer stalls=0
|
||||
PERF: core3: scoreboard stalls=0
|
||||
PERF: core3: alu unit stalls=0
|
||||
PERF: core3: lsu unit stalls=0
|
||||
PERF: core3: csr unit stalls=0
|
||||
PERF: core3: fpu unit stalls=0
|
||||
PERF: core3: gpu unit stalls=0
|
||||
PERF: core3: icache reads=0
|
||||
PERF: core3: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core3: icache pipeline stalls=0
|
||||
PERF: core3: icache reponse stalls=0
|
||||
PERF: core3: dcache reads=0
|
||||
PERF: core3: dcache writes=0
|
||||
PERF: core3: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core3: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core3: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core3: dcache mshr stalls=0
|
||||
PERF: core3: dcache pipeline stalls=0
|
||||
PERF: core3: dcache reponse stalls=0
|
||||
PERF: core3: smem reads=0
|
||||
PERF: core3: smem writes=0
|
||||
PERF: core3: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core3: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core3: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core3: dram average latency=-2147483648 cycles
|
||||
PERF: core4: instrs=495, cycles=3388, IPC=0.146104
|
||||
PERF: core4: ibuffer stalls=0
|
||||
PERF: core4: scoreboard stalls=0
|
||||
PERF: core4: alu unit stalls=0
|
||||
PERF: core4: lsu unit stalls=0
|
||||
PERF: core4: csr unit stalls=0
|
||||
PERF: core4: fpu unit stalls=0
|
||||
PERF: core4: gpu unit stalls=0
|
||||
PERF: core4: icache reads=0
|
||||
PERF: core4: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core4: icache pipeline stalls=0
|
||||
PERF: core4: icache reponse stalls=0
|
||||
PERF: core4: dcache reads=0
|
||||
PERF: core4: dcache writes=0
|
||||
PERF: core4: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core4: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core4: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core4: dcache mshr stalls=0
|
||||
PERF: core4: dcache pipeline stalls=0
|
||||
PERF: core4: dcache reponse stalls=0
|
||||
PERF: core4: smem reads=0
|
||||
PERF: core4: smem writes=0
|
||||
PERF: core4: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core4: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core4: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core4: dram average latency=-2147483648 cycles
|
||||
PERF: core5: instrs=495, cycles=3387, IPC=0.146147
|
||||
PERF: core5: ibuffer stalls=0
|
||||
PERF: core5: scoreboard stalls=0
|
||||
PERF: core5: alu unit stalls=0
|
||||
PERF: core5: lsu unit stalls=0
|
||||
PERF: core5: csr unit stalls=0
|
||||
PERF: core5: fpu unit stalls=0
|
||||
PERF: core5: gpu unit stalls=0
|
||||
PERF: core5: icache reads=0
|
||||
PERF: core5: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core5: icache pipeline stalls=0
|
||||
PERF: core5: icache reponse stalls=0
|
||||
PERF: core5: dcache reads=0
|
||||
PERF: core5: dcache writes=0
|
||||
PERF: core5: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core5: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core5: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core5: dcache mshr stalls=0
|
||||
PERF: core5: dcache pipeline stalls=0
|
||||
PERF: core5: dcache reponse stalls=0
|
||||
PERF: core5: smem reads=0
|
||||
PERF: core5: smem writes=0
|
||||
PERF: core5: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core5: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core5: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core5: dram average latency=-2147483648 cycles
|
||||
PERF: core6: instrs=495, cycles=3386, IPC=0.146190
|
||||
PERF: core6: ibuffer stalls=0
|
||||
PERF: core6: scoreboard stalls=0
|
||||
PERF: core6: alu unit stalls=0
|
||||
PERF: core6: lsu unit stalls=0
|
||||
PERF: core6: csr unit stalls=0
|
||||
PERF: core6: fpu unit stalls=0
|
||||
PERF: core6: gpu unit stalls=0
|
||||
PERF: core6: icache reads=0
|
||||
PERF: core6: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core6: icache pipeline stalls=0
|
||||
PERF: core6: icache reponse stalls=0
|
||||
PERF: core6: dcache reads=0
|
||||
PERF: core6: dcache writes=0
|
||||
PERF: core6: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core6: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core6: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core6: dcache mshr stalls=0
|
||||
PERF: core6: dcache pipeline stalls=0
|
||||
PERF: core6: dcache reponse stalls=0
|
||||
PERF: core6: smem reads=0
|
||||
PERF: core6: smem writes=0
|
||||
PERF: core6: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core6: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core6: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core6: dram average latency=-2147483648 cycles
|
||||
PERF: core7: instrs=495, cycles=3384, IPC=0.146277
|
||||
PERF: core7: ibuffer stalls=0
|
||||
PERF: core7: scoreboard stalls=0
|
||||
PERF: core7: alu unit stalls=0
|
||||
PERF: core7: lsu unit stalls=0
|
||||
PERF: core7: csr unit stalls=0
|
||||
PERF: core7: fpu unit stalls=0
|
||||
PERF: core7: gpu unit stalls=0
|
||||
PERF: core7: icache reads=0
|
||||
PERF: core7: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core7: icache pipeline stalls=0
|
||||
PERF: core7: icache reponse stalls=0
|
||||
PERF: core7: dcache reads=0
|
||||
PERF: core7: dcache writes=0
|
||||
PERF: core7: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core7: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: core7: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core7: dcache mshr stalls=0
|
||||
PERF: core7: dcache pipeline stalls=0
|
||||
PERF: core7: dcache reponse stalls=0
|
||||
PERF: core7: smem reads=0
|
||||
PERF: core7: smem writes=0
|
||||
PERF: core7: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: core7: dram requests=0 (reads=0, writes=0)
|
||||
PERF: core7: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: core7: dram average latency=-2147483648 cycles
|
||||
PERF: instrs=10056, cycles=4958, IPC=2.028237
|
||||
PERF: ibuffer stalls=0
|
||||
PERF: scoreboard stalls=0
|
||||
PERF: alu unit stalls=0
|
||||
PERF: lsu unit stalls=0
|
||||
PERF: csr unit stalls=0
|
||||
PERF: fpu unit stalls=0
|
||||
PERF: gpu unit stalls=0
|
||||
PERF: icache reads=0
|
||||
PERF: icache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: icache pipeline stalls=0
|
||||
PERF: icache reponse stalls=0
|
||||
PERF: dcache reads=0
|
||||
PERF: dcache writes=0
|
||||
PERF: dcache read misses=0 (hit ratio=-2147483648%)
|
||||
PERF: dcache write misses=0 (hit ratio=-2147483648%)
|
||||
PERF: dcache bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: dcache mshr stalls=0
|
||||
PERF: dcache pipeline stalls=0
|
||||
PERF: dcache reponse stalls=0
|
||||
PERF: smem reads=0
|
||||
PERF: smem writes=0
|
||||
PERF: smem bank stalls=0 (utilization=-2147483648%)
|
||||
PERF: dram requests=0 (reads=0, writes=0)
|
||||
PERF: dram stalls=0 (utilization=-2147483648%)
|
||||
PERF: dram average latency=-2147483648 cycles
|
||||
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
|
|
@ -1,51 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
mkdir -p test_outputs
|
||||
|
||||
output_dir="$(pwd)/test_outputs"
|
||||
|
||||
(cd rtl ; python3 gen_synth_configs.py ; ls -l configs)
|
||||
|
||||
config_location=rtl/configs
|
||||
|
||||
declare -a test_names=("sgemm" "saxpy" "bfs" "guassian" "vecadd" "nearn" "sfilter")
|
||||
|
||||
for test_name in ${test_names[@]}; do
|
||||
if [ ! -d "benchmarks/new_opencl/$test_name" ]; then
|
||||
echo "Unknown benchmark $test_name"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
for filename in "$config_location"/*.sh; do
|
||||
|
||||
name=${filename##*/}
|
||||
base=${name%.*}
|
||||
|
||||
. "$filename"
|
||||
|
||||
make -C hw build_config
|
||||
make -C runtime build_config
|
||||
make -C driver/rtlsim
|
||||
|
||||
for test_name in ${test_names[@]}; do
|
||||
|
||||
(
|
||||
|
||||
echo "Running $base-$test_name..."
|
||||
|
||||
cd "benchmarks/new_opencl/$test_name"
|
||||
make clean
|
||||
make
|
||||
make run-rtlsim 2>&1 | tee "$output_dir/$base-$test_name.log"
|
||||
) &
|
||||
|
||||
done # test_name
|
||||
|
||||
wait
|
||||
|
||||
done # config
|
||||
|
||||
|
|
@ -1,116 +0,0 @@
|
|||
-build.sh-
|
||||
|
||||
Description: Makes the build in the opae directory with the specified core
|
||||
count and optional performance profiling. If a build already
|
||||
exists, a make clean command is ran before the build. Script waits
|
||||
until the inteldev script or quartus program is finished running.
|
||||
|
||||
Usage: ./build.sh -c [1|2|4|8|16] [-p [y|n]]
|
||||
|
||||
Options:
|
||||
-c
|
||||
Core count (1, 2, 4, 8, or 16).
|
||||
|
||||
-p
|
||||
Performance profiling enable (y or n). Changes the source file in the
|
||||
opae directory to include/exclude "+define+PERF_ENABLE".
|
||||
|
||||
_______________________________________________________________________________
|
||||
|
||||
|
||||
-build_all_perf.sh-
|
||||
|
||||
Description: Runs build.sh with performance profiling enabled for all valid
|
||||
core configurations.
|
||||
|
||||
_______________________________________________________________________________
|
||||
_______________________________________________________________________________
|
||||
|
||||
|
||||
-program_fpga.sh-
|
||||
|
||||
Description: Signs and programs the fpga for a specified core count. Prompts
|
||||
for PACSign are all automatically answered 'yes'.
|
||||
|
||||
Usage: ./program_fpga.sh -c [1|2|4|8|16]
|
||||
|
||||
Options:
|
||||
-c
|
||||
Core count (1, 2, 4, 8, or 16).
|
||||
|
||||
_______________________________________________________________________________
|
||||
_______________________________________________________________________________
|
||||
|
||||
|
||||
-gather_perf_results.sh-
|
||||
|
||||
Description: Creates directory named perf_YYYY_MM_DD and core subfolders in
|
||||
evaluation. Copies relevant build output files to specified core
|
||||
directory. Runs and redirects outputs of sgemm, vecadd, saxpy,
|
||||
sfilter, nearn, and gaussian benchmarks to specified core
|
||||
directory. Build should already be made before running this.
|
||||
|
||||
Usage: ./gather_perf_results.sh -c [1|2|4|8|16]
|
||||
|
||||
Options:
|
||||
-c
|
||||
Core count (1, 2, 4, 8, or 16).
|
||||
|
||||
_______________________________________________________________________________
|
||||
|
||||
|
||||
-gather_all_perf_results.sh-
|
||||
|
||||
Description: Programs fpga and runs gather_perf_results.sh for all valid core
|
||||
configurations. All builds should already be made before running
|
||||
this.
|
||||
|
||||
_______________________________________________________________________________
|
||||
_______________________________________________________________________________
|
||||
|
||||
|
||||
-export_csv.sh-
|
||||
|
||||
Description: Creates specified .csv output file from an input directory, file,
|
||||
and parameter. The .csv file contains two columns: cores, and the input
|
||||
parameter. The output file is located within the directory specified with -d.
|
||||
|
||||
Usage: ./export_csv.sh -c [cores] -d [directory] -i [input filename] -o
|
||||
[output filename] -p '[parameter]'
|
||||
|
||||
Example: ./export_csv.sh -c 16 -d perf_2021_03_07 -i sgemm.result -o output.csv
|
||||
-p 'PERF: scoreboard stalls'
|
||||
|
||||
Options:
|
||||
-c
|
||||
Upper limit of cores to be read in. Core directories should exist in
|
||||
the directory specified by -d e.g. 1c, 2c, 4c for -c 4.
|
||||
|
||||
-d
|
||||
The directory of the form perf_{date} located in the evaluation
|
||||
directory.
|
||||
|
||||
-i
|
||||
The input filename located in each core directory within the
|
||||
directory specified by -d.
|
||||
|
||||
-o
|
||||
The output filename to be created within the directory specified
|
||||
by -d.
|
||||
|
||||
-p
|
||||
The parameter corresponding to the core count in the .csv file. The
|
||||
full name of the parameter from the start of the line should be
|
||||
inputted to avoid the parameter name being matched multiple times.
|
||||
|
||||
_______________________________________________________________________________
|
||||
|
||||
|
||||
-export_ipc_csv.sh-
|
||||
|
||||
Description: Runs export_csv.sh for the parameter IPC.
|
||||
|
||||
Usage: ./export_csv.sh -c [cores] -d [directory] -i [input filename] -o
|
||||
[output filename]
|
||||
|
||||
Example: ./export_ipc.sh -c 16 -d perf_2021_03_07 -i sgemm.result -o output.csv
|
|
@ -1,49 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
BUILD_DIR=../../hw/syn/opae
|
||||
|
||||
perf=0
|
||||
wait=0
|
||||
|
||||
while getopts c:pwh flag
|
||||
do
|
||||
case "${flag}" in
|
||||
c) cores=${OPTARG};; #1, 2, 4, 8, 16
|
||||
p) perf=1;; #perf counters enable
|
||||
w) wait=1;; # wait for build to complete
|
||||
h) echo "Usage: -c <cores> [-p perf] [-w wait] [-h help]"
|
||||
exit 0
|
||||
;;
|
||||
\?)
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
|
||||
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd ${BUILD_DIR}
|
||||
|
||||
if [ -d "./build_fpga_{$cores}c" ]; then
|
||||
make "clean-fpga-${cores}c"
|
||||
fi
|
||||
|
||||
if [ ${perf} = 1 ]; then
|
||||
PERF=1 make "fpga-${cores}c"
|
||||
else
|
||||
make "fpga-${cores}c"
|
||||
fi
|
||||
|
||||
if [ ${wait} = 1 ]; then
|
||||
sleep 30
|
||||
pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus"))
|
||||
for pid in ${pids[@]}; do
|
||||
while kill -0 ${pid} 2> /dev/null; do
|
||||
sleep 30
|
||||
done
|
||||
done
|
||||
fi
|
|
@ -1,7 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
for ((i=1; i <= 16; i=i*2)); do
|
||||
echo "Building ${i} core build..."
|
||||
./build.sh -c ${i} -p -w
|
||||
echo "Done ${i} core build."
|
||||
done
|
|
@ -1,33 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
while getopts c:d:i:o:p: flag
|
||||
do
|
||||
case "${flag}" in
|
||||
c) cores=${OPTARG};; #1, 2, 4, 8, 16
|
||||
d) dir=${OPTARG};; #directory name (e.g. perf_2021_03_07)
|
||||
i) ifile=${OPTARG};; #input filename
|
||||
o) ofile=${OPTARG};; #output filename
|
||||
p) param=${OPTARG};; #parameter to be made into csv
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
|
||||
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$ifile" ]; then
|
||||
echo 'No input filename given for argument -f'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$dir" ]; then
|
||||
echo 'No directory given for argument -d'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf "cores,${param}\n" > "../${dir}/${ofile}"
|
||||
for ((i=1; i<=$cores; i=i*2)); do
|
||||
printf "${i}," >> "../${dir}/${ofile}"
|
||||
(sed -n "s/${param}=\(.*\)/\1/p" < "../${dir}/${i}c/${ifile}") >> "../${dir}/${ofile}"
|
||||
done
|
|
@ -1,32 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
while getopts c:d:f:o: flag
|
||||
do
|
||||
case "${flag}" in
|
||||
c) cores=${OPTARG};; #1, 2, 4, 8, 16
|
||||
d) dir=${OPTARG};; #directory name (e.g. perf_2021_03_07)
|
||||
i) ifile=${OPTARG};; #input filename
|
||||
o) ofile=${OPTARG};; #output filename
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
|
||||
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$ifile" ]; then
|
||||
echo 'No input filename given for argument -f'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$dir" ]; then
|
||||
echo 'No directory given for argument -d'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
printf "cores,IPC" > "../${dir}/${ofile}"
|
||||
for ((i=1; i<=$cores; i=i*2)); do
|
||||
printf "${i}," >> "../${dir}/${ofile}"
|
||||
(sed -n "s/IPC=\(.*\)/\1/p" < "../${dir}/${i}c/${ifile}" | awk 'END {print $NF}') >> "../${dir}/${ofile}"
|
||||
done
|
|
@ -1,35 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
cd ../../hw/syn/opae/
|
||||
|
||||
date=$(date +%Y_%m_%d)
|
||||
results_dir="../../../evaluation/perf_${date}"
|
||||
mkdir -p ${results_dir}
|
||||
|
||||
for ((i=1; i <= 16; i=i*2)); do
|
||||
mkdir -p "${results_dir}/${i}c"
|
||||
done
|
||||
|
||||
for ((i=1; i <= 16; i=i*2)); do
|
||||
cp "./build_fpga_${i}c/build.log" "${results_dir}/${i}c/build.log"
|
||||
cp "./build_fpga_${i}c/build/output_files/afu_default.syn.summary" "${results_dir}/${i}c/afu_default.syn.summary"
|
||||
cp "./build_fpga_${i}c/build/output_files/afu_default.fit.summary" "${results_dir}/${i}c/afu_default.fit.summary"
|
||||
cp "./build_fpga_${i}c/build/output_files/afu_default.sta.summary" "${results_dir}/${i}c/afu_default.sta.summary"
|
||||
cp "./build_fpga_${i}c/build/output_files/user_clock_freq.txt" "${results_dir}/${i}c/user_clock_freq.txt"
|
||||
done
|
||||
|
||||
cd ../../../evaluation/scripts
|
||||
results_dir="../perf_${date}"
|
||||
|
||||
for ((i=1; i <= 16; i=i*2)); do
|
||||
echo "Programming fpga for ${i} core build..."
|
||||
./program_fpga.sh -c ${i}
|
||||
echo "Running tests for ${i} core build..."
|
||||
../../ci/blackbox.sh --driver=fpga --app=sgemm --perf > "${results_dir}/${i}c/sgemm.result"
|
||||
../../ci/blackbox.sh --driver=fpga --app=vecadd --perf > "${results_dir}/${i}c/vecadd.result"
|
||||
../../ci/blackbox.sh --driver=fpga --app=saxpy --perf > "${results_dir}/${i}c/saxpy.result"
|
||||
../../ci/blackbox.sh --driver=fpga --app=sfilter --perf > "${results_dir}/${i}c/sfilter.result"
|
||||
../../ci/blackbox.sh --driver=fpga --app=nearn --perf > "${results_dir}/${i}c/nearn.result"
|
||||
../../ci/blackbox.sh --driver=fpga --app=guassian --perf > "${results_dir}/${i}c/guassian.result"
|
||||
echo "Done ${i} core build."
|
||||
done
|
|
@ -1,34 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
cd ../../hw/syn/opae/
|
||||
|
||||
while getopts c: flag
|
||||
do
|
||||
case "${flag}" in
|
||||
c) i=${OPTARG};; #cores: 1, 2, 4, 8, 16
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! "$i" =~ ^(1|2|4|8|16)$ ]]; then
|
||||
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
date=$(date +%Y_%m_%d)
|
||||
results_dir="../../../evaluation/perf_${date}"
|
||||
mkdir -p ${results_dir}
|
||||
|
||||
mkdir -p "${results_dir}/${i}c"
|
||||
|
||||
cp "./build_fpga_${i}c/build.log" "${results_dir}/${i}c/build.log"
|
||||
cp "./build_fpga_${i}c/build/output_files/afu_default.syn.summary" "${results_dir}/${i}c/afu_default.syn.summary"
|
||||
cp "./build_fpga_${i}c/build/output_files/afu_default.fit.summary" "${results_dir}/${i}c/afu_default.fit.summary"
|
||||
cp "./build_fpga_${i}c/build/output_files/afu_default.sta.summary" "${results_dir}/${i}c/afu_default.sta.summary"
|
||||
cp "./build_fpga_${i}c/build/output_files/user_clock_freq.txt" "${results_dir}/${i}c/user_clock_freq.txt"
|
||||
|
||||
../../../ci/blackbox.sh --driver=fpga --app=sgemm --perf > "${results_dir}/${i}c/sgemm.result"
|
||||
../../../ci/blackbox.sh --driver=fpga --app=vecadd --perf > "${results_dir}/${i}c/vecadd.result"
|
||||
../../../ci/blackbox.sh --driver=fpga --app=saxpy --perf > "${results_dir}/${i}c/saxpy.result"
|
||||
../../../ci/blackbox.sh --driver=fpga --app=sfilter --perf > "${results_dir}/${i}c/sfilter.result"
|
||||
../../../ci/blackbox.sh --driver=fpga --app=nearn --perf > "${results_dir}/${i}c/nearn.result"
|
||||
../../../ci/blackbox.sh --driver=fpga --app=guassian --perf > "${results_dir}/${i}c/guassian.result"
|
|
@ -1,19 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
while getopts c: flag
|
||||
do
|
||||
case "${flag}" in
|
||||
c) i=${OPTARG};; #cores: 1, 2, 4, 8, 16
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ! "$i" =~ ^(1|2|4|8|16)$ ]]; then
|
||||
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "../../hw/syn/opae/build_fpga_${i}c"
|
||||
|
||||
printf "y\ny\ny\n" | PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs > /dev/null
|
||||
|
||||
fpgasupdate vortex_afu_unsigned_ssl.gbs
|
|
@ -1,8 +0,0 @@
|
|||
build_name,Fmax_Slow_900mV_100C,m20k,logic_utilization,total_registers,total_power,static_power
|
||||
1cl-2c-8w-4t-8Kl2-4Kd-1Ki,154.01,2.3590121636564687,16.956694756554306,99408.0,3.64785,1.88908
|
||||
1cl-2c-8w-8t-16Kl2-8Kd-1Ki,137.78,2.3590121636564687,23.021769662921347,134668.0,4.29923,1.95073
|
||||
1cl-2c-8w-8t-8Kl2-4Kd-1Ki,131.22,2.3590121636564687,23.179541198501873,134129.0,4.31822,1.94177
|
||||
1cl-4c-16w-8t-16Kl2-8Kd-1Ki,106.37,4.128271286398821,62.116573033707866,356954.0,7.92994,2.28735
|
||||
1cl-4c-8w-8t-16Kl2-4Kd-1Ki,118.51,4.128271286398821,43.340823970037455,251029.0,6.34737,2.09685
|
||||
1cl-4c-8w-8t-16Kl2-8Kd-1Ki,123.49,4.128271286398821,43.34199438202247,250895.0,6.34987,2.11429
|
||||
2cl-4c-8w-4t-8Kl2-4Kd-1Ki,132.75,6.1924069295982305,56.91058052434457,322475.0,7.63097,2.27641
|
|
|
@ -1,71 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_io_arb (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// bus select
|
||||
input wire select_io_rsp,
|
||||
|
||||
// input requets
|
||||
VX_csr_req_if csr_core_req_if,
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
|
||||
// output request
|
||||
VX_csr_pipe_req_if csr_pipe_req_if,
|
||||
|
||||
// input response
|
||||
VX_commit_if csr_pipe_rsp_if,
|
||||
|
||||
// outputs responses
|
||||
VX_commit_if csr_commit_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if
|
||||
);
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire [31:0] csr_core_req_data = csr_core_req_if.use_imm ? 32'(csr_core_req_if.rs1) : csr_core_req_if.rs1_data;
|
||||
|
||||
// requests
|
||||
assign csr_pipe_req_if.valid = csr_core_req_if.valid || csr_io_req_if.valid;
|
||||
assign csr_pipe_req_if.wid = csr_core_req_if.wid;
|
||||
assign csr_pipe_req_if.tmask = csr_core_req_if.tmask;
|
||||
assign csr_pipe_req_if.PC = csr_core_req_if.PC;
|
||||
assign csr_pipe_req_if.op_type = csr_core_req_if.valid ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_pipe_req_if.addr = csr_core_req_if.valid ? csr_core_req_if.addr : csr_io_req_if.addr;
|
||||
assign csr_pipe_req_if.data = csr_core_req_if.valid ? csr_core_req_data : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_pipe_req_if.rd = csr_core_req_if.rd;
|
||||
assign csr_pipe_req_if.wb = csr_core_req_if.wb;
|
||||
assign csr_pipe_req_if.is_io = !csr_core_req_if.valid;
|
||||
|
||||
// core always takes priority over IO bus
|
||||
assign csr_core_req_if.ready = csr_pipe_req_if.ready;
|
||||
assign csr_io_req_if.ready = csr_pipe_req_if.ready && !csr_core_req_if.valid;
|
||||
|
||||
// responses
|
||||
wire csr_io_rsp_ready;
|
||||
VX_skid_buffer #(
|
||||
.DATAW (32)
|
||||
) csr_io_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_pipe_rsp_if.valid & select_io_rsp),
|
||||
.data_in (csr_pipe_rsp_if.data[0]),
|
||||
.ready_in (csr_io_rsp_ready),
|
||||
.valid_out (csr_io_rsp_if.valid),
|
||||
.data_out (csr_io_rsp_if.data),
|
||||
.ready_out (csr_io_rsp_if.ready)
|
||||
);
|
||||
|
||||
assign csr_commit_if.valid = csr_pipe_rsp_if.valid & ~select_io_rsp;
|
||||
assign csr_commit_if.wid = csr_pipe_rsp_if.wid;
|
||||
assign csr_commit_if.tmask = csr_pipe_rsp_if.tmask;
|
||||
assign csr_commit_if.PC = csr_pipe_rsp_if.PC;
|
||||
assign csr_commit_if.rd = csr_pipe_rsp_if.rd;
|
||||
assign csr_commit_if.wb = csr_pipe_rsp_if.wb;
|
||||
assign csr_commit_if.eop = csr_pipe_rsp_if.eop;
|
||||
assign csr_commit_if.data = csr_pipe_rsp_if.data;
|
||||
|
||||
assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_ready : csr_commit_if.ready;
|
||||
|
||||
endmodule
|
|
@ -39,7 +39,7 @@ module VX_instr_demux (
|
|||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
.USE_FASTREG (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -57,7 +57,7 @@ module VX_instr_demux (
|
|||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
.USE_FASTREG (1)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -75,7 +75,7 @@ module VX_instr_demux (
|
|||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.BUFFERED (1)
|
||||
.USE_FASTREG (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -94,7 +94,7 @@ module VX_instr_demux (
|
|||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
.USE_FASTREG (1)
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -116,7 +116,7 @@ module VX_instr_demux (
|
|||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
||||
.BUFFERED (1)
|
||||
.USE_FASTREG (1)
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -256,26 +256,19 @@ module VX_mem_unit # (
|
|||
);
|
||||
end else begin
|
||||
// core to D-cache request
|
||||
for (genvar i = 0; i < `DNUM_REQS; ++i) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH)
|
||||
) core_req_buf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (dcache_core_req_if.valid[i]),
|
||||
.data_in ({dcache_core_req_if.addr[i], dcache_core_req_if.rw[i], dcache_core_req_if.byteen[i], dcache_core_req_if.data[i], dcache_core_req_if.tag[i]}),
|
||||
.ready_in (dcache_core_req_if.ready[i]),
|
||||
.valid_out (dcache_req_if.valid[i]),
|
||||
.data_out ({dcache_req_if.addr[i], dcache_req_if.rw[i], dcache_req_if.byteen[i], dcache_req_if.data[i], dcache_req_if.tag[i]}),
|
||||
.ready_out (dcache_req_if.ready[i])
|
||||
);
|
||||
end
|
||||
assign dcache_req_if.valid = dcache_core_req_if.valid;
|
||||
assign dcache_req_if.addr = dcache_core_req_if.addr;
|
||||
assign dcache_req_if.rw = dcache_core_req_if.rw;
|
||||
assign dcache_req_if.byteen = dcache_core_req_if.byteen;
|
||||
assign dcache_req_if.data = dcache_core_req_if.data;
|
||||
assign dcache_req_if.tag = dcache_core_req_if.tag;
|
||||
assign dcache_core_req_if.ready = dcache_req_if.ready;
|
||||
|
||||
// D-cache to core reponse
|
||||
assign dcache_core_rsp_if.valid = dcache_rsp_if.valid;
|
||||
assign dcache_core_rsp_if.tag = dcache_rsp_if.tag;
|
||||
assign dcache_core_rsp_if.data = dcache_rsp_if.data;
|
||||
assign dcache_rsp_if.ready = dcache_core_rsp_if.ready;
|
||||
assign dcache_rsp_if.ready = dcache_core_rsp_if.ready;
|
||||
end
|
||||
|
||||
wire [`DMEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DMEM_TAG_WIDTH'(icache_mem_req_if.tag);
|
||||
|
|
2
hw/rtl/cache/VX_bank.v
vendored
2
hw/rtl/cache/VX_bank.v
vendored
|
@ -476,7 +476,7 @@ module VX_bank #(
|
|||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.BUFFERED (NUM_BANKS == 1)
|
||||
.USE_FASTREG (NUM_BANKS == 1)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
4
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
4
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
|
@ -107,7 +107,7 @@ module VX_cache_core_rsp_merge #(
|
|||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
|
||||
.BUFFERED (1)
|
||||
.USE_FASTREG (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -156,7 +156,7 @@ module VX_cache_core_rsp_merge #(
|
|||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
.BUFFERED (1)
|
||||
.USE_FASTREG (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -4,7 +4,7 @@ module VX_skid_buffer #(
|
|||
parameter DATAW = 1,
|
||||
parameter PASSTHRU = 0,
|
||||
parameter NOBACKPRESSURE = 0,
|
||||
parameter BUFFERED = 0
|
||||
parameter USE_FASTREG = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
@ -50,7 +50,7 @@ module VX_skid_buffer #(
|
|||
|
||||
end else begin
|
||||
|
||||
if (BUFFERED) begin
|
||||
if (USE_FASTREG) begin
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [DATAW-1:0] buffer;
|
||||
|
|
|
@ -39,69 +39,69 @@ int main(int argc, char **argv) {
|
|||
if (argc == 1) {
|
||||
#ifdef ALL_TESTS
|
||||
std::string tests[] = {
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-add.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-and.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-auipc.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-beq.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bge.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bgeu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-blt.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bltu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bne.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-jal.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-jalr.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lb.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lbu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lh.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lhu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lui.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lw.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-or.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-ori.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sb.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sh.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-simple.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sll.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-slli.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-slt.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-slti.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sltiu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sltu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sra.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-srai.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-srl.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-srli.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sub.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sw.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-xor.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32ui-p-xori.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-add.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-addi.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-and.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-andi.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-auipc.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-beq.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-bge.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-bgeu.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-blt.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-bltu.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-bne.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-jal.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-jalr.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-lb.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-lbu.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-lh.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-lhu.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-lui.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-lw.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-or.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-ori.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-sb.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-sh.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-simple.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-sll.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-slli.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-slt.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-slti.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-sltiu.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-sltu.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-sra.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-srai.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-srl.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-srli.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-sub.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-sw.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-xor.hex",
|
||||
"../../../tests/riscv/isa/rv32ui-p-xori.hex",
|
||||
#ifdef EXT_M_ENABLE
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-div.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-divu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-mul.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-mulh.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-mulhsu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-mulhu.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-rem.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32um-p-remu.hex",
|
||||
"../../../tests/riscv/isa/rv32um-p-div.hex",
|
||||
"../../../tests/riscv/isa/rv32um-p-divu.hex",
|
||||
"../../../tests/riscv/isa/rv32um-p-mul.hex",
|
||||
"../../../tests/riscv/isa/rv32um-p-mulh.hex",
|
||||
"../../../tests/riscv/isa/rv32um-p-mulhsu.hex",
|
||||
"../../../tests/riscv/isa/rv32um-p-mulhu.hex",
|
||||
"../../../tests/riscv/isa/rv32um-p-rem.hex",
|
||||
"../../../tests/riscv/isa/rv32um-p-remu.hex",
|
||||
#endif
|
||||
};
|
||||
|
||||
std::string tests_fp[] = {
|
||||
#ifdef EXT_F_ENABLE
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fadd.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fmadd.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fmin.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fcmp.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-ldst.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fcvt.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fcvt_w.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-move.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-recoding.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fdiv.hex",
|
||||
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fclass.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-fadd.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-fmadd.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-fmin.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-fcmp.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-ldst.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-fcvt.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-fcvt_w.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-move.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-recoding.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-fdiv.hex",
|
||||
"../../../tests/riscv/isa/rv32uf-p-fclass.hex",
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -152,7 +152,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
#else
|
||||
|
||||
char test[] = "../../../runtime/tests/simple/vx_simple.hex";
|
||||
char test[] = "../../../tests/runtime/simple/vx_simple.hex";
|
||||
|
||||
std::cout << test << std::endl;
|
||||
|
||||
|
|
|
@ -76,16 +76,16 @@ $(FPGA_BUILD_DIR)_4c/build/dcp.qpf:
|
|||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_4c
|
||||
|
||||
$(FPGA_BUILD_DIR)_8c/build/dcp.qpf:
|
||||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_8c
|
||||
afu_synth_setup -s setup8.cfg $(FPGA_BUILD_DIR)_8c
|
||||
|
||||
$(FPGA_BUILD_DIR)_16c/build/dcp.qpf:
|
||||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_16c
|
||||
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_16c
|
||||
|
||||
$(FPGA_BUILD_DIR)_32c/build/dcp.qpf:
|
||||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_32c
|
||||
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_32c
|
||||
|
||||
$(FPGA_BUILD_DIR)_64c/build/dcp.qpf:
|
||||
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_64c
|
||||
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_64c
|
||||
|
||||
gen-sources-1c:
|
||||
./gen_sources.sh $(CFLAGS) $(CONFIG1) > sources.txt
|
||||
|
|
7
hw/syn/opae/setup16.cfg
Normal file
7
hw/syn/opae/setup16.cfg
Normal file
|
@ -0,0 +1,7 @@
|
|||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
|
||||
vortex_afu16.json
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
C:sources.txt
|
7
hw/syn/opae/setup8.cfg
Normal file
7
hw/syn/opae/setup8.cfg
Normal file
|
@ -0,0 +1,7 @@
|
|||
+define+SYNTHESIS
|
||||
+define+QUARTUS
|
||||
|
||||
vortex_afu8.json
|
||||
QI:vortex_afu.qsf
|
||||
|
||||
C:sources.txt
|
|
@ -18,10 +18,7 @@
|
|||
"mmio-status": 18,
|
||||
"mmio-scope-read": 20,
|
||||
"mmio-scope-write": 22,
|
||||
"mmio-csr-core": 24,
|
||||
"mmio-csr-addr": 26,
|
||||
"mmio-csr-data": 28,
|
||||
"mmio-csr-read": 30,
|
||||
"mmio-dev-caps": 24,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
|
|
|
@ -18,10 +18,7 @@
|
|||
"mmio-status": 18,
|
||||
"mmio-scope-read": 20,
|
||||
"mmio-scope-write": 22,
|
||||
"mmio-csr-core": 24,
|
||||
"mmio-csr-addr": 26,
|
||||
"mmio-csr-data": 28,
|
||||
"mmio-csr-read": 30,
|
||||
"mmio-dev-caps": 24,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
|
|
|
@ -41,29 +41,29 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
|
|||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
||||
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||
set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||
#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
|
||||
#set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
|
||||
set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
|
||||
set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
|
||||
set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
|
||||
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
|
||||
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
#set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
#set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
|
||||
#set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
|
||||
#set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
|
||||
#set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
|
||||
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name SEED 1
|
||||
#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
#set_global_assignment -name SEED 1
|
||||
|
||||
switch $opts(family) {
|
||||
"Arria 10" {
|
||||
|
|
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue