This commit is contained in:
Blaise Tine 2021-06-13 21:44:46 -04:00
commit fe86fd7936
640 changed files with 394599 additions and 653711 deletions

View file

@ -4,17 +4,11 @@ all:
$(MAKE) -C driver
$(MAKE) -C runtime
$(MAKE) -C simX
$(MAKE) -C benchmarks/opencl
perf-demo:
$(MAKE) -C hw
$(MAKE) -C driver rtlsim
$(MAKE) -C driver/tests/demo/ run-rtlsim
$(MAKE) -C tests
clean:
$(MAKE) -C hw clean
$(MAKE) -C driver clean
$(MAKE) -C simX clean
$(MAKE) -C runtime clean
$(MAKE) -C benchmarks/opencl clean
$(MAKE) -C tests clean

View file

@ -120,12 +120,12 @@ case $DRIVER in
;;
esac
if [ -d "$VORTEX_HOME/driver/tests/$APP" ];
if [ -d "$VORTEX_HOME/tests/opencl/$APP" ];
then
APP_PATH=$VORTEX_HOME/driver/tests/$APP
elif [ -d "$VORTEX_HOME/benchmarks/opencl/$APP" ];
APP_PATH=$VORTEX_HOME/tests/opencl/$APP
elif [ -d "$VORTEX_HOME/tests/regression/$APP" ];
then
APP_PATH=$VORTEX_HOME/benchmarks/opencl/$APP
APP_PATH=$VORTEX_HOME/tests/regression/$APP
else
echo "Application folder found: $APP"
exit -1

View file

@ -5,12 +5,11 @@ set -e
make -s
# Dogfood tests
./ci/test_runtime.sh
./ci/test_riscv_isa.sh
./ci/test_opencl.sh
./ci/test_driver.sh
./ci/test_simx.sh
# coverage tests
make -C tests/runtime run
make -C tests/riscv/isa run
make -C tests/opencl run
make -C simX run-tests
# warp/threads configurations
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=2 --app=demo

View file

@ -7,12 +7,15 @@ set -e
make -C runtime clean
make -C runtime
# clear POCL cache
rm -rf ~/.cache/pocl
# rebuild native kernel
make -C driver/tests/dogfood clean-all
make -C driver/tests/dogfood
make -C tests/driver/dogfood clean-all
make -C tests/driver/dogfood
./ci/blackbox.sh --driver=vlsim --cores=1 --app=dogfood
# rebuild opencl kernel
make -C benchmarks/opencl/sgemm clean-all
make -C benchmarks/opencl/sgemm
make -C tests/opencl/sgemm clean-all
make -C tests/opencl/sgemm
./ci/blackbox.sh --driver=vlsim --cores=1 --app=sgemm

View file

@ -1,6 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
make -C driver/tests run

View file

@ -1,6 +0,0 @@
#!/bin/sh
# exit when any command fails
set -e
make -C benchmarks/opencl run

View file

@ -1,6 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
make -C benchmarks/riscv_tests/isa run

View file

@ -1,6 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
make -C runtime/tests run

View file

@ -1,6 +0,0 @@
#!/bin/bash
# exit when any command fails
set -e
make -C simX run

View file

@ -2,34 +2,33 @@
The directory/file layout of the Vortex codebase is as followed:
- `benchmark`: contains opencl, risc-v, and vector tests
- `opencl`: contains basic kernel operation tests (i.e. vector add, transpose, dot product)
- `riscv`: contains official riscv tests which are pre-compiled into binaries
- `vector`: tests for vector instructions (not yet implemented)
- `ci`: contain tests to be run during continuous integration (Travis CI)
- driver, opencl, riscv_isa, and runtime tests
- `driver`: contains driver software implementation (software that is run on the host to communicate with the vortex processor)
- `opae`: contains code for driver that runs on FPGA
- `rtlsim`: contains code for driver that runs on local machine (driver built using verilator which converts rtl to c++ binary)
- `simx`: contains code for driver that runs on local machine (vortex)
- `include`: contains vortex.h which has the vortex API that is used by the drivers
- `runtime`: contains software used inside kernel programs to expose GPGPU capabilities
- `include`: contains vortex API needed for runtime
- `linker`: contains linker file for compiling kernels
- `src`: contains implementation of vortex API (from include folder)
- `tests`: contains runtime tests
- `simple`: contains test for GPGPU functionality allowed in vortex
- `simx`: contains simX, the cycle approximate simulator for vortex
- `miscs`: contains old code that is no longer used
- `hw`:
- `unit_tests`: contains unit test for RTL of cache and queue
- `syn`: contains all synthesis scripts (quartus and yosys)
- `quartus`: contains code to synthesis cache, core, pipeline, top, and vortex stand-alone
- `simulate`: contains RTL simulator (verilator)
- `testbench.cpp`: runs either the riscv, runtime, or opencl tests
- `opae`: contains source code for the accelerator functional unit (AFU) and code which programs the fpga
- `quartus`: contains synthesis scripts for Intel Quartus toolchain
- `opae`: contains synthesis scripts for Intel OPAE FPGA
- `simulate`: contains RTL simulator (verilator)
- `rtl`: contains rtl source code
- `cache`: contains cache subsystem code
- `fp_cores`: contains floating point unit code
- `interfaces`: contains code that handles communication for each of the units of the microarchitecture
- `libs`: contains general-purpose modules (i.e., buffers, encoders, arbiters, pipe registers)
- `libs`: contains general-purpose modules (i.e., buffers, encoders, arbiters, pipe registers)
- `driver`: contains driver software implementation (software that is run on the host to communicate with the vortex processor)
- `include`: contains vortex.h which has the vortex API that is used by the drivers
- `opae`: contains code for driver that runs on FPGA
- `rtlsim`: contains code for driver that runs on local machine (driver built using verilator which converts rtl to c++ binary)
- `simx`: contains code for driver that runs on local machine (vortex)
- `runtime`: contains software used inside kernel programs to expose GPGPU capabilities
- `include`: contains vortex API needed for runtime
- `linker`: contains linker file for compiling kernels
- `src`: contains implementation of vortex API (from include folder)
- `simX`: contains simX, the cycle approximate simulator for vortex
- `tests`: contains tests suite
- `runtime`: contains vortex runtime tests
- `driver`: contains vortex driver tests
- `opencl`: contains opencl tests and benchmarks
- `riscv`: contains official riscv tests
- `regression`: contains regression tests
- `vector`: tests for vector instructions (not yet implemented)
- `ci`: contain tests to be run during continuous integration (Travis CI)
- `miscs`: contains miscellaneous stuffs

View file

@ -1,4 +1,4 @@
# Flubber FPGA Startup and Configuration Guide
# FPGA Startup and Configuration Guide
OPAE Environment Setup
----------------------
@ -27,7 +27,7 @@ To enable L3 cache and profile counters for a build, simply uncomment the defini
OPAE Build
------------------
The Flubber FPGA has to following configuration options:
The FPGA has to following configuration options:
- 1 core fpga (fpga-1c)
- 2 cores fpga (fpga-2c)
- 4 cores fpga (fpga-4c)

View file

@ -10,7 +10,7 @@ SimX is a C++ cycle-level in-house simulator developed for Vortex. The relevant
### FGPA Simulation
The current target FPGA for simulation is the Arria10 Intel Accelerator Card v1.0. The guide to build the fpga with specific configurations is located [here.](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Flubber_FPGA_Startup_Guide.md)
The current target FPGA for simulation is the Arria10 Intel Accelerator Card v1.0. The guide to build the fpga with specific configurations is located [here.](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/FPGA_Startup_Guide.md)
### How to Test

View file

@ -7,7 +7,7 @@
- [Vortex Cache Subsystem](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Cache_Subsystem.md)
- Vortex Software
- [Vortex Simulation](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Simulation.md)
- [FPGA Configuration, Program and Test](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/Flubber_FPGA_Startup_Guide.md)
- [FPGA Configuration, Program and Test](https://github.com/vortexgpgpu/vortex-dev/blob/master/doc/FPGA_Startup_Guide.md)
- Debugging
- Useful Links

View file

@ -1,4 +1,4 @@
all: stub rtlsim simx opae tests
all: stub rtlsim simx opae
stub:
$(MAKE) -C stub
@ -12,14 +12,10 @@ rtlsim:
simx:
$(MAKE) -C simx
tests:
$(MAKE) -C tests
clean:
$(MAKE) clean -C stub
$(MAKE) clean -C opae
$(MAKE) clean -C rtlsim
$(MAKE) clean -C simx
$(MAKE) clean -C tests
.PHONY: all stub opae rtlsim simx tests clean
.PHONY: all stub opae rtlsim simx clean

View file

@ -1,54 +0,0 @@
#!/usr/bin/env python3
import sys
import pandas as pd
from os import path
from glob import glob
if len(sys.argv) < 2:
print('usage: python3 ' + sys.argv[0] + ' <path to test_outputs>')
exit()
output_dir = sys.argv[1]
config_names = []
test_names = []
cycle_counts = []
for filename in glob(path.join(output_dir, '*.log')):
cycle_line = None
with open(filename, 'r') as f:
for line in f:
line = line.strip()
if line.startswith('[sim] total cycles:'):
cycle_line = line
print(filename, cycle_line)
full_name, _, _ = path.basename(filename).partition('.')
if cycle_line is None:
count = None
else:
_, _, count = cycle_line.partition(':')
count = int(count.strip())
config, test = full_name.rsplit('-', 1)
config_names.append(config)
test_names.append(test)
cycle_counts.append(count)
df = pd.DataFrame({
'config': config_names,
'test': test_names,
'cycle_count': cycle_counts,
})
print(df.head())
pivot = pd.pivot_table(df, values='cycle_count', index=['config'], columns=['test'])
print(pivot.head())
pivot.to_csv('results.csv')
print('Table written to results.csv')

View file

@ -1,101 +0,0 @@
Warps: 2, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
292351
Warps: 2, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
174990
Warps: 2, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
87686
Warps: 2, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
44034
Warps: 2, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
29981
Warps: 4, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
751528
Warps: 4, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
490532
Warps: 4, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
245460
Warps: 4, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
122924
Warps: 4, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
61656
Warps: 8, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
609347
Warps: 8, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
337922
Warps: 8, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
103378
Warps: 8, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
44932
Warps: 8, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
30426
Warps: 16, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
446749
Warps: 16, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
197890
Warps: 16, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
106086
Warps: 16, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
46464
Warps: 16, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
31573
Warps: 32, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
274885
Warps: 32, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
185833
Warps: 32, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
95355
Warps: 32, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
49745
Warps: 32, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/saxpy/saxpy.hex -s -b
33326

View file

@ -1,80 +0,0 @@
Warps: 2, Threads: 2
2037907
Warps: 2, Threads: 4
1205061
Warps: 2, Threads: 8
583051
Warps: 2, Threads: 16
358821
Warps: 2, Threads: 32
168914
Warps: 4, Threads: 2
1647415
Warps: 4, Threads: 4
Warps: 4, Threads: 2
1719354
Warps: 4, Threads: 4
837672
Warps: 4, Threads: 8
358354
Warps: 4, Threads: 16
218991
Warps: 4, Threads: 32
174153
Warps: 8, Threads: 2
1684691
Warps: 8, Threads: 4
1035207
Warps: 8, Threads: 8
552477
Warps: 8, Threads: 16
316346
Warps: 8, Threads: 32
128139
Warps: 16, Threads: 2
1666519
Warps: 16, Threads: 4
1043940
Warps: 16, Threads: 8
554168
Warps: 16, Threads: 16
316615
Warps: 16, Threads: 32
131018
Warps: 32, Threads: 2
1637051
Warps: 32, Threads: 4
1036768
Warps: 32, Threads: 8
544135
Warps: 32, Threads: 16
310251
Warps: 32, Threads: 32
157421

View file

@ -1,276 +0,0 @@
Warps: 2, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
14663775
Warps: 2, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
10280838
Warps: 2, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
5133778
Warps: 2, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
2670416
Warps: 2, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
1353300
Warps: 4, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
14014523
Warps: 4, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
6700429
Warps: 4, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
4196995
Warps: 4, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
2179254
Warps: 4, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
1303963
Warps: 8, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
8146968
Warps: 8, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
4180557
Warps: 8, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
1946300
Warps: 8, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
1056178
Warps: 8, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
449062
Warps: 16, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
4103843
Warps: 16, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
2198894
Warps: 16, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
1080948
Warps: 16, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
630038
Warps: 16, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
448537
Warps: 32, Threads: 2
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
2512219
Warps: 32, Threads: 4
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
1524192
Warps: 32, Threads: 8
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
936191
Warps: 32, Threads: 16
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
446168
Warps: 32, Threads: 32
../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/sgemm/sgemm.hex -s -b
ctx->num_groups[0]: 64
ctx->num_groups[1]: 64
ctx->num_groups[2]: 64
ctx->local_size[0]: 1
ctx->local_size[1]: 1
ctx->local_size[2]: 1
380334

View file

@ -1,51 +0,0 @@
Warps: 2, Threads: 2
1.313778
Warps: 2, Threads: 4
1.869814
Warps: 2, Threads: 8
3.794385
Warps: 2, Threads: 16
7.532425
Warps: 2, Threads: 32
15.194329
Warps: 4, Threads: 2
1.373928
Warps: 4, Threads: 4
2.106374
Warps: 4, Threads: 8
4.214628
Warps: 4, Threads: 16
8.372964
Warps: 4, Threads: 32
16.604193
Warps: 8, Threads: 2
0.647895
Warps: 8, Threads: 4
1.232910
Warps: 8, Threads: 8
2.505588
Warps: 8, Threads: 16
5.622365
Warps: 8, Threads: 32
13.141898
Warps: 16, Threads: 2
0.683937
Warps: 16, Threads: 4
1.362874
Warps: 16, Threads: 8
2.877766
Warps: 16, Threads: 16
7.303546
Warps: 16, Threads: 32
12.981466
Warps: 32, Threads: 2
0.919473
Warps: 32, Threads: 4
1.601678
Warps: 32, Threads: 8
3.462736
Warps: 32, Threads: 16
7.460658
Warps: 32, Threads: 32
14.898925

View file

@ -1,27 +0,0 @@
for PROJECT in sfilter; do
echo "" > $PROJECT.result
for number_of_warps in 2 4 8 16 32; do
for number_of_threads in 2 4 8 16 32; do
echo "$PROJECT = Warp Count: $number_of_warps Thread Count: $number_of_threads Launched"
echo "#define TOTAL_THREADS $number_of_threads" > ../../runtime/config.h
echo "#define TOTAL_WARPS $number_of_warps" >> ../../runtime/config.h
cd ../opencl/$PROJECT
make clean &>> /dev/null
make &>> /dev/null
cd ../../test_benchmark
echo "Warps: $number_of_warps, Threads: $number_of_threads" >> $PROJECT.result
# echo ../../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/$PROJECT/$PROJECT.hex -s -b &>> $PROJECT.result
../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/$PROJECT/$PROJECT.hex -s -b &>> $PROJECT.result
done
done
done

View file

@ -1,24 +0,0 @@
PROJECT=sgemm
echo "" > $PROJECT.result
for number_of_warps in 2 4 8 16 32; do
for number_of_threads in 2 4 8 16 32; do
echo "Warp Count: $number_of_warps Thread Count: $number_of_threads Launched"
echo "#define TOTAL_THREADS $number_of_threads" > ../../runtime/config.h
echo "#define TOTAL_WARPS $number_of_warps" >> ../../runtime/config.h
cd ../opencl/$PROJECT
make clean &>> /dev/null
make &>> /dev/null
cd ../../test_benchmark
echo "Warps: $number_of_warps, Threads: $number_of_threads" >> $PROJECT.result
../../simX/obj_dir/Vcache_simX -E -a rv32i --core ../opencl/$PROJECT/$PROJECT.hex -s -b &>> $PROJECT.result
done
done

View file

@ -1,17 +0,0 @@
Fitter Status : Successful - Sat Mar 6 08:45:37 2021
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10
Device : 10AX115N2F40E2LG
Timing Models : Final
Logic utilization (in ALMs) : 359,139 / 427,200 ( 84 % )
Total registers : 546782
Total pins : 310 / 826 ( 38 % )
Total virtual pins : 0
Total block memory bits : 12,692,200 / 55,562,240 ( 23 % )
Total RAM Blocks : 2,285 / 2,713 ( 84 % )
Total DSP Blocks : 448 / 1,518 ( 30 % )
Total HSSI RX channels : 12 / 48 ( 25 % )
Total HSSI TX channels : 12 / 48 ( 25 % )
Total PLLs : 25 / 112 ( 22 % )

File diff suppressed because it is too large Load diff

View file

@ -1,4 +0,0 @@
Synthesis Status : Successful - Sat Mar 6 05:12:07 2021
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10

File diff suppressed because it is too large Load diff

View file

@ -1,29 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
OK
The result of matrix m is:
0.00 0.00 0.00 0.00
0.50 0.00 0.00 0.00
0.67 0.26 0.00 0.00
-0.00 0.15 -0.28 0.00
The result of matrix a is:
-0.60 -0.50 0.70 0.30
0.00 -0.65 -0.05 0.55
0.00 0.00 -0.75 -1.14
0.00 0.00 0.00 0.50
The result of array b is:
-0.85 -0.25 0.87 -0.25
The final solution is:
0.70 0.00 -0.40 -0.50
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
loading db: cane4_0.db
loading db: cane4_1.db
loading db: cane4_2.db
Number of records: 1500
Finding the 5 closest neighbors.
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'

View file

@ -1,458 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
Create context
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 4 ms
Download destination buffer
Verify result
PASSED!
PERF: core0: instrs=23498, cycles=16249, IPC=1.446120
PERF: core0: ibuffer stalls=2272
PERF: core0: scoreboard stalls=4197
PERF: core0: alu unit stalls=737
PERF: core0: lsu unit stalls=355
PERF: core0: csr unit stalls=0
PERF: core0: fpu unit stalls=3
PERF: core0: gpu unit stalls=0
PERF: core0: icache reads=6155
PERF: core0: icache read misses=73 (hit ratio=98%)
PERF: core0: icache pipeline stalls=2466
PERF: core0: icache reponse stalls=2272
PERF: core0: dcache reads=2862
PERF: core0: dcache writes=101
PERF: core0: dcache read misses=634 (hit ratio=77%)
PERF: core0: dcache write misses=97 (hit ratio=3%)
PERF: core0: dcache bank stalls=2189 (utilization=57%)
PERF: core0: dcache mshr stalls=2617
PERF: core0: dcache pipeline stalls=4967
PERF: core0: dcache reponse stalls=16
PERF: core0: smem reads=538
PERF: core0: smem writes=447
PERF: core0: smem bank stalls=0 (utilization=100%)
PERF: core0: dram requests=226 (reads=125, writes=101)
PERF: core0: dram stalls=1211 (utilization=15%)
PERF: core0: dram average latency=31 cycles
PERF: core1: instrs=23498, cycles=16180, IPC=1.452287
PERF: core1: ibuffer stalls=2244
PERF: core1: scoreboard stalls=4144
PERF: core1: alu unit stalls=735
PERF: core1: lsu unit stalls=399
PERF: core1: csr unit stalls=0
PERF: core1: fpu unit stalls=1
PERF: core1: gpu unit stalls=0
PERF: core1: icache reads=6155
PERF: core1: icache read misses=73 (hit ratio=98%)
PERF: core1: icache pipeline stalls=2462
PERF: core1: icache reponse stalls=2244
PERF: core1: dcache reads=2862
PERF: core1: dcache writes=101
PERF: core1: dcache read misses=635 (hit ratio=77%)
PERF: core1: dcache write misses=97 (hit ratio=3%)
PERF: core1: dcache bank stalls=2190 (utilization=57%)
PERF: core1: dcache mshr stalls=2515
PERF: core1: dcache pipeline stalls=4793
PERF: core1: dcache reponse stalls=16
PERF: core1: smem reads=538
PERF: core1: smem writes=447
PERF: core1: smem bank stalls=0 (utilization=100%)
PERF: core1: dram requests=227 (reads=126, writes=101)
PERF: core1: dram stalls=1257 (utilization=15%)
PERF: core1: dram average latency=30 cycles
PERF: core2: instrs=23498, cycles=16179, IPC=1.452376
PERF: core2: ibuffer stalls=2224
PERF: core2: scoreboard stalls=4120
PERF: core2: alu unit stalls=730
PERF: core2: lsu unit stalls=423
PERF: core2: csr unit stalls=0
PERF: core2: fpu unit stalls=2
PERF: core2: gpu unit stalls=0
PERF: core2: icache reads=6155
PERF: core2: icache read misses=73 (hit ratio=98%)
PERF: core2: icache pipeline stalls=2455
PERF: core2: icache reponse stalls=2224
PERF: core2: dcache reads=2862
PERF: core2: dcache writes=101
PERF: core2: dcache read misses=634 (hit ratio=77%)
PERF: core2: dcache write misses=97 (hit ratio=3%)
PERF: core2: dcache bank stalls=2187 (utilization=57%)
PERF: core2: dcache mshr stalls=2417
PERF: core2: dcache pipeline stalls=4427
PERF: core2: dcache reponse stalls=16
PERF: core2: smem reads=538
PERF: core2: smem writes=447
PERF: core2: smem bank stalls=0 (utilization=100%)
PERF: core2: dram requests=226 (reads=125, writes=101)
PERF: core2: dram stalls=1123 (utilization=16%)
PERF: core2: dram average latency=31 cycles
PERF: core3: instrs=23498, cycles=16102, IPC=1.459322
PERF: core3: ibuffer stalls=2190
PERF: core3: scoreboard stalls=4072
PERF: core3: alu unit stalls=741
PERF: core3: lsu unit stalls=410
PERF: core3: csr unit stalls=0
PERF: core3: fpu unit stalls=1
PERF: core3: gpu unit stalls=0
PERF: core3: icache reads=6155
PERF: core3: icache read misses=73 (hit ratio=98%)
PERF: core3: icache pipeline stalls=2380
PERF: core3: icache reponse stalls=2190
PERF: core3: dcache reads=2862
PERF: core3: dcache writes=101
PERF: core3: dcache read misses=634 (hit ratio=77%)
PERF: core3: dcache write misses=97 (hit ratio=3%)
PERF: core3: dcache bank stalls=2192 (utilization=57%)
PERF: core3: dcache mshr stalls=2345
PERF: core3: dcache pipeline stalls=3768
PERF: core3: dcache reponse stalls=16
PERF: core3: smem reads=538
PERF: core3: smem writes=447
PERF: core3: smem bank stalls=0 (utilization=100%)
PERF: core3: dram requests=226 (reads=125, writes=101)
PERF: core3: dram stalls=699 (utilization=24%)
PERF: core3: dram average latency=30 cycles
PERF: core4: instrs=23498, cycles=16254, IPC=1.445675
PERF: core4: ibuffer stalls=2311
PERF: core4: scoreboard stalls=4269
PERF: core4: alu unit stalls=733
PERF: core4: lsu unit stalls=377
PERF: core4: csr unit stalls=0
PERF: core4: fpu unit stalls=0
PERF: core4: gpu unit stalls=0
PERF: core4: icache reads=6155
PERF: core4: icache read misses=73 (hit ratio=98%)
PERF: core4: icache pipeline stalls=2532
PERF: core4: icache reponse stalls=2311
PERF: core4: dcache reads=2862
PERF: core4: dcache writes=101
PERF: core4: dcache read misses=653 (hit ratio=77%)
PERF: core4: dcache write misses=97 (hit ratio=3%)
PERF: core4: dcache bank stalls=2189 (utilization=57%)
PERF: core4: dcache mshr stalls=2519
PERF: core4: dcache pipeline stalls=4555
PERF: core4: dcache reponse stalls=16
PERF: core4: smem reads=538
PERF: core4: smem writes=447
PERF: core4: smem bank stalls=0 (utilization=100%)
PERF: core4: dram requests=233 (reads=132, writes=101)
PERF: core4: dram stalls=1018 (utilization=18%)
PERF: core4: dram average latency=30 cycles
PERF: core5: instrs=23498, cycles=16177, IPC=1.452556
PERF: core5: ibuffer stalls=2232
PERF: core5: scoreboard stalls=4137
PERF: core5: alu unit stalls=730
PERF: core5: lsu unit stalls=411
PERF: core5: csr unit stalls=0
PERF: core5: fpu unit stalls=1
PERF: core5: gpu unit stalls=0
PERF: core5: icache reads=6155
PERF: core5: icache read misses=73 (hit ratio=98%)
PERF: core5: icache pipeline stalls=2454
PERF: core5: icache reponse stalls=2232
PERF: core5: dcache reads=2862
PERF: core5: dcache writes=101
PERF: core5: dcache read misses=634 (hit ratio=77%)
PERF: core5: dcache write misses=97 (hit ratio=3%)
PERF: core5: dcache bank stalls=2184 (utilization=57%)
PERF: core5: dcache mshr stalls=2446
PERF: core5: dcache pipeline stalls=4560
PERF: core5: dcache reponse stalls=16
PERF: core5: smem reads=538
PERF: core5: smem writes=447
PERF: core5: smem bank stalls=0 (utilization=100%)
PERF: core5: dram requests=226 (reads=125, writes=101)
PERF: core5: dram stalls=1086 (utilization=17%)
PERF: core5: dram average latency=30 cycles
PERF: core6: instrs=23498, cycles=16164, IPC=1.453724
PERF: core6: ibuffer stalls=2228
PERF: core6: scoreboard stalls=4108
PERF: core6: alu unit stalls=727
PERF: core6: lsu unit stalls=419
PERF: core6: csr unit stalls=0
PERF: core6: fpu unit stalls=3
PERF: core6: gpu unit stalls=0
PERF: core6: icache reads=6155
PERF: core6: icache read misses=73 (hit ratio=98%)
PERF: core6: icache pipeline stalls=2434
PERF: core6: icache reponse stalls=2228
PERF: core6: dcache reads=2862
PERF: core6: dcache writes=101
PERF: core6: dcache read misses=634 (hit ratio=77%)
PERF: core6: dcache write misses=97 (hit ratio=3%)
PERF: core6: dcache bank stalls=2190 (utilization=57%)
PERF: core6: dcache mshr stalls=2451
PERF: core6: dcache pipeline stalls=4321
PERF: core6: dcache reponse stalls=16
PERF: core6: smem reads=538
PERF: core6: smem writes=447
PERF: core6: smem bank stalls=0 (utilization=100%)
PERF: core6: dram requests=226 (reads=125, writes=101)
PERF: core6: dram stalls=930 (utilization=19%)
PERF: core6: dram average latency=31 cycles
PERF: core7: instrs=23498, cycles=16105, IPC=1.459050
PERF: core7: ibuffer stalls=2189
PERF: core7: scoreboard stalls=4068
PERF: core7: alu unit stalls=746
PERF: core7: lsu unit stalls=411
PERF: core7: csr unit stalls=0
PERF: core7: fpu unit stalls=0
PERF: core7: gpu unit stalls=0
PERF: core7: icache reads=6155
PERF: core7: icache read misses=73 (hit ratio=98%)
PERF: core7: icache pipeline stalls=2369
PERF: core7: icache reponse stalls=2189
PERF: core7: dcache reads=2862
PERF: core7: dcache writes=101
PERF: core7: dcache read misses=634 (hit ratio=77%)
PERF: core7: dcache write misses=97 (hit ratio=3%)
PERF: core7: dcache bank stalls=2189 (utilization=57%)
PERF: core7: dcache mshr stalls=2357
PERF: core7: dcache pipeline stalls=3798
PERF: core7: dcache reponse stalls=16
PERF: core7: smem reads=538
PERF: core7: smem writes=447
PERF: core7: smem bank stalls=0 (utilization=100%)
PERF: core7: dram requests=226 (reads=125, writes=101)
PERF: core7: dram stalls=763 (utilization=22%)
PERF: core7: dram average latency=30 cycles
PERF: core8: instrs=23498, cycles=16256, IPC=1.445497
PERF: core8: ibuffer stalls=2249
PERF: core8: scoreboard stalls=4153
PERF: core8: alu unit stalls=740
PERF: core8: lsu unit stalls=382
PERF: core8: csr unit stalls=0
PERF: core8: fpu unit stalls=4
PERF: core8: gpu unit stalls=0
PERF: core8: icache reads=6155
PERF: core8: icache read misses=73 (hit ratio=98%)
PERF: core8: icache pipeline stalls=2457
PERF: core8: icache reponse stalls=2249
PERF: core8: dcache reads=2862
PERF: core8: dcache writes=101
PERF: core8: dcache read misses=634 (hit ratio=77%)
PERF: core8: dcache write misses=97 (hit ratio=3%)
PERF: core8: dcache bank stalls=2193 (utilization=57%)
PERF: core8: dcache mshr stalls=2563
PERF: core8: dcache pipeline stalls=5209
PERF: core8: dcache reponse stalls=15
PERF: core8: smem reads=538
PERF: core8: smem writes=447
PERF: core8: smem bank stalls=0 (utilization=100%)
PERF: core8: dram requests=226 (reads=125, writes=101)
PERF: core8: dram stalls=1474 (utilization=13%)
PERF: core8: dram average latency=31 cycles
PERF: core9: instrs=23498, cycles=16264, IPC=1.444786
PERF: core9: ibuffer stalls=2245
PERF: core9: scoreboard stalls=4151
PERF: core9: alu unit stalls=742
PERF: core9: lsu unit stalls=385
PERF: core9: csr unit stalls=0
PERF: core9: fpu unit stalls=2
PERF: core9: gpu unit stalls=0
PERF: core9: icache reads=6155
PERF: core9: icache read misses=73 (hit ratio=98%)
PERF: core9: icache pipeline stalls=2471
PERF: core9: icache reponse stalls=2245
PERF: core9: dcache reads=2862
PERF: core9: dcache writes=101
PERF: core9: dcache read misses=634 (hit ratio=77%)
PERF: core9: dcache write misses=97 (hit ratio=3%)
PERF: core9: dcache bank stalls=2200 (utilization=57%)
PERF: core9: dcache mshr stalls=2548
PERF: core9: dcache pipeline stalls=5160
PERF: core9: dcache reponse stalls=16
PERF: core9: smem reads=538
PERF: core9: smem writes=447
PERF: core9: smem bank stalls=0 (utilization=100%)
PERF: core9: dram requests=226 (reads=125, writes=101)
PERF: core9: dram stalls=1449 (utilization=13%)
PERF: core9: dram average latency=31 cycles
PERF: core10: instrs=23498, cycles=16253, IPC=1.445764
PERF: core10: ibuffer stalls=2228
PERF: core10: scoreboard stalls=4119
PERF: core10: alu unit stalls=724
PERF: core10: lsu unit stalls=420
PERF: core10: csr unit stalls=0
PERF: core10: fpu unit stalls=4
PERF: core10: gpu unit stalls=0
PERF: core10: icache reads=6155
PERF: core10: icache read misses=73 (hit ratio=98%)
PERF: core10: icache pipeline stalls=2457
PERF: core10: icache reponse stalls=2228
PERF: core10: dcache reads=2862
PERF: core10: dcache writes=101
PERF: core10: dcache read misses=634 (hit ratio=77%)
PERF: core10: dcache write misses=97 (hit ratio=3%)
PERF: core10: dcache bank stalls=2182 (utilization=57%)
PERF: core10: dcache mshr stalls=2427
PERF: core10: dcache pipeline stalls=4855
PERF: core10: dcache reponse stalls=16
PERF: core10: smem reads=538
PERF: core10: smem writes=447
PERF: core10: smem bank stalls=0 (utilization=100%)
PERF: core10: dram requests=226 (reads=125, writes=101)
PERF: core10: dram stalls=1326 (utilization=14%)
PERF: core10: dram average latency=31 cycles
PERF: core11: instrs=23498, cycles=16175, IPC=1.452736
PERF: core11: ibuffer stalls=2225
PERF: core11: scoreboard stalls=4114
PERF: core11: alu unit stalls=734
PERF: core11: lsu unit stalls=425
PERF: core11: csr unit stalls=0
PERF: core11: fpu unit stalls=0
PERF: core11: gpu unit stalls=0
PERF: core11: icache reads=6155
PERF: core11: icache read misses=73 (hit ratio=98%)
PERF: core11: icache pipeline stalls=2448
PERF: core11: icache reponse stalls=2225
PERF: core11: dcache reads=2862
PERF: core11: dcache writes=101
PERF: core11: dcache read misses=634 (hit ratio=77%)
PERF: core11: dcache write misses=97 (hit ratio=3%)
PERF: core11: dcache bank stalls=2195 (utilization=57%)
PERF: core11: dcache mshr stalls=2455
PERF: core11: dcache pipeline stalls=4007
PERF: core11: dcache reponse stalls=15
PERF: core11: smem reads=538
PERF: core11: smem writes=447
PERF: core11: smem bank stalls=0 (utilization=100%)
PERF: core11: dram requests=226 (reads=125, writes=101)
PERF: core11: dram stalls=967 (utilization=18%)
PERF: core11: dram average latency=31 cycles
PERF: core12: instrs=23498, cycles=16248, IPC=1.446209
PERF: core12: ibuffer stalls=2243
PERF: core12: scoreboard stalls=4147
PERF: core12: alu unit stalls=745
PERF: core12: lsu unit stalls=391
PERF: core12: csr unit stalls=0
PERF: core12: fpu unit stalls=2
PERF: core12: gpu unit stalls=0
PERF: core12: icache reads=6155
PERF: core12: icache read misses=73 (hit ratio=98%)
PERF: core12: icache pipeline stalls=2456
PERF: core12: icache reponse stalls=2243
PERF: core12: dcache reads=2862
PERF: core12: dcache writes=101
PERF: core12: dcache read misses=634 (hit ratio=77%)
PERF: core12: dcache write misses=97 (hit ratio=3%)
PERF: core12: dcache bank stalls=2198 (utilization=57%)
PERF: core12: dcache mshr stalls=2515
PERF: core12: dcache pipeline stalls=4956
PERF: core12: dcache reponse stalls=16
PERF: core12: smem reads=538
PERF: core12: smem writes=447
PERF: core12: smem bank stalls=0 (utilization=100%)
PERF: core12: dram requests=226 (reads=125, writes=101)
PERF: core12: dram stalls=1387 (utilization=14%)
PERF: core12: dram average latency=31 cycles
PERF: core13: instrs=23498, cycles=16176, IPC=1.452646
PERF: core13: ibuffer stalls=2224
PERF: core13: scoreboard stalls=4117
PERF: core13: alu unit stalls=732
PERF: core13: lsu unit stalls=431
PERF: core13: csr unit stalls=0
PERF: core13: fpu unit stalls=3
PERF: core13: gpu unit stalls=0
PERF: core13: icache reads=6155
PERF: core13: icache read misses=73 (hit ratio=98%)
PERF: core13: icache pipeline stalls=2446
PERF: core13: icache reponse stalls=2224
PERF: core13: dcache reads=2862
PERF: core13: dcache writes=101
PERF: core13: dcache read misses=634 (hit ratio=77%)
PERF: core13: dcache write misses=97 (hit ratio=3%)
PERF: core13: dcache bank stalls=2193 (utilization=57%)
PERF: core13: dcache mshr stalls=2425
PERF: core13: dcache pipeline stalls=4623
PERF: core13: dcache reponse stalls=15
PERF: core13: smem reads=538
PERF: core13: smem writes=447
PERF: core13: smem bank stalls=0 (utilization=100%)
PERF: core13: dram requests=226 (reads=125, writes=101)
PERF: core13: dram stalls=1260 (utilization=15%)
PERF: core13: dram average latency=31 cycles
PERF: core14: instrs=23498, cycles=16165, IPC=1.453634
PERF: core14: ibuffer stalls=2233
PERF: core14: scoreboard stalls=4091
PERF: core14: alu unit stalls=742
PERF: core14: lsu unit stalls=428
PERF: core14: csr unit stalls=0
PERF: core14: fpu unit stalls=2
PERF: core14: gpu unit stalls=0
PERF: core14: icache reads=6155
PERF: core14: icache read misses=73 (hit ratio=98%)
PERF: core14: icache pipeline stalls=2452
PERF: core14: icache reponse stalls=2233
PERF: core14: dcache reads=2862
PERF: core14: dcache writes=101
PERF: core14: dcache read misses=634 (hit ratio=77%)
PERF: core14: dcache write misses=97 (hit ratio=3%)
PERF: core14: dcache bank stalls=2193 (utilization=57%)
PERF: core14: dcache mshr stalls=2426
PERF: core14: dcache pipeline stalls=3984
PERF: core14: dcache reponse stalls=15
PERF: core14: smem reads=538
PERF: core14: smem writes=447
PERF: core14: smem bank stalls=0 (utilization=100%)
PERF: core14: dram requests=226 (reads=125, writes=101)
PERF: core14: dram stalls=952 (utilization=19%)
PERF: core14: dram average latency=30 cycles
PERF: core15: instrs=23500, cycles=16251, IPC=1.446065
PERF: core15: ibuffer stalls=2268
PERF: core15: scoreboard stalls=4241
PERF: core15: alu unit stalls=745
PERF: core15: lsu unit stalls=374
PERF: core15: csr unit stalls=0
PERF: core15: fpu unit stalls=1
PERF: core15: gpu unit stalls=0
PERF: core15: icache reads=6157
PERF: core15: icache read misses=73 (hit ratio=98%)
PERF: core15: icache pipeline stalls=2455
PERF: core15: icache reponse stalls=2268
PERF: core15: dcache reads=2862
PERF: core15: dcache writes=101
PERF: core15: dcache read misses=634 (hit ratio=77%)
PERF: core15: dcache write misses=97 (hit ratio=3%)
PERF: core15: dcache bank stalls=2195 (utilization=57%)
PERF: core15: dcache mshr stalls=2567
PERF: core15: dcache pipeline stalls=5084
PERF: core15: dcache reponse stalls=16
PERF: core15: smem reads=538
PERF: core15: smem writes=447
PERF: core15: smem bank stalls=0 (utilization=100%)
PERF: core15: dram requests=226 (reads=125, writes=101)
PERF: core15: dram stalls=1220 (utilization=15%)
PERF: core15: dram average latency=31 cycles
PERF: instrs=375970, cycles=16264, IPC=23.116699
PERF: ibuffer stalls=35805
PERF: scoreboard stalls=66248
PERF: alu unit stalls=11783
PERF: lsu unit stalls=6441
PERF: csr unit stalls=0
PERF: fpu unit stalls=29
PERF: gpu unit stalls=0
PERF: icache reads=98482
PERF: icache read misses=1168 (hit ratio=98%)
PERF: icache pipeline stalls=39194
PERF: icache reponse stalls=35805
PERF: dcache reads=45792
PERF: dcache writes=1616
PERF: dcache read misses=10164 (hit ratio=77%)
PERF: dcache write misses=1552 (hit ratio=3%)
PERF: dcache bank stalls=35059 (utilization=57%)
PERF: dcache mshr stalls=39593
PERF: dcache pipeline stalls=73067
PERF: dcache reponse stalls=252
PERF: smem reads=8608
PERF: smem writes=7152
PERF: smem bank stalls=0 (utilization=100%)
PERF: dram requests=3624 (reads=2008, writes=1616)
PERF: dram stalls=18122 (utilization=16%)
PERF: dram average latency=31 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'

View file

@ -1,3 +0,0 @@
# Generated by Platform Interface Manager user_clock_config.tcl
afu-image/clock-frequency-low:83.5
afu-image/clock-frequency-high:167

View file

@ -1,459 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
[VXDRV] DEVCAPS: version=0, num_cores=16, num_warps=4, num_threads=4
Create context
Allocate device buffers
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 4 ms
Download destination buffer
Verify result
PASSED!
PERF: core0: instrs=2019, cycles=5194, IPC=0.388718
PERF: core0: ibuffer stalls=89
PERF: core0: scoreboard stalls=493
PERF: core0: alu unit stalls=68
PERF: core0: lsu unit stalls=50
PERF: core0: csr unit stalls=0
PERF: core0: fpu unit stalls=0
PERF: core0: gpu unit stalls=0
PERF: core0: icache reads=804
PERF: core0: icache read misses=65 (hit ratio=91%)
PERF: core0: icache pipeline stalls=444
PERF: core0: icache reponse stalls=89
PERF: core0: dcache reads=114
PERF: core0: dcache writes=65
PERF: core0: dcache read misses=28 (hit ratio=75%)
PERF: core0: dcache write misses=60 (hit ratio=7%)
PERF: core0: dcache bank stalls=72 (utilization=71%)
PERF: core0: dcache mshr stalls=58
PERF: core0: dcache pipeline stalls=596
PERF: core0: dcache reponse stalls=1
PERF: core0: smem reads=70
PERF: core0: smem writes=63
PERF: core0: smem bank stalls=0 (utilization=100%)
PERF: core0: dram requests=109 (reads=44, writes=65)
PERF: core0: dram stalls=780 (utilization=12%)
PERF: core0: dram average latency=31 cycles
PERF: core1: instrs=2019, cycles=5191, IPC=0.388942
PERF: core1: ibuffer stalls=89
PERF: core1: scoreboard stalls=494
PERF: core1: alu unit stalls=68
PERF: core1: lsu unit stalls=48
PERF: core1: csr unit stalls=0
PERF: core1: fpu unit stalls=0
PERF: core1: gpu unit stalls=0
PERF: core1: icache reads=804
PERF: core1: icache read misses=65 (hit ratio=91%)
PERF: core1: icache pipeline stalls=455
PERF: core1: icache reponse stalls=89
PERF: core1: dcache reads=114
PERF: core1: dcache writes=65
PERF: core1: dcache read misses=28 (hit ratio=75%)
PERF: core1: dcache write misses=60 (hit ratio=7%)
PERF: core1: dcache bank stalls=72 (utilization=71%)
PERF: core1: dcache mshr stalls=58
PERF: core1: dcache pipeline stalls=596
PERF: core1: dcache reponse stalls=1
PERF: core1: smem reads=70
PERF: core1: smem writes=63
PERF: core1: smem bank stalls=0 (utilization=100%)
PERF: core1: dram requests=109 (reads=44, writes=65)
PERF: core1: dram stalls=774 (utilization=12%)
PERF: core1: dram average latency=31 cycles
PERF: core2: instrs=2019, cycles=5110, IPC=0.395108
PERF: core2: ibuffer stalls=89
PERF: core2: scoreboard stalls=485
PERF: core2: alu unit stalls=68
PERF: core2: lsu unit stalls=53
PERF: core2: csr unit stalls=0
PERF: core2: fpu unit stalls=0
PERF: core2: gpu unit stalls=0
PERF: core2: icache reads=804
PERF: core2: icache read misses=65 (hit ratio=91%)
PERF: core2: icache pipeline stalls=401
PERF: core2: icache reponse stalls=89
PERF: core2: dcache reads=114
PERF: core2: dcache writes=65
PERF: core2: dcache read misses=28 (hit ratio=75%)
PERF: core2: dcache write misses=60 (hit ratio=7%)
PERF: core2: dcache bank stalls=72 (utilization=71%)
PERF: core2: dcache mshr stalls=60
PERF: core2: dcache pipeline stalls=541
PERF: core2: dcache reponse stalls=1
PERF: core2: smem reads=70
PERF: core2: smem writes=63
PERF: core2: smem bank stalls=0 (utilization=100%)
PERF: core2: dram requests=109 (reads=44, writes=65)
PERF: core2: dram stalls=731 (utilization=12%)
PERF: core2: dram average latency=30 cycles
PERF: core3: instrs=2019, cycles=5101, IPC=0.395805
PERF: core3: ibuffer stalls=89
PERF: core3: scoreboard stalls=486
PERF: core3: alu unit stalls=68
PERF: core3: lsu unit stalls=52
PERF: core3: csr unit stalls=0
PERF: core3: fpu unit stalls=0
PERF: core3: gpu unit stalls=0
PERF: core3: icache reads=804
PERF: core3: icache read misses=65 (hit ratio=91%)
PERF: core3: icache pipeline stalls=401
PERF: core3: icache reponse stalls=89
PERF: core3: dcache reads=114
PERF: core3: dcache writes=65
PERF: core3: dcache read misses=28 (hit ratio=75%)
PERF: core3: dcache write misses=60 (hit ratio=7%)
PERF: core3: dcache bank stalls=72 (utilization=71%)
PERF: core3: dcache mshr stalls=58
PERF: core3: dcache pipeline stalls=532
PERF: core3: dcache reponse stalls=1
PERF: core3: smem reads=70
PERF: core3: smem writes=63
PERF: core3: smem bank stalls=0 (utilization=100%)
PERF: core3: dram requests=109 (reads=44, writes=65)
PERF: core3: dram stalls=731 (utilization=12%)
PERF: core3: dram average latency=29 cycles
PERF: core4: instrs=495, cycles=3605, IPC=0.137309
PERF: core4: ibuffer stalls=0
PERF: core4: scoreboard stalls=267
PERF: core4: alu unit stalls=0
PERF: core4: lsu unit stalls=0
PERF: core4: csr unit stalls=0
PERF: core4: fpu unit stalls=0
PERF: core4: gpu unit stalls=0
PERF: core4: icache reads=348
PERF: core4: icache read misses=31 (hit ratio=91%)
PERF: core4: icache pipeline stalls=63
PERF: core4: icache reponse stalls=0
PERF: core4: dcache reads=18
PERF: core4: dcache writes=48
PERF: core4: dcache read misses=8 (hit ratio=55%)
PERF: core4: dcache write misses=44 (hit ratio=8%)
PERF: core4: dcache bank stalls=0 (utilization=100%)
PERF: core4: dcache mshr stalls=0
PERF: core4: dcache pipeline stalls=525
PERF: core4: dcache reponse stalls=0
PERF: core4: smem reads=23
PERF: core4: smem writes=25
PERF: core4: smem bank stalls=0 (utilization=100%)
PERF: core4: dram requests=79 (reads=31, writes=48)
PERF: core4: dram stalls=765 (utilization=9%)
PERF: core4: dram average latency=31 cycles
PERF: core5: instrs=495, cycles=3603, IPC=0.137386
PERF: core5: ibuffer stalls=0
PERF: core5: scoreboard stalls=269
PERF: core5: alu unit stalls=0
PERF: core5: lsu unit stalls=0
PERF: core5: csr unit stalls=0
PERF: core5: fpu unit stalls=0
PERF: core5: gpu unit stalls=0
PERF: core5: icache reads=348
PERF: core5: icache read misses=31 (hit ratio=91%)
PERF: core5: icache pipeline stalls=63
PERF: core5: icache reponse stalls=0
PERF: core5: dcache reads=18
PERF: core5: dcache writes=48
PERF: core5: dcache read misses=8 (hit ratio=55%)
PERF: core5: dcache write misses=44 (hit ratio=8%)
PERF: core5: dcache bank stalls=0 (utilization=100%)
PERF: core5: dcache mshr stalls=0
PERF: core5: dcache pipeline stalls=514
PERF: core5: dcache reponse stalls=0
PERF: core5: smem reads=23
PERF: core5: smem writes=25
PERF: core5: smem bank stalls=0 (utilization=100%)
PERF: core5: dram requests=79 (reads=31, writes=48)
PERF: core5: dram stalls=758 (utilization=9%)
PERF: core5: dram average latency=31 cycles
PERF: core6: instrs=495, cycles=3587, IPC=0.137998
PERF: core6: ibuffer stalls=0
PERF: core6: scoreboard stalls=260
PERF: core6: alu unit stalls=0
PERF: core6: lsu unit stalls=0
PERF: core6: csr unit stalls=0
PERF: core6: fpu unit stalls=0
PERF: core6: gpu unit stalls=0
PERF: core6: icache reads=348
PERF: core6: icache read misses=31 (hit ratio=91%)
PERF: core6: icache pipeline stalls=63
PERF: core6: icache reponse stalls=0
PERF: core6: dcache reads=18
PERF: core6: dcache writes=48
PERF: core6: dcache read misses=8 (hit ratio=55%)
PERF: core6: dcache write misses=44 (hit ratio=8%)
PERF: core6: dcache bank stalls=0 (utilization=100%)
PERF: core6: dcache mshr stalls=0
PERF: core6: dcache pipeline stalls=472
PERF: core6: dcache reponse stalls=0
PERF: core6: smem reads=23
PERF: core6: smem writes=25
PERF: core6: smem bank stalls=0 (utilization=100%)
PERF: core6: dram requests=79 (reads=31, writes=48)
PERF: core6: dram stalls=727 (utilization=9%)
PERF: core6: dram average latency=31 cycles
PERF: core7: instrs=495, cycles=3573, IPC=0.138539
PERF: core7: ibuffer stalls=0
PERF: core7: scoreboard stalls=260
PERF: core7: alu unit stalls=0
PERF: core7: lsu unit stalls=0
PERF: core7: csr unit stalls=0
PERF: core7: fpu unit stalls=0
PERF: core7: gpu unit stalls=0
PERF: core7: icache reads=348
PERF: core7: icache read misses=31 (hit ratio=91%)
PERF: core7: icache pipeline stalls=63
PERF: core7: icache reponse stalls=0
PERF: core7: dcache reads=18
PERF: core7: dcache writes=48
PERF: core7: dcache read misses=8 (hit ratio=55%)
PERF: core7: dcache write misses=44 (hit ratio=8%)
PERF: core7: dcache bank stalls=0 (utilization=100%)
PERF: core7: dcache mshr stalls=0
PERF: core7: dcache pipeline stalls=474
PERF: core7: dcache reponse stalls=0
PERF: core7: smem reads=23
PERF: core7: smem writes=25
PERF: core7: smem bank stalls=0 (utilization=100%)
PERF: core7: dram requests=79 (reads=31, writes=48)
PERF: core7: dram stalls=728 (utilization=9%)
PERF: core7: dram average latency=31 cycles
PERF: core8: instrs=495, cycles=3604, IPC=0.137347
PERF: core8: ibuffer stalls=0
PERF: core8: scoreboard stalls=268
PERF: core8: alu unit stalls=0
PERF: core8: lsu unit stalls=0
PERF: core8: csr unit stalls=0
PERF: core8: fpu unit stalls=0
PERF: core8: gpu unit stalls=0
PERF: core8: icache reads=348
PERF: core8: icache read misses=31 (hit ratio=91%)
PERF: core8: icache pipeline stalls=63
PERF: core8: icache reponse stalls=0
PERF: core8: dcache reads=18
PERF: core8: dcache writes=48
PERF: core8: dcache read misses=8 (hit ratio=55%)
PERF: core8: dcache write misses=44 (hit ratio=8%)
PERF: core8: dcache bank stalls=0 (utilization=100%)
PERF: core8: dcache mshr stalls=0
PERF: core8: dcache pipeline stalls=525
PERF: core8: dcache reponse stalls=0
PERF: core8: smem reads=23
PERF: core8: smem writes=25
PERF: core8: smem bank stalls=0 (utilization=100%)
PERF: core8: dram requests=79 (reads=31, writes=48)
PERF: core8: dram stalls=764 (utilization=9%)
PERF: core8: dram average latency=31 cycles
PERF: core9: instrs=495, cycles=3600, IPC=0.137500
PERF: core9: ibuffer stalls=0
PERF: core9: scoreboard stalls=268
PERF: core9: alu unit stalls=0
PERF: core9: lsu unit stalls=0
PERF: core9: csr unit stalls=0
PERF: core9: fpu unit stalls=0
PERF: core9: gpu unit stalls=0
PERF: core9: icache reads=348
PERF: core9: icache read misses=31 (hit ratio=91%)
PERF: core9: icache pipeline stalls=63
PERF: core9: icache reponse stalls=0
PERF: core9: dcache reads=18
PERF: core9: dcache writes=48
PERF: core9: dcache read misses=8 (hit ratio=55%)
PERF: core9: dcache write misses=44 (hit ratio=8%)
PERF: core9: dcache bank stalls=0 (utilization=100%)
PERF: core9: dcache mshr stalls=0
PERF: core9: dcache pipeline stalls=514
PERF: core9: dcache reponse stalls=0
PERF: core9: smem reads=23
PERF: core9: smem writes=25
PERF: core9: smem bank stalls=0 (utilization=100%)
PERF: core9: dram requests=79 (reads=31, writes=48)
PERF: core9: dram stalls=756 (utilization=9%)
PERF: core9: dram average latency=31 cycles
PERF: core10: instrs=495, cycles=3585, IPC=0.138075
PERF: core10: ibuffer stalls=0
PERF: core10: scoreboard stalls=261
PERF: core10: alu unit stalls=0
PERF: core10: lsu unit stalls=0
PERF: core10: csr unit stalls=0
PERF: core10: fpu unit stalls=0
PERF: core10: gpu unit stalls=0
PERF: core10: icache reads=348
PERF: core10: icache read misses=31 (hit ratio=91%)
PERF: core10: icache pipeline stalls=63
PERF: core10: icache reponse stalls=0
PERF: core10: dcache reads=18
PERF: core10: dcache writes=48
PERF: core10: dcache read misses=8 (hit ratio=55%)
PERF: core10: dcache write misses=44 (hit ratio=8%)
PERF: core10: dcache bank stalls=0 (utilization=100%)
PERF: core10: dcache mshr stalls=0
PERF: core10: dcache pipeline stalls=472
PERF: core10: dcache reponse stalls=0
PERF: core10: smem reads=23
PERF: core10: smem writes=25
PERF: core10: smem bank stalls=0 (utilization=100%)
PERF: core10: dram requests=79 (reads=31, writes=48)
PERF: core10: dram stalls=728 (utilization=9%)
PERF: core10: dram average latency=31 cycles
PERF: core11: instrs=495, cycles=3572, IPC=0.138578
PERF: core11: ibuffer stalls=0
PERF: core11: scoreboard stalls=259
PERF: core11: alu unit stalls=0
PERF: core11: lsu unit stalls=0
PERF: core11: csr unit stalls=0
PERF: core11: fpu unit stalls=0
PERF: core11: gpu unit stalls=0
PERF: core11: icache reads=348
PERF: core11: icache read misses=31 (hit ratio=91%)
PERF: core11: icache pipeline stalls=63
PERF: core11: icache reponse stalls=0
PERF: core11: dcache reads=18
PERF: core11: dcache writes=48
PERF: core11: dcache read misses=8 (hit ratio=55%)
PERF: core11: dcache write misses=44 (hit ratio=8%)
PERF: core11: dcache bank stalls=0 (utilization=100%)
PERF: core11: dcache mshr stalls=0
PERF: core11: dcache pipeline stalls=474
PERF: core11: dcache reponse stalls=0
PERF: core11: smem reads=23
PERF: core11: smem writes=25
PERF: core11: smem bank stalls=0 (utilization=100%)
PERF: core11: dram requests=79 (reads=31, writes=48)
PERF: core11: dram stalls=728 (utilization=9%)
PERF: core11: dram average latency=31 cycles
PERF: core12: instrs=495, cycles=3599, IPC=0.137538
PERF: core12: ibuffer stalls=0
PERF: core12: scoreboard stalls=261
PERF: core12: alu unit stalls=0
PERF: core12: lsu unit stalls=0
PERF: core12: csr unit stalls=0
PERF: core12: fpu unit stalls=0
PERF: core12: gpu unit stalls=0
PERF: core12: icache reads=348
PERF: core12: icache read misses=31 (hit ratio=91%)
PERF: core12: icache pipeline stalls=63
PERF: core12: icache reponse stalls=0
PERF: core12: dcache reads=18
PERF: core12: dcache writes=48
PERF: core12: dcache read misses=8 (hit ratio=55%)
PERF: core12: dcache write misses=44 (hit ratio=8%)
PERF: core12: dcache bank stalls=0 (utilization=100%)
PERF: core12: dcache mshr stalls=0
PERF: core12: dcache pipeline stalls=533
PERF: core12: dcache reponse stalls=0
PERF: core12: smem reads=23
PERF: core12: smem writes=25
PERF: core12: smem bank stalls=0 (utilization=100%)
PERF: core12: dram requests=79 (reads=31, writes=48)
PERF: core12: dram stalls=762 (utilization=9%)
PERF: core12: dram average latency=31 cycles
PERF: core13: instrs=495, cycles=3589, IPC=0.137921
PERF: core13: ibuffer stalls=0
PERF: core13: scoreboard stalls=257
PERF: core13: alu unit stalls=0
PERF: core13: lsu unit stalls=0
PERF: core13: csr unit stalls=0
PERF: core13: fpu unit stalls=0
PERF: core13: gpu unit stalls=0
PERF: core13: icache reads=348
PERF: core13: icache read misses=31 (hit ratio=91%)
PERF: core13: icache pipeline stalls=63
PERF: core13: icache reponse stalls=0
PERF: core13: dcache reads=18
PERF: core13: dcache writes=48
PERF: core13: dcache read misses=8 (hit ratio=55%)
PERF: core13: dcache write misses=44 (hit ratio=8%)
PERF: core13: dcache bank stalls=0 (utilization=100%)
PERF: core13: dcache mshr stalls=0
PERF: core13: dcache pipeline stalls=478
PERF: core13: dcache reponse stalls=0
PERF: core13: smem reads=23
PERF: core13: smem writes=25
PERF: core13: smem bank stalls=0 (utilization=100%)
PERF: core13: dram requests=79 (reads=31, writes=48)
PERF: core13: dram stalls=736 (utilization=9%)
PERF: core13: dram average latency=31 cycles
PERF: core14: instrs=495, cycles=3584, IPC=0.138114
PERF: core14: ibuffer stalls=0
PERF: core14: scoreboard stalls=255
PERF: core14: alu unit stalls=0
PERF: core14: lsu unit stalls=0
PERF: core14: csr unit stalls=0
PERF: core14: fpu unit stalls=0
PERF: core14: gpu unit stalls=0
PERF: core14: icache reads=348
PERF: core14: icache read misses=31 (hit ratio=91%)
PERF: core14: icache pipeline stalls=63
PERF: core14: icache reponse stalls=0
PERF: core14: dcache reads=18
PERF: core14: dcache writes=48
PERF: core14: dcache read misses=8 (hit ratio=55%)
PERF: core14: dcache write misses=44 (hit ratio=8%)
PERF: core14: dcache bank stalls=0 (utilization=100%)
PERF: core14: dcache mshr stalls=0
PERF: core14: dcache pipeline stalls=480
PERF: core14: dcache reponse stalls=0
PERF: core14: smem reads=23
PERF: core14: smem writes=25
PERF: core14: smem bank stalls=0 (utilization=100%)
PERF: core14: dram requests=79 (reads=31, writes=48)
PERF: core14: dram stalls=734 (utilization=9%)
PERF: core14: dram average latency=31 cycles
PERF: core15: instrs=495, cycles=3570, IPC=0.138655
PERF: core15: ibuffer stalls=0
PERF: core15: scoreboard stalls=241
PERF: core15: alu unit stalls=0
PERF: core15: lsu unit stalls=0
PERF: core15: csr unit stalls=0
PERF: core15: fpu unit stalls=0
PERF: core15: gpu unit stalls=0
PERF: core15: icache reads=348
PERF: core15: icache read misses=31 (hit ratio=91%)
PERF: core15: icache pipeline stalls=62
PERF: core15: icache reponse stalls=0
PERF: core15: dcache reads=18
PERF: core15: dcache writes=48
PERF: core15: dcache read misses=8 (hit ratio=55%)
PERF: core15: dcache write misses=44 (hit ratio=8%)
PERF: core15: dcache bank stalls=0 (utilization=100%)
PERF: core15: dcache mshr stalls=0
PERF: core15: dcache pipeline stalls=419
PERF: core15: dcache reponse stalls=0
PERF: core15: smem reads=23
PERF: core15: smem writes=25
PERF: core15: smem bank stalls=0 (utilization=100%)
PERF: core15: dram requests=79 (reads=31, writes=48)
PERF: core15: dram stalls=667 (utilization=10%)
PERF: core15: dram average latency=31 cycles
PERF: instrs=14016, cycles=5194, IPC=2.698498
PERF: ibuffer stalls=356
PERF: scoreboard stalls=5084
PERF: alu unit stalls=272
PERF: lsu unit stalls=203
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=7392
PERF: icache read misses=632 (hit ratio=91%)
PERF: icache pipeline stalls=2456
PERF: icache reponse stalls=356
PERF: dcache reads=672
PERF: dcache writes=836
PERF: dcache read misses=208 (hit ratio=69%)
PERF: dcache write misses=768 (hit ratio=8%)
PERF: dcache bank stalls=288 (utilization=83%)
PERF: dcache mshr stalls=234
PERF: dcache pipeline stalls=8145
PERF: dcache reponse stalls=4
PERF: smem reads=556
PERF: smem writes=552
PERF: smem bank stalls=0 (utilization=100%)
PERF: dram requests=1384 (reads=548, writes=836)
PERF: dram stalls=11869 (utilization=10%)
PERF: dram average latency=31 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'

View file

@ -1,17 +0,0 @@
Fitter Status : Successful - Sat Mar 6 19:19:28 2021
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10
Device : 10AX115N2F40E2LG
Timing Models : Final
Logic utilization (in ALMs) : 55,747 / 427,200 ( 13 % )
Total registers : 79974
Total pins : 310 / 826 ( 38 % )
Total virtual pins : 0
Total block memory bits : 2,272,720 / 55,562,240 ( 4 % )
Total RAM Blocks : 320 / 2,713 ( 12 % )
Total DSP Blocks : 28 / 1,518 ( 2 % )
Total HSSI RX channels : 12 / 48 ( 25 % )
Total HSSI TX channels : 12 / 48 ( 25 % )
Total PLLs : 25 / 112 ( 22 % )

File diff suppressed because it is too large Load diff

View file

@ -1,4 +0,0 @@
Synthesis Status : Successful - Sat Mar 6 18:56:26 2021
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10

File diff suppressed because it is too large Load diff

View file

@ -1,29 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
OK
The result of matrix m is:
0.00 0.00 0.00 0.00
0.50 0.00 0.00 0.00
0.67 0.26 0.00 0.00
-0.00 0.15 -0.28 0.00
The result of matrix a is:
-0.60 -0.50 0.70 0.30
0.00 -0.65 -0.05 0.55
0.00 0.00 -0.75 -1.14
0.00 0.00 0.00 0.50
The result of array b is:
-0.85 -0.25 0.87 -0.25
The final solution is:
0.70 0.00 -0.40 -0.50
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
loading db: cane4_0.db
loading db: cane4_1.db
loading db: cane4_2.db
Number of records: 1500
Finding the 5 closest neighbors.
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'

View file

@ -1,42 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
Create context
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 4 ms
Download destination buffer
Verify result
PASSED!
PERF: instrs=360460, cycles=175991, IPC=2.048173
PERF: ibuffer stalls=20439
PERF: scoreboard stalls=50656
PERF: alu unit stalls=7129
PERF: lsu unit stalls=16771
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=90397
PERF: icache read misses=73 (hit ratio=99%)
PERF: icache pipeline stalls=12325
PERF: icache reponse stalls=20439
PERF: dcache reads=45342
PERF: dcache writes=1061
PERF: dcache read misses=1252 (hit ratio=97%)
PERF: dcache write misses=1057 (hit ratio=0%)
PERF: dcache bank stalls=50688 (utilization=47%)
PERF: dcache mshr stalls=2005
PERF: dcache pipeline stalls=2034
PERF: dcache reponse stalls=192
PERF: smem reads=7978
PERF: smem writes=6207
PERF: smem bank stalls=0 (utilization=100%)
PERF: dram requests=1423 (reads=362, writes=1061)
PERF: dram stalls=0 (utilization=100%)
PERF: dram average latency=26 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'

View file

@ -1,3 +0,0 @@
# Generated by Platform Interface Manager user_clock_config.tcl
afu-image/clock-frequency-low:88.5
afu-image/clock-frequency-high:177

View file

@ -1,43 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
[VXDRV] DEVCAPS: version=0, num_cores=1, num_warps=4, num_threads=4
Create context
Allocate device buffers
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 4 ms
Download destination buffer
Verify result
PASSED!
PERF: instrs=4908, cycles=6173, IPC=0.795075
PERF: ibuffer stalls=247
PERF: scoreboard stalls=629
PERF: alu unit stalls=130
PERF: lsu unit stalls=204
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=1528
PERF: icache read misses=65 (hit ratio=95%)
PERF: icache pipeline stalls=546
PERF: icache reponse stalls=247
PERF: dcache reads=371
PERF: dcache writes=113
PERF: dcache read misses=105 (hit ratio=71%)
PERF: dcache write misses=108 (hit ratio=4%)
PERF: dcache bank stalls=184 (utilization=72%)
PERF: dcache mshr stalls=125
PERF: dcache pipeline stalls=259
PERF: dcache reponse stalls=15
PERF: smem reads=154
PERF: smem writes=63
PERF: smem bank stalls=0 (utilization=100%)
PERF: dram requests=175 (reads=62, writes=113)
PERF: dram stalls=0 (utilization=100%)
PERF: dram average latency=26 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'

View file

@ -1,17 +0,0 @@
Fitter Status : Successful - Sat Mar 6 01:44:47 2021
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10
Device : 10AX115N2F40E2LG
Timing Models : Final
Logic utilization (in ALMs) : 74,001 / 427,200 ( 17 % )
Total registers : 109164
Total pins : 310 / 826 ( 38 % )
Total virtual pins : 0
Total block memory bits : 2,967,352 / 55,562,240 ( 5 % )
Total RAM Blocks : 451 / 2,713 ( 17 % )
Total DSP Blocks : 56 / 1,518 ( 4 % )
Total HSSI RX channels : 12 / 48 ( 25 % )
Total HSSI TX channels : 12 / 48 ( 25 % )
Total PLLs : 25 / 112 ( 22 % )

File diff suppressed because it is too large Load diff

View file

@ -1,4 +0,0 @@
Synthesis Status : Successful - Sat Mar 6 01:12:13 2021
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10

File diff suppressed because it is too large Load diff

View file

@ -1,29 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
OK
The result of matrix m is:
0.00 0.00 0.00 0.00
0.50 0.00 0.00 0.00
0.67 0.26 0.00 0.00
-0.00 0.15 -0.28 0.00
The result of matrix a is:
-0.60 -0.50 0.70 0.30
0.00 -0.65 -0.05 0.55
0.00 0.00 -0.75 -1.14
0.00 0.00 0.00 0.50
The result of array b is:
-0.85 -0.25 0.87 -0.25
The final solution is:
0.70 0.00 -0.40 -0.50
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
loading db: cane4_0.db
loading db: cane4_1.db
loading db: cane4_2.db
Number of records: 1500
Finding the 5 closest neighbors.
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'

View file

@ -1,94 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
Create context
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 4 ms
Download destination buffer
Verify result
PASSED!
PERF: core0: instrs=180750, cycles=84306, IPC=2.143975
PERF: core0: ibuffer stalls=0
PERF: core0: scoreboard stalls=0
PERF: core0: alu unit stalls=0
PERF: core0: lsu unit stalls=0
PERF: core0: csr unit stalls=0
PERF: core0: fpu unit stalls=0
PERF: core0: gpu unit stalls=0
PERF: core0: icache reads=0
PERF: core0: icache read misses=0 (hit ratio=-2147483648%)
PERF: core0: icache pipeline stalls=0
PERF: core0: icache reponse stalls=0
PERF: core0: dcache reads=0
PERF: core0: dcache writes=0
PERF: core0: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core0: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core0: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core0: dcache mshr stalls=0
PERF: core0: dcache pipeline stalls=0
PERF: core0: dcache reponse stalls=0
PERF: core0: smem reads=0
PERF: core0: smem writes=0
PERF: core0: smem bank stalls=0 (utilization=-2147483648%)
PERF: core0: dram requests=0 (reads=0, writes=0)
PERF: core0: dram stalls=0 (utilization=-2147483648%)
PERF: core0: dram average latency=-2147483648 cycles
PERF: core1: instrs=180752, cycles=84131, IPC=2.148459
PERF: core1: ibuffer stalls=0
PERF: core1: scoreboard stalls=0
PERF: core1: alu unit stalls=0
PERF: core1: lsu unit stalls=0
PERF: core1: csr unit stalls=0
PERF: core1: fpu unit stalls=0
PERF: core1: gpu unit stalls=0
PERF: core1: icache reads=0
PERF: core1: icache read misses=0 (hit ratio=-2147483648%)
PERF: core1: icache pipeline stalls=0
PERF: core1: icache reponse stalls=0
PERF: core1: dcache reads=0
PERF: core1: dcache writes=0
PERF: core1: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core1: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core1: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core1: dcache mshr stalls=0
PERF: core1: dcache pipeline stalls=0
PERF: core1: dcache reponse stalls=0
PERF: core1: smem reads=0
PERF: core1: smem writes=0
PERF: core1: smem bank stalls=0 (utilization=-2147483648%)
PERF: core1: dram requests=0 (reads=0, writes=0)
PERF: core1: dram stalls=0 (utilization=-2147483648%)
PERF: core1: dram average latency=-2147483648 cycles
PERF: instrs=361502, cycles=84306, IPC=4.287975
PERF: ibuffer stalls=0
PERF: scoreboard stalls=0
PERF: alu unit stalls=0
PERF: lsu unit stalls=0
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=0
PERF: icache read misses=0 (hit ratio=-2147483648%)
PERF: icache pipeline stalls=0
PERF: icache reponse stalls=0
PERF: dcache reads=0
PERF: dcache writes=0
PERF: dcache read misses=0 (hit ratio=-2147483648%)
PERF: dcache write misses=0 (hit ratio=-2147483648%)
PERF: dcache bank stalls=0 (utilization=-2147483648%)
PERF: dcache mshr stalls=0
PERF: dcache pipeline stalls=0
PERF: dcache reponse stalls=0
PERF: smem reads=0
PERF: smem writes=0
PERF: smem bank stalls=0 (utilization=-2147483648%)
PERF: dram requests=0 (reads=0, writes=0)
PERF: dram stalls=0 (utilization=-2147483648%)
PERF: dram average latency=-2147483648 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'

View file

@ -1,3 +0,0 @@
# Generated by Platform Interface Manager user_clock_config.tcl
afu-image/clock-frequency-low:92.0
afu-image/clock-frequency-high:184

View file

@ -1,95 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
[VXDRV] DEVCAPS: version=0, num_cores=2, num_warps=4, num_threads=4
Create context
Allocate device buffers
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 4 ms
Download destination buffer
Verify result
PASSED!
PERF: core0: instrs=2981, cycles=5416, IPC=0.550406
PERF: core0: ibuffer stalls=0
PERF: core0: scoreboard stalls=0
PERF: core0: alu unit stalls=0
PERF: core0: lsu unit stalls=0
PERF: core0: csr unit stalls=0
PERF: core0: fpu unit stalls=0
PERF: core0: gpu unit stalls=0
PERF: core0: icache reads=0
PERF: core0: icache read misses=0 (hit ratio=-2147483648%)
PERF: core0: icache pipeline stalls=0
PERF: core0: icache reponse stalls=0
PERF: core0: dcache reads=0
PERF: core0: dcache writes=0
PERF: core0: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core0: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core0: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core0: dcache mshr stalls=0
PERF: core0: dcache pipeline stalls=0
PERF: core0: dcache reponse stalls=0
PERF: core0: smem reads=0
PERF: core0: smem writes=0
PERF: core0: smem bank stalls=0 (utilization=-2147483648%)
PERF: core0: dram requests=0 (reads=0, writes=0)
PERF: core0: dram stalls=0 (utilization=-2147483648%)
PERF: core0: dram average latency=-2147483648 cycles
PERF: core1: instrs=2983, cycles=5353, IPC=0.557258
PERF: core1: ibuffer stalls=0
PERF: core1: scoreboard stalls=0
PERF: core1: alu unit stalls=0
PERF: core1: lsu unit stalls=0
PERF: core1: csr unit stalls=0
PERF: core1: fpu unit stalls=0
PERF: core1: gpu unit stalls=0
PERF: core1: icache reads=0
PERF: core1: icache read misses=0 (hit ratio=-2147483648%)
PERF: core1: icache pipeline stalls=0
PERF: core1: icache reponse stalls=0
PERF: core1: dcache reads=0
PERF: core1: dcache writes=0
PERF: core1: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core1: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core1: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core1: dcache mshr stalls=0
PERF: core1: dcache pipeline stalls=0
PERF: core1: dcache reponse stalls=0
PERF: core1: smem reads=0
PERF: core1: smem writes=0
PERF: core1: smem bank stalls=0 (utilization=-2147483648%)
PERF: core1: dram requests=0 (reads=0, writes=0)
PERF: core1: dram stalls=0 (utilization=-2147483648%)
PERF: core1: dram average latency=-2147483648 cycles
PERF: instrs=5964, cycles=5416, IPC=1.101182
PERF: ibuffer stalls=0
PERF: scoreboard stalls=0
PERF: alu unit stalls=0
PERF: lsu unit stalls=0
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=0
PERF: icache read misses=0 (hit ratio=-2147483648%)
PERF: icache pipeline stalls=0
PERF: icache reponse stalls=0
PERF: dcache reads=0
PERF: dcache writes=0
PERF: dcache read misses=0 (hit ratio=-2147483648%)
PERF: dcache write misses=0 (hit ratio=-2147483648%)
PERF: dcache bank stalls=0 (utilization=-2147483648%)
PERF: dcache mshr stalls=0
PERF: dcache pipeline stalls=0
PERF: dcache reponse stalls=0
PERF: smem reads=0
PERF: smem writes=0
PERF: smem bank stalls=0 (utilization=-2147483648%)
PERF: dram requests=0 (reads=0, writes=0)
PERF: dram stalls=0 (utilization=-2147483648%)
PERF: dram average latency=-2147483648 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'

View file

@ -1,17 +0,0 @@
Fitter Status : Successful - Sat Mar 6 02:49:17 2021
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10
Device : 10AX115N2F40E2LG
Timing Models : Final
Logic utilization (in ALMs) : 117,451 / 427,200 ( 27 % )
Total registers : 173797
Total pins : 310 / 826 ( 38 % )
Total virtual pins : 0
Total block memory bits : 4,356,616 / 55,562,240 ( 8 % )
Total RAM Blocks : 713 / 2,713 ( 26 % )
Total DSP Blocks : 112 / 1,518 ( 7 % )
Total HSSI RX channels : 12 / 48 ( 25 % )
Total HSSI TX channels : 12 / 48 ( 25 % )
Total PLLs : 25 / 112 ( 22 % )

File diff suppressed because it is too large Load diff

View file

@ -1,4 +0,0 @@
Synthesis Status : Successful - Sat Mar 6 01:57:55 2021
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10

File diff suppressed because it is too large Load diff

View file

@ -1,29 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
OK
The result of matrix m is:
0.00 0.00 0.00 0.00
0.50 0.00 0.00 0.00
0.67 0.26 0.00 0.00
-0.00 0.15 -0.28 0.00
The result of matrix a is:
-0.60 -0.50 0.70 0.30
0.00 -0.65 -0.05 0.55
0.00 0.00 -0.75 -1.14
0.00 0.00 0.00 0.50
The result of array b is:
-0.85 -0.25 0.87 -0.25
The final solution is:
0.70 0.00 -0.40 -0.50
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
loading db: cane4_0.db
loading db: cane4_1.db
loading db: cane4_2.db
Number of records: 1500
Finding the 5 closest neighbors.
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'

View file

@ -1,146 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
Create context
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 3 ms
Download destination buffer
Verify result
PASSED!
PERF: core0: instrs=90890, cycles=51133, IPC=1.777521
PERF: core0: ibuffer stalls=10132
PERF: core0: scoreboard stalls=15251
PERF: core0: alu unit stalls=2423
PERF: core0: lsu unit stalls=3859
PERF: core0: csr unit stalls=0
PERF: core0: fpu unit stalls=0
PERF: core0: gpu unit stalls=0
PERF: core0: icache reads=23003
PERF: core0: icache read misses=73 (hit ratio=99%)
PERF: core0: icache pipeline stalls=7639
PERF: core0: icache reponse stalls=10132
PERF: core0: dcache reads=17502
PERF: core0: dcache writes=293
PERF: core0: dcache read misses=1041 (hit ratio=94%)
PERF: core0: dcache write misses=289 (hit ratio=1%)
PERF: core0: dcache bank stalls=8464 (utilization=67%)
PERF: core0: dcache mshr stalls=4228
PERF: core0: dcache pipeline stalls=9676
PERF: core0: dcache reponse stalls=76
PERF: core0: smem reads=2026
PERF: core0: smem writes=1599
PERF: core0: smem bank stalls=0 (utilization=100%)
PERF: core0: dram requests=479 (reads=186, writes=293)
PERF: core0: dram stalls=789 (utilization=37%)
PERF: core0: dram average latency=32 cycles
PERF: core1: instrs=90890, cycles=51143, IPC=1.777174
PERF: core1: ibuffer stalls=10158
PERF: core1: scoreboard stalls=15244
PERF: core1: alu unit stalls=2440
PERF: core1: lsu unit stalls=3894
PERF: core1: csr unit stalls=0
PERF: core1: fpu unit stalls=0
PERF: core1: gpu unit stalls=0
PERF: core1: icache reads=23003
PERF: core1: icache read misses=73 (hit ratio=99%)
PERF: core1: icache pipeline stalls=7685
PERF: core1: icache reponse stalls=10158
PERF: core1: dcache reads=17502
PERF: core1: dcache writes=293
PERF: core1: dcache read misses=1101 (hit ratio=93%)
PERF: core1: dcache write misses=289 (hit ratio=1%)
PERF: core1: dcache bank stalls=8464 (utilization=67%)
PERF: core1: dcache mshr stalls=4330
PERF: core1: dcache pipeline stalls=9347
PERF: core1: dcache reponse stalls=67
PERF: core1: smem reads=2026
PERF: core1: smem writes=1599
PERF: core1: smem bank stalls=0 (utilization=100%)
PERF: core1: dram requests=509 (reads=216, writes=293)
PERF: core1: dram stalls=715 (utilization=41%)
PERF: core1: dram average latency=32 cycles
PERF: core2: instrs=90890, cycles=51135, IPC=1.777452
PERF: core2: ibuffer stalls=10120
PERF: core2: scoreboard stalls=15237
PERF: core2: alu unit stalls=2406
PERF: core2: lsu unit stalls=3881
PERF: core2: csr unit stalls=0
PERF: core2: fpu unit stalls=0
PERF: core2: gpu unit stalls=0
PERF: core2: icache reads=23003
PERF: core2: icache read misses=73 (hit ratio=99%)
PERF: core2: icache pipeline stalls=7651
PERF: core2: icache reponse stalls=10120
PERF: core2: dcache reads=17502
PERF: core2: dcache writes=293
PERF: core2: dcache read misses=1040 (hit ratio=94%)
PERF: core2: dcache write misses=289 (hit ratio=1%)
PERF: core2: dcache bank stalls=8464 (utilization=67%)
PERF: core2: dcache mshr stalls=4234
PERF: core2: dcache pipeline stalls=9580
PERF: core2: dcache reponse stalls=75
PERF: core2: smem reads=2026
PERF: core2: smem writes=1599
PERF: core2: smem bank stalls=0 (utilization=100%)
PERF: core2: dram requests=478 (reads=185, writes=293)
PERF: core2: dram stalls=776 (utilization=38%)
PERF: core2: dram average latency=32 cycles
PERF: core3: instrs=90892, cycles=51134, IPC=1.777526
PERF: core3: ibuffer stalls=10116
PERF: core3: scoreboard stalls=15282
PERF: core3: alu unit stalls=2380
PERF: core3: lsu unit stalls=3862
PERF: core3: csr unit stalls=0
PERF: core3: fpu unit stalls=0
PERF: core3: gpu unit stalls=0
PERF: core3: icache reads=23005
PERF: core3: icache read misses=73 (hit ratio=99%)
PERF: core3: icache pipeline stalls=7688
PERF: core3: icache reponse stalls=10116
PERF: core3: dcache reads=17502
PERF: core3: dcache writes=293
PERF: core3: dcache read misses=1040 (hit ratio=94%)
PERF: core3: dcache write misses=289 (hit ratio=1%)
PERF: core3: dcache bank stalls=8464 (utilization=67%)
PERF: core3: dcache mshr stalls=4421
PERF: core3: dcache pipeline stalls=9647
PERF: core3: dcache reponse stalls=76
PERF: core3: smem reads=2026
PERF: core3: smem writes=1599
PERF: core3: smem bank stalls=0 (utilization=100%)
PERF: core3: dram requests=478 (reads=185, writes=293)
PERF: core3: dram stalls=684 (utilization=41%)
PERF: core3: dram average latency=32 cycles
PERF: instrs=363562, cycles=51143, IPC=7.108734
PERF: ibuffer stalls=40526
PERF: scoreboard stalls=61014
PERF: alu unit stalls=9649
PERF: lsu unit stalls=15496
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=92014
PERF: icache read misses=292 (hit ratio=99%)
PERF: icache pipeline stalls=30663
PERF: icache reponse stalls=40526
PERF: dcache reads=70008
PERF: dcache writes=1172
PERF: dcache read misses=4222 (hit ratio=93%)
PERF: dcache write misses=1156 (hit ratio=1%)
PERF: dcache bank stalls=33856 (utilization=67%)
PERF: dcache mshr stalls=17213
PERF: dcache pipeline stalls=38250
PERF: dcache reponse stalls=294
PERF: smem reads=8104
PERF: smem writes=6396
PERF: smem bank stalls=0 (utilization=100%)
PERF: dram requests=1944 (reads=772, writes=1172)
PERF: dram stalls=2964 (utilization=39%)
PERF: dram average latency=32 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'

View file

@ -1,3 +0,0 @@
# Generated by Platform Interface Manager user_clock_config.tcl
afu-image/clock-frequency-low:93.0
afu-image/clock-frequency-high:186

View file

@ -1,147 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
[VXDRV] DEVCAPS: version=0, num_cores=4, num_warps=4, num_threads=4
Create context
Allocate device buffers
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 4 ms
Download destination buffer
Verify result
PASSED!
PERF: core0: instrs=2019, cycles=5042, IPC=0.400436
PERF: core0: ibuffer stalls=86
PERF: core0: scoreboard stalls=451
PERF: core0: alu unit stalls=68
PERF: core0: lsu unit stalls=53
PERF: core0: csr unit stalls=0
PERF: core0: fpu unit stalls=0
PERF: core0: gpu unit stalls=0
PERF: core0: icache reads=804
PERF: core0: icache read misses=65 (hit ratio=91%)
PERF: core0: icache pipeline stalls=469
PERF: core0: icache reponse stalls=86
PERF: core0: dcache reads=114
PERF: core0: dcache writes=65
PERF: core0: dcache read misses=28 (hit ratio=75%)
PERF: core0: dcache write misses=60 (hit ratio=7%)
PERF: core0: dcache bank stalls=72 (utilization=71%)
PERF: core0: dcache mshr stalls=56
PERF: core0: dcache pipeline stalls=88
PERF: core0: dcache reponse stalls=1
PERF: core0: smem reads=70
PERF: core0: smem writes=63
PERF: core0: smem bank stalls=0 (utilization=100%)
PERF: core0: dram requests=109 (reads=44, writes=65)
PERF: core0: dram stalls=53 (utilization=67%)
PERF: core0: dram average latency=31 cycles
PERF: core1: instrs=2019, cycles=5041, IPC=0.400516
PERF: core1: ibuffer stalls=86
PERF: core1: scoreboard stalls=451
PERF: core1: alu unit stalls=68
PERF: core1: lsu unit stalls=53
PERF: core1: csr unit stalls=0
PERF: core1: fpu unit stalls=0
PERF: core1: gpu unit stalls=0
PERF: core1: icache reads=804
PERF: core1: icache read misses=65 (hit ratio=91%)
PERF: core1: icache pipeline stalls=470
PERF: core1: icache reponse stalls=86
PERF: core1: dcache reads=114
PERF: core1: dcache writes=65
PERF: core1: dcache read misses=28 (hit ratio=75%)
PERF: core1: dcache write misses=60 (hit ratio=7%)
PERF: core1: dcache bank stalls=72 (utilization=71%)
PERF: core1: dcache mshr stalls=56
PERF: core1: dcache pipeline stalls=88
PERF: core1: dcache reponse stalls=1
PERF: core1: smem reads=70
PERF: core1: smem writes=63
PERF: core1: smem bank stalls=0 (utilization=100%)
PERF: core1: dram requests=109 (reads=44, writes=65)
PERF: core1: dram stalls=52 (utilization=67%)
PERF: core1: dram average latency=31 cycles
PERF: core2: instrs=2019, cycles=5040, IPC=0.400595
PERF: core2: ibuffer stalls=86
PERF: core2: scoreboard stalls=451
PERF: core2: alu unit stalls=68
PERF: core2: lsu unit stalls=53
PERF: core2: csr unit stalls=0
PERF: core2: fpu unit stalls=0
PERF: core2: gpu unit stalls=0
PERF: core2: icache reads=804
PERF: core2: icache read misses=65 (hit ratio=91%)
PERF: core2: icache pipeline stalls=470
PERF: core2: icache reponse stalls=86
PERF: core2: dcache reads=114
PERF: core2: dcache writes=65
PERF: core2: dcache read misses=28 (hit ratio=75%)
PERF: core2: dcache write misses=60 (hit ratio=7%)
PERF: core2: dcache bank stalls=72 (utilization=71%)
PERF: core2: dcache mshr stalls=56
PERF: core2: dcache pipeline stalls=88
PERF: core2: dcache reponse stalls=1
PERF: core2: smem reads=70
PERF: core2: smem writes=63
PERF: core2: smem bank stalls=0 (utilization=100%)
PERF: core2: dram requests=109 (reads=44, writes=65)
PERF: core2: dram stalls=51 (utilization=68%)
PERF: core2: dram average latency=31 cycles
PERF: core3: instrs=2021, cycles=5043, IPC=0.400754
PERF: core3: ibuffer stalls=102
PERF: core3: scoreboard stalls=496
PERF: core3: alu unit stalls=73
PERF: core3: lsu unit stalls=53
PERF: core3: csr unit stalls=0
PERF: core3: fpu unit stalls=0
PERF: core3: gpu unit stalls=0
PERF: core3: icache reads=806
PERF: core3: icache read misses=65 (hit ratio=91%)
PERF: core3: icache pipeline stalls=439
PERF: core3: icache reponse stalls=102
PERF: core3: dcache reads=114
PERF: core3: dcache writes=65
PERF: core3: dcache read misses=28 (hit ratio=75%)
PERF: core3: dcache write misses=60 (hit ratio=7%)
PERF: core3: dcache bank stalls=72 (utilization=71%)
PERF: core3: dcache mshr stalls=56
PERF: core3: dcache pipeline stalls=88
PERF: core3: dcache reponse stalls=1
PERF: core3: smem reads=70
PERF: core3: smem writes=63
PERF: core3: smem bank stalls=0 (utilization=100%)
PERF: core3: dram requests=109 (reads=44, writes=65)
PERF: core3: dram stalls=50 (utilization=68%)
PERF: core3: dram average latency=30 cycles
PERF: instrs=8078, cycles=5043, IPC=1.601824
PERF: ibuffer stalls=360
PERF: scoreboard stalls=1849
PERF: alu unit stalls=277
PERF: lsu unit stalls=212
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=3218
PERF: icache read misses=260 (hit ratio=91%)
PERF: icache pipeline stalls=1848
PERF: icache reponse stalls=360
PERF: dcache reads=456
PERF: dcache writes=260
PERF: dcache read misses=112 (hit ratio=75%)
PERF: dcache write misses=240 (hit ratio=7%)
PERF: dcache bank stalls=288 (utilization=71%)
PERF: dcache mshr stalls=224
PERF: dcache pipeline stalls=352
PERF: dcache reponse stalls=4
PERF: smem reads=280
PERF: smem writes=252
PERF: smem bank stalls=0 (utilization=100%)
PERF: dram requests=436 (reads=176, writes=260)
PERF: dram stalls=206 (utilization=67%)
PERF: dram average latency=30 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'

View file

@ -1,17 +0,0 @@
Fitter Status : Successful - Sat Mar 6 04:32:43 2021
Quartus Prime Version : 19.2.0 Build 57 06/24/2019 Patches 0.01rc SJ Pro Edition
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10
Device : 10AX115N2F40E2LG
Timing Models : Final
Logic utilization (in ALMs) : 190,373 / 427,200 ( 45 % )
Total registers : 288074
Total pins : 310 / 826 ( 38 % )
Total virtual pins : 0
Total block memory bits : 7,135,144 / 55,562,240 ( 13 % )
Total RAM Blocks : 1,237 / 2,713 ( 46 % )
Total DSP Blocks : 224 / 1,518 ( 15 % )
Total HSSI RX channels : 12 / 48 ( 25 % )
Total HSSI TX channels : 12 / 48 ( 25 % )
Total PLLs : 25 / 112 ( 22 % )

File diff suppressed because it is too large Load diff

View file

@ -1,4 +0,0 @@
Synthesis Status : Successful - Sat Mar 6 03:10:30 2021
Revision Name : afu_default
Top-level Entity Name : dcp_top
Family : Arria 10

File diff suppressed because it is too large Load diff

View file

@ -1,29 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./guassian
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
OK
The result of matrix m is:
0.00 0.00 0.00 0.00
0.50 0.00 0.00 0.00
0.67 0.26 0.00 0.00
-0.00 0.15 -0.28 0.00
The result of matrix a is:
-0.60 -0.50 0.70 0.30
0.00 -0.65 -0.05 0.55
0.00 0.00 -0.75 -1.14
0.00 0.00 0.00 0.50
The result of array b is:
-0.85 -0.25 0.87 -0.25
The final solution is:
0.70 0.00 -0.40 -0.50
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/guassian'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./nearn
loading db: cane4_0.db
loading db: cane4_1.db
loading db: cane4_2.db
Number of records: 1500
Finding the 5 closest neighbors.
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
1974 12 22 18 24 JOYCE 30.6 89.9 80 593 --> Distance=0.608276
1965 5 13 0 17 TONY 27.8 89.0 122 260 --> Distance=2.416610
1991 3 18 12 19 DEBBY 28.5 87.8 107 850 --> Distance=2.662703
1957 4 17 6 12 ALBERTO 32.5 87.8 54 510 --> Distance=3.330163
1964 8 5 6 9 FLORENCE 31.5 86.3 18 242 --> Distance=3.992490
Passed!
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/nearn'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./saxpy
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/saxpy'

View file

@ -1,19 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sfilter
enter demo main
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
Attempting to create program from binary...
Read program from binary.
attempting to create input buffer
attempting to create output buffer
attempting to create kernel
setting up kernel args
attempting to enqueue write buffer
attempting to enqueue kernel
Elapsed time: 4 ms
Download destination buffer
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sfilter'

View file

@ -1,250 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./sgemm -n32
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
Create context
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 4 ms
Download destination buffer
Verify result
PASSED!
PERF: core0: instrs=45962, cycles=25060, IPC=1.834078
PERF: core0: ibuffer stalls=0
PERF: core0: scoreboard stalls=0
PERF: core0: alu unit stalls=0
PERF: core0: lsu unit stalls=0
PERF: core0: csr unit stalls=0
PERF: core0: fpu unit stalls=0
PERF: core0: gpu unit stalls=0
PERF: core0: icache reads=0
PERF: core0: icache read misses=0 (hit ratio=-2147483648%)
PERF: core0: icache pipeline stalls=0
PERF: core0: icache reponse stalls=0
PERF: core0: dcache reads=0
PERF: core0: dcache writes=0
PERF: core0: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core0: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core0: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core0: dcache mshr stalls=0
PERF: core0: dcache pipeline stalls=0
PERF: core0: dcache reponse stalls=0
PERF: core0: smem reads=0
PERF: core0: smem writes=0
PERF: core0: smem bank stalls=0 (utilization=-2147483648%)
PERF: core0: dram requests=0 (reads=0, writes=0)
PERF: core0: dram stalls=0 (utilization=-2147483648%)
PERF: core0: dram average latency=-2147483648 cycles
PERF: core1: instrs=45962, cycles=25057, IPC=1.834298
PERF: core1: ibuffer stalls=0
PERF: core1: scoreboard stalls=0
PERF: core1: alu unit stalls=0
PERF: core1: lsu unit stalls=0
PERF: core1: csr unit stalls=0
PERF: core1: fpu unit stalls=0
PERF: core1: gpu unit stalls=0
PERF: core1: icache reads=0
PERF: core1: icache read misses=0 (hit ratio=-2147483648%)
PERF: core1: icache pipeline stalls=0
PERF: core1: icache reponse stalls=0
PERF: core1: dcache reads=0
PERF: core1: dcache writes=0
PERF: core1: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core1: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core1: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core1: dcache mshr stalls=0
PERF: core1: dcache pipeline stalls=0
PERF: core1: dcache reponse stalls=0
PERF: core1: smem reads=0
PERF: core1: smem writes=0
PERF: core1: smem bank stalls=0 (utilization=-2147483648%)
PERF: core1: dram requests=0 (reads=0, writes=0)
PERF: core1: dram stalls=0 (utilization=-2147483648%)
PERF: core1: dram average latency=-2147483648 cycles
PERF: core2: instrs=45962, cycles=25062, IPC=1.833932
PERF: core2: ibuffer stalls=0
PERF: core2: scoreboard stalls=0
PERF: core2: alu unit stalls=0
PERF: core2: lsu unit stalls=0
PERF: core2: csr unit stalls=0
PERF: core2: fpu unit stalls=0
PERF: core2: gpu unit stalls=0
PERF: core2: icache reads=0
PERF: core2: icache read misses=0 (hit ratio=-2147483648%)
PERF: core2: icache pipeline stalls=0
PERF: core2: icache reponse stalls=0
PERF: core2: dcache reads=0
PERF: core2: dcache writes=0
PERF: core2: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core2: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core2: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core2: dcache mshr stalls=0
PERF: core2: dcache pipeline stalls=0
PERF: core2: dcache reponse stalls=0
PERF: core2: smem reads=0
PERF: core2: smem writes=0
PERF: core2: smem bank stalls=0 (utilization=-2147483648%)
PERF: core2: dram requests=0 (reads=0, writes=0)
PERF: core2: dram stalls=0 (utilization=-2147483648%)
PERF: core2: dram average latency=-2147483648 cycles
PERF: core3: instrs=45962, cycles=25054, IPC=1.834517
PERF: core3: ibuffer stalls=0
PERF: core3: scoreboard stalls=0
PERF: core3: alu unit stalls=0
PERF: core3: lsu unit stalls=0
PERF: core3: csr unit stalls=0
PERF: core3: fpu unit stalls=0
PERF: core3: gpu unit stalls=0
PERF: core3: icache reads=0
PERF: core3: icache read misses=0 (hit ratio=-2147483648%)
PERF: core3: icache pipeline stalls=0
PERF: core3: icache reponse stalls=0
PERF: core3: dcache reads=0
PERF: core3: dcache writes=0
PERF: core3: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core3: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core3: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core3: dcache mshr stalls=0
PERF: core3: dcache pipeline stalls=0
PERF: core3: dcache reponse stalls=0
PERF: core3: smem reads=0
PERF: core3: smem writes=0
PERF: core3: smem bank stalls=0 (utilization=-2147483648%)
PERF: core3: dram requests=0 (reads=0, writes=0)
PERF: core3: dram stalls=0 (utilization=-2147483648%)
PERF: core3: dram average latency=-2147483648 cycles
PERF: core4: instrs=45962, cycles=25056, IPC=1.834371
PERF: core4: ibuffer stalls=0
PERF: core4: scoreboard stalls=0
PERF: core4: alu unit stalls=0
PERF: core4: lsu unit stalls=0
PERF: core4: csr unit stalls=0
PERF: core4: fpu unit stalls=0
PERF: core4: gpu unit stalls=0
PERF: core4: icache reads=0
PERF: core4: icache read misses=0 (hit ratio=-2147483648%)
PERF: core4: icache pipeline stalls=0
PERF: core4: icache reponse stalls=0
PERF: core4: dcache reads=0
PERF: core4: dcache writes=0
PERF: core4: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core4: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core4: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core4: dcache mshr stalls=0
PERF: core4: dcache pipeline stalls=0
PERF: core4: dcache reponse stalls=0
PERF: core4: smem reads=0
PERF: core4: smem writes=0
PERF: core4: smem bank stalls=0 (utilization=-2147483648%)
PERF: core4: dram requests=0 (reads=0, writes=0)
PERF: core4: dram stalls=0 (utilization=-2147483648%)
PERF: core4: dram average latency=-2147483648 cycles
PERF: core5: instrs=45962, cycles=25066, IPC=1.833639
PERF: core5: ibuffer stalls=0
PERF: core5: scoreboard stalls=0
PERF: core5: alu unit stalls=0
PERF: core5: lsu unit stalls=0
PERF: core5: csr unit stalls=0
PERF: core5: fpu unit stalls=0
PERF: core5: gpu unit stalls=0
PERF: core5: icache reads=0
PERF: core5: icache read misses=0 (hit ratio=-2147483648%)
PERF: core5: icache pipeline stalls=0
PERF: core5: icache reponse stalls=0
PERF: core5: dcache reads=0
PERF: core5: dcache writes=0
PERF: core5: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core5: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core5: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core5: dcache mshr stalls=0
PERF: core5: dcache pipeline stalls=0
PERF: core5: dcache reponse stalls=0
PERF: core5: smem reads=0
PERF: core5: smem writes=0
PERF: core5: smem bank stalls=0 (utilization=-2147483648%)
PERF: core5: dram requests=0 (reads=0, writes=0)
PERF: core5: dram stalls=0 (utilization=-2147483648%)
PERF: core5: dram average latency=-2147483648 cycles
PERF: core6: instrs=45962, cycles=25058, IPC=1.834225
PERF: core6: ibuffer stalls=0
PERF: core6: scoreboard stalls=0
PERF: core6: alu unit stalls=0
PERF: core6: lsu unit stalls=0
PERF: core6: csr unit stalls=0
PERF: core6: fpu unit stalls=0
PERF: core6: gpu unit stalls=0
PERF: core6: icache reads=0
PERF: core6: icache read misses=0 (hit ratio=-2147483648%)
PERF: core6: icache pipeline stalls=0
PERF: core6: icache reponse stalls=0
PERF: core6: dcache reads=0
PERF: core6: dcache writes=0
PERF: core6: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core6: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core6: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core6: dcache mshr stalls=0
PERF: core6: dcache pipeline stalls=0
PERF: core6: dcache reponse stalls=0
PERF: core6: smem reads=0
PERF: core6: smem writes=0
PERF: core6: smem bank stalls=0 (utilization=-2147483648%)
PERF: core6: dram requests=0 (reads=0, writes=0)
PERF: core6: dram stalls=0 (utilization=-2147483648%)
PERF: core6: dram average latency=-2147483648 cycles
PERF: core7: instrs=45964, cycles=25061, IPC=1.834085
PERF: core7: ibuffer stalls=0
PERF: core7: scoreboard stalls=0
PERF: core7: alu unit stalls=0
PERF: core7: lsu unit stalls=0
PERF: core7: csr unit stalls=0
PERF: core7: fpu unit stalls=0
PERF: core7: gpu unit stalls=0
PERF: core7: icache reads=0
PERF: core7: icache read misses=0 (hit ratio=-2147483648%)
PERF: core7: icache pipeline stalls=0
PERF: core7: icache reponse stalls=0
PERF: core7: dcache reads=0
PERF: core7: dcache writes=0
PERF: core7: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core7: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core7: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core7: dcache mshr stalls=0
PERF: core7: dcache pipeline stalls=0
PERF: core7: dcache reponse stalls=0
PERF: core7: smem reads=0
PERF: core7: smem writes=0
PERF: core7: smem bank stalls=0 (utilization=-2147483648%)
PERF: core7: dram requests=0 (reads=0, writes=0)
PERF: core7: dram stalls=0 (utilization=-2147483648%)
PERF: core7: dram average latency=-2147483648 cycles
PERF: instrs=367698, cycles=25066, IPC=14.669193
PERF: ibuffer stalls=0
PERF: scoreboard stalls=0
PERF: alu unit stalls=0
PERF: lsu unit stalls=0
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=0
PERF: icache read misses=0 (hit ratio=-2147483648%)
PERF: icache pipeline stalls=0
PERF: icache reponse stalls=0
PERF: dcache reads=0
PERF: dcache writes=0
PERF: dcache read misses=0 (hit ratio=-2147483648%)
PERF: dcache write misses=0 (hit ratio=-2147483648%)
PERF: dcache bank stalls=0 (utilization=-2147483648%)
PERF: dcache mshr stalls=0
PERF: dcache pipeline stalls=0
PERF: dcache reponse stalls=0
PERF: smem reads=0
PERF: smem writes=0
PERF: smem bank stalls=0 (utilization=-2147483648%)
PERF: dram requests=0 (reads=0, writes=0)
PERF: dram stalls=0 (utilization=-2147483648%)
PERF: dram average latency=-2147483648 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/sgemm'

View file

@ -1,3 +0,0 @@
# Generated by Platform Interface Manager user_clock_config.tcl
afu-image/clock-frequency-low:90.0
afu-image/clock-frequency-high:180

View file

@ -1,251 +0,0 @@
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DNUM_WARPS=4 -DNUM_THREADS=4 -DL2_ENABLE=0 -DL3_ENABLE=0 -DPERF_ENABLE
make: Entering directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
rm -rf libvortex.so *.o .depend
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/driver/opae'
make: Entering directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'
LD_LIBRARY_PATH=/opt/pocl/runtime/lib:/nethome/lcooper43/vortex-dev-old/driver/opae:/opt/opae/1.1.2/lib:/opt/inteldevstack/a10_gx_pac_ias_1_2_1_pv/opencl/opencl_bsp/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/host/linux64/lib:/opt/intelFPGA_pro/quartus_19.2.0b57/hld/linux64/lib: ./vecadd -n64
[VXDRV] DEVCAPS: version=0, num_cores=8, num_warps=4, num_threads=4
Create context
Allocate device buffers
Create program from kernel source
Upload source buffers
Execute the kernel
Elapsed time: 3 ms
Download destination buffer
Verify result
PASSED!
PERF: core0: instrs=2019, cycles=4958, IPC=0.407221
PERF: core0: ibuffer stalls=0
PERF: core0: scoreboard stalls=0
PERF: core0: alu unit stalls=0
PERF: core0: lsu unit stalls=0
PERF: core0: csr unit stalls=0
PERF: core0: fpu unit stalls=0
PERF: core0: gpu unit stalls=0
PERF: core0: icache reads=0
PERF: core0: icache read misses=0 (hit ratio=-2147483648%)
PERF: core0: icache pipeline stalls=0
PERF: core0: icache reponse stalls=0
PERF: core0: dcache reads=0
PERF: core0: dcache writes=0
PERF: core0: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core0: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core0: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core0: dcache mshr stalls=0
PERF: core0: dcache pipeline stalls=0
PERF: core0: dcache reponse stalls=0
PERF: core0: smem reads=0
PERF: core0: smem writes=0
PERF: core0: smem bank stalls=0 (utilization=-2147483648%)
PERF: core0: dram requests=0 (reads=0, writes=0)
PERF: core0: dram stalls=0 (utilization=-2147483648%)
PERF: core0: dram average latency=-2147483648 cycles
PERF: core1: instrs=2019, cycles=4957, IPC=0.407303
PERF: core1: ibuffer stalls=0
PERF: core1: scoreboard stalls=0
PERF: core1: alu unit stalls=0
PERF: core1: lsu unit stalls=0
PERF: core1: csr unit stalls=0
PERF: core1: fpu unit stalls=0
PERF: core1: gpu unit stalls=0
PERF: core1: icache reads=0
PERF: core1: icache read misses=0 (hit ratio=-2147483648%)
PERF: core1: icache pipeline stalls=0
PERF: core1: icache reponse stalls=0
PERF: core1: dcache reads=0
PERF: core1: dcache writes=0
PERF: core1: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core1: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core1: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core1: dcache mshr stalls=0
PERF: core1: dcache pipeline stalls=0
PERF: core1: dcache reponse stalls=0
PERF: core1: smem reads=0
PERF: core1: smem writes=0
PERF: core1: smem bank stalls=0 (utilization=-2147483648%)
PERF: core1: dram requests=0 (reads=0, writes=0)
PERF: core1: dram stalls=0 (utilization=-2147483648%)
PERF: core1: dram average latency=-2147483648 cycles
PERF: core2: instrs=2019, cycles=4955, IPC=0.407467
PERF: core2: ibuffer stalls=0
PERF: core2: scoreboard stalls=0
PERF: core2: alu unit stalls=0
PERF: core2: lsu unit stalls=0
PERF: core2: csr unit stalls=0
PERF: core2: fpu unit stalls=0
PERF: core2: gpu unit stalls=0
PERF: core2: icache reads=0
PERF: core2: icache read misses=0 (hit ratio=-2147483648%)
PERF: core2: icache pipeline stalls=0
PERF: core2: icache reponse stalls=0
PERF: core2: dcache reads=0
PERF: core2: dcache writes=0
PERF: core2: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core2: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core2: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core2: dcache mshr stalls=0
PERF: core2: dcache pipeline stalls=0
PERF: core2: dcache reponse stalls=0
PERF: core2: smem reads=0
PERF: core2: smem writes=0
PERF: core2: smem bank stalls=0 (utilization=-2147483648%)
PERF: core2: dram requests=0 (reads=0, writes=0)
PERF: core2: dram stalls=0 (utilization=-2147483648%)
PERF: core2: dram average latency=-2147483648 cycles
PERF: core3: instrs=2019, cycles=4953, IPC=0.407632
PERF: core3: ibuffer stalls=0
PERF: core3: scoreboard stalls=0
PERF: core3: alu unit stalls=0
PERF: core3: lsu unit stalls=0
PERF: core3: csr unit stalls=0
PERF: core3: fpu unit stalls=0
PERF: core3: gpu unit stalls=0
PERF: core3: icache reads=0
PERF: core3: icache read misses=0 (hit ratio=-2147483648%)
PERF: core3: icache pipeline stalls=0
PERF: core3: icache reponse stalls=0
PERF: core3: dcache reads=0
PERF: core3: dcache writes=0
PERF: core3: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core3: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core3: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core3: dcache mshr stalls=0
PERF: core3: dcache pipeline stalls=0
PERF: core3: dcache reponse stalls=0
PERF: core3: smem reads=0
PERF: core3: smem writes=0
PERF: core3: smem bank stalls=0 (utilization=-2147483648%)
PERF: core3: dram requests=0 (reads=0, writes=0)
PERF: core3: dram stalls=0 (utilization=-2147483648%)
PERF: core3: dram average latency=-2147483648 cycles
PERF: core4: instrs=495, cycles=3388, IPC=0.146104
PERF: core4: ibuffer stalls=0
PERF: core4: scoreboard stalls=0
PERF: core4: alu unit stalls=0
PERF: core4: lsu unit stalls=0
PERF: core4: csr unit stalls=0
PERF: core4: fpu unit stalls=0
PERF: core4: gpu unit stalls=0
PERF: core4: icache reads=0
PERF: core4: icache read misses=0 (hit ratio=-2147483648%)
PERF: core4: icache pipeline stalls=0
PERF: core4: icache reponse stalls=0
PERF: core4: dcache reads=0
PERF: core4: dcache writes=0
PERF: core4: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core4: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core4: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core4: dcache mshr stalls=0
PERF: core4: dcache pipeline stalls=0
PERF: core4: dcache reponse stalls=0
PERF: core4: smem reads=0
PERF: core4: smem writes=0
PERF: core4: smem bank stalls=0 (utilization=-2147483648%)
PERF: core4: dram requests=0 (reads=0, writes=0)
PERF: core4: dram stalls=0 (utilization=-2147483648%)
PERF: core4: dram average latency=-2147483648 cycles
PERF: core5: instrs=495, cycles=3387, IPC=0.146147
PERF: core5: ibuffer stalls=0
PERF: core5: scoreboard stalls=0
PERF: core5: alu unit stalls=0
PERF: core5: lsu unit stalls=0
PERF: core5: csr unit stalls=0
PERF: core5: fpu unit stalls=0
PERF: core5: gpu unit stalls=0
PERF: core5: icache reads=0
PERF: core5: icache read misses=0 (hit ratio=-2147483648%)
PERF: core5: icache pipeline stalls=0
PERF: core5: icache reponse stalls=0
PERF: core5: dcache reads=0
PERF: core5: dcache writes=0
PERF: core5: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core5: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core5: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core5: dcache mshr stalls=0
PERF: core5: dcache pipeline stalls=0
PERF: core5: dcache reponse stalls=0
PERF: core5: smem reads=0
PERF: core5: smem writes=0
PERF: core5: smem bank stalls=0 (utilization=-2147483648%)
PERF: core5: dram requests=0 (reads=0, writes=0)
PERF: core5: dram stalls=0 (utilization=-2147483648%)
PERF: core5: dram average latency=-2147483648 cycles
PERF: core6: instrs=495, cycles=3386, IPC=0.146190
PERF: core6: ibuffer stalls=0
PERF: core6: scoreboard stalls=0
PERF: core6: alu unit stalls=0
PERF: core6: lsu unit stalls=0
PERF: core6: csr unit stalls=0
PERF: core6: fpu unit stalls=0
PERF: core6: gpu unit stalls=0
PERF: core6: icache reads=0
PERF: core6: icache read misses=0 (hit ratio=-2147483648%)
PERF: core6: icache pipeline stalls=0
PERF: core6: icache reponse stalls=0
PERF: core6: dcache reads=0
PERF: core6: dcache writes=0
PERF: core6: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core6: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core6: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core6: dcache mshr stalls=0
PERF: core6: dcache pipeline stalls=0
PERF: core6: dcache reponse stalls=0
PERF: core6: smem reads=0
PERF: core6: smem writes=0
PERF: core6: smem bank stalls=0 (utilization=-2147483648%)
PERF: core6: dram requests=0 (reads=0, writes=0)
PERF: core6: dram stalls=0 (utilization=-2147483648%)
PERF: core6: dram average latency=-2147483648 cycles
PERF: core7: instrs=495, cycles=3384, IPC=0.146277
PERF: core7: ibuffer stalls=0
PERF: core7: scoreboard stalls=0
PERF: core7: alu unit stalls=0
PERF: core7: lsu unit stalls=0
PERF: core7: csr unit stalls=0
PERF: core7: fpu unit stalls=0
PERF: core7: gpu unit stalls=0
PERF: core7: icache reads=0
PERF: core7: icache read misses=0 (hit ratio=-2147483648%)
PERF: core7: icache pipeline stalls=0
PERF: core7: icache reponse stalls=0
PERF: core7: dcache reads=0
PERF: core7: dcache writes=0
PERF: core7: dcache read misses=0 (hit ratio=-2147483648%)
PERF: core7: dcache write misses=0 (hit ratio=-2147483648%)
PERF: core7: dcache bank stalls=0 (utilization=-2147483648%)
PERF: core7: dcache mshr stalls=0
PERF: core7: dcache pipeline stalls=0
PERF: core7: dcache reponse stalls=0
PERF: core7: smem reads=0
PERF: core7: smem writes=0
PERF: core7: smem bank stalls=0 (utilization=-2147483648%)
PERF: core7: dram requests=0 (reads=0, writes=0)
PERF: core7: dram stalls=0 (utilization=-2147483648%)
PERF: core7: dram average latency=-2147483648 cycles
PERF: instrs=10056, cycles=4958, IPC=2.028237
PERF: ibuffer stalls=0
PERF: scoreboard stalls=0
PERF: alu unit stalls=0
PERF: lsu unit stalls=0
PERF: csr unit stalls=0
PERF: fpu unit stalls=0
PERF: gpu unit stalls=0
PERF: icache reads=0
PERF: icache read misses=0 (hit ratio=-2147483648%)
PERF: icache pipeline stalls=0
PERF: icache reponse stalls=0
PERF: dcache reads=0
PERF: dcache writes=0
PERF: dcache read misses=0 (hit ratio=-2147483648%)
PERF: dcache write misses=0 (hit ratio=-2147483648%)
PERF: dcache bank stalls=0 (utilization=-2147483648%)
PERF: dcache mshr stalls=0
PERF: dcache pipeline stalls=0
PERF: dcache reponse stalls=0
PERF: smem reads=0
PERF: smem writes=0
PERF: smem bank stalls=0 (utilization=-2147483648%)
PERF: dram requests=0 (reads=0, writes=0)
PERF: dram stalls=0 (utilization=-2147483648%)
PERF: dram average latency=-2147483648 cycles
make: Leaving directory '/nethome/lcooper43/vortex-dev-old/benchmarks/opencl/vecadd'

View file

@ -1,51 +0,0 @@
#!/bin/bash
set -e
mkdir -p test_outputs
output_dir="$(pwd)/test_outputs"
(cd rtl ; python3 gen_synth_configs.py ; ls -l configs)
config_location=rtl/configs
declare -a test_names=("sgemm" "saxpy" "bfs" "guassian" "vecadd" "nearn" "sfilter")
for test_name in ${test_names[@]}; do
if [ ! -d "benchmarks/new_opencl/$test_name" ]; then
echo "Unknown benchmark $test_name"
exit 1
fi
done
for filename in "$config_location"/*.sh; do
name=${filename##*/}
base=${name%.*}
. "$filename"
make -C hw build_config
make -C runtime build_config
make -C driver/rtlsim
for test_name in ${test_names[@]}; do
(
echo "Running $base-$test_name..."
cd "benchmarks/new_opencl/$test_name"
make clean
make
make run-rtlsim 2>&1 | tee "$output_dir/$base-$test_name.log"
) &
done # test_name
wait
done # config

View file

@ -1,116 +0,0 @@
-build.sh-
Description: Makes the build in the opae directory with the specified core
count and optional performance profiling. If a build already
exists, a make clean command is ran before the build. Script waits
until the inteldev script or quartus program is finished running.
Usage: ./build.sh -c [1|2|4|8|16] [-p [y|n]]
Options:
-c
Core count (1, 2, 4, 8, or 16).
-p
Performance profiling enable (y or n). Changes the source file in the
opae directory to include/exclude "+define+PERF_ENABLE".
_______________________________________________________________________________
-build_all_perf.sh-
Description: Runs build.sh with performance profiling enabled for all valid
core configurations.
_______________________________________________________________________________
_______________________________________________________________________________
-program_fpga.sh-
Description: Signs and programs the fpga for a specified core count. Prompts
for PACSign are all automatically answered 'yes'.
Usage: ./program_fpga.sh -c [1|2|4|8|16]
Options:
-c
Core count (1, 2, 4, 8, or 16).
_______________________________________________________________________________
_______________________________________________________________________________
-gather_perf_results.sh-
Description: Creates directory named perf_YYYY_MM_DD and core subfolders in
evaluation. Copies relevant build output files to specified core
directory. Runs and redirects outputs of sgemm, vecadd, saxpy,
sfilter, nearn, and gaussian benchmarks to specified core
directory. Build should already be made before running this.
Usage: ./gather_perf_results.sh -c [1|2|4|8|16]
Options:
-c
Core count (1, 2, 4, 8, or 16).
_______________________________________________________________________________
-gather_all_perf_results.sh-
Description: Programs fpga and runs gather_perf_results.sh for all valid core
configurations. All builds should already be made before running
this.
_______________________________________________________________________________
_______________________________________________________________________________
-export_csv.sh-
Description: Creates specified .csv output file from an input directory, file,
and parameter. The .csv file contains two columns: cores, and the input
parameter. The output file is located within the directory specified with -d.
Usage: ./export_csv.sh -c [cores] -d [directory] -i [input filename] -o
[output filename] -p '[parameter]'
Example: ./export_csv.sh -c 16 -d perf_2021_03_07 -i sgemm.result -o output.csv
-p 'PERF: scoreboard stalls'
Options:
-c
Upper limit of cores to be read in. Core directories should exist in
the directory specified by -d e.g. 1c, 2c, 4c for -c 4.
-d
The directory of the form perf_{date} located in the evaluation
directory.
-i
The input filename located in each core directory within the
directory specified by -d.
-o
The output filename to be created within the directory specified
by -d.
-p
The parameter corresponding to the core count in the .csv file. The
full name of the parameter from the start of the line should be
inputted to avoid the parameter name being matched multiple times.
_______________________________________________________________________________
-export_ipc_csv.sh-
Description: Runs export_csv.sh for the parameter IPC.
Usage: ./export_csv.sh -c [cores] -d [directory] -i [input filename] -o
[output filename]
Example: ./export_ipc.sh -c 16 -d perf_2021_03_07 -i sgemm.result -o output.csv

View file

@ -1,49 +0,0 @@
#!/bin/bash
BUILD_DIR=../../hw/syn/opae
perf=0
wait=0
while getopts c:pwh flag
do
case "${flag}" in
c) cores=${OPTARG};; #1, 2, 4, 8, 16
p) perf=1;; #perf counters enable
w) wait=1;; # wait for build to complete
h) echo "Usage: -c <cores> [-p perf] [-w wait] [-h help]"
exit 0
;;
\?)
echo "Invalid option: -$OPTARG" 1>&2
exit 1
;;
esac
done
if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
exit 1
fi
cd ${BUILD_DIR}
if [ -d "./build_fpga_{$cores}c" ]; then
make "clean-fpga-${cores}c"
fi
if [ ${perf} = 1 ]; then
PERF=1 make "fpga-${cores}c"
else
make "fpga-${cores}c"
fi
if [ ${wait} = 1 ]; then
sleep 30
pids=($(pgrep -f "${OPAE_PLATFORM_ROOT}|quartus"))
for pid in ${pids[@]}; do
while kill -0 ${pid} 2> /dev/null; do
sleep 30
done
done
fi

View file

@ -1,7 +0,0 @@
#!/bin/bash
for ((i=1; i <= 16; i=i*2)); do
echo "Building ${i} core build..."
./build.sh -c ${i} -p -w
echo "Done ${i} core build."
done

View file

@ -1,33 +0,0 @@
#!/bin/bash
while getopts c:d:i:o:p: flag
do
case "${flag}" in
c) cores=${OPTARG};; #1, 2, 4, 8, 16
d) dir=${OPTARG};; #directory name (e.g. perf_2021_03_07)
i) ifile=${OPTARG};; #input filename
o) ofile=${OPTARG};; #output filename
p) param=${OPTARG};; #parameter to be made into csv
esac
done
if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
exit 1
fi
if [ -z "$ifile" ]; then
echo 'No input filename given for argument -f'
exit 1
fi
if [ -z "$dir" ]; then
echo 'No directory given for argument -d'
exit 1
fi
printf "cores,${param}\n" > "../${dir}/${ofile}"
for ((i=1; i<=$cores; i=i*2)); do
printf "${i}," >> "../${dir}/${ofile}"
(sed -n "s/${param}=\(.*\)/\1/p" < "../${dir}/${i}c/${ifile}") >> "../${dir}/${ofile}"
done

View file

@ -1,32 +0,0 @@
#!/bin/bash
while getopts c:d:f:o: flag
do
case "${flag}" in
c) cores=${OPTARG};; #1, 2, 4, 8, 16
d) dir=${OPTARG};; #directory name (e.g. perf_2021_03_07)
i) ifile=${OPTARG};; #input filename
o) ofile=${OPTARG};; #output filename
esac
done
if [[ ! "$cores" =~ ^(1|2|4|8|16)$ ]]; then
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
exit 1
fi
if [ -z "$ifile" ]; then
echo 'No input filename given for argument -f'
exit 1
fi
if [ -z "$dir" ]; then
echo 'No directory given for argument -d'
exit 1
fi
printf "cores,IPC" > "../${dir}/${ofile}"
for ((i=1; i<=$cores; i=i*2)); do
printf "${i}," >> "../${dir}/${ofile}"
(sed -n "s/IPC=\(.*\)/\1/p" < "../${dir}/${i}c/${ifile}" | awk 'END {print $NF}') >> "../${dir}/${ofile}"
done

View file

@ -1,35 +0,0 @@
#!/bin/bash
cd ../../hw/syn/opae/
date=$(date +%Y_%m_%d)
results_dir="../../../evaluation/perf_${date}"
mkdir -p ${results_dir}
for ((i=1; i <= 16; i=i*2)); do
mkdir -p "${results_dir}/${i}c"
done
for ((i=1; i <= 16; i=i*2)); do
cp "./build_fpga_${i}c/build.log" "${results_dir}/${i}c/build.log"
cp "./build_fpga_${i}c/build/output_files/afu_default.syn.summary" "${results_dir}/${i}c/afu_default.syn.summary"
cp "./build_fpga_${i}c/build/output_files/afu_default.fit.summary" "${results_dir}/${i}c/afu_default.fit.summary"
cp "./build_fpga_${i}c/build/output_files/afu_default.sta.summary" "${results_dir}/${i}c/afu_default.sta.summary"
cp "./build_fpga_${i}c/build/output_files/user_clock_freq.txt" "${results_dir}/${i}c/user_clock_freq.txt"
done
cd ../../../evaluation/scripts
results_dir="../perf_${date}"
for ((i=1; i <= 16; i=i*2)); do
echo "Programming fpga for ${i} core build..."
./program_fpga.sh -c ${i}
echo "Running tests for ${i} core build..."
../../ci/blackbox.sh --driver=fpga --app=sgemm --perf > "${results_dir}/${i}c/sgemm.result"
../../ci/blackbox.sh --driver=fpga --app=vecadd --perf > "${results_dir}/${i}c/vecadd.result"
../../ci/blackbox.sh --driver=fpga --app=saxpy --perf > "${results_dir}/${i}c/saxpy.result"
../../ci/blackbox.sh --driver=fpga --app=sfilter --perf > "${results_dir}/${i}c/sfilter.result"
../../ci/blackbox.sh --driver=fpga --app=nearn --perf > "${results_dir}/${i}c/nearn.result"
../../ci/blackbox.sh --driver=fpga --app=guassian --perf > "${results_dir}/${i}c/guassian.result"
echo "Done ${i} core build."
done

View file

@ -1,34 +0,0 @@
#!/bin/bash
cd ../../hw/syn/opae/
while getopts c: flag
do
case "${flag}" in
c) i=${OPTARG};; #cores: 1, 2, 4, 8, 16
esac
done
if [[ ! "$i" =~ ^(1|2|4|8|16)$ ]]; then
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
exit 1
fi
date=$(date +%Y_%m_%d)
results_dir="../../../evaluation/perf_${date}"
mkdir -p ${results_dir}
mkdir -p "${results_dir}/${i}c"
cp "./build_fpga_${i}c/build.log" "${results_dir}/${i}c/build.log"
cp "./build_fpga_${i}c/build/output_files/afu_default.syn.summary" "${results_dir}/${i}c/afu_default.syn.summary"
cp "./build_fpga_${i}c/build/output_files/afu_default.fit.summary" "${results_dir}/${i}c/afu_default.fit.summary"
cp "./build_fpga_${i}c/build/output_files/afu_default.sta.summary" "${results_dir}/${i}c/afu_default.sta.summary"
cp "./build_fpga_${i}c/build/output_files/user_clock_freq.txt" "${results_dir}/${i}c/user_clock_freq.txt"
../../../ci/blackbox.sh --driver=fpga --app=sgemm --perf > "${results_dir}/${i}c/sgemm.result"
../../../ci/blackbox.sh --driver=fpga --app=vecadd --perf > "${results_dir}/${i}c/vecadd.result"
../../../ci/blackbox.sh --driver=fpga --app=saxpy --perf > "${results_dir}/${i}c/saxpy.result"
../../../ci/blackbox.sh --driver=fpga --app=sfilter --perf > "${results_dir}/${i}c/sfilter.result"
../../../ci/blackbox.sh --driver=fpga --app=nearn --perf > "${results_dir}/${i}c/nearn.result"
../../../ci/blackbox.sh --driver=fpga --app=guassian --perf > "${results_dir}/${i}c/guassian.result"

View file

@ -1,19 +0,0 @@
#!/bin/bash
while getopts c: flag
do
case "${flag}" in
c) i=${OPTARG};; #cores: 1, 2, 4, 8, 16
esac
done
if [[ ! "$i" =~ ^(1|2|4|8|16)$ ]]; then
echo 'Invalid parameter for argument -c (1, 2, 4, 8, or 16 expected)'
exit 1
fi
cd "../../hw/syn/opae/build_fpga_${i}c"
printf "y\ny\ny\n" | PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs > /dev/null
fpgasupdate vortex_afu_unsigned_ssl.gbs

View file

@ -1,8 +0,0 @@
build_name,Fmax_Slow_900mV_100C,m20k,logic_utilization,total_registers,total_power,static_power
1cl-2c-8w-4t-8Kl2-4Kd-1Ki,154.01,2.3590121636564687,16.956694756554306,99408.0,3.64785,1.88908
1cl-2c-8w-8t-16Kl2-8Kd-1Ki,137.78,2.3590121636564687,23.021769662921347,134668.0,4.29923,1.95073
1cl-2c-8w-8t-8Kl2-4Kd-1Ki,131.22,2.3590121636564687,23.179541198501873,134129.0,4.31822,1.94177
1cl-4c-16w-8t-16Kl2-8Kd-1Ki,106.37,4.128271286398821,62.116573033707866,356954.0,7.92994,2.28735
1cl-4c-8w-8t-16Kl2-4Kd-1Ki,118.51,4.128271286398821,43.340823970037455,251029.0,6.34737,2.09685
1cl-4c-8w-8t-16Kl2-8Kd-1Ki,123.49,4.128271286398821,43.34199438202247,250895.0,6.34987,2.11429
2cl-4c-8w-4t-8Kl2-4Kd-1Ki,132.75,6.1924069295982305,56.91058052434457,322475.0,7.63097,2.27641
1 build_name Fmax_Slow_900mV_100C m20k logic_utilization total_registers total_power static_power
2 1cl-2c-8w-4t-8Kl2-4Kd-1Ki 154.01 2.3590121636564687 16.956694756554306 99408.0 3.64785 1.88908
3 1cl-2c-8w-8t-16Kl2-8Kd-1Ki 137.78 2.3590121636564687 23.021769662921347 134668.0 4.29923 1.95073
4 1cl-2c-8w-8t-8Kl2-4Kd-1Ki 131.22 2.3590121636564687 23.179541198501873 134129.0 4.31822 1.94177
5 1cl-4c-16w-8t-16Kl2-8Kd-1Ki 106.37 4.128271286398821 62.116573033707866 356954.0 7.92994 2.28735
6 1cl-4c-8w-8t-16Kl2-4Kd-1Ki 118.51 4.128271286398821 43.340823970037455 251029.0 6.34737 2.09685
7 1cl-4c-8w-8t-16Kl2-8Kd-1Ki 123.49 4.128271286398821 43.34199438202247 250895.0 6.34987 2.11429
8 2cl-4c-8w-4t-8Kl2-4Kd-1Ki 132.75 6.1924069295982305 56.91058052434457 322475.0 7.63097 2.27641

View file

@ -1,71 +0,0 @@
`include "VX_define.vh"
module VX_csr_io_arb (
input wire clk,
input wire reset,
// bus select
input wire select_io_rsp,
// input requets
VX_csr_req_if csr_core_req_if,
VX_csr_io_req_if csr_io_req_if,
// output request
VX_csr_pipe_req_if csr_pipe_req_if,
// input response
VX_commit_if csr_pipe_rsp_if,
// outputs responses
VX_commit_if csr_commit_if,
VX_csr_io_rsp_if csr_io_rsp_if
);
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire [31:0] csr_core_req_data = csr_core_req_if.use_imm ? 32'(csr_core_req_if.rs1) : csr_core_req_if.rs1_data;
// requests
assign csr_pipe_req_if.valid = csr_core_req_if.valid || csr_io_req_if.valid;
assign csr_pipe_req_if.wid = csr_core_req_if.wid;
assign csr_pipe_req_if.tmask = csr_core_req_if.tmask;
assign csr_pipe_req_if.PC = csr_core_req_if.PC;
assign csr_pipe_req_if.op_type = csr_core_req_if.valid ? csr_core_req_if.op_type : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
assign csr_pipe_req_if.addr = csr_core_req_if.valid ? csr_core_req_if.addr : csr_io_req_if.addr;
assign csr_pipe_req_if.data = csr_core_req_if.valid ? csr_core_req_data : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
assign csr_pipe_req_if.rd = csr_core_req_if.rd;
assign csr_pipe_req_if.wb = csr_core_req_if.wb;
assign csr_pipe_req_if.is_io = !csr_core_req_if.valid;
// core always takes priority over IO bus
assign csr_core_req_if.ready = csr_pipe_req_if.ready;
assign csr_io_req_if.ready = csr_pipe_req_if.ready && !csr_core_req_if.valid;
// responses
wire csr_io_rsp_ready;
VX_skid_buffer #(
.DATAW (32)
) csr_io_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (csr_pipe_rsp_if.valid & select_io_rsp),
.data_in (csr_pipe_rsp_if.data[0]),
.ready_in (csr_io_rsp_ready),
.valid_out (csr_io_rsp_if.valid),
.data_out (csr_io_rsp_if.data),
.ready_out (csr_io_rsp_if.ready)
);
assign csr_commit_if.valid = csr_pipe_rsp_if.valid & ~select_io_rsp;
assign csr_commit_if.wid = csr_pipe_rsp_if.wid;
assign csr_commit_if.tmask = csr_pipe_rsp_if.tmask;
assign csr_commit_if.PC = csr_pipe_rsp_if.PC;
assign csr_commit_if.rd = csr_pipe_rsp_if.rd;
assign csr_commit_if.wb = csr_pipe_rsp_if.wb;
assign csr_commit_if.eop = csr_pipe_rsp_if.eop;
assign csr_commit_if.data = csr_pipe_rsp_if.data;
assign csr_pipe_rsp_if.ready = select_io_rsp ? csr_io_rsp_ready : csr_commit_if.ready;
endmodule

View file

@ -39,7 +39,7 @@ module VX_instr_demux (
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
.USE_FASTREG (1)
) alu_buffer (
.clk (clk),
.reset (reset),
@ -57,7 +57,7 @@ module VX_instr_demux (
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.BUFFERED (1)
.USE_FASTREG (1)
) lsu_buffer (
.clk (clk),
.reset (reset),
@ -75,7 +75,7 @@ module VX_instr_demux (
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.BUFFERED (1)
.USE_FASTREG (1)
) csr_buffer (
.clk (clk),
.reset (reset),
@ -94,7 +94,7 @@ module VX_instr_demux (
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.BUFFERED (1)
.USE_FASTREG (1)
) fpu_buffer (
.clk (clk),
.reset (reset),
@ -116,7 +116,7 @@ module VX_instr_demux (
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
.BUFFERED (1)
.USE_FASTREG (1)
) gpu_buffer (
.clk (clk),
.reset (reset),

View file

@ -256,26 +256,19 @@ module VX_mem_unit # (
);
end else begin
// core to D-cache request
for (genvar i = 0; i < `DNUM_REQS; ++i) begin
VX_skid_buffer #(
.DATAW (`DCORE_ADDR_WIDTH + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH)
) core_req_buf (
.clk (clk),
.reset (reset),
.valid_in (dcache_core_req_if.valid[i]),
.data_in ({dcache_core_req_if.addr[i], dcache_core_req_if.rw[i], dcache_core_req_if.byteen[i], dcache_core_req_if.data[i], dcache_core_req_if.tag[i]}),
.ready_in (dcache_core_req_if.ready[i]),
.valid_out (dcache_req_if.valid[i]),
.data_out ({dcache_req_if.addr[i], dcache_req_if.rw[i], dcache_req_if.byteen[i], dcache_req_if.data[i], dcache_req_if.tag[i]}),
.ready_out (dcache_req_if.ready[i])
);
end
assign dcache_req_if.valid = dcache_core_req_if.valid;
assign dcache_req_if.addr = dcache_core_req_if.addr;
assign dcache_req_if.rw = dcache_core_req_if.rw;
assign dcache_req_if.byteen = dcache_core_req_if.byteen;
assign dcache_req_if.data = dcache_core_req_if.data;
assign dcache_req_if.tag = dcache_core_req_if.tag;
assign dcache_core_req_if.ready = dcache_req_if.ready;
// D-cache to core reponse
assign dcache_core_rsp_if.valid = dcache_rsp_if.valid;
assign dcache_core_rsp_if.tag = dcache_rsp_if.tag;
assign dcache_core_rsp_if.data = dcache_rsp_if.data;
assign dcache_rsp_if.ready = dcache_core_rsp_if.ready;
assign dcache_rsp_if.ready = dcache_core_rsp_if.ready;
end
wire [`DMEM_TAG_WIDTH-1:0] icache_mem_req_tag = `DMEM_TAG_WIDTH'(icache_mem_req_if.tag);

View file

@ -476,7 +476,7 @@ module VX_bank #(
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.BUFFERED (NUM_BANKS == 1)
.USE_FASTREG (NUM_BANKS == 1)
) core_rsp_req (
.clk (clk),
.reset (reset),

View file

@ -107,7 +107,7 @@ module VX_cache_core_rsp_merge #(
VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
.BUFFERED (1)
.USE_FASTREG (1)
) pipe_reg (
.clk (clk),
.reset (reset),
@ -156,7 +156,7 @@ module VX_cache_core_rsp_merge #(
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
.BUFFERED (1)
.USE_FASTREG (1)
) pipe_reg (
.clk (clk),
.reset (reset),

View file

@ -4,7 +4,7 @@ module VX_skid_buffer #(
parameter DATAW = 1,
parameter PASSTHRU = 0,
parameter NOBACKPRESSURE = 0,
parameter BUFFERED = 0
parameter USE_FASTREG = 0
) (
input wire clk,
input wire reset,
@ -50,7 +50,7 @@ module VX_skid_buffer #(
end else begin
if (BUFFERED) begin
if (USE_FASTREG) begin
reg [DATAW-1:0] data_out_r;
reg [DATAW-1:0] buffer;

View file

@ -39,69 +39,69 @@ int main(int argc, char **argv) {
if (argc == 1) {
#ifdef ALL_TESTS
std::string tests[] = {
"../../../benchmarks/riscv_tests/isa/rv32ui-p-add.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-and.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-auipc.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-beq.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bge.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bgeu.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-blt.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bltu.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-bne.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-jal.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-jalr.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lb.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lbu.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lh.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lhu.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lui.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-lw.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-or.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-ori.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sb.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sh.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-simple.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sll.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-slli.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-slt.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-slti.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sltiu.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sltu.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sra.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-srai.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-srl.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-srli.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sub.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-sw.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-xor.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-xori.hex",
"../../../tests/riscv/isa/rv32ui-p-add.hex",
"../../../tests/riscv/isa/rv32ui-p-addi.hex",
"../../../tests/riscv/isa/rv32ui-p-and.hex",
"../../../tests/riscv/isa/rv32ui-p-andi.hex",
"../../../tests/riscv/isa/rv32ui-p-auipc.hex",
"../../../tests/riscv/isa/rv32ui-p-beq.hex",
"../../../tests/riscv/isa/rv32ui-p-bge.hex",
"../../../tests/riscv/isa/rv32ui-p-bgeu.hex",
"../../../tests/riscv/isa/rv32ui-p-blt.hex",
"../../../tests/riscv/isa/rv32ui-p-bltu.hex",
"../../../tests/riscv/isa/rv32ui-p-bne.hex",
"../../../tests/riscv/isa/rv32ui-p-jal.hex",
"../../../tests/riscv/isa/rv32ui-p-jalr.hex",
"../../../tests/riscv/isa/rv32ui-p-lb.hex",
"../../../tests/riscv/isa/rv32ui-p-lbu.hex",
"../../../tests/riscv/isa/rv32ui-p-lh.hex",
"../../../tests/riscv/isa/rv32ui-p-lhu.hex",
"../../../tests/riscv/isa/rv32ui-p-lui.hex",
"../../../tests/riscv/isa/rv32ui-p-lw.hex",
"../../../tests/riscv/isa/rv32ui-p-or.hex",
"../../../tests/riscv/isa/rv32ui-p-ori.hex",
"../../../tests/riscv/isa/rv32ui-p-sb.hex",
"../../../tests/riscv/isa/rv32ui-p-sh.hex",
"../../../tests/riscv/isa/rv32ui-p-simple.hex",
"../../../tests/riscv/isa/rv32ui-p-sll.hex",
"../../../tests/riscv/isa/rv32ui-p-slli.hex",
"../../../tests/riscv/isa/rv32ui-p-slt.hex",
"../../../tests/riscv/isa/rv32ui-p-slti.hex",
"../../../tests/riscv/isa/rv32ui-p-sltiu.hex",
"../../../tests/riscv/isa/rv32ui-p-sltu.hex",
"../../../tests/riscv/isa/rv32ui-p-sra.hex",
"../../../tests/riscv/isa/rv32ui-p-srai.hex",
"../../../tests/riscv/isa/rv32ui-p-srl.hex",
"../../../tests/riscv/isa/rv32ui-p-srli.hex",
"../../../tests/riscv/isa/rv32ui-p-sub.hex",
"../../../tests/riscv/isa/rv32ui-p-sw.hex",
"../../../tests/riscv/isa/rv32ui-p-xor.hex",
"../../../tests/riscv/isa/rv32ui-p-xori.hex",
#ifdef EXT_M_ENABLE
"../../../benchmarks/riscv_tests/isa/rv32um-p-div.hex",
"../../../benchmarks/riscv_tests/isa/rv32um-p-divu.hex",
"../../../benchmarks/riscv_tests/isa/rv32um-p-mul.hex",
"../../../benchmarks/riscv_tests/isa/rv32um-p-mulh.hex",
"../../../benchmarks/riscv_tests/isa/rv32um-p-mulhsu.hex",
"../../../benchmarks/riscv_tests/isa/rv32um-p-mulhu.hex",
"../../../benchmarks/riscv_tests/isa/rv32um-p-rem.hex",
"../../../benchmarks/riscv_tests/isa/rv32um-p-remu.hex",
"../../../tests/riscv/isa/rv32um-p-div.hex",
"../../../tests/riscv/isa/rv32um-p-divu.hex",
"../../../tests/riscv/isa/rv32um-p-mul.hex",
"../../../tests/riscv/isa/rv32um-p-mulh.hex",
"../../../tests/riscv/isa/rv32um-p-mulhsu.hex",
"../../../tests/riscv/isa/rv32um-p-mulhu.hex",
"../../../tests/riscv/isa/rv32um-p-rem.hex",
"../../../tests/riscv/isa/rv32um-p-remu.hex",
#endif
};
std::string tests_fp[] = {
#ifdef EXT_F_ENABLE
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fadd.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fmadd.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fmin.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fcmp.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-ldst.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fcvt.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fcvt_w.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-move.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-recoding.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fdiv.hex",
"../../../benchmarks/riscv_tests/isa/rv32uf-p-fclass.hex",
"../../../tests/riscv/isa/rv32uf-p-fadd.hex",
"../../../tests/riscv/isa/rv32uf-p-fmadd.hex",
"../../../tests/riscv/isa/rv32uf-p-fmin.hex",
"../../../tests/riscv/isa/rv32uf-p-fcmp.hex",
"../../../tests/riscv/isa/rv32uf-p-ldst.hex",
"../../../tests/riscv/isa/rv32uf-p-fcvt.hex",
"../../../tests/riscv/isa/rv32uf-p-fcvt_w.hex",
"../../../tests/riscv/isa/rv32uf-p-move.hex",
"../../../tests/riscv/isa/rv32uf-p-recoding.hex",
"../../../tests/riscv/isa/rv32uf-p-fdiv.hex",
"../../../tests/riscv/isa/rv32uf-p-fclass.hex",
#endif
};
@ -152,7 +152,7 @@ int main(int argc, char **argv) {
#else
char test[] = "../../../runtime/tests/simple/vx_simple.hex";
char test[] = "../../../tests/runtime/simple/vx_simple.hex";
std::cout << test << std::endl;

View file

@ -76,16 +76,16 @@ $(FPGA_BUILD_DIR)_4c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_4c
$(FPGA_BUILD_DIR)_8c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_8c
afu_synth_setup -s setup8.cfg $(FPGA_BUILD_DIR)_8c
$(FPGA_BUILD_DIR)_16c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_16c
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_16c
$(FPGA_BUILD_DIR)_32c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_32c
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_32c
$(FPGA_BUILD_DIR)_64c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_64c
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_64c
gen-sources-1c:
./gen_sources.sh $(CFLAGS) $(CONFIG1) > sources.txt

7
hw/syn/opae/setup16.cfg Normal file
View file

@ -0,0 +1,7 @@
+define+SYNTHESIS
+define+QUARTUS
vortex_afu16.json
QI:vortex_afu.qsf
C:sources.txt

7
hw/syn/opae/setup8.cfg Normal file
View file

@ -0,0 +1,7 @@
+define+SYNTHESIS
+define+QUARTUS
vortex_afu8.json
QI:vortex_afu.qsf
C:sources.txt

View file

@ -18,10 +18,7 @@
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-csr-core": 24,
"mmio-csr-addr": 26,
"mmio-csr-data": 28,
"mmio-csr-read": 30,
"mmio-dev-caps": 24,
"afu-top-interface":
{

View file

@ -18,10 +18,7 @@
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-csr-core": 24,
"mmio-csr-addr": 26,
"mmio-csr-data": 28,
"mmio-csr-read": 30,
"mmio-dev-caps": 24,
"afu-top-interface":
{

View file

@ -41,29 +41,29 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
#set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name USE_HIGH_SPEED_ADDER ON
set_global_assignment -name MUX_RESTRUCTURE ON
set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
#set_global_assignment -name MUX_RESTRUCTURE ON
#set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
#set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
#set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
#set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name SEED 1
#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
#set_global_assignment -name SEED 1
switch $opts(family) {
"Arria 10" {

File diff suppressed because it is too large Load diff

Some files were not shown because too many files have changed in this diff Show more