mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 22:07:41 -04:00
enabling Makefile configuration with build folder support
This commit is contained in:
parent
99c91987fb
commit
6b81b26ffc
130 changed files with 1829 additions and 796 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
/build*
|
||||
/.vscode
|
42
.travis.yml
42
.travis.yml
|
@ -17,8 +17,6 @@ env:
|
|||
cache:
|
||||
directories:
|
||||
- $TOOLDIR
|
||||
- $HOME/build32
|
||||
- $HOME/build64
|
||||
|
||||
before_install:
|
||||
- if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ]; then
|
||||
|
@ -34,50 +32,56 @@ stages:
|
|||
jobs:
|
||||
include:
|
||||
- stage: setup
|
||||
name: build32
|
||||
script:
|
||||
- rm -rf $HOME/build32 && cp -r $PWD $HOME/build32
|
||||
- rm -rf $HOME/build64 && cp -r $PWD $HOME/build64
|
||||
- make -C $HOME/build32 > /dev/null
|
||||
- XLEN=64 make -C $HOME/build64 > /dev/null
|
||||
- mkdir -p $HOME/build32 && cd $HOME/build32
|
||||
- ../configure
|
||||
- make -s > /dev/null
|
||||
- stage: setup
|
||||
name: build64
|
||||
script:
|
||||
- mkdir -p $HOME/build64 && cd $HOME/build64
|
||||
- XLEN=64 ../configure
|
||||
- make -s > /dev/null
|
||||
- stage: test
|
||||
name: unittest
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --unittest
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --unittest
|
||||
- stage: test
|
||||
name: isa
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
- stage: test
|
||||
name: isa64
|
||||
script: cp -r $HOME/build64 build && cd build && XLEN=64 ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
script: cd $HOME/build64 && ./ci/travis_run.py ./ci/regression.sh --isa
|
||||
- stage: test
|
||||
name: regression
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- stage: test
|
||||
name: regression64
|
||||
script: cp -r $HOME/build64 build && cd build && XLEN=64 ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
script: cd $HOME/build64 && ./ci/travis_run.py ./ci/regression.sh --regression
|
||||
- stage: test
|
||||
name: opencl
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --opencl
|
||||
- stage: test
|
||||
name: cluster
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --cluster
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --cluster
|
||||
- stage: test
|
||||
name: config
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --config
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --config
|
||||
- stage: test
|
||||
name: debug
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --debug
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --debug
|
||||
- stage: test
|
||||
name: stress0
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --stress0
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --stress0
|
||||
- stage: test
|
||||
name: stress1
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --stress1
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --stress1
|
||||
- stage: test
|
||||
name: synthesis
|
||||
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
- stage: test
|
||||
name: synthesis64
|
||||
script: cp -r $HOME/build64 build && cd build && XLEN=64 ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
script: cd $HOME/build64 && ./ci/travis_run.py ./ci/regression.sh --synthesis
|
||||
|
||||
after_success:
|
||||
# Gather code coverage
|
||||
|
|
13
Makefile
13
Makefile
|
@ -1,5 +1,7 @@
|
|||
all:
|
||||
$(MAKE) -C third_party
|
||||
include config.mk
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C sim
|
||||
$(MAKE) -C kernel
|
||||
|
@ -14,15 +16,8 @@ clean:
|
|||
$(MAKE) -C tests clean
|
||||
|
||||
clean-all:
|
||||
$(MAKE) -C third_party clean
|
||||
$(MAKE) -C hw clean
|
||||
$(MAKE) -C sim clean
|
||||
$(MAKE) -C kernel clean
|
||||
$(MAKE) -C runtime clean
|
||||
$(MAKE) -C tests clean-all
|
||||
|
||||
crtlsim:
|
||||
$(MAKE) -C sim clean
|
||||
|
||||
brtlsim:
|
||||
$(MAKE) -C sim
|
||||
|
|
16
README.md
16
README.md
|
@ -51,17 +51,19 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
|
|||
$ sudo apt-get install build-essential
|
||||
$ sudo apt-get install git
|
||||
### Install Vortex codebase
|
||||
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
$ git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
$ cd Vortex
|
||||
### Install prebuilt toolchain
|
||||
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting the TOOLDIR environment variable
|
||||
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting TOOLDIR
|
||||
|
||||
$ export TOOLDIR=$HOME/tools
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ TOOLDIR=$HOME/tools ./ci/toolchain_install.sh --all
|
||||
|
||||
### Set up environment variables
|
||||
$ source ./ci/toolchain_env.sh
|
||||
### Build Vortex sources
|
||||
$ make -s
|
||||
### Building Vortex
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ TOOLDIR=$HOME/tools ../configure
|
||||
$ make
|
||||
### Quick demo running vecadd OpenCL kernel on 2 cores
|
||||
$ ./ci/blackbox.sh --cores=2 --app=vecadd
|
||||
$ ./ci/blackbox.sh --cores=2 --app=vecadd
|
|
@ -30,7 +30,7 @@ show_help()
|
|||
}
|
||||
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
VORTEX_HOME=$SCRIPT_DIR/..
|
||||
ROOT_DIR=$SCRIPT_DIR/..
|
||||
|
||||
DRIVER=simx
|
||||
APP=sgemm
|
||||
|
@ -134,16 +134,16 @@ case $DRIVER in
|
|||
DRIVER_PATH=
|
||||
;;
|
||||
simx)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/simx
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/simx
|
||||
;;
|
||||
rtlsim)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/rtlsim
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/rtlsim
|
||||
;;
|
||||
opae)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/opae
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/opae
|
||||
;;
|
||||
xrt)
|
||||
DRIVER_PATH=$VORTEX_HOME/runtime/xrt
|
||||
DRIVER_PATH=$ROOT_DIR/runtime/xrt
|
||||
;;
|
||||
*)
|
||||
echo "invalid driver: $DRIVER"
|
||||
|
@ -151,12 +151,12 @@ case $DRIVER in
|
|||
;;
|
||||
esac
|
||||
|
||||
if [ -d "$VORTEX_HOME/tests/opencl/$APP" ];
|
||||
if [ -d "$ROOT_DIR/tests/opencl/$APP" ];
|
||||
then
|
||||
APP_PATH=$VORTEX_HOME/tests/opencl/$APP
|
||||
elif [ -d "$VORTEX_HOME/tests/regression/$APP" ];
|
||||
APP_PATH=$ROOT_DIR/tests/opencl/$APP
|
||||
elif [ -d "$ROOT_DIR/tests/regression/$APP" ];
|
||||
then
|
||||
APP_PATH=$VORTEX_HOME/tests/regression/$APP
|
||||
APP_PATH=$ROOT_DIR/tests/regression/$APP
|
||||
else
|
||||
echo "Application folder not found: $APP"
|
||||
exit -1
|
||||
|
@ -204,10 +204,10 @@ export PERF_CLASS=$PERF_CLASS
|
|||
status=0
|
||||
|
||||
# ensure config update
|
||||
make -C $VORTEX_HOME/hw config > /dev/null
|
||||
make -C $ROOT_DIR/hw config > /dev/null
|
||||
|
||||
# ensure the stub driver is present
|
||||
make -C $VORTEX_HOME/runtime/stub > /dev/null
|
||||
make -C $ROOT_DIR/runtime/stub > /dev/null
|
||||
|
||||
if [ $DEBUG -ne 0 ]
|
||||
then
|
||||
|
|
41
ci/datagen.py
Executable file
41
ci/datagen.py
Executable file
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import struct
|
||||
import random
|
||||
import sys
|
||||
|
||||
def create_binary_file(n, filename):
|
||||
# Open the file in binary write mode
|
||||
with open(filename, 'wb') as f:
|
||||
# Write the integer N as 4 bytes
|
||||
f.write(struct.pack('i', n))
|
||||
# Generate and write N floating-point numbers
|
||||
for _ in range(n):
|
||||
# Generate a random float between 0 and 1
|
||||
num = random.random()
|
||||
# Write the float in IEEE 754 format (4 bytes)
|
||||
f.write(struct.pack('f', num))
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: script.py N filename")
|
||||
sys.exit(1)
|
||||
|
||||
n = int(sys.argv[1])
|
||||
filename = sys.argv[2]
|
||||
|
||||
create_binary_file(n, filename)
|
||||
print(f"Created binary file '{filename}' containing {n} floats.")
|
32
config.in
Normal file
32
config.in
Normal file
|
@ -0,0 +1,32 @@
|
|||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
VORTEX_HOME ?= @VORTEX_HOME@
|
||||
|
||||
XLEN ?= @XLEN@
|
||||
|
||||
TOOLDIR ?= @TOOLDIR@
|
||||
|
||||
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
|
||||
|
||||
ifeq ($(XLEN),64)
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
|
||||
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
|
94
configure
vendored
Executable file
94
configure
vendored
Executable file
|
@ -0,0 +1,94 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright © 2019-2023
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# options
|
||||
XLEN=${XLEN:=32}
|
||||
TOOLDIR=${TOOLDIR:=/opt}
|
||||
|
||||
# project subdirectories to build
|
||||
SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*")
|
||||
|
||||
# Get the directory of the script
|
||||
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||||
|
||||
# Determine the current working directory
|
||||
CURRENT_DIR=$(pwd)
|
||||
|
||||
THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
|
||||
|
||||
# Path to the template file
|
||||
TEMPLATE="$SCRIPT_DIR/config.in"
|
||||
|
||||
# Output file
|
||||
OUTPUT="config.mk"
|
||||
|
||||
# Function to process config.in and generate config.mk
|
||||
generate_config() {
|
||||
if [ -f "$TEMPLATE" ]; then
|
||||
# Replace tokens
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g " "$TEMPLATE" > "$CURRENT_DIR/$OUTPUT"
|
||||
else
|
||||
echo "Template file $TEMPLATE not found."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Function to recursively copy Makefiles, skipping the current directory
|
||||
copy_makefiles() {
|
||||
local source_dir="$1"
|
||||
local target_dir="$2"
|
||||
#echo "source_dir=$source_dir, target_dir=$target_dir"
|
||||
|
||||
for pattern in "${SUBDIRS[@]}"; do
|
||||
if [[ "$pattern" == !* ]]; then
|
||||
local dir_to_copy="${pattern#!}" # Remove the "!" from the start
|
||||
cp -r "$source_dir/$dir_to_copy" "$target_dir/"
|
||||
elif [[ "$pattern" == "." ]]; then
|
||||
# Handle the current script directory explicitly
|
||||
if [ -f "$source_dir/Makefile" ]; then
|
||||
mkdir -p "$target_dir"
|
||||
cp "$source_dir/Makefile" "$target_dir"
|
||||
fi
|
||||
else
|
||||
# Use find to match the directory pattern and process each matched directory
|
||||
find "$source_dir" -type d -path "$source_dir/$pattern" 2>/dev/null | while read dir; do
|
||||
# Compute the relative path of the directory
|
||||
local rel_path="${dir#$source_dir}"
|
||||
rel_path="${rel_path#/}" # Remove leading slash, if present
|
||||
local full_target_dir="$target_dir/$rel_path"
|
||||
|
||||
# Function to copy and update file includes
|
||||
copy_and_update_includes() {
|
||||
local file="$1"
|
||||
local dest="$2/$file"
|
||||
if [ -f "$dir/$file" ]; then
|
||||
mkdir -p "$2"
|
||||
cp "$dir/$file" "$dest"
|
||||
fi
|
||||
}
|
||||
|
||||
# Copy and update Makefile and common.mk if they exist
|
||||
copy_and_update_includes "Makefile" "$full_target_dir"
|
||||
copy_and_update_includes "common.mk" "$full_target_dir"
|
||||
done
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
generate_config
|
||||
|
||||
if [ "$(realpath "$SCRIPT_DIR")" != "$(realpath "$CURRENT_DIR")" ]; then
|
||||
copy_makefiles "$SCRIPT_DIR" "$CURRENT_DIR"
|
||||
fi
|
|
@ -26,18 +26,17 @@
|
|||
3. Download the Vortex codebase:
|
||||
|
||||
```
|
||||
git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
```
|
||||
|
||||
4. Install Vortex's prebuilt toolchain:
|
||||
|
||||
```
|
||||
$ cd vortex
|
||||
|
||||
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting the TOOLDIR environment variable
|
||||
|
||||
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting TOOLDIR
|
||||
|
||||
$ export TOOLDIR=$HOME/tools
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ TOOLDIR=$HOME/tools ./ci/toolchain_install.sh --all
|
||||
```
|
||||
|
||||
5. Set up environment:
|
||||
|
@ -49,6 +48,9 @@
|
|||
6. Build Vortex
|
||||
|
||||
```
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ TOOLDIR=$HOME/tools ../configure
|
||||
$ make
|
||||
```
|
||||
|
||||
|
@ -77,18 +79,17 @@ Note: depending on the system, some of the toolchain may need to be recompiled f
|
|||
4. Download the Vortex codebase:
|
||||
|
||||
```
|
||||
git clone --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
```
|
||||
|
||||
5. Install Vortex's prebuilt toolchain:
|
||||
|
||||
```
|
||||
$ cd vortex
|
||||
|
||||
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting TOOLDIR
|
||||
|
||||
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting the TOOLDIR environment variable
|
||||
|
||||
$ export TOOLDIR=$HOME/tools
|
||||
$ ./ci/toolchain_install.sh --all
|
||||
$ TOOLDIR=$HOME/tools ./ci/toolchain_install.sh --all
|
||||
```
|
||||
|
||||
6. Set up environment:
|
||||
|
@ -100,5 +101,8 @@ Note: depending on the system, some of the toolchain may need to be recompiled f
|
|||
7. Build Vortex
|
||||
|
||||
```
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ TOOLDIR=$HOME/tools ../configure
|
||||
$ make
|
||||
```
|
||||
|
|
2
hw/.gitignore
vendored
2
hw/.gitignore
vendored
|
@ -1,2 +0,0 @@
|
|||
VX_config.h
|
||||
VX_types.h
|
|
@ -1,5 +1,9 @@
|
|||
RTL_DIR=./rtl
|
||||
SCRIPT_DIR=./scripts
|
||||
ROOT_DIR := $(realpath ..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
HW_DIR := $(VORTEX_HOME)/hw
|
||||
SCRIPT_DIR := $(HW_DIR)/scripts
|
||||
RTL_DIR := $(HW_DIR)/rtl
|
||||
|
||||
all: config
|
||||
|
||||
|
|
|
@ -11,11 +11,13 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
// Modified port of cast module from fpnew Libray
|
||||
// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
||||
parameter LATENCY = 1,
|
||||
parameter INT_WIDTH = 32,
|
||||
|
@ -314,3 +316,4 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
|
|||
);
|
||||
|
||||
endmodule
|
||||
`endif
|
||||
|
|
|
@ -11,12 +11,12 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
// Modified port of noncomp module from fpnew Libray
|
||||
// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
/// Modified port of noncomp module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fncp_unit import VX_fpu_pkg::*; #(
|
||||
parameter LATENCY = 2,
|
||||
|
|
|
@ -11,11 +11,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
module VX_fp_classifier import VX_fpu_pkg::*; #(
|
||||
parameter MAN_BITS = 23,
|
||||
parameter EXP_BITS = 8
|
||||
|
|
|
@ -11,12 +11,12 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
// Modified port of rouding module from fpnew Libray
|
||||
// reference: https://github.com/pulp-platform/fpnew
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
/// Modified port of rouding module from fpnew Libray
|
||||
/// reference: https://github.com/pulp-platform/fpnew
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fp_rounding #(
|
||||
parameter DAT_WIDTH = 2 // Width of the abolute value, without sign bit
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 5,
|
||||
parameter NUM_PES = `UP(NUM_LANES / `FCVT_PE_RATIO),
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_div import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter NUM_PES = `UP(NUM_LANES / `FDIV_PE_RATIO),
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifdef FPU_DPI
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
||||
|
@ -485,3 +487,4 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
|
|||
assign ready_in = per_core_ready_in[core_select];
|
||||
|
||||
endmodule
|
||||
`endif
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_dsp import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 4,
|
||||
parameter TAG_WIDTH = 4,
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_fma import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter NUM_PES = `UP(NUM_LANES / `FMA_PE_RATIO),
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
`ifdef FPU_FPNEW
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_fpnew
|
||||
import VX_fpu_pkg::*;
|
||||
import fpnew_pkg::*;
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter NUM_PES = `UP(NUM_LANES / `FNCP_PE_RATIO),
|
||||
|
|
|
@ -11,10 +11,10 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
`ifdef FPU_DSP
|
||||
|
||||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter NUM_PES = `UP(NUM_LANES /`FSQRT_PE_RATIO),
|
||||
|
|
1
hw/syn/altera/.gitignore
vendored
1
hw/syn/altera/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
ip_cache/*
|
1
hw/syn/altera/common.mk
Normal file
1
hw/syn/altera/common.mk
Normal file
|
@ -0,0 +1 @@
|
|||
include ../common.mk
|
1
hw/syn/altera/opae/.gitignore
vendored
1
hw/syn/altera/opae/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
build*/*
|
|
@ -1,5 +1,8 @@
|
|||
ROOT_DIR := $(realpath ../../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
DEVICE_FAMILY ?= arria10
|
||||
XLEN ?= 32
|
||||
|
||||
PREFIX ?= build$(XLEN)
|
||||
TARGET ?= fpga
|
||||
NUM_CORES ?= 1
|
||||
|
|
29
hw/syn/altera/quartus/.gitignore
vendored
29
hw/syn/altera/quartus/.gitignore
vendored
|
@ -1,29 +0,0 @@
|
|||
/unittest/*
|
||||
!/unittest/Makefile
|
||||
|
||||
/smem/*
|
||||
!/smem/Makefile
|
||||
|
||||
/cache/*
|
||||
!/cache/Makefile
|
||||
|
||||
/vortex/*
|
||||
!/vortex/Makefile
|
||||
|
||||
/pipeline/*
|
||||
!/pipeline/Makefile
|
||||
|
||||
/core/*
|
||||
!/core/Makefile
|
||||
|
||||
/top/*
|
||||
!/top/Makefile
|
||||
|
||||
/top-gfx/*
|
||||
!/top-gfx/Makefile
|
||||
|
||||
/test/*
|
||||
!/test/Makefile
|
||||
|
||||
/fpu/*
|
||||
!/fpu/Makefile
|
|
@ -1,3 +1,5 @@
|
|||
include ../common.mk
|
||||
|
||||
PREFIX ?= build
|
||||
|
||||
BUILD_DIR=$(PREFIX)_$(DEVICE_FAMILY)
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
include ../common.mk
|
||||
|
||||
RTL_DIR = ../../../../../rtl
|
||||
AFU_DIR = $(RTL_DIR)/afu/opae
|
||||
THIRD_PARTY_DIR = ../../../../../../third_party
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
|
||||
include ../common.mk
|
||||
|
||||
ALL:sim
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
|
||||
include ../common.mk
|
||||
|
||||
SCRIPT_DIR=./scripts
|
||||
|
||||
|
|
1
hw/syn/xilinx/common.mk
Normal file
1
hw/syn/xilinx/common.mk
Normal file
|
@ -0,0 +1 @@
|
|||
include ../common.mk
|
2
hw/syn/xilinx/test/.gitignore
vendored
2
hw/syn/xilinx/test/.gitignore
vendored
|
@ -1,2 +0,0 @@
|
|||
/project_1/*
|
||||
/.Xil/*
|
|
@ -1,3 +1,5 @@
|
|||
include ../common.mk
|
||||
|
||||
VIVADO = $(XILINX_VIVADO)/bin/vivado
|
||||
|
||||
RTL_DIR = ../../../rtl
|
||||
|
|
|
@ -1,17 +1,12 @@
|
|||
XLEN ?= 32
|
||||
TOOLDIR ?= /opt
|
||||
ROOT_DIR := $(realpath ../../../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
ifeq ($(XLEN),64)
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
|
||||
CFLAGS += -march=rv64imafd -mabi=lp64d
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
|
||||
CFLAGS += -march=rv32imaf -mabi=ilp32f
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
|
||||
VORTEX_RT_PATH ?= $(realpath ../../../../../kernel)
|
||||
BIN2COE_PATH ?= ../../../../../../bin2coe
|
||||
|
||||
CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc
|
||||
|
|
1
hw/syn/xilinx/xrt/.gitignore
vendored
1
hw/syn/xilinx/xrt/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
/build*/*
|
|
@ -1,3 +1,6 @@
|
|||
ROOT_DIR := $(realpath ../../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
ifneq ($(findstring Makefile, $(MAKEFILE_LIST)), Makefile)
|
||||
help:
|
||||
$(ECHO) "Makefile Usage:"
|
||||
|
@ -11,7 +14,7 @@ endif
|
|||
|
||||
TARGET ?= hw
|
||||
PLATFORM ?=
|
||||
XLEN ?= 32
|
||||
|
||||
NUM_CORES ?= 1
|
||||
PREFIX ?= build$(XLEN)
|
||||
MAX_JOBS ?= 8
|
||||
|
|
1
hw/syn/yosys/.gitignore
vendored
1
hw/syn/yosys/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
build_*/*
|
|
@ -1,19 +1,22 @@
|
|||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/hw/syn/yosys
|
||||
|
||||
TOP_LEVEL_ENTITY ?= Vortex
|
||||
PREFIX ?= build
|
||||
NUM_CORES ?= 1
|
||||
XLEN ?= 32
|
||||
|
||||
SCRIPT_DIR = ../../../scripts
|
||||
RTL_DIR = ../../../rtl
|
||||
DPI_DIR = ../../../dpi
|
||||
THIRD_PARTY_DIR = ../../../../third_party
|
||||
SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts
|
||||
RTL_DIR := $(VORTEX_HOME)/hw/rtl
|
||||
THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party
|
||||
|
||||
CP = cp -rf
|
||||
RMDIR = rm -rf
|
||||
ECHO = @echo
|
||||
|
||||
BUILD_DIR = $(PREFIX)_$(TOP_LEVEL_ENTITY)
|
||||
BIN_DIR = $(BUILD_DIR)/bin
|
||||
BUILD_DIR := $(PREFIX)_$(TOP_LEVEL_ENTITY)
|
||||
BIN_DIR := $(BUILD_DIR)/bin
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
|
||||
|
@ -81,10 +84,10 @@ $(BUILD_DIR)/project.v: gen-sources
|
|||
cd $(BUILD_DIR); $(SCRIPT_DIR)/sv2v.sh -t$(TOP_LEVEL_ENTITY) -Isrc -oproject.v
|
||||
|
||||
build: $(BUILD_DIR)/project.v
|
||||
cd $(BUILD_DIR); ../synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v
|
||||
cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v
|
||||
|
||||
elaborate: $(BUILD_DIR)/project.v
|
||||
cd $(BUILD_DIR); ../synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate"
|
||||
cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate"
|
||||
|
||||
clean:
|
||||
$(RMDIR) $(BUILD_DIR)
|
||||
|
|
1
hw/unittest/.gitignore
vendored
1
hw/unittest/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
*/obj_dir/*
|
|
@ -2,16 +2,19 @@ all:
|
|||
$(MAKE) -C cache
|
||||
$(MAKE) -C generic_queue
|
||||
$(MAKE) -C mem_streamer
|
||||
$(MAKE) -C top_modules
|
||||
$(MAKE) -C cache_top
|
||||
$(MAKE) -C core_top
|
||||
|
||||
run:
|
||||
$(MAKE) -C cache run
|
||||
$(MAKE) -C generic_queue run
|
||||
$(MAKE) -C mem_streamer run
|
||||
$(MAKE) -C top_modules run
|
||||
$(MAKE) -C cache_top run
|
||||
$(MAKE) -C core_top run
|
||||
|
||||
clean:
|
||||
$(MAKE) -C cache clean
|
||||
$(MAKE) -C generic_queue clean
|
||||
$(MAKE) -C mem_streamer clean
|
||||
$(MAKE) -C top_modules clean
|
||||
$(MAKE) -C cache_top clean
|
||||
$(MAKE) -C core_top clean
|
|
@ -1,172 +0,0 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`timescale 1ns/1ps
|
||||
|
||||
module VX_tb_divide();
|
||||
|
||||
`ifdef TRACE
|
||||
initial
|
||||
begin
|
||||
$dumpfile("trace.vcd");
|
||||
$dumpvars(0,test);
|
||||
end
|
||||
`endif
|
||||
|
||||
reg clk;
|
||||
reg rst;
|
||||
|
||||
reg [31:0] numer, denom;
|
||||
|
||||
wire [31:0] o_div[0:7], o_rem[0:7];
|
||||
|
||||
for (genvar i = 0; i < 8; ++i) begin
|
||||
VX_divide#(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.WIDTHQ(32),
|
||||
.WIDTHR(32),
|
||||
.PIPELINE(i)
|
||||
) div(
|
||||
.clock(clk),
|
||||
.aclr(rst),
|
||||
.clken(1'b1),
|
||||
.numer(numer),
|
||||
.denom(denom),
|
||||
.quotient(o_div[i]),
|
||||
.remainder(o_rem[i])
|
||||
);
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 0; rst = 0;
|
||||
|
||||
numer = 56;
|
||||
denom = 11;
|
||||
|
||||
$display("56 / 11 #0");
|
||||
if (o_div[0] != 5 || o_rem[0] != 1) begin
|
||||
$display("PIPE0: div=", o_div[0], " rem=", o_rem[0]);
|
||||
$display("expected 5,1 EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[1] != 1'bx || o_rem[1] != 1'bx) begin
|
||||
$display("PIPE1: div=", o_div[1], " rem=", o_rem[1]);
|
||||
$display("expected x,x EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[2] != 1'bx || o_rem[2] != 1'bx) begin
|
||||
$display("PIPE2: div=", o_div[2], " rem=", o_rem[2]);
|
||||
$display("expected x,x EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[3] != 1'bx || o_rem[3] != 1'bx) begin
|
||||
$display("PIPE3: div=", o_div[3], " rem=", o_rem[3]);
|
||||
$display("expected x,x EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
#2;
|
||||
|
||||
$display("56 / 11 #2");
|
||||
if (o_div[0] != 5 || o_rem[0] != 1) begin
|
||||
$display("PIPE0: div=", o_div[0], " rem=", o_rem[0]);
|
||||
$display("expected 5,1, EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[1] != 5 || o_rem[1] != 1) begin
|
||||
$display("PIPE1: div=", o_div[1], " rem=", o_rem[1]);
|
||||
$display("expected 5,1 EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[2] != 1'bx || o_rem[2] != 1'bx) begin
|
||||
$display("PIPE2: div=", o_div[2], " rem=", o_rem[2]);
|
||||
$display("expected x,x EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[3] != 1'bx || o_rem[3] != 1'bx) begin
|
||||
$display("PIPE3: div=", o_div[3], " rem=", o_rem[3]);
|
||||
$display("expected x,x EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
#2;
|
||||
|
||||
$display("56 / 11 #4");
|
||||
if (o_div[0] != 5 || o_rem[0] != 1) begin
|
||||
$display("PIPE0: div=", o_div[0], " rem=", o_rem[0]);
|
||||
$display("expected 5,1 EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[1] != 5 || o_rem[1] != 1) begin
|
||||
$display("PIPE1: div=", o_div[1], " rem=", o_rem[1]);
|
||||
$display("expected 5,1 EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[2] != 5 || o_rem[2] != 1) begin
|
||||
$display("PIPE2: div=", o_div[2], " rem=", o_rem[2]);
|
||||
$display("expected 5,1 EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[3] != 1'bx || o_rem[3] != 1'bx) begin
|
||||
$display("PIPE3: div=", o_div[3], " rem=", o_rem[3]);
|
||||
$display("expected x,x EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
#2;
|
||||
|
||||
$display("56 / 11 #6");
|
||||
|
||||
if (o_div[0] != 5 || o_rem[0] != 1) begin
|
||||
$display("PIPE0: div=", o_div[0], " rem=", o_rem[0]);
|
||||
$display("expected 5,1 EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[1] != 5 || o_rem[1] != 1) begin
|
||||
$display("PIPE1: div=", o_div[1], " rem=", o_rem[1]);
|
||||
$display("expected 5,1 EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[2] != 5 || o_rem[2] != 1) begin
|
||||
$display("PIPE2: div=", o_div[2], " rem=", o_rem[2]);
|
||||
$display("expected 5,1 EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
if (o_div[3] != 5 || o_rem[3] != 1) begin
|
||||
$display("PIPE3: div=", o_div[3], " rem=", o_rem[3]);
|
||||
$display("expected 5,1 EXITING");
|
||||
$finish();
|
||||
end
|
||||
|
||||
$display("PASS");
|
||||
|
||||
$finish();
|
||||
end
|
||||
|
||||
always #1
|
||||
clk = !clk;
|
||||
|
||||
endmodule
|
80
hw/unittest/cache/Makefile
vendored
80
hw/unittest/cache/Makefile
vendored
|
@ -1,74 +1,26 @@
|
|||
DESTDIR ?= .
|
||||
RTL_DIR = ../../rtl
|
||||
DPI_DIR = ../../dpi
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
CONFIGS +=
|
||||
PARAMS +=
|
||||
PROJECT := cache
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
RTL_DIR := $(VORTEX_HOME)/hw/rtl
|
||||
DPI_DIR := $(VORTEX_HOME)/hw/dpi
|
||||
|
||||
LDFLAGS +=
|
||||
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
|
||||
|
||||
# control RTL debug tracing states
|
||||
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE
|
||||
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
|
||||
CXXFLAGS += -I$(ROOT_DIR)/hw
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
SRCS := $(DPI_DIR)/util_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/cachesim.cpp $(SRC_DIR)/testbench.cpp
|
||||
|
||||
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv
|
||||
DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE
|
||||
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
|
||||
RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv
|
||||
|
||||
SRCS = cachesim.cpp testbench.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp
|
||||
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
|
||||
|
||||
TOP = VX_cache_top
|
||||
TOP := VX_cache_top
|
||||
|
||||
VL_FLAGS = --exe
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -DSIMULATION -DSV_DPI
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += $(PARAMS)
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
|
||||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
ifdef PERF
|
||||
VL_FLAGS += -DPERF_ENABLE
|
||||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
PROJECT = cache
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS)
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
|
||||
|
||||
run: $(DESTDIR)/$(PROJECT)
|
||||
$(DESTDIR)/$(PROJECT)
|
||||
|
||||
waves: trace.vcd
|
||||
gtkwave -o trace.vcd
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT)
|
||||
include ../common.mk
|
26
hw/unittest/cache_top/Makefile
Normal file
26
hw/unittest/cache_top/Makefile
Normal file
|
@ -0,0 +1,26 @@
|
|||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
PROJECT := cache_top
|
||||
|
||||
RTL_DIR := $(VORTEX_HOME)/hw/rtl
|
||||
DPI_DIR := $(VORTEX_HOME)/hw/dpi
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
|
||||
|
||||
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
|
||||
CXXFLAGS += -I$(ROOT_DIR)/hw
|
||||
|
||||
SRCS := $(DPI_DIR)/util_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/main.cpp
|
||||
|
||||
DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE
|
||||
|
||||
RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv
|
||||
|
||||
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
|
||||
|
||||
TOP := VX_cache_top
|
||||
|
||||
include ../common.mk
|
|
@ -1,26 +1,18 @@
|
|||
DESTDIR ?= .
|
||||
RTL_DIR = ../../rtl
|
||||
DPI_DIR = ../../dpi
|
||||
|
||||
CONFIGS +=
|
||||
PARAMS +=
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
LDFLAGS +=
|
||||
LDFLAGS +=
|
||||
RTL_PKGS +=
|
||||
RTL_INCLUDE +=
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core
|
||||
|
||||
SRCS = main.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp
|
||||
|
||||
VL_FLAGS = --exe
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
|
@ -30,6 +22,7 @@ VL_FLAGS += $(CONFIGS)
|
|||
VL_FLAGS += $(PARAMS)
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
|
||||
|
@ -51,17 +44,16 @@ ifdef PERF
|
|||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
PROJECT = top_modules
|
||||
|
||||
all: build
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
build: $(SRCS)
|
||||
verilator --build $(VL_FLAGS) --cc VX_cache_top --top-module VX_cache_top $^ -CFLAGS '$(CXXFLAGS)'
|
||||
verilator --build $(VL_FLAGS) --cc VX_core_top --top-module VX_core_top $^ -CFLAGS '$(CXXFLAGS)'
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS)
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
|
||||
|
||||
run:
|
||||
run: $(DESTDIR)/$(PROJECT)
|
||||
$(DESTDIR)/$(PROJECT)
|
||||
|
||||
waves:
|
||||
waves: trace.vcd
|
||||
gtkwave -o trace.vcd
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir
|
||||
rm -rf *.vcd obj_dir $(DESTDIR)/$(PROJECT)
|
26
hw/unittest/core_top/Makefile
Normal file
26
hw/unittest/core_top/Makefile
Normal file
|
@ -0,0 +1,26 @@
|
|||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
PROJECT := core_top
|
||||
|
||||
RTL_DIR := $(VORTEX_HOME)/hw/rtl
|
||||
DPI_DIR := $(VORTEX_HOME)/hw/dpi
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
|
||||
|
||||
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
|
||||
CXXFLAGS += -I$(ROOT_DIR)/hw
|
||||
|
||||
SRCS := $(DPI_DIR)/util_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/main.cpp
|
||||
|
||||
DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE
|
||||
|
||||
RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
|
||||
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
||||
RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core
|
||||
|
||||
TOP := VX_core_top
|
||||
|
||||
include ../common.mk
|
49
hw/unittest/core_top/main.cpp
Normal file
49
hw/unittest/core_top/main.cpp
Normal file
|
@ -0,0 +1,49 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "vl_simulator.h"
|
||||
|
||||
#ifndef TRACE_START_TIME
|
||||
#define TRACE_START_TIME 0ull
|
||||
#endif
|
||||
|
||||
#ifndef TRACE_STOP_TIME
|
||||
#define TRACE_STOP_TIME -1ull
|
||||
#endif
|
||||
|
||||
static uint64_t timestamp = 0;
|
||||
static bool trace_enabled = false;
|
||||
static uint64_t trace_start_time = TRACE_START_TIME;
|
||||
static uint64_t trace_stop_time = TRACE_STOP_TIME;
|
||||
|
||||
double sc_time_stamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
bool sim_trace_enabled() {
|
||||
if (timestamp >= trace_start_time
|
||||
&& timestamp < trace_stop_time)
|
||||
return true;
|
||||
return trace_enabled;
|
||||
}
|
||||
|
||||
void sim_trace_enable(bool enable) {
|
||||
trace_enabled = enable;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
// Initialize Verilators variables
|
||||
Verilated::commandArgs(argc, argv);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -1,65 +1,24 @@
|
|||
DESTDIR ?= .
|
||||
RTL_DIR = ../../rtl
|
||||
DPI_DIR = ../../dpi
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
CONFIGS +=
|
||||
PARAMS +=
|
||||
PROJECT := generic_queue
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
RTL_DIR := $(VORTEX_HOME)/hw/rtl
|
||||
DPI_DIR := $(VORTEX_HOME)/hw/dpi
|
||||
|
||||
LDFLAGS +=
|
||||
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
|
||||
|
||||
RTL_PKGS +=
|
||||
SRCS := $(DPI_DIR)/util_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/main.cpp
|
||||
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
||||
DBG_TRACE_FLAGS :=
|
||||
|
||||
SRCS = main.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp
|
||||
RTL_PKGS :=
|
||||
|
||||
TOP = VX_fifo_queue
|
||||
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
||||
|
||||
VL_FLAGS = --exe
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -DSIMULATION -DSV_DPI
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += $(PARAMS)
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
|
||||
TOP := VX_fifo_queue
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
|
||||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
PROJECT = generic_queue
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS)
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
|
||||
|
||||
run: $(DESTDIR)/$(PROJECT)
|
||||
$(DESTDIR)/$(PROJECT)
|
||||
|
||||
waves: trace.vcd
|
||||
gtkwave -o trace.vcd
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT)
|
||||
include ../common.mk
|
|
@ -1,65 +1,24 @@
|
|||
DESTDIR ?= .
|
||||
RTL_DIR = ../../rtl
|
||||
DPI_DIR = ../../dpi
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
CONFIGS +=
|
||||
PARAMS +=
|
||||
PROJECT := mem_streamer
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
RTL_DIR := $(VORTEX_HOME)/hw/rtl
|
||||
DPI_DIR := $(VORTEX_HOME)/hw/dpi
|
||||
|
||||
LDFLAGS +=
|
||||
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
|
||||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
|
||||
|
||||
RTL_PKGS +=
|
||||
SRCS := $(DPI_DIR)/util_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/memsim.cpp $(SRC_DIR)/ram.cpp
|
||||
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
||||
DBG_TRACE_FLAGS :=
|
||||
|
||||
SRCS = memsim.cpp ram.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp
|
||||
RTL_PKGS :=
|
||||
|
||||
TOP = VX_mem_scheduler
|
||||
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
|
||||
|
||||
VL_FLAGS = --exe
|
||||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += -DSIMULATION -DSV_DPI
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += $(PARAMS)
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
|
||||
TOP := VX_mem_scheduler
|
||||
|
||||
# Enable Verilator multithreaded simulation
|
||||
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
|
||||
VL_FLAGS += -j $(THREADS)
|
||||
#VL_FLAGS += --threads $(THREADS)
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
|
||||
CXXFLAGS += -g -O0 $(DBG_FLAGS)
|
||||
else
|
||||
VL_FLAGS += -DNDEBUG
|
||||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
PROJECT = mem_streamer
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS)
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
|
||||
|
||||
run: $(DESTDIR)/$(PROJECT)
|
||||
$(DESTDIR)/$(PROJECT)
|
||||
|
||||
waves: trace.vcd
|
||||
gtkwave -o trace.vcd
|
||||
|
||||
clean:
|
||||
rm -rf obj_dir $(DESTDIR)/$(PROJECT)
|
||||
include ../common.mk
|
0
kernel/.gitignore
vendored
0
kernel/.gitignore
vendored
|
@ -1,18 +1,14 @@
|
|||
XLEN ?= 32
|
||||
TOOLDIR ?= /opt
|
||||
ROOT_DIR := $(realpath ..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
ifeq ($(XLEN),64)
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
|
||||
CFLAGS += -march=rv64imafd -mabi=lp64d
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
|
||||
CFLAGS += -march=rv32imaf -mabi=ilp32f
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
|
||||
INC_DIR := $(VORTEX_HOME)/kernel/include
|
||||
SRC_DIR := $(VORTEX_HOME)/kernel/src
|
||||
|
||||
LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT)
|
||||
LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH)
|
||||
|
@ -34,27 +30,27 @@ DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump
|
|||
CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy
|
||||
|
||||
CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -I./include -I../hw
|
||||
CFLAGS += -I$(INC_DIR) -I$(ROOT_DIR)/hw
|
||||
CFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
PROJECT = libvortexrt
|
||||
PROJECT := libvortexrt
|
||||
|
||||
SRCS = ./src/vx_start.S ./src/vx_syscalls.c ./src/vx_print.S ./src/tinyprintf.c ./src/vx_print.c ./src/vx_spawn.c ./src/vx_serial.S ./src/vx_perf.c
|
||||
SRCS = $(SRC_DIR)/vx_start.S $(SRC_DIR)/vx_syscalls.c $(SRC_DIR)/vx_print.S $(SRC_DIR)/tinyprintf.c $(SRC_DIR)/vx_print.c $(SRC_DIR)/vx_spawn.c $(SRC_DIR)/vx_serial.S $(SRC_DIR)/vx_perf.c
|
||||
|
||||
OBJS := $(addsuffix .o, $(notdir $(SRCS)))
|
||||
OBJS = $(addsuffix .o, $(notdir $(SRCS)))
|
||||
|
||||
all: $(PROJECT).a $(PROJECT).dump
|
||||
|
||||
$(PROJECT).dump: $(PROJECT).a
|
||||
$(DP) -D $(PROJECT).a > $(PROJECT).dump
|
||||
|
||||
%.S.o: src/%.S
|
||||
%.S.o: $(SRC_DIR)/%.S
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
%.cpp.o: src/%.cpp
|
||||
%.cpp.o: $(SRC_DIR)/%.cpp
|
||||
$(CXX) $(CFLAGS) -c $< -o $@
|
||||
|
||||
%.c.o: src/%.c
|
||||
%.c.o: $(SRC_DIR)/%.c
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
$(PROJECT).a: $(OBJS)
|
||||
|
|
1
perf/.gitignore
vendored
1
perf/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
**/*.log
|
8
runtime/common.mk
Normal file
8
runtime/common.mk
Normal file
|
@ -0,0 +1,8 @@
|
|||
ROOT_DIR := $(realpath ../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SIM_DIR := $(VORTEX_HOME)/sim
|
||||
HW_DIR := $(VORTEX_HOME)/hw
|
||||
|
||||
INC_DIR := $(VORTEX_HOME)/runtime/include
|
||||
COMMON_DIR := $(VORTEX_HOME)/runtime/common
|
1
runtime/opae/.gitignore
vendored
1
runtime/opae/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
/obj_dir/*
|
|
@ -1,12 +1,15 @@
|
|||
XLEN ?= 32
|
||||
include ../common.mk
|
||||
|
||||
TARGET ?= opaesim
|
||||
|
||||
DESTDIR ?= $(CURDIR)
|
||||
SIM_DIR = $(abspath ../../sim)
|
||||
HW_DIR = $(abspath ../../hw)
|
||||
SYN_DIR = $(HW_DIR)/syn/altera/opae
|
||||
|
||||
SYN_DIR := $(HW_DIR)/syn/altera/opae
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/runtime/opae
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
CXXFLAGS += -I../include -I../common -I$(HW_DIR) -I$(DESTDIR)
|
||||
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
# Position independent code
|
||||
|
@ -20,7 +23,7 @@ CXXFLAGS += -DDUMP_PERF_STATS
|
|||
|
||||
LDFLAGS += -shared -luuid -ldl -pthread
|
||||
|
||||
SRCS = vortex.cpp driver.cpp ../common/utils.cpp
|
||||
SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp $(COMMON_DIR)/utils.cpp
|
||||
|
||||
# set up target types
|
||||
ifeq ($(TARGET), opaesim)
|
||||
|
@ -46,7 +49,7 @@ endif
|
|||
# Enable scope logic analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += ../common/scope.cpp
|
||||
SRCS += $(COMMON_DIR)/scope.cpp
|
||||
endif
|
||||
|
||||
# Enable perf counters
|
||||
|
@ -54,16 +57,16 @@ ifdef PERF
|
|||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
PROJECT = libvortex.so
|
||||
PROJECT := libvortex.so
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/libopae-c-sim.so:
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/opaesim $(DESTDIR)/libopae-c-sim.so
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/opaesim $(DESTDIR)/libopae-c-sim.so
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) $(OPAESIM)
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $@
|
||||
|
||||
clean:
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/opaesim clean
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/opaesim clean
|
||||
rm -rf $(DESTDIR)/$(PROJECT)
|
||||
|
|
1
runtime/rtlsim/.gitignore
vendored
1
runtime/rtlsim/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
/obj_dir/*
|
|
@ -1,10 +1,11 @@
|
|||
XLEN ?= 32
|
||||
include ../common.mk
|
||||
|
||||
DESTDIR ?= $(CURDIR)
|
||||
SIM_DIR = $(abspath ../../sim)
|
||||
HW_DIR = $(abspath ../../hw)
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/runtime/rtlsim
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
CXXFLAGS += -I../include -I../common -I$(HW_DIR) -I$(SIM_DIR)/rtlsim -I$(SIM_DIR)/common
|
||||
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/rtlsim -I$(COMMON_DIR) -I$(SIM_DIR)/common
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
# Position independent code
|
||||
|
@ -19,7 +20,7 @@ CXXFLAGS += -DDUMP_PERF_STATS
|
|||
LDFLAGS += -shared -pthread
|
||||
LDFLAGS += -L$(DESTDIR) -lrtlsim
|
||||
|
||||
SRCS = vortex.cpp ../common/utils.cpp
|
||||
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
@ -33,14 +34,14 @@ ifdef PERF
|
|||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
PROJECT = libvortex.so
|
||||
PROJECT := libvortex.so
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS)
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/rtlsim $(DESTDIR)/librtlsim.so
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/rtlsim $(DESTDIR)/librtlsim.so
|
||||
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $@
|
||||
|
||||
clean:
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/rtlsim clean
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/rtlsim clean
|
||||
rm -rf $(DESTDIR)/$(PROJECT) *.o
|
2
runtime/simx/.gitignore
vendored
2
runtime/simx/.gitignore
vendored
|
@ -1,2 +0,0 @@
|
|||
obj_dir
|
||||
libvortex.so
|
|
@ -1,11 +1,12 @@
|
|||
XLEN ?= 32
|
||||
include ../common.mk
|
||||
|
||||
DESTDIR ?= $(CURDIR)
|
||||
SIM_DIR = $(abspath ../../sim)
|
||||
HW_DIR = $(abspath ../../hw)
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/runtime/simx
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I../include -I../common -I$(HW_DIR) -I$(SIM_DIR)/simx -I$(SIM_DIR)/common
|
||||
CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMMON_DIR) -I$(SIM_DIR)/common
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
CXXFLAGS += -DDUMP_PERF_STATS
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
@ -13,7 +14,7 @@ CXXFLAGS += -DXLEN_$(XLEN)
|
|||
LDFLAGS += -shared -pthread
|
||||
LDFLAGS += -L$(DESTDIR) -lsimx
|
||||
|
||||
SRCS = vortex.cpp ../common/utils.cpp
|
||||
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
@ -22,14 +23,14 @@ else
|
|||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
PROJECT = libvortex.so
|
||||
PROJECT := libvortex.so
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS)
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/simx $(DESTDIR)/libsimx.so
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/simx $(DESTDIR)/libsimx.so
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
clean:
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/simx clean
|
||||
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/simx clean
|
||||
rm -rf $(DESTDIR)/$(PROJECT) *.o
|
|
@ -1,19 +1,18 @@
|
|||
XLEN ?= 32
|
||||
include ../common.mk
|
||||
|
||||
DESTDIR ?= $(CURDIR)
|
||||
SIM_DIR = $(abspath ../../sim)
|
||||
HW_DIR = $(abspath ../../hw)
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/runtime/stub
|
||||
|
||||
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I../common -I$(HW_DIR) -I$(SIM_DIR)/common
|
||||
|
||||
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/common
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
|
||||
SRCS = vortex.cpp ../common/utils.cpp
|
||||
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
|
||||
|
||||
PROJECT = libvortex.so
|
||||
PROJECT := libvortex.so
|
||||
|
||||
all: $(PROJECT)
|
||||
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
include ../common.mk
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/runtime/xrt
|
||||
|
||||
CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors
|
||||
|
||||
CXXFLAGS += -I../include -I../common -I../../hw -I$(XILINX_XRT)/include -I../../sim/common
|
||||
|
||||
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common
|
||||
CXXFLAGS += -fPIC
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
LDFLAGS += -L$(XILINX_XRT)/lib -luuid -lxrt_coreutil
|
||||
|
||||
SRCS = vortex.cpp ../common/utils.cpp ../../sim/common/util.cpp
|
||||
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp $(SIM_DIR)/common/util.cpp
|
||||
|
||||
PROJECT = libvortex.so
|
||||
PROJECT := libvortex.so
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
@ -21,7 +23,7 @@ endif
|
|||
# Enable scope logic analyzer
|
||||
ifdef SCOPE
|
||||
CXXFLAGS += -DSCOPE
|
||||
SRCS += ../common/scope.cpp
|
||||
SRCS += $(COMMON_DIR)/scope.cpp
|
||||
endif
|
||||
|
||||
all: $(PROJECT)
|
||||
|
|
12
sim/common.mk
Normal file
12
sim/common.mk
Normal file
|
@ -0,0 +1,12 @@
|
|||
ROOT_DIR := $(realpath ../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
HW_DIR := $(VORTEX_HOME)/hw
|
||||
RTL_DIR := $(HW_DIR)/rtl
|
||||
DPI_DIR := $(HW_DIR)/dpi
|
||||
AFU_DIR := $(RTL_DIR)/afu/opae
|
||||
SCRIPT_DIR := $(HW_DIR)/scripts
|
||||
|
||||
COMMON_DIR := $(VORTEX_HOME)/sim/common
|
||||
|
||||
THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party
|
1
sim/opaesim/.gitignore
vendored
1
sim/opaesim/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
/obj_dir/*
|
|
@ -1,16 +1,12 @@
|
|||
XLEN ?= 32
|
||||
include ../common.mk
|
||||
|
||||
DESTDIR ?= $(CURDIR)
|
||||
HW_DIR = $(abspath ../../hw)
|
||||
COMMON_DIR = $(abspath ../common)
|
||||
THIRD_PARTY_DIR = $(abspath ../../third_party)
|
||||
RTL_DIR = $(HW_DIR)/rtl
|
||||
DPI_DIR = $(HW_DIR)/dpi
|
||||
AFU_DIR = $(RTL_DIR)/afu/opae
|
||||
SCRIPT_DIR = $(HW_DIR)/scripts
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/sim/opaesim
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I$(CURDIR) -I$(HW_DIR) -I$(COMMON_DIR) -I$(DESTDIR)
|
||||
CXXFLAGS += -I$(SRC_DIR) -I$(ROOT_DIR)/hw -I$(COMMON_DIR) -I$(DESTDIR)
|
||||
CXXFLAGS += -I/$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I/$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
@ -52,7 +48,7 @@ DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
|||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += $(CURDIR)/fpga.cpp $(CURDIR)/opae_sim.cpp
|
||||
SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp
|
||||
|
||||
RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv
|
||||
RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
|
||||
|
@ -62,7 +58,7 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
|||
RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
|
||||
FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
endif
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip
|
||||
|
||||
TOP = vortex_afu_shim
|
||||
|
@ -73,7 +69,7 @@ VL_FLAGS += --x-initial unique --x-assign unique
|
|||
VL_FLAGS += -DSIMULATION -DSV_DPI
|
||||
VL_FLAGS += -DXLEN_$(XLEN)
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(SRC_DIR)/verilator.vlt
|
||||
VL_FLAGS += $(RTL_INCLUDE)
|
||||
VL_FLAGS += $(RTL_PKGS)
|
||||
VL_FLAGS += $(DBG_SCOPE_FLAGS)
|
||||
|
@ -111,7 +107,7 @@ endif
|
|||
VL_FLAGS += -DNOPAE
|
||||
CXXFLAGS += -DNOPAE
|
||||
|
||||
PROJECT = libopae-c-sim.so
|
||||
PROJECT := libopae-c-sim.so
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
|
|
1
sim/rtlsim/.gitignore
vendored
1
sim/rtlsim/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
/obj_dir/*
|
|
@ -1,14 +1,12 @@
|
|||
XLEN ?= 32
|
||||
include ../common.mk
|
||||
|
||||
DESTDIR ?= $(CURDIR)
|
||||
HW_DIR = $(abspath ../../hw)
|
||||
COMMON_DIR = $(abspath ../common)
|
||||
THIRD_PARTY_DIR = $(abspath ../../third_party)
|
||||
RTL_DIR = $(HW_DIR)/rtl
|
||||
DPI_DIR = $(HW_DIR)/dpi
|
||||
|
||||
SRC_DIR = $(VORTEX_HOME)/sim/rtlsim
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I$(HW_DIR) -I$(COMMON_DIR)
|
||||
CXXFLAGS += -I$(ROOT_DIR)/hw -I$(COMMON_DIR)
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
@ -37,7 +35,7 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface
|
|||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += $(CURDIR)/processor.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp
|
||||
|
||||
ifdef AXI_BUS
|
||||
TOP = Vortex_axi
|
||||
|
@ -50,7 +48,7 @@ VL_FLAGS = --exe
|
|||
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
|
||||
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
|
||||
VL_FLAGS += --x-initial unique --x-assign unique
|
||||
VL_FLAGS += verilator.vlt
|
||||
VL_FLAGS += $(SRC_DIR)/verilator.vlt
|
||||
VL_FLAGS += -DSIMULATION -DSV_DPI
|
||||
VL_FLAGS += -DXLEN_$(XLEN)
|
||||
VL_FLAGS += $(CONFIGS)
|
||||
|
@ -80,11 +78,11 @@ ifdef PERF
|
|||
CXXFLAGS += -DPERF_ENABLE
|
||||
endif
|
||||
|
||||
PROJECT = rtlsim
|
||||
PROJECT := rtlsim
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) $(CURDIR)/main.cpp
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) $(SRC_DIR)/main.cpp
|
||||
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS) -DSTARTUP_ADDR=0x80000000' -LDFLAGS '$(LDFLAGS)' --Mdir $(DESTDIR)/obj_dir -o $@
|
||||
|
||||
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
XLEN ?= 32
|
||||
DESTDIR ?= $(CURDIR)
|
||||
HW_DIR = $(abspath ../../hw)
|
||||
COMMON_DIR = $(abspath ../common)
|
||||
THIRD_PARTY_DIR = $(abspath ../../third_party)
|
||||
include ../common.mk
|
||||
|
||||
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
|
||||
DESTDIR ?= $(CURDIR)
|
||||
|
||||
SRC_DIR = $(VORTEX_HOME)/sim/simx
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
|
||||
CXXFLAGS += -I$(CURDIR) -I$(COMMON_DIR) -I$(HW_DIR)
|
||||
CXXFLAGS += -I$(SRC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
|
||||
CXXFLAGS += -I$(THIRD_PARTY_DIR)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
@ -16,7 +16,7 @@ LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
|||
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
|
||||
SRCS += processor.cpp cluster.cpp socket.cpp core.cpp emulator.cpp decode.cpp execute.cpp func_unit.cpp cache_sim.cpp mem_sim.cpp local_mem.cpp mem_coalescer.cpp dcrs.cpp types.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
@ -26,11 +26,11 @@ else
|
|||
CXXFLAGS += -O2 -DNDEBUG
|
||||
endif
|
||||
|
||||
PROJECT = simx
|
||||
PROJECT := simx
|
||||
|
||||
all: $(DESTDIR)/$(PROJECT)
|
||||
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp
|
||||
$(DESTDIR)/$(PROJECT): $(SRCS) $(SRC_DIR)/main.cpp
|
||||
$(CXX) $(CXXFLAGS) -DSTARTUP_ADDR=0x80000000 $^ $(LDFLAGS) -o $@
|
||||
|
||||
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#define __WARP_H
|
||||
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <stack>
|
||||
#include <mem.h>
|
||||
#include "types.h"
|
||||
|
|
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
|
@ -1 +0,0 @@
|
|||
**/*.log
|
|
@ -1,5 +1,8 @@
|
|||
all: kernel regression opencl riscv unittest
|
||||
all: unittest kernel regression opencl riscv
|
||||
|
||||
unittest:
|
||||
$(MAKE) -C unittest
|
||||
|
||||
kernel:
|
||||
$(MAKE) -C kernel
|
||||
|
||||
|
@ -14,21 +17,18 @@ endif
|
|||
riscv:
|
||||
$(MAKE) -C riscv
|
||||
|
||||
unittest:
|
||||
$(MAKE) -C unittest
|
||||
|
||||
clean:
|
||||
$(MAKE) -C unittest clean
|
||||
$(MAKE) -C kernel clean
|
||||
$(MAKE) -C regression clean
|
||||
$(MAKE) -C opencl clean
|
||||
$(MAKE) -C riscv clean
|
||||
$(MAKE) -C unittest clean
|
||||
|
||||
clean-all:
|
||||
$(MAKE) -C unittest clean
|
||||
$(MAKE) -C kernel clean
|
||||
$(MAKE) -C regression clean-all
|
||||
$(MAKE) -C opencl clean-all
|
||||
$(MAKE) -C riscv clean
|
||||
$(MAKE) -C unittest clean
|
||||
|
||||
.PHONY: all kernel regression opencl riscv unittest
|
||||
.PHONY: all unittest kernel regression opencl riscv
|
|
@ -1,29 +1,20 @@
|
|||
XLEN ?= 32
|
||||
TOOLDIR ?= /opt
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
|
||||
ifeq ($(XLEN),64)
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
|
||||
CFLAGS += -march=rv64imafd -mabi=lp64d
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
|
||||
CFLAGS += -march=rv32imaf -mabi=ilp32f
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
|
||||
VORTEX_KN_PATH ?= $(realpath ../../../kernel)
|
||||
|
||||
CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc
|
||||
AR = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc-ar
|
||||
DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump
|
||||
CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy
|
||||
|
||||
SIM_DIR = ../../../sim
|
||||
|
||||
CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
|
||||
CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(VORTEX_KN_PATH)/../hw
|
||||
CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(ROOT_DIR)/hw
|
||||
|
||||
LDFLAGS += -lm -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(VORTEX_KN_PATH)/libvortexrt.a
|
||||
LDFLAGS += -lm -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(ROOT_DIR)/kernel/libvortexrt.a
|
||||
|
||||
all: $(PROJECT).elf $(PROJECT).bin $(PROJECT).dump
|
||||
|
||||
|
@ -37,10 +28,10 @@ $(PROJECT).elf: $(SRCS)
|
|||
$(CC) $(CFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT).elf
|
||||
|
||||
run-rtlsim: $(PROJECT).bin
|
||||
$(SIM_DIR)/rtlsim/rtlsim $(PROJECT).bin
|
||||
$(ROOT_DIR)/sim/rtlsim/rtlsim $(PROJECT).bin
|
||||
|
||||
run-simx: $(PROJECT).bin
|
||||
$(SIM_DIR)/simx/simx $(PROJECT).bin
|
||||
$(ROOT_DIR)/sim/simx/simx $(PROJECT).bin
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CC) $(CFLAGS) -MM $^ > .depend;
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
PROJECT = conform
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cpp tests.cpp
|
||||
PROJECT := conform
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/kernel/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cpp $(SRC_DIR)/tests.cpp
|
||||
|
||||
include ../common.mk
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
PROJECT = fibonacci
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cpp
|
||||
PROJECT := fibonacci
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/kernel/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cpp
|
||||
|
||||
include ../common.mk
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
PROJECT = hello
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cpp
|
||||
PROJECT := hello
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/kernel/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cpp
|
||||
|
||||
include ../common.mk
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
PROJECT = bfs
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc
|
||||
PROJECT := bfs
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc
|
||||
|
||||
OPTS ?=
|
||||
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
PROJECT = blackscholes
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cpp oclUtils.cpp shrUtils.cpp cmd_arg_reader.cpp oclBlackScholes_launcher.cpp oclBlackScholes_gold.cpp
|
||||
PROJECT := blackscholes
|
||||
|
||||
CXXFLAGS += -I.
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/oclUtils.cpp $(SRC_DIR)/shrUtils.cpp $(SRC_DIR)/cmd_arg_reader.cpp $(SRC_DIR)/oclBlackScholes_launcher.cpp $(SRC_DIR)/oclBlackScholes_gold.cpp
|
||||
|
||||
CXXFLAGS += -I$(SRC_DIR)
|
||||
|
||||
OPTS ?=
|
||||
|
||||
|
|
|
@ -1,41 +1,29 @@
|
|||
XLEN ?= 32
|
||||
TOOLDIR ?= /opt
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
|
||||
TARGET ?= opaesim
|
||||
|
||||
XRT_SYN_DIR ?= ../../../hw/syn/xilinx/xrt
|
||||
XRT_SYN_DIR ?= $(VORTEX_HOME)/hw/syn/xilinx/xrt
|
||||
XRT_DEVICE_INDEX ?= 0
|
||||
|
||||
ifeq ($(XLEN),64)
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
|
||||
VX_CFLAGS += -march=rv64imafd -mabi=lp64d
|
||||
K_CFLAGS += -march=rv64imafd -mabi=ilp64d
|
||||
STARTUP_ADDR ?= 0x180000000
|
||||
else
|
||||
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
|
||||
VX_CFLAGS += -march=rv32imaf -mabi=ilp32f
|
||||
K_CFLAGS += -march=rv32imaf -mabi=ilp32f
|
||||
STARTUP_ADDR ?= 0x80000000
|
||||
endif
|
||||
|
||||
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
|
||||
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
||||
|
||||
POCL_CC_PATH ?= $(TOOLDIR)/pocl/compiler
|
||||
POCL_RT_PATH ?= $(TOOLDIR)/pocl/runtime
|
||||
|
||||
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
|
||||
VORTEX_KN_PATH ?= $(realpath ../../../kernel)
|
||||
|
||||
FPGA_BIN_DIR ?= $(VORTEX_RT_PATH)/opae
|
||||
|
||||
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
|
||||
LLVM_POCL ?= $(TOOLDIR)/llvm-vortex
|
||||
|
||||
K_CFLAGS += -v -O3 --sysroot=$(RISCV_SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -Xclang -target-feature -Xclang +vortex
|
||||
K_CFLAGS += -fno-rtti -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
|
||||
K_CFLAGS += -I$(VORTEX_KN_PATH)/include -DNDEBUG
|
||||
K_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortexrt.a -lm
|
||||
K_CFLAGS += -v -O3 --sysroot=$(RISCV_SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -Xclang -target-feature -Xclang +vortex
|
||||
K_CFLAGS += -fno-rtti -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
|
||||
K_CFLAGS += -I$(VORTEX_KN_PATH)/include -DNDEBUG
|
||||
K_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(ROOT_DIR)/kernel/libvortexrt.a -lm
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
|
||||
CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing
|
||||
|
@ -61,30 +49,30 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
OBJS := $(addsuffix .o, $(notdir $(filter-out main.cc,$(SRCS))))
|
||||
OBJS := $(addsuffix .o, $(filter-out main.cc,$(notdir $(SRCS))))
|
||||
|
||||
all: $(PROJECT) kernel.pocl
|
||||
|
||||
kernel.pocl: kernel.cl
|
||||
LD_LIBRARY_PATH=$(LLVM_POCL)/lib:$(POCL_CC_PATH)/lib:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) LLVM_PREFIX=$(LLVM_VORTEX) POCL_DEBUG=all POCL_VORTEX_CFLAGS="$(K_CFLAGS)" POCL_VORTEX_LDFLAGS="$(K_LDFLAGS)" $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
|
||||
kernel.pocl: $(SRC_DIR)/kernel.cl
|
||||
LD_LIBRARY_PATH=$(LLVM_POCL)/lib:$(POCL_CC_PATH)/lib:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) LLVM_PREFIX=$(LLVM_VORTEX) POCL_DEBUG=all POCL_VORTEX_CFLAGS="$(K_CFLAGS)" POCL_VORTEX_LDFLAGS="$(K_LDFLAGS)" $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl $^
|
||||
|
||||
%.cc.o: %.cc
|
||||
%.cc.o: $(SRC_DIR)/%.cc
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
%.cpp.o: %.cpp
|
||||
%.cpp.o: $(SRC_DIR)/%.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
%.c.o: %.c
|
||||
%.c.o: $(SRC_DIR)/%.c
|
||||
$(CC) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
main.cc.o: main.cc
|
||||
main.cc.o: $(SRC_DIR)/main.cc
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
main.cc.host.o: main.cc
|
||||
main.cc.host.o: $(SRC_DIR)/main.cc
|
||||
$(CXX) $(CXXFLAGS) -DHOSTGPU -c $< -o $@
|
||||
|
||||
$(PROJECT): main.cc.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L$(VORTEX_RT_PATH)/stub -lvortex -L$(POCL_RT_PATH)/lib -lOpenCL -o $@
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L$(ROOT_DIR)/runtime/stub -lvortex -L$(POCL_RT_PATH)/lib -lOpenCL -o $@
|
||||
|
||||
$(PROJECT).host: main.cc.host.o $(OBJS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -lOpenCL -o $@
|
||||
|
@ -93,19 +81,19 @@ run-gpu: $(PROJECT).host kernel.pocl
|
|||
./$(PROJECT).host $(OPTS)
|
||||
|
||||
run-simx: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-rtlsim: $(PROJECT) kernel.pocl
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-opae: $(PROJECT) kernel.pocl
|
||||
SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
SCOPE_JSON_PATH=$(ROOT_DIR)/runtime/opae/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
|
||||
run-xrt: $(PROJECT) kernel.pocl
|
||||
ifeq ($(TARGET), hw)
|
||||
SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
else
|
||||
XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
|
||||
endif
|
||||
|
||||
.depend: $(SRCS)
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
PROJECT = conv3
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc
|
||||
PROJECT := conv3
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc
|
||||
|
||||
OPTS ?= -n32
|
||||
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
PROJECT = dotproduct
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc oclUtils.cpp shrUtils.cpp cmd_arg_reader.cpp
|
||||
PROJECT := dotproduct
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/oclUtils.cpp $(SRC_DIR)/shrUtils.cpp $(SRC_DIR)/cmd_arg_reader.cpp
|
||||
|
||||
OPTS ?= -size=4096
|
||||
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
PROJECT = guassian
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc clutils.cpp utils.cpp
|
||||
PROJECT := guassian
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/clutils.cpp $(SRC_DIR)/utils.cpp
|
||||
|
||||
OPTS ?=
|
||||
|
||||
|
|
2
tests/opencl/kmeans/.gitignore
vendored
2
tests/opencl/kmeans/.gitignore
vendored
|
@ -1,2 +0,0 @@
|
|||
kmeans
|
||||
|
|
@ -1,6 +1,11 @@
|
|||
PROJECT = kmeans
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc read_input.c rmse.c kmeans_clustering.c cluster.c getopt.c
|
||||
PROJECT := kmeans
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/read_input.c $(SRC_DIR)/rmse.c $(SRC_DIR)/kmeans_clustering.c $(SRC_DIR)/cluster.c $(SRC_DIR)/getopt.c
|
||||
|
||||
OPTS ?=
|
||||
|
||||
|
|
BIN
tests/opencl/kmeans/kmeans
Executable file
BIN
tests/opencl/kmeans/kmeans
Executable file
Binary file not shown.
|
@ -1,10 +1,16 @@
|
|||
PROJECT = lbm
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc args.c parboil_opencl.c gpu_info.c lbm.c ocl.c
|
||||
PROJECT := lbm
|
||||
|
||||
CXXFLAGS += -I.
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/args.c $(SRC_DIR)/parboil_opencl.c $(SRC_DIR)/gpu_info.c $(SRC_DIR)/lbm.c $(SRC_DIR)/ocl.c
|
||||
|
||||
CXXFLAGS += -I$(SRC_DIR)
|
||||
K_CFLAGS += -I$(SRC_DIR)
|
||||
|
||||
# Usage: #iter [-i input_file] [-o output_file]
|
||||
OPTS ?= 1 -i 32_32_8_ldc.of
|
||||
OPTS ?= 1 -i $(SRC_DIR)/32_32_8_ldc.of
|
||||
|
||||
include ../common.mk
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
PROJECT = nearn
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc clutils.cpp utils.cpp
|
||||
PROJECT := nearn
|
||||
|
||||
OPTS ?= filelist.txt
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/clutils.cpp $(SRC_DIR)/utils.cpp
|
||||
|
||||
OPTS ?= $(SRC_DIR)/filelist.txt
|
||||
|
||||
include ../common.mk
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
PROJECT = oclprintf
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc
|
||||
PROJECT := oclprintf
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc
|
||||
|
||||
OPTS ?= -n4
|
||||
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
PROJECT = psort
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc
|
||||
PROJECT := psort
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc
|
||||
|
||||
OPTS ?= -f -n16
|
||||
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
PROJECT = saxpy
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc
|
||||
PROJECT := saxpy
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc
|
||||
|
||||
OPTS ?= -n1024
|
||||
|
||||
|
|
|
@ -1,6 +1,11 @@
|
|||
PROJECT = sfilter
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc
|
||||
PROJECT := sfilter
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc
|
||||
|
||||
OPTS ?= -n16
|
||||
|
||||
|
|
|
@ -1,6 +1,14 @@
|
|||
PROJECT = sgemm
|
||||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
SRCS = main.cc
|
||||
PROJECT := sgemm
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc
|
||||
|
||||
CXXFLAGS += -I$(SRC_DIR)
|
||||
K_CFLAGS += -I$(SRC_DIR)
|
||||
|
||||
OPTS ?= -n32
|
||||
|
||||
|
|
12
tests/opencl/sgemm2/Makefile
Normal file
12
tests/opencl/sgemm2/Makefile
Normal file
|
@ -0,0 +1,12 @@
|
|||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
PROJECT := sgemm2
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc
|
||||
|
||||
OPTS ?= -n32
|
||||
|
||||
include ../common.mk
|
15
tests/opencl/sgemm2/common.h
Normal file
15
tests/opencl/sgemm2/common.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
// Copyright 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#define LOCAL_SIZE 4
|
37
tests/opencl/sgemm2/kernel.cl
Normal file
37
tests/opencl/sgemm2/kernel.cl
Normal file
|
@ -0,0 +1,37 @@
|
|||
#include "common.h"
|
||||
|
||||
__kernel void sgemm2(__global float *A,
|
||||
__global float *B,
|
||||
__global float *C,
|
||||
const unsigned int N)
|
||||
{
|
||||
int globalRow = get_global_id(1);
|
||||
int globalCol = get_global_id(0);
|
||||
int localRow = get_local_id(1);
|
||||
int localCol = get_local_id(0);
|
||||
|
||||
// Static local memory declaration
|
||||
__local float localA[LOCAL_SIZE][LOCAL_SIZE];
|
||||
__local float localB[LOCAL_SIZE][LOCAL_SIZE];
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
// Iterate over blocks
|
||||
for (int k = 0; k < N; k += LOCAL_SIZE) {
|
||||
// Load a block of matrix A into local memory
|
||||
localA[localRow][localCol] = A[globalRow * N + k + localCol];
|
||||
|
||||
// Load a block of matrix B into local memory
|
||||
localB[localRow][localCol] = B[(k + localRow) * N + globalCol];
|
||||
|
||||
// Ensure the entire block is loaded
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// Compute multiplication for this block
|
||||
for (int j = 0; j < LOCAL_SIZE; j++) {
|
||||
sum += localA[localRow][j] * localB[j][localCol];
|
||||
}
|
||||
}
|
||||
|
||||
C[globalRow * N + globalCol] = sum;
|
||||
}
|
242
tests/opencl/sgemm2/main.cc
Normal file
242
tests/opencl/sgemm2/main.cc
Normal file
|
@ -0,0 +1,242 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <CL/opencl.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include "common.h"
|
||||
|
||||
#define FLOAT_ULP 6
|
||||
|
||||
#define KERNEL_NAME "sgemm2"
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
cl_int _err = _expr; \
|
||||
if (_err == CL_SUCCESS) \
|
||||
break; \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (0)
|
||||
|
||||
#define CL_CHECK2(_expr) \
|
||||
({ \
|
||||
cl_int _err = CL_INVALID_VALUE; \
|
||||
decltype(_expr) _ret = _expr; \
|
||||
if (_err != CL_SUCCESS) { \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool compare_equal(float a, float b) {
|
||||
union fi_t { float f; int32_t i; };
|
||||
fi_t fa, fb;
|
||||
fa.f = a;
|
||||
fb.f = b;
|
||||
auto d = std::abs(fa.i - fb.i);
|
||||
return d <= FLOAT_ULP;
|
||||
}
|
||||
|
||||
static void matmul_cpu(float *C, float *A, float *B, int N) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (int j = 0; j < N; j++) {
|
||||
float sum = 0.0f;
|
||||
for (int k = 0; k < N; k++) {
|
||||
sum += A[i * N + k] * B[k * N + j];
|
||||
}
|
||||
C[i * N + j] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cl_device_id device_id = NULL;
|
||||
cl_context context = NULL;
|
||||
cl_command_queue commandQueue = NULL;
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
cl_mem a_memobj = NULL;
|
||||
cl_mem b_memobj = NULL;
|
||||
cl_mem c_memobj = NULL;
|
||||
uint8_t *kernel_bin = NULL;
|
||||
|
||||
static void cleanup() {
|
||||
if (commandQueue) clReleaseCommandQueue(commandQueue);
|
||||
if (kernel) clReleaseKernel(kernel);
|
||||
if (program) clReleaseProgram(program);
|
||||
if (a_memobj) clReleaseMemObject(a_memobj);
|
||||
if (b_memobj) clReleaseMemObject(b_memobj);
|
||||
if (c_memobj) clReleaseMemObject(c_memobj);
|
||||
if (context) clReleaseContext(context);
|
||||
if (device_id) clReleaseDevice(device_id);
|
||||
if (kernel_bin) free(kernel_bin);
|
||||
}
|
||||
|
||||
int size = 32;
|
||||
|
||||
static void show_usage() {
|
||||
printf("Usage: [-n size] [-h: help]\n");
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
size = atoi(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
uint32_t num_points = size * size;
|
||||
|
||||
printf("Matrix size=%d\n", size);
|
||||
if ((size / LOCAL_SIZE) * LOCAL_SIZE != size) {
|
||||
printf("Error: matrix size must be a multiple of %d\n", LOCAL_SIZE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_platform_id platform_id;
|
||||
size_t kernel_size;
|
||||
|
||||
// Getting platform and device information
|
||||
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||
|
||||
printf("Create context\n");
|
||||
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
|
||||
|
||||
char device_string[1024];
|
||||
clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
|
||||
printf("Using device: %s\n", device_string);
|
||||
|
||||
printf("Allocate device buffers\n");
|
||||
size_t nbytes = num_points * sizeof(float);
|
||||
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
|
||||
|
||||
printf("Create program from kernel source\n");
|
||||
#ifdef HOSTGPU
|
||||
if (0 != read_kernel_file("kernel.cl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
program = CL_CHECK2(clCreateProgramWithSource(
|
||||
context, 1, (const char**)&kernel_bin, &kernel_size, &_err));
|
||||
#else
|
||||
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
program = CL_CHECK2(clCreateProgramWithBinary(
|
||||
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, NULL, &_err));
|
||||
#endif
|
||||
if (program == NULL) {
|
||||
cleanup();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Build program
|
||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||
|
||||
// Create kernel
|
||||
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
|
||||
|
||||
size_t global_size[2] = {size, size};
|
||||
size_t local_size[2] = {LOCAL_SIZE, LOCAL_SIZE};
|
||||
|
||||
// Set kernel arguments
|
||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(uint32_t), &size));
|
||||
|
||||
// Allocate memories for input arrays and output arrays.
|
||||
std::vector<float> h_a(num_points);
|
||||
std::vector<float> h_b(num_points);
|
||||
std::vector<float> h_c(num_points);
|
||||
|
||||
// Generate input values
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
h_a[i] = static_cast<float>(rand()) / RAND_MAX;
|
||||
h_b[i] = static_cast<float>(rand()) / RAND_MAX;
|
||||
}
|
||||
|
||||
// Creating command queue
|
||||
commandQueue = CL_CHECK2(clCreateCommandQueue(context, device_id, 0, &_err));
|
||||
|
||||
printf("Upload source buffers\n");
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, a_memobj, CL_TRUE, 0, nbytes, h_a.data(), 0, NULL, NULL));
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, nbytes, h_b.data(), 0, NULL, NULL));
|
||||
|
||||
printf("Execute the kernel\n");
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, global_size, local_size, 0, NULL, NULL));
|
||||
CL_CHECK(clFinish(commandQueue));
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
printf("Elapsed time: %lg ms\n", elapsed);
|
||||
|
||||
printf("Download destination buffer\n");
|
||||
CL_CHECK(clEnqueueReadBuffer(commandQueue, c_memobj, CL_TRUE, 0, nbytes, h_c.data(), 0, NULL, NULL));
|
||||
|
||||
printf("Verify result\n");
|
||||
std::vector<float> ref_vec(num_points);
|
||||
matmul_cpu(ref_vec.data(), h_a.data(), h_b.data(), size);
|
||||
int errors = 0;
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
if (!compare_equal(h_c[i], ref_vec[i])) {
|
||||
if (errors < 100)
|
||||
printf("*** error: [%d] expected=%f, actual=%f\n", i, ref_vec[i], h_c[i]);
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
printf("FAILED! - %d errors\n", errors);
|
||||
} else {
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
// Clean up
|
||||
cleanup();
|
||||
|
||||
return errors;
|
||||
}
|
12
tests/opencl/sgemm3/Makefile
Normal file
12
tests/opencl/sgemm3/Makefile
Normal file
|
@ -0,0 +1,12 @@
|
|||
ROOT_DIR := $(realpath ../../..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
PROJECT := sgemm3
|
||||
|
||||
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
|
||||
|
||||
SRCS := $(SRC_DIR)/main.cc
|
||||
|
||||
OPTS ?= -n32
|
||||
|
||||
include ../common.mk
|
73
tests/opencl/sgemm3/kernel.cl
Normal file
73
tests/opencl/sgemm3/kernel.cl
Normal file
|
@ -0,0 +1,73 @@
|
|||
__kernel void sgemm3(__global float *A,
|
||||
__global float *B,
|
||||
__global float *C,
|
||||
const unsigned int N,
|
||||
__local float *localA,
|
||||
__local float *localB)
|
||||
{
|
||||
int globalRow = get_global_id(1);
|
||||
int globalCol = get_global_id(0);
|
||||
int localRow = get_local_id(1);
|
||||
int localCol = get_local_id(0);
|
||||
int localSize = get_local_size(0); // assuming square local size
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
// Loop over all blocks of both matrices
|
||||
for (int k = 0; k < N; k += localSize) {
|
||||
// Load block of matrix A to local memory
|
||||
localA[localRow * localSize + localCol] = A[globalRow * N + k + localCol];
|
||||
|
||||
// Load block of matrix B to local memory, adjusting for column-major access
|
||||
localB[localRow * localSize + localCol] = B[(k + localRow) * N + globalCol];
|
||||
|
||||
// Synchronize to make sure the tiles are loaded
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// Multiply the two matrix blocks and accumulate result
|
||||
for (int j = 0; j < localSize; j++) {
|
||||
sum += localA[localRow * localSize + j] * localB[j * localSize + localCol];
|
||||
}
|
||||
|
||||
// Ensure computation is done before loading next block
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
C[globalRow * N + globalCol] = sum;
|
||||
}
|
||||
|
||||
/*__kernel void sgemm2(__global float *A,
|
||||
__global float *B,
|
||||
__global float *C,
|
||||
const unsigned int N)
|
||||
{
|
||||
int globalRow = get_global_id(1);
|
||||
int globalCol = get_global_id(0);
|
||||
int localRow = get_local_id(1);
|
||||
int localCol = get_local_id(0);
|
||||
|
||||
// Static local memory declaration
|
||||
__local float localA[16][16];
|
||||
__local float localB[16][16];
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
// Iterate over blocks
|
||||
for (int k = 0; k < N; k += 16) {
|
||||
// Load a block of matrix A into local memory
|
||||
localA[localRow][localCol] = A[globalRow * N + k + localCol];
|
||||
|
||||
// Load a block of matrix B into local memory
|
||||
localB[localRow][localCol] = B[(k + localRow) * N + globalCol];
|
||||
|
||||
// Ensure the entire block is loaded
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// Compute multiplication for this block
|
||||
for (int j = 0; j < 16; j++) {
|
||||
sum += localA[localRow][j] * localB[j][localCol];
|
||||
}
|
||||
}
|
||||
|
||||
C[globalRow * N + globalCol] = sum;
|
||||
}*/
|
245
tests/opencl/sgemm3/main.cc
Normal file
245
tests/opencl/sgemm3/main.cc
Normal file
|
@ -0,0 +1,245 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <CL/opencl.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
|
||||
#define LOCAL_SIZE 16
|
||||
|
||||
#define FLOAT_ULP 6
|
||||
|
||||
#define KERNEL_NAME "sgemm3"
|
||||
|
||||
#define CL_CHECK(_expr) \
|
||||
do { \
|
||||
cl_int _err = _expr; \
|
||||
if (_err == CL_SUCCESS) \
|
||||
break; \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} while (0)
|
||||
|
||||
#define CL_CHECK2(_expr) \
|
||||
({ \
|
||||
cl_int _err = CL_INVALID_VALUE; \
|
||||
decltype(_expr) _ret = _expr; \
|
||||
if (_err != CL_SUCCESS) { \
|
||||
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
|
||||
cleanup(); \
|
||||
exit(-1); \
|
||||
} \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
|
||||
if (nullptr == filename || nullptr == data || 0 == size)
|
||||
return -1;
|
||||
|
||||
FILE* fp = fopen(filename, "r");
|
||||
if (NULL == fp) {
|
||||
fprintf(stderr, "Failed to load kernel.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fseek(fp , 0 , SEEK_END);
|
||||
long fsize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
*data = (uint8_t*)malloc(fsize);
|
||||
*size = fread(*data, 1, fsize, fp);
|
||||
|
||||
fclose(fp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool compare_equal(float a, float b) {
|
||||
union fi_t { float f; int32_t i; };
|
||||
fi_t fa, fb;
|
||||
fa.f = a;
|
||||
fb.f = b;
|
||||
auto d = std::abs(fa.i - fb.i);
|
||||
return d <= FLOAT_ULP;
|
||||
}
|
||||
|
||||
static void matmul_cpu(float *C, float *A, float *B, int N) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
for (int j = 0; j < N; j++) {
|
||||
float sum = 0.0f;
|
||||
for (int k = 0; k < N; k++) {
|
||||
sum += A[i * N + k] * B[k * N + j];
|
||||
}
|
||||
C[i * N + j] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cl_device_id device_id = NULL;
|
||||
cl_context context = NULL;
|
||||
cl_command_queue commandQueue = NULL;
|
||||
cl_program program = NULL;
|
||||
cl_kernel kernel = NULL;
|
||||
cl_mem a_memobj = NULL;
|
||||
cl_mem b_memobj = NULL;
|
||||
cl_mem c_memobj = NULL;
|
||||
uint8_t *kernel_bin = NULL;
|
||||
|
||||
static void cleanup() {
|
||||
if (commandQueue) clReleaseCommandQueue(commandQueue);
|
||||
if (kernel) clReleaseKernel(kernel);
|
||||
if (program) clReleaseProgram(program);
|
||||
if (a_memobj) clReleaseMemObject(a_memobj);
|
||||
if (b_memobj) clReleaseMemObject(b_memobj);
|
||||
if (c_memobj) clReleaseMemObject(c_memobj);
|
||||
if (context) clReleaseContext(context);
|
||||
if (device_id) clReleaseDevice(device_id);
|
||||
if (kernel_bin) free(kernel_bin);
|
||||
}
|
||||
|
||||
int size = 32;
|
||||
|
||||
static void show_usage() {
|
||||
printf("Usage: [-n size] [-h: help]\n");
|
||||
}
|
||||
|
||||
static void parse_args(int argc, char **argv) {
|
||||
int c;
|
||||
while ((c = getopt(argc, argv, "n:h?")) != -1) {
|
||||
switch (c) {
|
||||
case 'n':
|
||||
size = atoi(optarg);
|
||||
break;
|
||||
case 'h':
|
||||
case '?': {
|
||||
show_usage();
|
||||
exit(0);
|
||||
} break;
|
||||
default:
|
||||
show_usage();
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
// parse command arguments
|
||||
parse_args(argc, argv);
|
||||
|
||||
uint32_t num_points = size * size;
|
||||
|
||||
printf("Matrix size=%d\n", size);
|
||||
if ((size / LOCAL_SIZE) * LOCAL_SIZE != size) {
|
||||
printf("Error: matrix size must be a multiple of %d\n", LOCAL_SIZE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_platform_id platform_id;
|
||||
size_t kernel_size;
|
||||
|
||||
// Getting platform and device information
|
||||
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
|
||||
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
|
||||
|
||||
printf("Create context\n");
|
||||
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
|
||||
|
||||
char device_string[1024];
|
||||
clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
|
||||
printf("Using device: %s\n", device_string);
|
||||
|
||||
printf("Allocate device buffers\n");
|
||||
size_t nbytes = num_points * sizeof(float);
|
||||
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
|
||||
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
|
||||
|
||||
printf("Create program from kernel source\n");
|
||||
#ifdef HOSTGPU
|
||||
if (0 != read_kernel_file("kernel.cl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
program = CL_CHECK2(clCreateProgramWithSource(
|
||||
context, 1, (const char**)&kernel_bin, &kernel_size, &_err));
|
||||
#else
|
||||
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
|
||||
return -1;
|
||||
program = CL_CHECK2(clCreateProgramWithBinary(
|
||||
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, NULL, &_err));
|
||||
#endif
|
||||
if (program == NULL) {
|
||||
cleanup();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Build program
|
||||
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
|
||||
|
||||
// Create kernel
|
||||
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
|
||||
|
||||
size_t global_size[2] = {size, size};
|
||||
size_t local_size[2] = {LOCAL_SIZE, LOCAL_SIZE};
|
||||
|
||||
// Set kernel arguments
|
||||
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
|
||||
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(uint32_t), &size));
|
||||
CL_CHECK(clSetKernelArg(kernel, 4, local_size[0]*local_size[1]*sizeof(float), NULL));
|
||||
CL_CHECK(clSetKernelArg(kernel, 5, local_size[0]*local_size[1]*sizeof(float), NULL));
|
||||
|
||||
// Allocate memories for input arrays and output arrays.
|
||||
std::vector<float> h_a(num_points);
|
||||
std::vector<float> h_b(num_points);
|
||||
std::vector<float> h_c(num_points);
|
||||
|
||||
// Generate input values
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
h_a[i] = static_cast<float>(rand()) / RAND_MAX;
|
||||
h_b[i] = static_cast<float>(rand()) / RAND_MAX;
|
||||
}
|
||||
|
||||
// Creating command queue
|
||||
commandQueue = CL_CHECK2(clCreateCommandQueue(context, device_id, 0, &_err));
|
||||
|
||||
printf("Upload source buffers\n");
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, a_memobj, CL_TRUE, 0, nbytes, h_a.data(), 0, NULL, NULL));
|
||||
CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, nbytes, h_b.data(), 0, NULL, NULL));
|
||||
|
||||
printf("Execute the kernel\n");
|
||||
auto time_start = std::chrono::high_resolution_clock::now();
|
||||
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, global_size, local_size, 0, NULL, NULL));
|
||||
CL_CHECK(clFinish(commandQueue));
|
||||
auto time_end = std::chrono::high_resolution_clock::now();
|
||||
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
|
||||
printf("Elapsed time: %lg ms\n", elapsed);
|
||||
|
||||
printf("Download destination buffer\n");
|
||||
CL_CHECK(clEnqueueReadBuffer(commandQueue, c_memobj, CL_TRUE, 0, nbytes, h_c.data(), 0, NULL, NULL));
|
||||
|
||||
printf("Verify result\n");
|
||||
std::vector<float> ref_vec(num_points);
|
||||
matmul_cpu(ref_vec.data(), h_a.data(), h_b.data(), size);
|
||||
int errors = 0;
|
||||
for (uint32_t i = 0; i < num_points; ++i) {
|
||||
if (!compare_equal(h_c[i], ref_vec[i])) {
|
||||
if (errors < 100)
|
||||
printf("*** error: [%d] expected=%f, actual=%f\n", i, ref_vec[i], h_c[i]);
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
printf("FAILED! - %d errors\n", errors);
|
||||
} else {
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
// Clean up
|
||||
cleanup();
|
||||
|
||||
return errors;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue