enabling Makefile configuration with build folder support

This commit is contained in:
Blaise Tine 2024-03-30 02:28:39 -07:00
parent 99c91987fb
commit 6b81b26ffc
130 changed files with 1829 additions and 796 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/build*
/.vscode

View file

@ -17,8 +17,6 @@ env:
cache:
directories:
- $TOOLDIR
- $HOME/build32
- $HOME/build64
before_install:
- if [ ! -d "$TOOLDIR" ] || [ -z "$(ls -A $TOOLDIR)" ]; then
@ -34,50 +32,56 @@ stages:
jobs:
include:
- stage: setup
name: build32
script:
- rm -rf $HOME/build32 && cp -r $PWD $HOME/build32
- rm -rf $HOME/build64 && cp -r $PWD $HOME/build64
- make -C $HOME/build32 > /dev/null
- XLEN=64 make -C $HOME/build64 > /dev/null
- mkdir -p $HOME/build32 && cd $HOME/build32
- ../configure
- make -s > /dev/null
- stage: setup
name: build64
script:
- mkdir -p $HOME/build64 && cd $HOME/build64
- XLEN=64 ../configure
- make -s > /dev/null
- stage: test
name: unittest
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --unittest
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --unittest
- stage: test
name: isa
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --isa
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --isa
- stage: test
name: isa64
script: cp -r $HOME/build64 build && cd build && XLEN=64 ./ci/travis_run.py ./ci/regression.sh --isa
script: cd $HOME/build64 && ./ci/travis_run.py ./ci/regression.sh --isa
- stage: test
name: regression
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --regression
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --regression
- stage: test
name: regression64
script: cp -r $HOME/build64 build && cd build && XLEN=64 ./ci/travis_run.py ./ci/regression.sh --regression
script: cd $HOME/build64 && ./ci/travis_run.py ./ci/regression.sh --regression
- stage: test
name: opencl
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --opencl
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --opencl
- stage: test
name: cluster
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --cluster
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --cluster
- stage: test
name: config
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --config
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --config
- stage: test
name: debug
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --debug
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --debug
- stage: test
name: stress0
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --stress0
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --stress0
- stage: test
name: stress1
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --stress1
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --stress1
- stage: test
name: synthesis
script: cp -r $HOME/build32 build && cd build && ./ci/travis_run.py ./ci/regression.sh --synthesis
script: cd $HOME/build32 && ./ci/travis_run.py ./ci/regression.sh --synthesis
- stage: test
name: synthesis64
script: cp -r $HOME/build64 build && cd build && XLEN=64 ./ci/travis_run.py ./ci/regression.sh --synthesis
script: cd $HOME/build64 && ./ci/travis_run.py ./ci/regression.sh --synthesis
after_success:
# Gather code coverage

View file

@ -1,5 +1,7 @@
include config.mk
all:
$(MAKE) -C third_party
$(MAKE) -C $(VORTEX_HOME)/third_party
$(MAKE) -C hw
$(MAKE) -C sim
$(MAKE) -C kernel
@ -14,15 +16,8 @@ clean:
$(MAKE) -C tests clean
clean-all:
$(MAKE) -C third_party clean
$(MAKE) -C hw clean
$(MAKE) -C sim clean
$(MAKE) -C kernel clean
$(MAKE) -C runtime clean
$(MAKE) -C tests clean-all
crtlsim:
$(MAKE) -C sim clean
brtlsim:
$(MAKE) -C sim

View file

@ -51,17 +51,19 @@ More detailed build instructions can be found [here](docs/install_vortex.md).
$ sudo apt-get install build-essential
$ sudo apt-get install git
### Install Vortex codebase
$ git clone --recursive https://github.com/vortexgpgpu/vortex.git
$ git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
$ cd Vortex
### Install prebuilt toolchain
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting the TOOLDIR environment variable
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting TOOLDIR
$ export TOOLDIR=$HOME/tools
$ ./ci/toolchain_install.sh --all
$ TOOLDIR=$HOME/tools ./ci/toolchain_install.sh --all
### Set up environment variables
$ source ./ci/toolchain_env.sh
### Build Vortex sources
$ make -s
### Building Vortex
$ mkdir build
$ cd build
$ TOOLDIR=$HOME/tools ../configure
$ make
### Quick demo running vecadd OpenCL kernel on 2 cores
$ ./ci/blackbox.sh --cores=2 --app=vecadd

View file

@ -30,7 +30,7 @@ show_help()
}
SCRIPT_DIR=$(dirname "$0")
VORTEX_HOME=$SCRIPT_DIR/..
ROOT_DIR=$SCRIPT_DIR/..
DRIVER=simx
APP=sgemm
@ -134,16 +134,16 @@ case $DRIVER in
DRIVER_PATH=
;;
simx)
DRIVER_PATH=$VORTEX_HOME/runtime/simx
DRIVER_PATH=$ROOT_DIR/runtime/simx
;;
rtlsim)
DRIVER_PATH=$VORTEX_HOME/runtime/rtlsim
DRIVER_PATH=$ROOT_DIR/runtime/rtlsim
;;
opae)
DRIVER_PATH=$VORTEX_HOME/runtime/opae
DRIVER_PATH=$ROOT_DIR/runtime/opae
;;
xrt)
DRIVER_PATH=$VORTEX_HOME/runtime/xrt
DRIVER_PATH=$ROOT_DIR/runtime/xrt
;;
*)
echo "invalid driver: $DRIVER"
@ -151,12 +151,12 @@ case $DRIVER in
;;
esac
if [ -d "$VORTEX_HOME/tests/opencl/$APP" ];
if [ -d "$ROOT_DIR/tests/opencl/$APP" ];
then
APP_PATH=$VORTEX_HOME/tests/opencl/$APP
elif [ -d "$VORTEX_HOME/tests/regression/$APP" ];
APP_PATH=$ROOT_DIR/tests/opencl/$APP
elif [ -d "$ROOT_DIR/tests/regression/$APP" ];
then
APP_PATH=$VORTEX_HOME/tests/regression/$APP
APP_PATH=$ROOT_DIR/tests/regression/$APP
else
echo "Application folder not found: $APP"
exit -1
@ -204,10 +204,10 @@ export PERF_CLASS=$PERF_CLASS
status=0
# ensure config update
make -C $VORTEX_HOME/hw config > /dev/null
make -C $ROOT_DIR/hw config > /dev/null
# ensure the stub driver is present
make -C $VORTEX_HOME/runtime/stub > /dev/null
make -C $ROOT_DIR/runtime/stub > /dev/null
if [ $DEBUG -ne 0 ]
then

41
ci/datagen.py Executable file
View file

@ -0,0 +1,41 @@
#!/usr/bin/env python3
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import struct
import random
import sys
def create_binary_file(n, filename):
# Open the file in binary write mode
with open(filename, 'wb') as f:
# Write the integer N as 4 bytes
f.write(struct.pack('i', n))
# Generate and write N floating-point numbers
for _ in range(n):
# Generate a random float between 0 and 1
num = random.random()
# Write the float in IEEE 754 format (4 bytes)
f.write(struct.pack('f', num))
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: script.py N filename")
sys.exit(1)
n = int(sys.argv[1])
filename = sys.argv[2]
create_binary_file(n, filename)
print(f"Created binary file '{filename}' containing {n} floats.")

32
config.in Normal file
View file

@ -0,0 +1,32 @@
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
VORTEX_HOME ?= @VORTEX_HOME@
XLEN ?= @XLEN@
TOOLDIR ?= @TOOLDIR@
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
ifeq ($(XLEN),64)
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
else
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
endif
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel

94
configure vendored Executable file
View file

@ -0,0 +1,94 @@
#!/bin/bash
# Copyright © 2019-2023
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# options
XLEN=${XLEN:=32}
TOOLDIR=${TOOLDIR:=/opt}
# project subdirectories to build
SUBDIRS=("." "!ci" "!perf" "hw*" "kernel*" "runtime*" "sim*" "tests*")
# Get the directory of the script
SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
# Determine the current working directory
CURRENT_DIR=$(pwd)
THIRD_PARTY_DIR=$SCRIPT_DIR/third_party
# Path to the template file
TEMPLATE="$SCRIPT_DIR/config.in"
# Output file
OUTPUT="config.mk"
# Function to process config.in and generate config.mk
generate_config() {
if [ -f "$TEMPLATE" ]; then
# Replace tokens
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g " "$TEMPLATE" > "$CURRENT_DIR/$OUTPUT"
else
echo "Template file $TEMPLATE not found."
exit 1
fi
}
# Function to recursively copy Makefiles, skipping the current directory
copy_makefiles() {
local source_dir="$1"
local target_dir="$2"
#echo "source_dir=$source_dir, target_dir=$target_dir"
for pattern in "${SUBDIRS[@]}"; do
if [[ "$pattern" == !* ]]; then
local dir_to_copy="${pattern#!}" # Remove the "!" from the start
cp -r "$source_dir/$dir_to_copy" "$target_dir/"
elif [[ "$pattern" == "." ]]; then
# Handle the current script directory explicitly
if [ -f "$source_dir/Makefile" ]; then
mkdir -p "$target_dir"
cp "$source_dir/Makefile" "$target_dir"
fi
else
# Use find to match the directory pattern and process each matched directory
find "$source_dir" -type d -path "$source_dir/$pattern" 2>/dev/null | while read dir; do
# Compute the relative path of the directory
local rel_path="${dir#$source_dir}"
rel_path="${rel_path#/}" # Remove leading slash, if present
local full_target_dir="$target_dir/$rel_path"
# Function to copy and update file includes
copy_and_update_includes() {
local file="$1"
local dest="$2/$file"
if [ -f "$dir/$file" ]; then
mkdir -p "$2"
cp "$dir/$file" "$dest"
fi
}
# Copy and update Makefile and common.mk if they exist
copy_and_update_includes "Makefile" "$full_target_dir"
copy_and_update_includes "common.mk" "$full_target_dir"
done
fi
done
}
generate_config
if [ "$(realpath "$SCRIPT_DIR")" != "$(realpath "$CURRENT_DIR")" ]; then
copy_makefiles "$SCRIPT_DIR" "$CURRENT_DIR"
fi

View file

@ -26,7 +26,7 @@
3. Download the Vortex codebase:
```
git clone --recursive https://github.com/vortexgpgpu/vortex.git
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
```
4. Install Vortex's prebuilt toolchain:
@ -34,10 +34,9 @@
```
$ cd vortex
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting the TOOLDIR environment variable
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting TOOLDIR
$ export TOOLDIR=$HOME/tools
$ ./ci/toolchain_install.sh --all
$ TOOLDIR=$HOME/tools ./ci/toolchain_install.sh --all
```
5. Set up environment:
@ -49,6 +48,9 @@
6. Build Vortex
```
$ mkdir build
$ cd build
$ TOOLDIR=$HOME/tools ../configure
$ make
```
@ -77,7 +79,7 @@ Note: depending on the system, some of the toolchain may need to be recompiled f
4. Download the Vortex codebase:
```
git clone --recursive https://github.com/vortexgpgpu/vortex.git
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
```
5. Install Vortex's prebuilt toolchain:
@ -85,10 +87,9 @@ Note: depending on the system, some of the toolchain may need to be recompiled f
```
$ cd vortex
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting the TOOLDIR environment variable
# By default, the toolchain will install to /opt folder which requires sudo access. Alternatively, you could also install the toolchain to a different location of your choice by setting TOOLDIR
$ export TOOLDIR=$HOME/tools
$ ./ci/toolchain_install.sh --all
$ TOOLDIR=$HOME/tools ./ci/toolchain_install.sh --all
```
6. Set up environment:
@ -100,5 +101,8 @@ Note: depending on the system, some of the toolchain may need to be recompiled f
7. Build Vortex
```
$ mkdir build
$ cd build
$ TOOLDIR=$HOME/tools ../configure
$ make
```

2
hw/.gitignore vendored
View file

@ -1,2 +0,0 @@
VX_config.h
VX_types.h

View file

@ -1,5 +1,9 @@
RTL_DIR=./rtl
SCRIPT_DIR=./scripts
ROOT_DIR := $(realpath ..)
include $(ROOT_DIR)/config.mk
HW_DIR := $(VORTEX_HOME)/hw
SCRIPT_DIR := $(HW_DIR)/scripts
RTL_DIR := $(HW_DIR)/rtl
all: config

View file

@ -11,11 +11,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
// Modified port of cast module from fpnew Libray
// reference: https://github.com/pulp-platform/fpnew
`ifdef FPU_DSP
`include "VX_fpu_define.vh"
module VX_fcvt_unit import VX_fpu_pkg::*; #(
parameter LATENCY = 1,
parameter INT_WIDTH = 32,
@ -314,3 +316,4 @@ module VX_fcvt_unit import VX_fpu_pkg::*; #(
);
endmodule
`endif

View file

@ -11,12 +11,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
// Modified port of noncomp module from fpnew Libray
// reference: https://github.com/pulp-platform/fpnew
`ifdef FPU_DSP
/// Modified port of noncomp module from fpnew Libray
/// reference: https://github.com/pulp-platform/fpnew
`include "VX_fpu_define.vh"
module VX_fncp_unit import VX_fpu_pkg::*; #(
parameter LATENCY = 2,

View file

@ -11,11 +11,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`ifdef FPU_DSP
`include "VX_fpu_define.vh"
`ifdef FPU_DSP
module VX_fp_classifier import VX_fpu_pkg::*; #(
parameter MAN_BITS = 23,
parameter EXP_BITS = 8

View file

@ -11,12 +11,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
// Modified port of rouding module from fpnew Libray
// reference: https://github.com/pulp-platform/fpnew
`ifdef FPU_DSP
/// Modified port of rouding module from fpnew Libray
/// reference: https://github.com/pulp-platform/fpnew
`include "VX_fpu_define.vh"
module VX_fp_rounding #(
parameter DAT_WIDTH = 2 // Width of the abolute value, without sign bit

View file

@ -11,10 +11,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
`ifdef FPU_DSP
`include "VX_fpu_define.vh"
module VX_fpu_cvt import VX_fpu_pkg::*; #(
parameter NUM_LANES = 5,
parameter NUM_PES = `UP(NUM_LANES / `FCVT_PE_RATIO),

View file

@ -11,10 +11,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
`ifdef FPU_DSP
`include "VX_fpu_define.vh"
module VX_fpu_div import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter NUM_PES = `UP(NUM_LANES / `FDIV_PE_RATIO),

View file

@ -11,6 +11,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`ifdef FPU_DPI
`include "VX_fpu_define.vh"
module VX_fpu_dpi import VX_fpu_pkg::*; #(
@ -485,3 +487,4 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
assign ready_in = per_core_ready_in[core_select];
endmodule
`endif

View file

@ -11,10 +11,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
`ifdef FPU_DSP
`include "VX_fpu_define.vh"
module VX_fpu_dsp import VX_fpu_pkg::*; #(
parameter NUM_LANES = 4,
parameter TAG_WIDTH = 4,

View file

@ -11,10 +11,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
`ifdef FPU_DSP
`include "VX_fpu_define.vh"
module VX_fpu_fma import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter NUM_PES = `UP(NUM_LANES / `FMA_PE_RATIO),

View file

@ -11,10 +11,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
`ifdef FPU_FPNEW
`include "VX_fpu_define.vh"
module VX_fpu_fpnew
import VX_fpu_pkg::*;
import fpnew_pkg::*;

View file

@ -11,10 +11,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
`ifdef FPU_DSP
`include "VX_fpu_define.vh"
module VX_fpu_ncp import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter NUM_PES = `UP(NUM_LANES / `FNCP_PE_RATIO),

View file

@ -11,10 +11,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_fpu_define.vh"
`ifdef FPU_DSP
`include "VX_fpu_define.vh"
module VX_fpu_sqrt import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter NUM_PES = `UP(NUM_LANES /`FSQRT_PE_RATIO),

View file

@ -1 +0,0 @@
ip_cache/*

1
hw/syn/altera/common.mk Normal file
View file

@ -0,0 +1 @@
include ../common.mk

View file

@ -1 +0,0 @@
build*/*

View file

@ -1,5 +1,8 @@
ROOT_DIR := $(realpath ../../../..)
include $(ROOT_DIR)/config.mk
DEVICE_FAMILY ?= arria10
XLEN ?= 32
PREFIX ?= build$(XLEN)
TARGET ?= fpga
NUM_CORES ?= 1

View file

@ -1,29 +0,0 @@
/unittest/*
!/unittest/Makefile
/smem/*
!/smem/Makefile
/cache/*
!/cache/Makefile
/vortex/*
!/vortex/Makefile
/pipeline/*
!/pipeline/Makefile
/core/*
!/core/Makefile
/top/*
!/top/Makefile
/top-gfx/*
!/top-gfx/Makefile
/test/*
!/test/Makefile
/fpu/*
!/fpu/Makefile

View file

@ -1,3 +1,5 @@
include ../common.mk
PREFIX ?= build
BUILD_DIR=$(PREFIX)_$(DEVICE_FAMILY)

View file

@ -1,3 +1,5 @@
include ../common.mk
RTL_DIR = ../../../../../rtl
AFU_DIR = $(RTL_DIR)/afu/opae
THIRD_PARTY_DIR = ../../../../../../third_party

View file

@ -1,4 +1,4 @@
include ../common.mk
ALL:sim

View file

@ -1,4 +1,4 @@
include ../common.mk
SCRIPT_DIR=./scripts

1
hw/syn/xilinx/common.mk Normal file
View file

@ -0,0 +1 @@
include ../common.mk

View file

@ -1,2 +0,0 @@
/project_1/*
/.Xil/*

View file

@ -1,3 +1,5 @@
include ../common.mk
VIVADO = $(XILINX_VIVADO)/bin/vivado
RTL_DIR = ../../../rtl

View file

@ -1,17 +1,12 @@
XLEN ?= 32
TOOLDIR ?= /opt
ROOT_DIR := $(realpath ../../../../..)
include $(ROOT_DIR)/config.mk
ifeq ($(XLEN),64)
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
CFLAGS += -march=rv64imafd -mabi=lp64d
else
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
CFLAGS += -march=rv32imaf -mabi=ilp32f
endif
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
VORTEX_RT_PATH ?= $(realpath ../../../../../kernel)
BIN2COE_PATH ?= ../../../../../../bin2coe
CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc

View file

@ -1 +0,0 @@
/build*/*

View file

@ -1,3 +1,6 @@
ROOT_DIR := $(realpath ../../../..)
include $(ROOT_DIR)/config.mk
ifneq ($(findstring Makefile, $(MAKEFILE_LIST)), Makefile)
help:
$(ECHO) "Makefile Usage:"
@ -11,7 +14,7 @@ endif
TARGET ?= hw
PLATFORM ?=
XLEN ?= 32
NUM_CORES ?= 1
PREFIX ?= build$(XLEN)
MAX_JOBS ?= 8

View file

@ -1 +0,0 @@
build_*/*

View file

@ -1,19 +1,22 @@
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRC_DIR := $(VORTEX_HOME)/hw/syn/yosys
TOP_LEVEL_ENTITY ?= Vortex
PREFIX ?= build
NUM_CORES ?= 1
XLEN ?= 32
SCRIPT_DIR = ../../../scripts
RTL_DIR = ../../../rtl
DPI_DIR = ../../../dpi
THIRD_PARTY_DIR = ../../../../third_party
SCRIPT_DIR := $(VORTEX_HOME)/hw/scripts
RTL_DIR := $(VORTEX_HOME)/hw/rtl
THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party
CP = cp -rf
RMDIR = rm -rf
ECHO = @echo
BUILD_DIR = $(PREFIX)_$(TOP_LEVEL_ENTITY)
BIN_DIR = $(BUILD_DIR)/bin
BUILD_DIR := $(PREFIX)_$(TOP_LEVEL_ENTITY)
BIN_DIR := $(BUILD_DIR)/bin
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_PIPELINE
@ -81,10 +84,10 @@ $(BUILD_DIR)/project.v: gen-sources
cd $(BUILD_DIR); $(SCRIPT_DIR)/sv2v.sh -t$(TOP_LEVEL_ENTITY) -Isrc -oproject.v
build: $(BUILD_DIR)/project.v
cd $(BUILD_DIR); ../synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v
cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v
elaborate: $(BUILD_DIR)/project.v
cd $(BUILD_DIR); ../synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate"
cd $(BUILD_DIR); $(SRC_DIR)/synth.sh -t$(TOP_LEVEL_ENTITY) -sproject.v -P="elaborate"
clean:
$(RMDIR) $(BUILD_DIR)

View file

@ -1 +0,0 @@
*/obj_dir/*

View file

@ -2,16 +2,19 @@ all:
$(MAKE) -C cache
$(MAKE) -C generic_queue
$(MAKE) -C mem_streamer
$(MAKE) -C top_modules
$(MAKE) -C cache_top
$(MAKE) -C core_top
run:
$(MAKE) -C cache run
$(MAKE) -C generic_queue run
$(MAKE) -C mem_streamer run
$(MAKE) -C top_modules run
$(MAKE) -C cache_top run
$(MAKE) -C core_top run
clean:
$(MAKE) -C cache clean
$(MAKE) -C generic_queue clean
$(MAKE) -C mem_streamer clean
$(MAKE) -C top_modules clean
$(MAKE) -C cache_top clean
$(MAKE) -C core_top clean

View file

@ -1,172 +0,0 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`timescale 1ns/1ps
module VX_tb_divide();
`ifdef TRACE
initial
begin
$dumpfile("trace.vcd");
$dumpvars(0,test);
end
`endif
reg clk;
reg rst;
reg [31:0] numer, denom;
wire [31:0] o_div[0:7], o_rem[0:7];
for (genvar i = 0; i < 8; ++i) begin
VX_divide#(
.WIDTHN(32),
.WIDTHD(32),
.WIDTHQ(32),
.WIDTHR(32),
.PIPELINE(i)
) div(
.clock(clk),
.aclr(rst),
.clken(1'b1),
.numer(numer),
.denom(denom),
.quotient(o_div[i]),
.remainder(o_rem[i])
);
end
initial begin
clk = 0; rst = 0;
numer = 56;
denom = 11;
$display("56 / 11 #0");
if (o_div[0] != 5 || o_rem[0] != 1) begin
$display("PIPE0: div=", o_div[0], " rem=", o_rem[0]);
$display("expected 5,1 EXITING");
$finish();
end
if (o_div[1] != 1'bx || o_rem[1] != 1'bx) begin
$display("PIPE1: div=", o_div[1], " rem=", o_rem[1]);
$display("expected x,x EXITING");
$finish();
end
if (o_div[2] != 1'bx || o_rem[2] != 1'bx) begin
$display("PIPE2: div=", o_div[2], " rem=", o_rem[2]);
$display("expected x,x EXITING");
$finish();
end
if (o_div[3] != 1'bx || o_rem[3] != 1'bx) begin
$display("PIPE3: div=", o_div[3], " rem=", o_rem[3]);
$display("expected x,x EXITING");
$finish();
end
#2;
$display("56 / 11 #2");
if (o_div[0] != 5 || o_rem[0] != 1) begin
$display("PIPE0: div=", o_div[0], " rem=", o_rem[0]);
$display("expected 5,1, EXITING");
$finish();
end
if (o_div[1] != 5 || o_rem[1] != 1) begin
$display("PIPE1: div=", o_div[1], " rem=", o_rem[1]);
$display("expected 5,1 EXITING");
$finish();
end
if (o_div[2] != 1'bx || o_rem[2] != 1'bx) begin
$display("PIPE2: div=", o_div[2], " rem=", o_rem[2]);
$display("expected x,x EXITING");
$finish();
end
if (o_div[3] != 1'bx || o_rem[3] != 1'bx) begin
$display("PIPE3: div=", o_div[3], " rem=", o_rem[3]);
$display("expected x,x EXITING");
$finish();
end
#2;
$display("56 / 11 #4");
if (o_div[0] != 5 || o_rem[0] != 1) begin
$display("PIPE0: div=", o_div[0], " rem=", o_rem[0]);
$display("expected 5,1 EXITING");
$finish();
end
if (o_div[1] != 5 || o_rem[1] != 1) begin
$display("PIPE1: div=", o_div[1], " rem=", o_rem[1]);
$display("expected 5,1 EXITING");
$finish();
end
if (o_div[2] != 5 || o_rem[2] != 1) begin
$display("PIPE2: div=", o_div[2], " rem=", o_rem[2]);
$display("expected 5,1 EXITING");
$finish();
end
if (o_div[3] != 1'bx || o_rem[3] != 1'bx) begin
$display("PIPE3: div=", o_div[3], " rem=", o_rem[3]);
$display("expected x,x EXITING");
$finish();
end
#2;
$display("56 / 11 #6");
if (o_div[0] != 5 || o_rem[0] != 1) begin
$display("PIPE0: div=", o_div[0], " rem=", o_rem[0]);
$display("expected 5,1 EXITING");
$finish();
end
if (o_div[1] != 5 || o_rem[1] != 1) begin
$display("PIPE1: div=", o_div[1], " rem=", o_rem[1]);
$display("expected 5,1 EXITING");
$finish();
end
if (o_div[2] != 5 || o_rem[2] != 1) begin
$display("PIPE2: div=", o_div[2], " rem=", o_rem[2]);
$display("expected 5,1 EXITING");
$finish();
end
if (o_div[3] != 5 || o_rem[3] != 1) begin
$display("PIPE3: div=", o_div[3], " rem=", o_rem[3]);
$display("expected 5,1 EXITING");
$finish();
end
$display("PASS");
$finish();
end
always #1
clk = !clk;
endmodule

View file

@ -1,74 +1,26 @@
DESTDIR ?= .
RTL_DIR = ../../rtl
DPI_DIR = ../../dpi
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
CONFIGS +=
PARAMS +=
PROJECT := cache
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
CXXFLAGS += $(CONFIGS)
RTL_DIR := $(VORTEX_HOME)/hw/rtl
DPI_DIR := $(VORTEX_HOME)/hw/dpi
LDFLAGS +=
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
# control RTL debug tracing states
DBG_TRACE_FLAGS += -DDBG_TRACE_CACHE
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
CXXFLAGS += -I$(ROOT_DIR)/hw
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
SRCS := $(DPI_DIR)/util_dpi.cpp
SRCS += $(SRC_DIR)/cachesim.cpp $(SRC_DIR)/testbench.cpp
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv
DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv
SRCS = cachesim.cpp testbench.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
TOP = VX_cache_top
TOP := VX_cache_top
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION -DSV_DPI
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(PARAMS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
# Enable perf counters
ifdef PERF
VL_FLAGS += -DPERF_ENABLE
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = cache
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
run: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT)
waves: trace.vcd
gtkwave -o trace.vcd
clean:
rm -rf obj_dir $(DESTDIR)/$(PROJECT)
include ../common.mk

View file

@ -0,0 +1,26 @@
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
PROJECT := cache_top
RTL_DIR := $(VORTEX_HOME)/hw/rtl
DPI_DIR := $(VORTEX_HOME)/hw/dpi
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
CXXFLAGS += -I$(ROOT_DIR)/hw
SRCS := $(DPI_DIR)/util_dpi.cpp
SRCS += $(SRC_DIR)/main.cpp
DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE
RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache
TOP := VX_cache_top
include ../common.mk

View file

@ -1,26 +1,18 @@
DESTDIR ?= .
RTL_DIR = ../../rtl
DPI_DIR = ../../dpi
CONFIGS +=
PARAMS +=
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
CXXFLAGS += $(CONFIGS)
LDFLAGS +=
RTL_PKGS +=
RTL_INCLUDE +=
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
RTL_PKGS = $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core
SRCS = main.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
@ -30,6 +22,7 @@ VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(PARAMS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
@ -51,17 +44,16 @@ ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = top_modules
all: $(DESTDIR)/$(PROJECT)
all: build
$(DESTDIR)/$(PROJECT): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
build: $(SRCS)
verilator --build $(VL_FLAGS) --cc VX_cache_top --top-module VX_cache_top $^ -CFLAGS '$(CXXFLAGS)'
verilator --build $(VL_FLAGS) --cc VX_core_top --top-module VX_core_top $^ -CFLAGS '$(CXXFLAGS)'
run: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT)
run:
waves:
waves: trace.vcd
gtkwave -o trace.vcd
clean:
rm -rf obj_dir
rm -rf *.vcd obj_dir $(DESTDIR)/$(PROJECT)

View file

@ -0,0 +1,26 @@
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
PROJECT := core_top
RTL_DIR := $(VORTEX_HOME)/hw/rtl
DPI_DIR := $(VORTEX_HOME)/hw/dpi
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
CXXFLAGS += -I$(ROOT_DIR)/hw
SRCS := $(DPI_DIR)/util_dpi.cpp
SRCS += $(SRC_DIR)/main.cpp
DBG_TRACE_FLAGS := -DDBG_TRACE_CACHE
RTL_PKGS := $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
RTL_INCLUDE += -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/mem -I$(RTL_DIR)/fpu -I$(RTL_DIR)/core
TOP := VX_core_top
include ../common.mk

View file

@ -0,0 +1,49 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "vl_simulator.h"
#ifndef TRACE_START_TIME
#define TRACE_START_TIME 0ull
#endif
#ifndef TRACE_STOP_TIME
#define TRACE_STOP_TIME -1ull
#endif
static uint64_t timestamp = 0;
static bool trace_enabled = false;
static uint64_t trace_start_time = TRACE_START_TIME;
static uint64_t trace_stop_time = TRACE_STOP_TIME;
double sc_time_stamp() {
return timestamp;
}
bool sim_trace_enabled() {
if (timestamp >= trace_start_time
&& timestamp < trace_stop_time)
return true;
return trace_enabled;
}
void sim_trace_enable(bool enable) {
trace_enabled = enable;
}
int main(int argc, char **argv) {
// Initialize Verilators variables
Verilated::commandArgs(argc, argv);
return 0;
}

View file

@ -1,65 +1,24 @@
DESTDIR ?= .
RTL_DIR = ../../rtl
DPI_DIR = ../../dpi
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
CONFIGS +=
PARAMS +=
PROJECT := generic_queue
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
CXXFLAGS += $(CONFIGS)
RTL_DIR := $(VORTEX_HOME)/hw/rtl
DPI_DIR := $(VORTEX_HOME)/hw/dpi
LDFLAGS +=
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
RTL_PKGS +=
SRCS := $(DPI_DIR)/util_dpi.cpp
SRCS += $(SRC_DIR)/main.cpp
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
DBG_TRACE_FLAGS :=
SRCS = main.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp
RTL_PKGS :=
TOP = VX_fifo_queue
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION -DSV_DPI
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(PARAMS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
TOP := VX_fifo_queue
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT = generic_queue
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
run: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT)
waves: trace.vcd
gtkwave -o trace.vcd
clean:
rm -rf obj_dir $(DESTDIR)/$(PROJECT)
include ../common.mk

View file

@ -1,65 +1,24 @@
DESTDIR ?= .
RTL_DIR = ../../rtl
DPI_DIR = ../../dpi
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
CONFIGS +=
PARAMS +=
PROJECT := mem_streamer
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../../.. -I../../common -I../../../../sim/common
CXXFLAGS += $(CONFIGS)
RTL_DIR := $(VORTEX_HOME)/hw/rtl
DPI_DIR := $(VORTEX_HOME)/hw/dpi
LDFLAGS +=
SRC_DIR := $(VORTEX_HOME)/hw/unittest/$(PROJECT)
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
CXXFLAGS := -I$(SRC_DIR) -I$(VORTEX_HOME)/hw/unittest/common -I$(VORTEX_HOME)/sim/common
RTL_PKGS +=
SRCS := $(DPI_DIR)/util_dpi.cpp
SRCS += $(SRC_DIR)/memsim.cpp $(SRC_DIR)/ram.cpp
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
DBG_TRACE_FLAGS :=
SRCS = memsim.cpp ram.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp
RTL_PKGS :=
TOP = VX_mem_scheduler
RTL_INCLUDE := -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs
VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION -DSV_DPI
VL_FLAGS += $(CONFIGS)
VL_FLAGS += $(PARAMS)
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += --cc $(TOP) --top-module $(TOP)
TOP := VX_mem_scheduler
# Enable Verilator multithreaded simulation
THREADS ?= $(shell python -c 'import multiprocessing as mp; print(mp.cpu_count())')
VL_FLAGS += -j $(THREADS)
#VL_FLAGS += --threads $(THREADS)
# Debugigng
ifdef DEBUG
VL_FLAGS += --trace --trace-structs $(DBG_FLAGS)
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT = mem_streamer
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS)
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS)' -o ../$@
run: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT)
waves: trace.vcd
gtkwave -o trace.vcd
clean:
rm -rf obj_dir $(DESTDIR)/$(PROJECT)
include ../common.mk

0
kernel/.gitignore vendored
View file

View file

@ -1,18 +1,14 @@
XLEN ?= 32
TOOLDIR ?= /opt
ROOT_DIR := $(realpath ..)
include $(ROOT_DIR)/config.mk
ifeq ($(XLEN),64)
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
CFLAGS += -march=rv64imafd -mabi=lp64d
else
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
CFLAGS += -march=rv32imaf -mabi=ilp32f
endif
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
INC_DIR := $(VORTEX_HOME)/kernel/include
SRC_DIR := $(VORTEX_HOME)/kernel/src
LLVM_CFLAGS += --sysroot=$(RISCV_SYSROOT)
LLVM_CFLAGS += --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH)
@ -34,27 +30,27 @@ DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump
CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy
CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
CFLAGS += -I./include -I../hw
CFLAGS += -I$(INC_DIR) -I$(ROOT_DIR)/hw
CFLAGS += -DXLEN_$(XLEN)
PROJECT = libvortexrt
PROJECT := libvortexrt
SRCS = ./src/vx_start.S ./src/vx_syscalls.c ./src/vx_print.S ./src/tinyprintf.c ./src/vx_print.c ./src/vx_spawn.c ./src/vx_serial.S ./src/vx_perf.c
SRCS = $(SRC_DIR)/vx_start.S $(SRC_DIR)/vx_syscalls.c $(SRC_DIR)/vx_print.S $(SRC_DIR)/tinyprintf.c $(SRC_DIR)/vx_print.c $(SRC_DIR)/vx_spawn.c $(SRC_DIR)/vx_serial.S $(SRC_DIR)/vx_perf.c
OBJS := $(addsuffix .o, $(notdir $(SRCS)))
OBJS = $(addsuffix .o, $(notdir $(SRCS)))
all: $(PROJECT).a $(PROJECT).dump
$(PROJECT).dump: $(PROJECT).a
$(DP) -D $(PROJECT).a > $(PROJECT).dump
%.S.o: src/%.S
%.S.o: $(SRC_DIR)/%.S
$(CC) $(CFLAGS) -c $< -o $@
%.cpp.o: src/%.cpp
%.cpp.o: $(SRC_DIR)/%.cpp
$(CXX) $(CFLAGS) -c $< -o $@
%.c.o: src/%.c
%.c.o: $(SRC_DIR)/%.c
$(CC) $(CFLAGS) -c $< -o $@
$(PROJECT).a: $(OBJS)

1
perf/.gitignore vendored
View file

@ -1 +0,0 @@
**/*.log

8
runtime/common.mk Normal file
View file

@ -0,0 +1,8 @@
ROOT_DIR := $(realpath ../..)
include $(ROOT_DIR)/config.mk
SIM_DIR := $(VORTEX_HOME)/sim
HW_DIR := $(VORTEX_HOME)/hw
INC_DIR := $(VORTEX_HOME)/runtime/include
COMMON_DIR := $(VORTEX_HOME)/runtime/common

View file

@ -1 +0,0 @@
/obj_dir/*

View file

@ -1,12 +1,15 @@
XLEN ?= 32
include ../common.mk
TARGET ?= opaesim
DESTDIR ?= $(CURDIR)
SIM_DIR = $(abspath ../../sim)
HW_DIR = $(abspath ../../hw)
SYN_DIR = $(HW_DIR)/syn/altera/opae
SYN_DIR := $(HW_DIR)/syn/altera/opae
SRC_DIR := $(VORTEX_HOME)/runtime/opae
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../common -I$(HW_DIR) -I$(DESTDIR)
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(DESTDIR)
CXXFLAGS += -DXLEN_$(XLEN)
# Position independent code
@ -20,7 +23,7 @@ CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -luuid -ldl -pthread
SRCS = vortex.cpp driver.cpp ../common/utils.cpp
SRCS = $(SRC_DIR)/vortex.cpp $(SRC_DIR)/driver.cpp $(COMMON_DIR)/utils.cpp
# set up target types
ifeq ($(TARGET), opaesim)
@ -46,7 +49,7 @@ endif
# Enable scope logic analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += ../common/scope.cpp
SRCS += $(COMMON_DIR)/scope.cpp
endif
# Enable perf counters
@ -54,16 +57,16 @@ ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = libvortex.so
PROJECT := libvortex.so
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/libopae-c-sim.so:
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/opaesim $(DESTDIR)/libopae-c-sim.so
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/opaesim $(DESTDIR)/libopae-c-sim.so
$(DESTDIR)/$(PROJECT): $(SRCS) $(OPAESIM)
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $@
clean:
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/opaesim clean
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/opaesim clean
rm -rf $(DESTDIR)/$(PROJECT)

View file

@ -1 +0,0 @@
/obj_dir/*

View file

@ -1,10 +1,11 @@
XLEN ?= 32
include ../common.mk
DESTDIR ?= $(CURDIR)
SIM_DIR = $(abspath ../../sim)
HW_DIR = $(abspath ../../hw)
SRC_DIR := $(VORTEX_HOME)/runtime/rtlsim
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../common -I$(HW_DIR) -I$(SIM_DIR)/rtlsim -I$(SIM_DIR)/common
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/rtlsim -I$(COMMON_DIR) -I$(SIM_DIR)/common
CXXFLAGS += -DXLEN_$(XLEN)
# Position independent code
@ -19,7 +20,7 @@ CXXFLAGS += -DDUMP_PERF_STATS
LDFLAGS += -shared -pthread
LDFLAGS += -L$(DESTDIR) -lrtlsim
SRCS = vortex.cpp ../common/utils.cpp
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
# Debugigng
ifdef DEBUG
@ -33,14 +34,14 @@ ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = libvortex.so
PROJECT := libvortex.so
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS)
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/rtlsim $(DESTDIR)/librtlsim.so
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/rtlsim $(DESTDIR)/librtlsim.so
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $@
clean:
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/rtlsim clean
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/rtlsim clean
rm -rf $(DESTDIR)/$(PROJECT) *.o

View file

@ -1,2 +0,0 @@
obj_dir
libvortex.so

View file

@ -1,11 +1,12 @@
XLEN ?= 32
include ../common.mk
DESTDIR ?= $(CURDIR)
SIM_DIR = $(abspath ../../sim)
HW_DIR = $(abspath ../../hw)
SRC_DIR := $(VORTEX_HOME)/runtime/simx
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I../include -I../common -I$(HW_DIR) -I$(SIM_DIR)/simx -I$(SIM_DIR)/common
CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMMON_DIR) -I$(SIM_DIR)/common
CXXFLAGS += $(CONFIGS)
CXXFLAGS += -DDUMP_PERF_STATS
CXXFLAGS += -DXLEN_$(XLEN)
@ -13,7 +14,7 @@ CXXFLAGS += -DXLEN_$(XLEN)
LDFLAGS += -shared -pthread
LDFLAGS += -L$(DESTDIR) -lsimx
SRCS = vortex.cpp ../common/utils.cpp
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
# Debugigng
ifdef DEBUG
@ -22,14 +23,14 @@ else
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT = libvortex.so
PROJECT := libvortex.so
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS)
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/simx $(DESTDIR)/libsimx.so
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/simx $(DESTDIR)/libsimx.so
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
clean:
DESTDIR=$(DESTDIR) $(MAKE) -C $(SIM_DIR)/simx clean
DESTDIR=$(DESTDIR) $(MAKE) -C $(ROOT_DIR)/sim/simx clean
rm -rf $(DESTDIR)/$(PROJECT) *.o

View file

@ -1,19 +1,18 @@
XLEN ?= 32
include ../common.mk
DESTDIR ?= $(CURDIR)
SIM_DIR = $(abspath ../../sim)
HW_DIR = $(abspath ../../hw)
SRC_DIR := $(VORTEX_HOME)/runtime/stub
CXXFLAGS += -std=c++11 -O2 -Wall -Wextra -pedantic -Wfatal-errors
CXXFLAGS += -I../include -I../common -I$(HW_DIR) -I$(SIM_DIR)/common
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(SIM_DIR)/common
CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread
SRCS = vortex.cpp ../common/utils.cpp
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp
PROJECT = libvortex.so
PROJECT := libvortex.so
all: $(PROJECT)

View file

@ -1,15 +1,17 @@
include ../common.mk
SRC_DIR := $(VORTEX_HOME)/runtime/xrt
CXXFLAGS += -std=c++14 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -I../include -I../common -I../../hw -I$(XILINX_XRT)/include -I../../sim/common
CXXFLAGS += -I$(INC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw -I$(XILINX_XRT)/include -I$(SIM_DIR)/common
CXXFLAGS += -fPIC
LDFLAGS += -shared -pthread
LDFLAGS += -L$(XILINX_XRT)/lib -luuid -lxrt_coreutil
SRCS = vortex.cpp ../common/utils.cpp ../../sim/common/util.cpp
SRCS := $(SRC_DIR)/vortex.cpp $(COMMON_DIR)/utils.cpp $(SIM_DIR)/common/util.cpp
PROJECT = libvortex.so
PROJECT := libvortex.so
# Debugigng
ifdef DEBUG
@ -21,7 +23,7 @@ endif
# Enable scope logic analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += ../common/scope.cpp
SRCS += $(COMMON_DIR)/scope.cpp
endif
all: $(PROJECT)

12
sim/common.mk Normal file
View file

@ -0,0 +1,12 @@
ROOT_DIR := $(realpath ../..)
include $(ROOT_DIR)/config.mk
HW_DIR := $(VORTEX_HOME)/hw
RTL_DIR := $(HW_DIR)/rtl
DPI_DIR := $(HW_DIR)/dpi
AFU_DIR := $(RTL_DIR)/afu/opae
SCRIPT_DIR := $(HW_DIR)/scripts
COMMON_DIR := $(VORTEX_HOME)/sim/common
THIRD_PARTY_DIR := $(VORTEX_HOME)/third_party

View file

@ -1 +0,0 @@
/obj_dir/*

View file

@ -1,16 +1,12 @@
XLEN ?= 32
include ../common.mk
DESTDIR ?= $(CURDIR)
HW_DIR = $(abspath ../../hw)
COMMON_DIR = $(abspath ../common)
THIRD_PARTY_DIR = $(abspath ../../third_party)
RTL_DIR = $(HW_DIR)/rtl
DPI_DIR = $(HW_DIR)/dpi
AFU_DIR = $(RTL_DIR)/afu/opae
SCRIPT_DIR = $(HW_DIR)/scripts
SRC_DIR := $(VORTEX_HOME)/sim/opaesim
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I$(CURDIR) -I$(HW_DIR) -I$(COMMON_DIR) -I$(DESTDIR)
CXXFLAGS += -I$(SRC_DIR) -I$(ROOT_DIR)/hw -I$(COMMON_DIR) -I$(DESTDIR)
CXXFLAGS += -I/$(THIRD_PARTY_DIR)/softfloat/source/include
CXXFLAGS += -I/$(THIRD_PARTY_DIR)
CXXFLAGS += -DXLEN_$(XLEN)
@ -52,7 +48,7 @@ DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += $(CURDIR)/fpga.cpp $(CURDIR)/opae_sim.cpp
SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp
RTL_PKGS = $(AFU_DIR)/local_mem_cfg_pkg.sv $(AFU_DIR)/ccip/ccip_if_pkg.sv
RTL_PKGS += $(RTL_DIR)/VX_gpu_pkg.sv $(RTL_DIR)/fpu/VX_fpu_pkg.sv
@ -62,7 +58,7 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
RTL_PKGS += $(THIRD_PARTY_DIR)/fpnew/src/fpnew_pkg.sv $(THIRD_PARTY_DIR)/fpnew/src/common_cells/src/cf_math_pkg $(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
FPU_INCLUDE += -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
endif
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
RTL_INCLUDE = -I$(SRC_DIR) -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip
TOP = vortex_afu_shim
@ -73,7 +69,7 @@ VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION -DSV_DPI
VL_FLAGS += -DXLEN_$(XLEN)
VL_FLAGS += $(CONFIGS)
VL_FLAGS += verilator.vlt
VL_FLAGS += $(SRC_DIR)/verilator.vlt
VL_FLAGS += $(RTL_INCLUDE)
VL_FLAGS += $(RTL_PKGS)
VL_FLAGS += $(DBG_SCOPE_FLAGS)
@ -111,7 +107,7 @@ endif
VL_FLAGS += -DNOPAE
CXXFLAGS += -DNOPAE
PROJECT = libopae-c-sim.so
PROJECT := libopae-c-sim.so
all: $(DESTDIR)/$(PROJECT)

View file

@ -1 +0,0 @@
/obj_dir/*

View file

@ -1,14 +1,12 @@
XLEN ?= 32
include ../common.mk
DESTDIR ?= $(CURDIR)
HW_DIR = $(abspath ../../hw)
COMMON_DIR = $(abspath ../common)
THIRD_PARTY_DIR = $(abspath ../../third_party)
RTL_DIR = $(HW_DIR)/rtl
DPI_DIR = $(HW_DIR)/dpi
SRC_DIR = $(VORTEX_HOME)/sim/rtlsim
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I$(HW_DIR) -I$(COMMON_DIR)
CXXFLAGS += -I$(ROOT_DIR)/hw -I$(COMMON_DIR)
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)
CXXFLAGS += -DXLEN_$(XLEN)
@ -37,7 +35,7 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
SRCS += $(CURDIR)/processor.cpp
SRCS += $(SRC_DIR)/processor.cpp
ifdef AXI_BUS
TOP = Vortex_axi
@ -50,7 +48,7 @@ VL_FLAGS = --exe
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt
VL_FLAGS += $(SRC_DIR)/verilator.vlt
VL_FLAGS += -DSIMULATION -DSV_DPI
VL_FLAGS += -DXLEN_$(XLEN)
VL_FLAGS += $(CONFIGS)
@ -80,11 +78,11 @@ ifdef PERF
CXXFLAGS += -DPERF_ENABLE
endif
PROJECT = rtlsim
PROJECT := rtlsim
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS) $(CURDIR)/main.cpp
$(DESTDIR)/$(PROJECT): $(SRCS) $(SRC_DIR)/main.cpp
verilator --build $(VL_FLAGS) $^ -CFLAGS '$(CXXFLAGS) -DSTARTUP_ADDR=0x80000000' -LDFLAGS '$(LDFLAGS)' --Mdir $(DESTDIR)/obj_dir -o $@
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)

View file

@ -1,12 +1,12 @@
XLEN ?= 32
DESTDIR ?= $(CURDIR)
HW_DIR = $(abspath ../../hw)
COMMON_DIR = $(abspath ../common)
THIRD_PARTY_DIR = $(abspath ../../third_party)
include ../common.mk
CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
DESTDIR ?= $(CURDIR)
SRC_DIR = $(VORTEX_HOME)/sim/simx
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -fPIC -Wno-maybe-uninitialized
CXXFLAGS += -I$(CURDIR) -I$(COMMON_DIR) -I$(HW_DIR)
CXXFLAGS += -I$(SRC_DIR) -I$(COMMON_DIR) -I$(ROOT_DIR)/hw
CXXFLAGS += -I$(THIRD_PARTY_DIR)/softfloat/source/include
CXXFLAGS += -I$(THIRD_PARTY_DIR)
CXXFLAGS += -DXLEN_$(XLEN)
@ -16,7 +16,7 @@ LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
LDFLAGS += -L$(THIRD_PARTY_DIR)/ramulator -lramulator
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp
SRCS += processor.cpp cluster.cpp socket.cpp core.cpp emulator.cpp decode.cpp execute.cpp func_unit.cpp cache_sim.cpp mem_sim.cpp local_mem.cpp mem_coalescer.cpp dcrs.cpp types.cpp
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
# Debugigng
ifdef DEBUG
@ -26,11 +26,11 @@ else
CXXFLAGS += -O2 -DNDEBUG
endif
PROJECT = simx
PROJECT := simx
all: $(DESTDIR)/$(PROJECT)
$(DESTDIR)/$(PROJECT): $(SRCS) main.cpp
$(DESTDIR)/$(PROJECT): $(SRCS) $(SRC_DIR)/main.cpp
$(CXX) $(CXXFLAGS) -DSTARTUP_ADDR=0x80000000 $^ $(LDFLAGS) -o $@
$(DESTDIR)/lib$(PROJECT).so: $(SRCS)

View file

@ -15,6 +15,7 @@
#define __WARP_H
#include <vector>
#include <sstream>
#include <stack>
#include <mem.h>
#include "types.h"

1
tests/.gitignore vendored
View file

@ -1 +0,0 @@
**/*.log

View file

@ -1,4 +1,7 @@
all: kernel regression opencl riscv unittest
all: unittest kernel regression opencl riscv
unittest:
$(MAKE) -C unittest
kernel:
$(MAKE) -C kernel
@ -14,21 +17,18 @@ endif
riscv:
$(MAKE) -C riscv
unittest:
$(MAKE) -C unittest
clean:
$(MAKE) -C unittest clean
$(MAKE) -C kernel clean
$(MAKE) -C regression clean
$(MAKE) -C opencl clean
$(MAKE) -C riscv clean
$(MAKE) -C unittest clean
clean-all:
$(MAKE) -C unittest clean
$(MAKE) -C kernel clean
$(MAKE) -C regression clean-all
$(MAKE) -C opencl clean-all
$(MAKE) -C riscv clean
$(MAKE) -C unittest clean
.PHONY: all kernel regression opencl riscv unittest
.PHONY: all unittest kernel regression opencl riscv

View file

@ -1,29 +1,20 @@
XLEN ?= 32
TOOLDIR ?= /opt
ROOT_DIR := $(realpath ../../..)
ifeq ($(XLEN),64)
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
CFLAGS += -march=rv64imafd -mabi=lp64d
else
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
CFLAGS += -march=rv32imaf -mabi=ilp32f
endif
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
VORTEX_KN_PATH ?= $(realpath ../../../kernel)
CC = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc
AR = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-gcc-ar
DP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objdump
CP = $(RISCV_TOOLCHAIN_PATH)/bin/$(RISCV_PREFIX)-objcopy
SIM_DIR = ../../../sim
CFLAGS += -O3 -mcmodel=medany -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(VORTEX_KN_PATH)/../hw
CFLAGS += -I$(VORTEX_KN_PATH)/include -I$(ROOT_DIR)/hw
LDFLAGS += -lm -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(VORTEX_KN_PATH)/libvortexrt.a
LDFLAGS += -lm -Wl,-Bstatic,--gc-sections,-T,$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=0x80000000 $(ROOT_DIR)/kernel/libvortexrt.a
all: $(PROJECT).elf $(PROJECT).bin $(PROJECT).dump
@ -37,10 +28,10 @@ $(PROJECT).elf: $(SRCS)
$(CC) $(CFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT).elf
run-rtlsim: $(PROJECT).bin
$(SIM_DIR)/rtlsim/rtlsim $(PROJECT).bin
$(ROOT_DIR)/sim/rtlsim/rtlsim $(PROJECT).bin
run-simx: $(PROJECT).bin
$(SIM_DIR)/simx/simx $(PROJECT).bin
$(ROOT_DIR)/sim/simx/simx $(PROJECT).bin
.depend: $(SRCS)
$(CC) $(CFLAGS) -MM $^ > .depend;

View file

@ -1,5 +1,10 @@
PROJECT = conform
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cpp tests.cpp
PROJECT := conform
SRC_DIR := $(VORTEX_HOME)/tests/kernel/$(PROJECT)
SRCS := $(SRC_DIR)/main.cpp $(SRC_DIR)/tests.cpp
include ../common.mk

View file

@ -1,5 +1,10 @@
PROJECT = fibonacci
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cpp
PROJECT := fibonacci
SRC_DIR := $(VORTEX_HOME)/tests/kernel/$(PROJECT)
SRCS := $(SRC_DIR)/main.cpp
include ../common.mk

View file

@ -1,5 +1,10 @@
PROJECT = hello
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cpp
PROJECT := hello
SRC_DIR := $(VORTEX_HOME)/tests/kernel/$(PROJECT)
SRCS := $(SRC_DIR)/main.cpp
include ../common.mk

View file

@ -1,6 +1,11 @@
PROJECT = bfs
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc
PROJECT := bfs
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc
OPTS ?=

View file

@ -1,8 +1,13 @@
PROJECT = blackscholes
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cpp oclUtils.cpp shrUtils.cpp cmd_arg_reader.cpp oclBlackScholes_launcher.cpp oclBlackScholes_gold.cpp
PROJECT := blackscholes
CXXFLAGS += -I.
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/oclUtils.cpp $(SRC_DIR)/shrUtils.cpp $(SRC_DIR)/cmd_arg_reader.cpp $(SRC_DIR)/oclBlackScholes_launcher.cpp $(SRC_DIR)/oclBlackScholes_gold.cpp
CXXFLAGS += -I$(SRC_DIR)
OPTS ?=

View file

@ -1,41 +1,29 @@
XLEN ?= 32
TOOLDIR ?= /opt
ROOT_DIR := $(realpath ../../..)
TARGET ?= opaesim
XRT_SYN_DIR ?= ../../../hw/syn/xilinx/xrt
XRT_SYN_DIR ?= $(VORTEX_HOME)/hw/syn/xilinx/xrt
XRT_DEVICE_INDEX ?= 0
ifeq ($(XLEN),64)
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv64-gnu-toolchain
VX_CFLAGS += -march=rv64imafd -mabi=lp64d
K_CFLAGS += -march=rv64imafd -mabi=ilp64d
STARTUP_ADDR ?= 0x180000000
else
RISCV_TOOLCHAIN_PATH ?= $(TOOLDIR)/riscv-gnu-toolchain
VX_CFLAGS += -march=rv32imaf -mabi=ilp32f
K_CFLAGS += -march=rv32imaf -mabi=ilp32f
STARTUP_ADDR ?= 0x80000000
endif
RISCV_PREFIX ?= riscv$(XLEN)-unknown-elf
RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
POCL_CC_PATH ?= $(TOOLDIR)/pocl/compiler
POCL_RT_PATH ?= $(TOOLDIR)/pocl/runtime
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
VORTEX_KN_PATH ?= $(realpath ../../../kernel)
FPGA_BIN_DIR ?= $(VORTEX_RT_PATH)/opae
LLVM_VORTEX ?= $(TOOLDIR)/llvm-vortex
LLVM_POCL ?= $(TOOLDIR)/llvm-vortex
K_CFLAGS += -v -O3 --sysroot=$(RISCV_SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -Xclang -target-feature -Xclang +vortex
K_CFLAGS += -fno-rtti -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
K_CFLAGS += -I$(VORTEX_KN_PATH)/include -DNDEBUG
K_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(VORTEX_KN_PATH)/libvortexrt.a -lm
K_CFLAGS += -v -O3 --sysroot=$(RISCV_SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -Xclang -target-feature -Xclang +vortex
K_CFLAGS += -fno-rtti -fno-exceptions -nostartfiles -fdata-sections -ffunction-sections
K_CFLAGS += -I$(VORTEX_KN_PATH)/include -DNDEBUG
K_LDFLAGS += -Wl,-Bstatic,--gc-sections,-T$(VORTEX_KN_PATH)/linker/vx_link$(XLEN).ld,--defsym=STARTUP_ADDR=$(STARTUP_ADDR) $(ROOT_DIR)/kernel/libvortexrt.a -lm
CXXFLAGS += -std=c++11 -Wall -Wextra -Wfatal-errors
CXXFLAGS += -Wno-deprecated-declarations -Wno-unused-parameter -Wno-narrowing
@ -61,30 +49,30 @@ endif
endif
endif
OBJS := $(addsuffix .o, $(notdir $(filter-out main.cc,$(SRCS))))
OBJS := $(addsuffix .o, $(filter-out main.cc,$(notdir $(SRCS))))
all: $(PROJECT) kernel.pocl
kernel.pocl: kernel.cl
LD_LIBRARY_PATH=$(LLVM_POCL)/lib:$(POCL_CC_PATH)/lib:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) LLVM_PREFIX=$(LLVM_VORTEX) POCL_DEBUG=all POCL_VORTEX_CFLAGS="$(K_CFLAGS)" POCL_VORTEX_LDFLAGS="$(K_LDFLAGS)" $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl kernel.cl
kernel.pocl: $(SRC_DIR)/kernel.cl
LD_LIBRARY_PATH=$(LLVM_POCL)/lib:$(POCL_CC_PATH)/lib:$(LLVM_VORTEX)/lib:$(LD_LIBRARY_PATH) LLVM_PREFIX=$(LLVM_VORTEX) POCL_DEBUG=all POCL_VORTEX_CFLAGS="$(K_CFLAGS)" POCL_VORTEX_LDFLAGS="$(K_LDFLAGS)" $(POCL_CC_PATH)/bin/poclcc -o kernel.pocl $^
%.cc.o: %.cc
%.cc.o: $(SRC_DIR)/%.cc
$(CXX) $(CXXFLAGS) -c $< -o $@
%.cpp.o: %.cpp
%.cpp.o: $(SRC_DIR)/%.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@
%.c.o: %.c
%.c.o: $(SRC_DIR)/%.c
$(CC) $(CXXFLAGS) -c $< -o $@
main.cc.o: main.cc
main.cc.o: $(SRC_DIR)/main.cc
$(CXX) $(CXXFLAGS) -c $< -o $@
main.cc.host.o: main.cc
main.cc.host.o: $(SRC_DIR)/main.cc
$(CXX) $(CXXFLAGS) -DHOSTGPU -c $< -o $@
$(PROJECT): main.cc.o $(OBJS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L$(VORTEX_RT_PATH)/stub -lvortex -L$(POCL_RT_PATH)/lib -lOpenCL -o $@
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L$(ROOT_DIR)/runtime/stub -lvortex -L$(POCL_RT_PATH)/lib -lOpenCL -o $@
$(PROJECT).host: main.cc.host.o $(OBJS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -lOpenCL -o $@
@ -93,19 +81,19 @@ run-gpu: $(PROJECT).host kernel.pocl
./$(PROJECT).host $(OPTS)
run-simx: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-opae: $(PROJECT) kernel.pocl
SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
SCOPE_JSON_PATH=$(ROOT_DIR)/runtime/opae/scope.json OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-xrt: $(PROJECT) kernel.pocl
ifeq ($(TARGET), hw)
SCOPE_JSON_PATH=$(FPGA_BIN_DIR)/scope.json XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
else
XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
XCL_EMULATION_MODE=$(TARGET) XRT_INI_PATH=$(XRT_SYN_DIR)/xrt.ini EMCONFIG_PATH=$(FPGA_BIN_DIR) XRT_DEVICE_INDEX=$(XRT_DEVICE_INDEX) XRT_XCLBIN_PATH=$(FPGA_BIN_DIR)/vortex_afu.xclbin LD_LIBRARY_PATH=$(XILINX_XRT)/lib:$(POCL_RT_PATH)/lib:$(ROOT_DIR)/runtime/xrt:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
endif
.depend: $(SRCS)

View file

@ -1,6 +1,11 @@
PROJECT = conv3
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc
PROJECT := conv3
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc
OPTS ?= -n32

View file

@ -1,6 +1,11 @@
PROJECT = dotproduct
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc oclUtils.cpp shrUtils.cpp cmd_arg_reader.cpp
PROJECT := dotproduct
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/oclUtils.cpp $(SRC_DIR)/shrUtils.cpp $(SRC_DIR)/cmd_arg_reader.cpp
OPTS ?= -size=4096

View file

@ -1,6 +1,11 @@
PROJECT = guassian
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc clutils.cpp utils.cpp
PROJECT := guassian
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/clutils.cpp $(SRC_DIR)/utils.cpp
OPTS ?=

View file

@ -1,2 +0,0 @@
kmeans

View file

@ -1,6 +1,11 @@
PROJECT = kmeans
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc read_input.c rmse.c kmeans_clustering.c cluster.c getopt.c
PROJECT := kmeans
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/read_input.c $(SRC_DIR)/rmse.c $(SRC_DIR)/kmeans_clustering.c $(SRC_DIR)/cluster.c $(SRC_DIR)/getopt.c
OPTS ?=

BIN
tests/opencl/kmeans/kmeans Executable file

Binary file not shown.

View file

@ -1,10 +1,16 @@
PROJECT = lbm
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc args.c parboil_opencl.c gpu_info.c lbm.c ocl.c
PROJECT := lbm
CXXFLAGS += -I.
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/args.c $(SRC_DIR)/parboil_opencl.c $(SRC_DIR)/gpu_info.c $(SRC_DIR)/lbm.c $(SRC_DIR)/ocl.c
CXXFLAGS += -I$(SRC_DIR)
K_CFLAGS += -I$(SRC_DIR)
# Usage: #iter [-i input_file] [-o output_file]
OPTS ?= 1 -i 32_32_8_ldc.of
OPTS ?= 1 -i $(SRC_DIR)/32_32_8_ldc.of
include ../common.mk

View file

@ -1,7 +1,12 @@
PROJECT = nearn
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc clutils.cpp utils.cpp
PROJECT := nearn
OPTS ?= filelist.txt
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc $(SRC_DIR)/clutils.cpp $(SRC_DIR)/utils.cpp
OPTS ?= $(SRC_DIR)/filelist.txt
include ../common.mk

View file

@ -1,6 +1,11 @@
PROJECT = oclprintf
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc
PROJECT := oclprintf
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc
OPTS ?= -n4

View file

@ -1,6 +1,11 @@
PROJECT = psort
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc
PROJECT := psort
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc
OPTS ?= -f -n16

View file

@ -1,6 +1,11 @@
PROJECT = saxpy
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc
PROJECT := saxpy
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc
OPTS ?= -n1024

View file

@ -1,6 +1,11 @@
PROJECT = sfilter
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc
PROJECT := sfilter
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc
OPTS ?= -n16

View file

@ -1,6 +1,14 @@
PROJECT = sgemm
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
SRCS = main.cc
PROJECT := sgemm
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc
CXXFLAGS += -I$(SRC_DIR)
K_CFLAGS += -I$(SRC_DIR)
OPTS ?= -n32

View file

@ -0,0 +1,12 @@
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
PROJECT := sgemm2
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc
OPTS ?= -n32
include ../common.mk

View file

@ -0,0 +1,15 @@
// Copyright 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define LOCAL_SIZE 4

View file

@ -0,0 +1,37 @@
#include "common.h"
__kernel void sgemm2(__global float *A,
__global float *B,
__global float *C,
const unsigned int N)
{
int globalRow = get_global_id(1);
int globalCol = get_global_id(0);
int localRow = get_local_id(1);
int localCol = get_local_id(0);
// Static local memory declaration
__local float localA[LOCAL_SIZE][LOCAL_SIZE];
__local float localB[LOCAL_SIZE][LOCAL_SIZE];
float sum = 0.0f;
// Iterate over blocks
for (int k = 0; k < N; k += LOCAL_SIZE) {
// Load a block of matrix A into local memory
localA[localRow][localCol] = A[globalRow * N + k + localCol];
// Load a block of matrix B into local memory
localB[localRow][localCol] = B[(k + localRow) * N + globalCol];
// Ensure the entire block is loaded
barrier(CLK_LOCAL_MEM_FENCE);
// Compute multiplication for this block
for (int j = 0; j < LOCAL_SIZE; j++) {
sum += localA[localRow][j] * localB[j][localCol];
}
}
C[globalRow * N + globalCol] = sum;
}

242
tests/opencl/sgemm2/main.cc Normal file
View file

@ -0,0 +1,242 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <CL/opencl.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <chrono>
#include <vector>
#include "common.h"
#define FLOAT_ULP 6
#define KERNEL_NAME "sgemm2"
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
cleanup(); \
exit(-1); \
} while (0)
#define CL_CHECK2(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
decltype(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
cleanup(); \
exit(-1); \
} \
_ret; \
})
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (nullptr == filename || nullptr == data || 0 == size)
return -1;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return -1;
}
fseek(fp , 0 , SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return 0;
}
static bool compare_equal(float a, float b) {
union fi_t { float f; int32_t i; };
fi_t fa, fb;
fa.f = a;
fb.f = b;
auto d = std::abs(fa.i - fb.i);
return d <= FLOAT_ULP;
}
static void matmul_cpu(float *C, float *A, float *B, int N) {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
float sum = 0.0f;
for (int k = 0; k < N; k++) {
sum += A[i * N + k] * B[k * N + j];
}
C[i * N + j] = sum;
}
}
}
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue commandQueue = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_mem a_memobj = NULL;
cl_mem b_memobj = NULL;
cl_mem c_memobj = NULL;
uint8_t *kernel_bin = NULL;
static void cleanup() {
if (commandQueue) clReleaseCommandQueue(commandQueue);
if (kernel) clReleaseKernel(kernel);
if (program) clReleaseProgram(program);
if (a_memobj) clReleaseMemObject(a_memobj);
if (b_memobj) clReleaseMemObject(b_memobj);
if (c_memobj) clReleaseMemObject(c_memobj);
if (context) clReleaseContext(context);
if (device_id) clReleaseDevice(device_id);
if (kernel_bin) free(kernel_bin);
}
int size = 32;
static void show_usage() {
printf("Usage: [-n size] [-h: help]\n");
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:h?")) != -1) {
switch (c) {
case 'n':
size = atoi(optarg);
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
int main (int argc, char **argv) {
// parse command arguments
parse_args(argc, argv);
uint32_t num_points = size * size;
printf("Matrix size=%d\n", size);
if ((size / LOCAL_SIZE) * LOCAL_SIZE != size) {
printf("Error: matrix size must be a multiple of %d\n", LOCAL_SIZE);
return -1;
}
cl_platform_id platform_id;
size_t kernel_size;
// Getting platform and device information
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
printf("Create context\n");
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
char device_string[1024];
clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
printf("Using device: %s\n", device_string);
printf("Allocate device buffers\n");
size_t nbytes = num_points * sizeof(float);
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
printf("Create program from kernel source\n");
#ifdef HOSTGPU
if (0 != read_kernel_file("kernel.cl", &kernel_bin, &kernel_size))
return -1;
program = CL_CHECK2(clCreateProgramWithSource(
context, 1, (const char**)&kernel_bin, &kernel_size, &_err));
#else
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
return -1;
program = CL_CHECK2(clCreateProgramWithBinary(
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, NULL, &_err));
#endif
if (program == NULL) {
cleanup();
return -1;
}
// Build program
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
// Create kernel
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
size_t global_size[2] = {size, size};
size_t local_size[2] = {LOCAL_SIZE, LOCAL_SIZE};
// Set kernel arguments
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(uint32_t), &size));
// Allocate memories for input arrays and output arrays.
std::vector<float> h_a(num_points);
std::vector<float> h_b(num_points);
std::vector<float> h_c(num_points);
// Generate input values
for (uint32_t i = 0; i < num_points; ++i) {
h_a[i] = static_cast<float>(rand()) / RAND_MAX;
h_b[i] = static_cast<float>(rand()) / RAND_MAX;
}
// Creating command queue
commandQueue = CL_CHECK2(clCreateCommandQueue(context, device_id, 0, &_err));
printf("Upload source buffers\n");
CL_CHECK(clEnqueueWriteBuffer(commandQueue, a_memobj, CL_TRUE, 0, nbytes, h_a.data(), 0, NULL, NULL));
CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, nbytes, h_b.data(), 0, NULL, NULL));
printf("Execute the kernel\n");
auto time_start = std::chrono::high_resolution_clock::now();
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, global_size, local_size, 0, NULL, NULL));
CL_CHECK(clFinish(commandQueue));
auto time_end = std::chrono::high_resolution_clock::now();
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
printf("Elapsed time: %lg ms\n", elapsed);
printf("Download destination buffer\n");
CL_CHECK(clEnqueueReadBuffer(commandQueue, c_memobj, CL_TRUE, 0, nbytes, h_c.data(), 0, NULL, NULL));
printf("Verify result\n");
std::vector<float> ref_vec(num_points);
matmul_cpu(ref_vec.data(), h_a.data(), h_b.data(), size);
int errors = 0;
for (uint32_t i = 0; i < num_points; ++i) {
if (!compare_equal(h_c[i], ref_vec[i])) {
if (errors < 100)
printf("*** error: [%d] expected=%f, actual=%f\n", i, ref_vec[i], h_c[i]);
++errors;
}
}
if (errors != 0) {
printf("FAILED! - %d errors\n", errors);
} else {
printf("PASSED!\n");
}
// Clean up
cleanup();
return errors;
}

View file

@ -0,0 +1,12 @@
ROOT_DIR := $(realpath ../../..)
include $(ROOT_DIR)/config.mk
PROJECT := sgemm3
SRC_DIR := $(VORTEX_HOME)/tests/opencl/$(PROJECT)
SRCS := $(SRC_DIR)/main.cc
OPTS ?= -n32
include ../common.mk

View file

@ -0,0 +1,73 @@
__kernel void sgemm3(__global float *A,
__global float *B,
__global float *C,
const unsigned int N,
__local float *localA,
__local float *localB)
{
int globalRow = get_global_id(1);
int globalCol = get_global_id(0);
int localRow = get_local_id(1);
int localCol = get_local_id(0);
int localSize = get_local_size(0); // assuming square local size
float sum = 0.0f;
// Loop over all blocks of both matrices
for (int k = 0; k < N; k += localSize) {
// Load block of matrix A to local memory
localA[localRow * localSize + localCol] = A[globalRow * N + k + localCol];
// Load block of matrix B to local memory, adjusting for column-major access
localB[localRow * localSize + localCol] = B[(k + localRow) * N + globalCol];
// Synchronize to make sure the tiles are loaded
barrier(CLK_LOCAL_MEM_FENCE);
// Multiply the two matrix blocks and accumulate result
for (int j = 0; j < localSize; j++) {
sum += localA[localRow * localSize + j] * localB[j * localSize + localCol];
}
// Ensure computation is done before loading next block
barrier(CLK_LOCAL_MEM_FENCE);
}
C[globalRow * N + globalCol] = sum;
}
/*__kernel void sgemm2(__global float *A,
__global float *B,
__global float *C,
const unsigned int N)
{
int globalRow = get_global_id(1);
int globalCol = get_global_id(0);
int localRow = get_local_id(1);
int localCol = get_local_id(0);
// Static local memory declaration
__local float localA[16][16];
__local float localB[16][16];
float sum = 0.0f;
// Iterate over blocks
for (int k = 0; k < N; k += 16) {
// Load a block of matrix A into local memory
localA[localRow][localCol] = A[globalRow * N + k + localCol];
// Load a block of matrix B into local memory
localB[localRow][localCol] = B[(k + localRow) * N + globalCol];
// Ensure the entire block is loaded
barrier(CLK_LOCAL_MEM_FENCE);
// Compute multiplication for this block
for (int j = 0; j < 16; j++) {
sum += localA[localRow][j] * localB[j][localCol];
}
}
C[globalRow * N + globalCol] = sum;
}*/

245
tests/opencl/sgemm3/main.cc Normal file
View file

@ -0,0 +1,245 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <CL/opencl.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <chrono>
#include <vector>
#define LOCAL_SIZE 16
#define FLOAT_ULP 6
#define KERNEL_NAME "sgemm3"
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
cleanup(); \
exit(-1); \
} while (0)
#define CL_CHECK2(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
decltype(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
printf("OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
cleanup(); \
exit(-1); \
} \
_ret; \
})
static int read_kernel_file(const char* filename, uint8_t** data, size_t* size) {
if (nullptr == filename || nullptr == data || 0 == size)
return -1;
FILE* fp = fopen(filename, "r");
if (NULL == fp) {
fprintf(stderr, "Failed to load kernel.");
return -1;
}
fseek(fp , 0 , SEEK_END);
long fsize = ftell(fp);
rewind(fp);
*data = (uint8_t*)malloc(fsize);
*size = fread(*data, 1, fsize, fp);
fclose(fp);
return 0;
}
static bool compare_equal(float a, float b) {
union fi_t { float f; int32_t i; };
fi_t fa, fb;
fa.f = a;
fb.f = b;
auto d = std::abs(fa.i - fb.i);
return d <= FLOAT_ULP;
}
static void matmul_cpu(float *C, float *A, float *B, int N) {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
float sum = 0.0f;
for (int k = 0; k < N; k++) {
sum += A[i * N + k] * B[k * N + j];
}
C[i * N + j] = sum;
}
}
}
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue commandQueue = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_mem a_memobj = NULL;
cl_mem b_memobj = NULL;
cl_mem c_memobj = NULL;
uint8_t *kernel_bin = NULL;
static void cleanup() {
if (commandQueue) clReleaseCommandQueue(commandQueue);
if (kernel) clReleaseKernel(kernel);
if (program) clReleaseProgram(program);
if (a_memobj) clReleaseMemObject(a_memobj);
if (b_memobj) clReleaseMemObject(b_memobj);
if (c_memobj) clReleaseMemObject(c_memobj);
if (context) clReleaseContext(context);
if (device_id) clReleaseDevice(device_id);
if (kernel_bin) free(kernel_bin);
}
int size = 32;
static void show_usage() {
printf("Usage: [-n size] [-h: help]\n");
}
static void parse_args(int argc, char **argv) {
int c;
while ((c = getopt(argc, argv, "n:h?")) != -1) {
switch (c) {
case 'n':
size = atoi(optarg);
break;
case 'h':
case '?': {
show_usage();
exit(0);
} break;
default:
show_usage();
exit(-1);
}
}
}
int main (int argc, char **argv) {
// parse command arguments
parse_args(argc, argv);
uint32_t num_points = size * size;
printf("Matrix size=%d\n", size);
if ((size / LOCAL_SIZE) * LOCAL_SIZE != size) {
printf("Error: matrix size must be a multiple of %d\n", LOCAL_SIZE);
return -1;
}
cl_platform_id platform_id;
size_t kernel_size;
// Getting platform and device information
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
printf("Create context\n");
context = CL_CHECK2(clCreateContext(NULL, 1, &device_id, NULL, NULL, &_err));
char device_string[1024];
clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL);
printf("Using device: %s\n", device_string);
printf("Allocate device buffers\n");
size_t nbytes = num_points * sizeof(float);
a_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
b_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_READ_ONLY, nbytes, NULL, &_err));
c_memobj = CL_CHECK2(clCreateBuffer(context, CL_MEM_WRITE_ONLY, nbytes, NULL, &_err));
printf("Create program from kernel source\n");
#ifdef HOSTGPU
if (0 != read_kernel_file("kernel.cl", &kernel_bin, &kernel_size))
return -1;
program = CL_CHECK2(clCreateProgramWithSource(
context, 1, (const char**)&kernel_bin, &kernel_size, &_err));
#else
if (0 != read_kernel_file("kernel.pocl", &kernel_bin, &kernel_size))
return -1;
program = CL_CHECK2(clCreateProgramWithBinary(
context, 1, &device_id, &kernel_size, (const uint8_t**)&kernel_bin, NULL, &_err));
#endif
if (program == NULL) {
cleanup();
return -1;
}
// Build program
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
// Create kernel
kernel = CL_CHECK2(clCreateKernel(program, KERNEL_NAME, &_err));
size_t global_size[2] = {size, size};
size_t local_size[2] = {LOCAL_SIZE, LOCAL_SIZE};
// Set kernel arguments
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&a_memobj));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&b_memobj));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&c_memobj));
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(uint32_t), &size));
CL_CHECK(clSetKernelArg(kernel, 4, local_size[0]*local_size[1]*sizeof(float), NULL));
CL_CHECK(clSetKernelArg(kernel, 5, local_size[0]*local_size[1]*sizeof(float), NULL));
// Allocate memories for input arrays and output arrays.
std::vector<float> h_a(num_points);
std::vector<float> h_b(num_points);
std::vector<float> h_c(num_points);
// Generate input values
for (uint32_t i = 0; i < num_points; ++i) {
h_a[i] = static_cast<float>(rand()) / RAND_MAX;
h_b[i] = static_cast<float>(rand()) / RAND_MAX;
}
// Creating command queue
commandQueue = CL_CHECK2(clCreateCommandQueue(context, device_id, 0, &_err));
printf("Upload source buffers\n");
CL_CHECK(clEnqueueWriteBuffer(commandQueue, a_memobj, CL_TRUE, 0, nbytes, h_a.data(), 0, NULL, NULL));
CL_CHECK(clEnqueueWriteBuffer(commandQueue, b_memobj, CL_TRUE, 0, nbytes, h_b.data(), 0, NULL, NULL));
printf("Execute the kernel\n");
auto time_start = std::chrono::high_resolution_clock::now();
CL_CHECK(clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, global_size, local_size, 0, NULL, NULL));
CL_CHECK(clFinish(commandQueue));
auto time_end = std::chrono::high_resolution_clock::now();
double elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(time_end - time_start).count();
printf("Elapsed time: %lg ms\n", elapsed);
printf("Download destination buffer\n");
CL_CHECK(clEnqueueReadBuffer(commandQueue, c_memobj, CL_TRUE, 0, nbytes, h_c.data(), 0, NULL, NULL));
printf("Verify result\n");
std::vector<float> ref_vec(num_points);
matmul_cpu(ref_vec.data(), h_a.data(), h_b.data(), size);
int errors = 0;
for (uint32_t i = 0; i < num_points; ++i) {
if (!compare_equal(h_c[i], ref_vec[i])) {
if (errors < 100)
printf("*** error: [%d] expected=%f, actual=%f\n", i, ref_vec[i], h_c[i]);
++errors;
}
}
if (errors != 0) {
printf("FAILED! - %d errors\n", errors);
} else {
printf("PASSED!\n");
}
// Clean up
cleanup();
return errors;
}

Some files were not shown because too many files have changed in this diff Show more