mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Merge branch 'master' of github.com:vortexgpgpu/vortex
This commit is contained in:
commit
e8ce3878bb
39 changed files with 31156 additions and 128 deletions
164
.github/workflows/ci.yml
vendored
164
.github/workflows/ci.yml
vendored
|
@ -21,13 +21,13 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
|
@ -36,7 +36,7 @@ jobs:
|
|||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
|
@ -62,7 +62,111 @@ jobs:
|
|||
run: |
|
||||
make -C third_party > /dev/null
|
||||
|
||||
build:
|
||||
# build:
|
||||
# runs-on: ubuntu-20.04
|
||||
# needs: setup
|
||||
# strategy:
|
||||
# matrix:
|
||||
# xlen: [32, 64]
|
||||
|
||||
# steps:
|
||||
# - name: Checkout code
|
||||
# uses: actions/checkout@v2
|
||||
|
||||
# - name: Install Dependencies
|
||||
# run: |
|
||||
# sudo bash ./ci/system_updates.sh
|
||||
|
||||
# - name: Cache Toolchain Directory
|
||||
# id: cache-toolchain
|
||||
# uses: actions/cache@v2
|
||||
# with:
|
||||
# path: tools
|
||||
# key: ${{ runner.os }}-toolchain-v0.1
|
||||
# restore-keys: |
|
||||
# ${{ runner.os }}-toolchain-
|
||||
|
||||
# - name: Cache Third Party Directory
|
||||
# id: cache-thirdparty
|
||||
# uses: actions/cache@v2
|
||||
# with:
|
||||
# path: third_party
|
||||
# key: ${{ runner.os }}-thirdparty-v0.1
|
||||
# restore-keys: |
|
||||
# ${{ runner.os }}-thirdparty-
|
||||
|
||||
# - name: Run Build
|
||||
# run: |
|
||||
# TOOLDIR=$PWD/tools
|
||||
# mkdir -p build${{ matrix.xlen }}
|
||||
# cd build${{ matrix.xlen }}
|
||||
# ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
|
||||
# source ci/toolchain_env.sh
|
||||
# make software -s > /dev/null
|
||||
# make tests -s > /dev/null
|
||||
|
||||
# - name: Upload Build Artifact
|
||||
# uses: actions/upload-artifact@v2
|
||||
# with:
|
||||
# name: build-${{ matrix.xlen }}
|
||||
# path: build${{ matrix.xlen }}
|
||||
|
||||
# tests:
|
||||
# runs-on: ubuntu-20.04
|
||||
# needs: build
|
||||
# strategy:
|
||||
# matrix:
|
||||
# name: [regression, opencl, config1, config2, debug, stress]
|
||||
# xlen: [32, 64]
|
||||
|
||||
# steps:
|
||||
# - name: Checkout code
|
||||
# uses: actions/checkout@v2
|
||||
|
||||
# - name: Install Dependencies
|
||||
# run: |
|
||||
# sudo bash ./ci/system_updates.sh
|
||||
|
||||
# - name: Cache Toolchain Directory
|
||||
# id: cache-toolchain
|
||||
# uses: actions/cache@v2
|
||||
# with:
|
||||
# path: tools
|
||||
# key: ${{ runner.os }}-toolchain-v0.1
|
||||
# restore-keys: |
|
||||
# ${{ runner.os }}-toolchain-
|
||||
|
||||
# - name: Cache Third Party Directory
|
||||
# id: cache-thirdparty
|
||||
# uses: actions/cache@v2
|
||||
# with:
|
||||
# path: third_party
|
||||
# key: ${{ runner.os }}-thirdparty-v0.1
|
||||
# restore-keys: |
|
||||
# ${{ runner.os }}-thirdparty-
|
||||
|
||||
# - name: Download Build Artifact
|
||||
# uses: actions/download-artifact@v2
|
||||
# with:
|
||||
# name: build-${{ matrix.xlen }}
|
||||
# path: build${{ matrix.xlen }}
|
||||
|
||||
# - name: Run tests
|
||||
# run: |
|
||||
# cd build${{ matrix.xlen }}
|
||||
# source ci/toolchain_env.sh
|
||||
# chmod -R +x . # Ensure all files have executable permissions
|
||||
# if [ "${{ matrix.name }}" == "regression" ]; then
|
||||
# ./ci/regression.sh --unittest
|
||||
# ./ci/regression.sh --isa
|
||||
# ./ci/regression.sh --kernel
|
||||
# ./ci/regression.sh --synthesis
|
||||
# ./ci/regression.sh --regression
|
||||
# else
|
||||
# ./ci/regression.sh --${{ matrix.name }}
|
||||
# fi
|
||||
|
||||
build_vm:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: setup
|
||||
strategy:
|
||||
|
@ -71,7 +175,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
|
@ -79,7 +183,7 @@ jobs:
|
|||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
|
@ -88,7 +192,7 @@ jobs:
|
|||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
|
@ -98,31 +202,31 @@ jobs:
|
|||
- name: Run Build
|
||||
run: |
|
||||
TOOLDIR=$PWD/tools
|
||||
mkdir -p build${{ matrix.xlen }}
|
||||
cd build${{ matrix.xlen }}
|
||||
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
|
||||
mkdir -p build${{ matrix.xlen }}-vm
|
||||
cd build${{ matrix.xlen }}-vm
|
||||
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1
|
||||
source ci/toolchain_env.sh
|
||||
make software -s > /dev/null
|
||||
make tests -s > /dev/null
|
||||
|
||||
- name: Upload Build Artifact
|
||||
uses: actions/upload-artifact@v2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
name: build-${{ matrix.xlen }}-vm
|
||||
path: build${{ matrix.xlen }}-vm
|
||||
|
||||
tests:
|
||||
test_vm:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: build
|
||||
needs: build_vm
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
name: [regression, opencl, cache, config1, config2, debug, stress]
|
||||
name: [regression, opencl, cache, config1, config2, debug, stress, vm]
|
||||
xlen: [32, 64]
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
|
@ -130,7 +234,7 @@ jobs:
|
|||
|
||||
- name: Cache Toolchain Directory
|
||||
id: cache-toolchain
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: tools
|
||||
key: ${{ runner.os }}-toolchain-v0.1
|
||||
|
@ -139,7 +243,7 @@ jobs:
|
|||
|
||||
- name: Cache Third Party Directory
|
||||
id: cache-thirdparty
|
||||
uses: actions/cache@v2
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: third_party
|
||||
key: ${{ runner.os }}-thirdparty-v0.1
|
||||
|
@ -147,30 +251,22 @@ jobs:
|
|||
${{ runner.os }}-thirdparty-
|
||||
|
||||
- name: Download Build Artifact
|
||||
uses: actions/download-artifact@v2
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: build-${{ matrix.xlen }}
|
||||
path: build${{ matrix.xlen }}
|
||||
name: build-${{ matrix.xlen }}-vm
|
||||
path: build${{ matrix.xlen }}-vm
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
cd build${{ matrix.xlen }}
|
||||
cd build${{ matrix.xlen }}-vm
|
||||
source ci/toolchain_env.sh
|
||||
chmod -R +x . # Ensure all files have executable permissions
|
||||
if [ "${{ matrix.name }}" == "regression" ]; then
|
||||
./ci/regression.sh --unittest
|
||||
./ci/regression.sh --isa
|
||||
./ci/regression.sh --kernel
|
||||
./ci/regression.sh --synthesis
|
||||
./ci/regression.sh --regression
|
||||
else
|
||||
./ci/regression.sh --${{ matrix.name }}
|
||||
fi
|
||||
./ci/regression.sh --vm
|
||||
|
||||
complete:
|
||||
runs-on: ubuntu-20.04
|
||||
needs: tests
|
||||
needs: test_vm
|
||||
|
||||
steps:
|
||||
- name: Check Completion
|
||||
run: echo "All matrix jobs passed"
|
||||
run: echo "All matrix jobs passed"
|
||||
|
|
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
|||
/build*
|
||||
/.vscode
|
||||
*.cache
|
||||
*.cache
|
||||
*.code-workspace
|
||||
|
|
|
@ -2,6 +2,14 @@ include config.mk
|
|||
|
||||
.PHONY: build software tests
|
||||
|
||||
vm:
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party
|
||||
$(MAKE) -C hw
|
||||
$(MAKE) -C sim simx
|
||||
$(MAKE) -C kernel
|
||||
$(MAKE) -C runtime vm
|
||||
$(MAKE) -C tests
|
||||
|
||||
all:
|
||||
$(MAKE) -C $(VORTEX_HOME)/third_party
|
||||
$(MAKE) -C hw
|
||||
|
|
21
README.md
21
README.md
|
@ -54,23 +54,26 @@ sudo apt-get install git
|
|||
```
|
||||
### Install Vortex codebase
|
||||
```sh
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
cd vortex
|
||||
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
|
||||
cd vortex
|
||||
```
|
||||
### Configure your build folder
|
||||
```sh
|
||||
mkdir build
|
||||
cd build
|
||||
../configure --xlen=32 --tooldir=$HOME/tools
|
||||
mkdir build
|
||||
cd build
|
||||
# for 32bit
|
||||
../configure --xlen=32 --tooldir=$HOME/tools
|
||||
# for 64bit
|
||||
../configure --xlen=64 --tooldir=$HOME/tools
|
||||
```
|
||||
### Install prebuilt toolchain
|
||||
```sh
|
||||
./ci/toolchain_install.sh --all
|
||||
./ci/toolchain_install.sh --all
|
||||
```
|
||||
### Set environment variables
|
||||
### set environment variables
|
||||
```sh
|
||||
# should always run before using the toolchain!
|
||||
source ./ci/toolchain_env.sh
|
||||
# should always run before using the toolchain!
|
||||
source ./ci/toolchain_env.sh
|
||||
```
|
||||
### Building Vortex
|
||||
```sh
|
||||
|
|
|
@ -19,6 +19,8 @@ set -e
|
|||
# clear blackbox cache
|
||||
rm -f blackbox.*.cache
|
||||
|
||||
# HW: add a test "VM Test" to make sure VM feature is enabled
|
||||
|
||||
XLEN=${XLEN:=@XLEN@}
|
||||
|
||||
XSIZE=$((XLEN / 8))
|
||||
|
@ -124,6 +126,30 @@ opencl()
|
|||
echo "opencl tests done!"
|
||||
}
|
||||
|
||||
vm(){
|
||||
echo "begin vm tests..."
|
||||
|
||||
make -C sim/simx
|
||||
make -C runtime/simx
|
||||
|
||||
make -C tests/kernel run-simx
|
||||
|
||||
# Regression tests
|
||||
make -C tests/regression run-simx
|
||||
|
||||
# test global barrier
|
||||
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
|
||||
|
||||
# test local barrier
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
|
||||
|
||||
# OpenCL tests
|
||||
make -C tests/opencl run-simx
|
||||
./ci/blackbox.sh --driver=simx --app=lbm --warps=8
|
||||
|
||||
echo "vm tests done!"
|
||||
}
|
||||
|
||||
cache()
|
||||
{
|
||||
echo "begin cache tests..."
|
||||
|
@ -242,7 +268,11 @@ config2()
|
|||
|
||||
# custom program startup address
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
STARTUP_ADDR=0x40000000 make -C tests/regression/dogfood
|
||||
if [ "$XLEN" == "64" ]; then
|
||||
STARTUP_ADDR=0x180000000 make -C tests/regression/dogfood
|
||||
else
|
||||
STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood
|
||||
fi
|
||||
./ci/blackbox.sh --driver=simx --app=dogfood
|
||||
./ci/blackbox.sh --driver=rtlsim --app=dogfood
|
||||
make -C tests/regression/dogfood clean-kernel
|
||||
|
@ -359,6 +389,9 @@ while [ "$1" != "" ]; do
|
|||
--cache )
|
||||
tests+=("cache")
|
||||
;;
|
||||
--vm )
|
||||
tests+=("vm")
|
||||
;;
|
||||
--config1 )
|
||||
tests+=("config1")
|
||||
;;
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright 2019-2023
|
||||
#
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -16,7 +16,8 @@
|
|||
|
||||
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
|
||||
|
||||
export PATH=$TOOLDIR/verilator/bin:$PATH
|
||||
# export VERILATOR_ROOT=$TOOLDIR/verilator
|
||||
# export PATH=$VERILATOR_ROOT/bin:$PATH
|
||||
|
||||
export SV2V_PATH=$TOOLDIR/sv2v
|
||||
export PATH=$SV2V_PATH/bin:$PATH
|
||||
|
|
|
@ -34,4 +34,6 @@ RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
|
|||
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
|
||||
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
|
||||
|
||||
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
|
||||
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
|
||||
|
||||
VM_ENABLE ?= @VM_ENABLE@
|
7
configure
vendored
7
configure
vendored
|
@ -63,7 +63,7 @@ copy_files() {
|
|||
filename_no_ext="${filename%.in}"
|
||||
dest_file="$dest_dir/$filename_no_ext"
|
||||
mkdir -p "$dest_dir"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g" "$file" > "$dest_file"
|
||||
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
|
||||
# apply permissions to bash scripts
|
||||
read -r firstline < "$dest_file"
|
||||
if [[ "$firstline" =~ ^#!.*bash ]]; then
|
||||
|
@ -114,6 +114,7 @@ default_xlen=32
|
|||
default_tooldir=$HOME/tools
|
||||
default_osversion=$(detect_osversion)
|
||||
default_prefix=$CURRENT_DIR
|
||||
default_vm=0
|
||||
|
||||
# load default configuration parameters from existing config.mk
|
||||
if [ -f "config.mk" ]; then
|
||||
|
@ -126,6 +127,7 @@ if [ -f "config.mk" ]; then
|
|||
TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;;
|
||||
OSVERSION\ ?*) default_osversion=${value//\?=/} ;;
|
||||
PREFIX\ ?*) default_prefix=${value//\?=/} ;;
|
||||
VM_ENABLE\ ?*) default_vm=${value//\?=/} ;;
|
||||
esac
|
||||
done < config.mk
|
||||
fi
|
||||
|
@ -135,6 +137,7 @@ XLEN=${XLEN:=$default_xlen}
|
|||
TOOLDIR=${TOOLDIR:=$default_tooldir}
|
||||
OSVERSION=${OSVERSION:=$default_osversion}
|
||||
PREFIX=${PREFIX:=$default_prefix}
|
||||
VM_ENABLE=${VM_ENABLE:=$default_vm}
|
||||
|
||||
# parse command line arguments
|
||||
usage() {
|
||||
|
@ -143,6 +146,7 @@ usage() {
|
|||
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
|
||||
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
|
||||
echo " --prefix=<path> Set installation directory"
|
||||
echo " --vm_enable=<value> Enable Virtual Memory support (default: 0)"
|
||||
exit 1
|
||||
}
|
||||
while [[ "$#" -gt 0 ]]; do
|
||||
|
@ -151,6 +155,7 @@ while [[ "$#" -gt 0 ]]; do
|
|||
--tooldir=*) TOOLDIR="${1#*=}" ;;
|
||||
--osversion=*) OSVERSION="${1#*=}" ;;
|
||||
--prefix=*) PREFIX="${1#*=}" ;;
|
||||
--vm_enable=*) VM_ENABLE="${1#*=}" ;;
|
||||
-h|--help) usage ;;
|
||||
*) echo "Unknown parameter passed: $1"; usage ;;
|
||||
esac
|
||||
|
|
74
docs/fpga_setup.md
Normal file
74
docs/fpga_setup.md
Normal file
|
@ -0,0 +1,74 @@
|
|||
# FPGA Startup and Configuration Guide
|
||||
|
||||
OPAE Environment Setup
|
||||
----------------------
|
||||
|
||||
$ source /opt/inteldevstack/init_env_user.sh
|
||||
$ export OPAE_HOME=/opt/opae/1.1.2
|
||||
$ export PATH=$OPAE_HOME/bin:$PATH
|
||||
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
|
||||
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
|
||||
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
|
||||
|
||||
OPAE Build
|
||||
------------------
|
||||
|
||||
The FPGA has to following configuration options:
|
||||
- DEVICE_FAMILY=arria10 | stratix10
|
||||
- NUM_CORES=#n
|
||||
|
||||
Command line:
|
||||
|
||||
$ cd hw/syn/altera/opae
|
||||
$ PREFIX=test1 TARGET=fpga NUM_CORES=4 make
|
||||
|
||||
A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete.
|
||||
Setting TARGET=ase will build the project for simulation using Intel ASE.
|
||||
|
||||
|
||||
OPAE Build Configuration
|
||||
------------------------
|
||||
|
||||
The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured:
|
||||
- `NUM_WARPS`: Number of warps per cores
|
||||
- `NUM_THREADS`: Number of threads per warps
|
||||
- `PERF_ENABLE`: enable the use of all profile counters
|
||||
|
||||
You configure the syntesis build from the command line:
|
||||
|
||||
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
|
||||
|
||||
OPAE Build Progress
|
||||
-------------------
|
||||
|
||||
You could check the last 10 lines in the build log for possible errors until build completion.
|
||||
|
||||
$ tail -n 10 <build_dir>/build.log
|
||||
|
||||
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
|
||||
|
||||
$ ps -u <username>
|
||||
|
||||
If the build fails and you need to restart it, clean up the build folder using the following command:
|
||||
|
||||
$ make clean
|
||||
|
||||
The file `vortex_afu.gbs` should exist when the build is done:
|
||||
|
||||
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
|
||||
|
||||
|
||||
Signing the bitstream and Programming the FPGA
|
||||
----------------------------------------------
|
||||
|
||||
$ cd <build_dir>
|
||||
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
|
||||
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
|
||||
|
||||
FPGA sample test running OpenCL sgemm kernel
|
||||
--------------------------------------------
|
||||
|
||||
Run the following from the Vortex root directory
|
||||
|
||||
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"
|
||||
|
|
@ -14,6 +14,8 @@
|
|||
`ifndef VX_CONFIG_VH
|
||||
`define VX_CONFIG_VH
|
||||
|
||||
|
||||
|
||||
`ifndef MIN
|
||||
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||
`endif
|
||||
|
@ -31,7 +33,6 @@
|
|||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifndef EXT_M_DISABLE
|
||||
`define EXT_M_ENABLE
|
||||
`endif
|
||||
|
@ -158,7 +159,7 @@
|
|||
`endif
|
||||
|
||||
`ifndef STARTUP_ADDR
|
||||
`define STARTUP_ADDR 64'h080000000
|
||||
`define STARTUP_ADDR 64'h180000000
|
||||
`endif
|
||||
|
||||
`ifndef USER_BASE_ADDR
|
||||
|
@ -169,7 +170,14 @@
|
|||
`define IO_BASE_ADDR 64'h000000040
|
||||
`endif
|
||||
|
||||
`else
|
||||
`ifdef VM_ENABLE
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`define PAGE_TABLE_BASE_ADDR 64'h1F0000000
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
`else // XLEN_32
|
||||
|
||||
`ifndef STACK_BASE_ADDR
|
||||
`define STACK_BASE_ADDR 32'hFFFF0000
|
||||
|
@ -187,6 +195,13 @@
|
|||
`define IO_BASE_ADDR 32'h00000040
|
||||
`endif
|
||||
|
||||
`ifdef VM_ENABLE
|
||||
`ifndef PAGE_TABLE_BASE_ADDR
|
||||
`define PAGE_TABLE_BASE_ADDR 32'hF0000000
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
`define IO_END_ADDR `USER_BASE_ADDR
|
||||
|
@ -202,7 +217,7 @@
|
|||
`ifndef IO_COUT_ADDR
|
||||
`define IO_COUT_ADDR `IO_BASE_ADDR
|
||||
`endif
|
||||
`define IO_COUT_SIZE 64
|
||||
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
|
||||
|
||||
`ifndef IO_MPM_ADDR
|
||||
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
|
||||
|
@ -251,6 +266,59 @@
|
|||
`define DEBUG_LEVEL 3
|
||||
`endif
|
||||
|
||||
`ifndef MEM_PAGE_SIZE
|
||||
`define MEM_PAGE_SIZE (4096)
|
||||
`endif
|
||||
`ifndef MEM_PAGE_LOG2_SIZE
|
||||
`define MEM_PAGE_LOG2_SIZE (12)
|
||||
`endif
|
||||
|
||||
// Virtual Memory Configuration ///////////////////////////////////////////////////////
|
||||
`ifdef VM_ENABLE
|
||||
`ifdef XLEN_32
|
||||
`ifndef VM_ADDR_MODE
|
||||
`define VM_ADDR_MODE SV32 //or BARE
|
||||
`endif
|
||||
`ifndef PT_LEVEL
|
||||
`define PT_LEVEL (2)
|
||||
`endif
|
||||
`ifndef PTE_SIZE
|
||||
`define PTE_SIZE (4)
|
||||
`endif
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`define NUM_PTE_ENTRY (1024)
|
||||
`endif
|
||||
`ifndef PT_SIZE_LIMIT
|
||||
`define PT_SIZE_LIMIT (1<<23)
|
||||
`endif
|
||||
`else
|
||||
`ifndef VM_ADDR_MODE
|
||||
`define VM_ADDR_MODE SV39 //or BARE
|
||||
`endif
|
||||
`ifndef PT_LEVEL
|
||||
`define PT_LEVEL (3)
|
||||
`endif
|
||||
`ifndef PTE_SIZE
|
||||
`define PTE_SIZE (8)
|
||||
`endif
|
||||
`ifndef NUM_PTE_ENTRY
|
||||
`define NUM_PTE_ENTRY (512)
|
||||
`endif
|
||||
`ifndef PT_SIZE_LIMIT
|
||||
`define PT_SIZE_LIMIT (1<<25)
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef PT_SIZE
|
||||
`define PT_SIZE MEM_PAGE_SIZE
|
||||
`endif
|
||||
|
||||
`ifndef TLB_SIZE
|
||||
`define TLB_SIZE (32)
|
||||
`endif
|
||||
|
||||
`endif
|
||||
|
||||
// Pipeline Configuration /////////////////////////////////////////////////////
|
||||
|
||||
// Issue width
|
||||
|
|
286
hw/rtl/core/VX_gpr_slice.sv
Normal file
286
hw/rtl/core/VX_gpr_slice.sv
Normal file
|
@ -0,0 +1,286 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_gpr_slice import VX_gpu_pkg::*; #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter CACHE_ENABLE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_writeback_if.slave writeback_if,
|
||||
VX_scoreboard_if.slave scoreboard_if,
|
||||
VX_operands_if.master operands_if
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
|
||||
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * ISSUE_RATIO);
|
||||
|
||||
localparam STATE_IDLE = 2'd0;
|
||||
localparam STATE_FETCH1 = 2'd1;
|
||||
localparam STATE_FETCH2 = 2'd2;
|
||||
localparam STATE_FETCH3 = 2'd3;
|
||||
localparam STATE_BITS = 2;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
|
||||
reg [`NR_BITS-1:0] gpr_rd_rid, gpr_rd_rid_n;
|
||||
reg [ISSUE_WIS_W-1:0] gpr_rd_wis, gpr_rd_wis_n;
|
||||
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data [ISSUE_RATIO-1:0];
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data_n [ISSUE_RATIO-1:0];
|
||||
reg [`NR_BITS-1:0] cache_reg [ISSUE_RATIO-1:0];
|
||||
reg [`NR_BITS-1:0] cache_reg_n [ISSUE_RATIO-1:0];
|
||||
reg [`NUM_THREADS-1:0] cache_tmask [ISSUE_RATIO-1:0];
|
||||
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
|
||||
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
|
||||
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
|
||||
|
||||
reg [STATE_BITS-1:0] state, state_n;
|
||||
reg [`NR_BITS-1:0] rs2, rs2_n;
|
||||
reg [`NR_BITS-1:0] rs3, rs3_n;
|
||||
reg rs2_ready, rs2_ready_n;
|
||||
reg rs3_ready, rs3_ready_n;
|
||||
reg data_ready, data_ready_n;
|
||||
|
||||
wire stg_valid_in, stg_ready_in;
|
||||
|
||||
wire is_rs1_zero = (scoreboard_if.data.rs1 == 0);
|
||||
wire is_rs2_zero = (scoreboard_if.data.rs2 == 0);
|
||||
wire is_rs3_zero = (scoreboard_if.data.rs3 == 0);
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
rs2_n = rs2;
|
||||
rs3_n = rs3;
|
||||
rs2_ready_n = rs2_ready;
|
||||
rs3_ready_n = rs3_ready;
|
||||
rs1_data_n = rs1_data;
|
||||
rs2_data_n = rs2_data;
|
||||
rs3_data_n = rs3_data;
|
||||
cache_data_n = cache_data;
|
||||
cache_reg_n = cache_reg;
|
||||
cache_tmask_n= cache_tmask;
|
||||
cache_eop_n = cache_eop;
|
||||
gpr_rd_rid_n = gpr_rd_rid;
|
||||
gpr_rd_wis_n = gpr_rd_wis;
|
||||
data_ready_n = data_ready;
|
||||
|
||||
case (state)
|
||||
STATE_IDLE: begin
|
||||
if (operands_if.valid && operands_if.ready) begin
|
||||
data_ready_n = 0;
|
||||
end
|
||||
if (scoreboard_if.valid && data_ready_n == 0) begin
|
||||
data_ready_n = 1;
|
||||
if (is_rs3_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs3 == cache_reg[scoreboard_if.data.wis] &&
|
||||
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
|
||||
rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
|
||||
rs3_ready_n = 1;
|
||||
end else begin
|
||||
rs3_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if.data.rs3;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH3;
|
||||
end
|
||||
if (is_rs2_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs2 == cache_reg[scoreboard_if.data.wis] &&
|
||||
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
|
||||
rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
|
||||
rs2_ready_n = 1;
|
||||
end else begin
|
||||
rs2_ready_n = 0;
|
||||
gpr_rd_rid_n = scoreboard_if.data.rs2;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH2;
|
||||
end
|
||||
if (is_rs1_zero || (CACHE_ENABLE != 0 &&
|
||||
scoreboard_if.data.rs1 == cache_reg[scoreboard_if.data.wis] &&
|
||||
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
|
||||
rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
|
||||
end else begin
|
||||
gpr_rd_rid_n = scoreboard_if.data.rs1;
|
||||
data_ready_n = 0;
|
||||
state_n = STATE_FETCH1;
|
||||
end
|
||||
end
|
||||
gpr_rd_wis_n = scoreboard_if.data.wis;
|
||||
rs2_n = scoreboard_if.data.rs2;
|
||||
rs3_n = scoreboard_if.data.rs3;
|
||||
end
|
||||
STATE_FETCH1: begin
|
||||
rs1_data_n = gpr_rd_data;
|
||||
if (~rs2_ready) begin
|
||||
gpr_rd_rid_n = rs2;
|
||||
state_n = STATE_FETCH2;
|
||||
end else if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH2: begin
|
||||
rs2_data_n = gpr_rd_data;
|
||||
if (~rs3_ready) begin
|
||||
gpr_rd_rid_n = rs3;
|
||||
state_n = STATE_FETCH3;
|
||||
end else begin
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
end
|
||||
STATE_FETCH3: begin
|
||||
rs3_data_n = gpr_rd_data;
|
||||
data_ready_n = 1;
|
||||
state_n = STATE_IDLE;
|
||||
end
|
||||
endcase
|
||||
|
||||
if (CACHE_ENABLE != 0 && writeback_if.valid) begin
|
||||
if ((cache_reg[writeback_if.data.wis] == writeback_if.data.rd)
|
||||
|| (cache_eop[writeback_if.data.wis] && writeback_if.data.sop)) begin
|
||||
for (integer j = 0; j < `NUM_THREADS; ++j) begin
|
||||
if (writeback_if.data.tmask[j]) begin
|
||||
cache_data_n[writeback_if.data.wis][j] = writeback_if.data.data[j];
|
||||
end
|
||||
end
|
||||
cache_reg_n[writeback_if.data.wis] = writeback_if.data.rd;
|
||||
cache_eop_n[writeback_if.data.wis] = writeback_if.data.eop;
|
||||
cache_tmask_n[writeback_if.data.wis] = writeback_if.data.sop ? writeback_if.data.tmask :
|
||||
(cache_tmask_n[writeback_if.data.wis] | writeback_if.data.tmask);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= STATE_IDLE;
|
||||
cache_eop <= {ISSUE_RATIO{1'b1}};
|
||||
data_ready <= 0;
|
||||
end else begin
|
||||
state <= state_n;
|
||||
cache_eop <= cache_eop_n;
|
||||
data_ready <= data_ready_n;
|
||||
end
|
||||
gpr_rd_rid <= gpr_rd_rid_n;
|
||||
gpr_rd_wis <= gpr_rd_wis_n;
|
||||
rs2_ready <= rs2_ready_n;
|
||||
rs3_ready <= rs3_ready_n;
|
||||
rs2 <= rs2_n;
|
||||
rs3 <= rs3_n;
|
||||
rs1_data <= rs1_data_n;
|
||||
rs2_data <= rs2_data_n;
|
||||
rs3_data <= rs3_data_n;
|
||||
cache_data <= cache_data_n;
|
||||
cache_reg <= cache_reg_n;
|
||||
cache_tmask <= cache_tmask_n;
|
||||
end
|
||||
|
||||
assign stg_valid_in = scoreboard_if.valid && data_ready;
|
||||
assign scoreboard_if.ready = stg_ready_in && data_ready;
|
||||
|
||||
VX_toggle_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) toggle_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (stg_valid_in),
|
||||
.data_in ({
|
||||
scoreboard_if.data.uuid,
|
||||
scoreboard_if.data.wis,
|
||||
scoreboard_if.data.tmask,
|
||||
scoreboard_if.data.PC,
|
||||
scoreboard_if.data.wb,
|
||||
scoreboard_if.data.ex_type,
|
||||
scoreboard_if.data.op_type,
|
||||
scoreboard_if.data.op_args,
|
||||
scoreboard_if.data.rd
|
||||
}),
|
||||
.ready_in (stg_ready_in),
|
||||
.valid_out (operands_if.valid),
|
||||
.data_out ({
|
||||
operands_if.data.uuid,
|
||||
operands_if.data.wis,
|
||||
operands_if.data.tmask,
|
||||
operands_if.data.PC,
|
||||
operands_if.data.wb,
|
||||
operands_if.data.ex_type,
|
||||
operands_if.data.op_type,
|
||||
operands_if.data.op_args,
|
||||
operands_if.data.rd
|
||||
}),
|
||||
.ready_out (operands_if.ready)
|
||||
);
|
||||
|
||||
assign operands_if.data.rs1_data = rs1_data;
|
||||
assign operands_if.data.rs2_data = rs2_data;
|
||||
assign operands_if.data.rs3_data = rs3_data;
|
||||
|
||||
// GPR banks
|
||||
|
||||
reg [RAM_ADDRW-1:0] gpr_rd_addr;
|
||||
wire [RAM_ADDRW-1:0] gpr_wr_addr;
|
||||
if (ISSUE_WIS != 0) begin
|
||||
assign gpr_wr_addr = {writeback_if.data.wis, writeback_if.data.rd};
|
||||
always @(posedge clk) begin
|
||||
gpr_rd_addr <= {gpr_rd_wis_n, gpr_rd_rid_n};
|
||||
end
|
||||
end else begin
|
||||
assign gpr_wr_addr = writeback_if.data.rd;
|
||||
always @(posedge clk) begin
|
||||
gpr_rd_addr <= gpr_rd_rid_n;
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef GPR_RESET
|
||||
reg wr_enabled = 0;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_enabled <= 1;
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
for (genvar j = 0; j < `NUM_THREADS; ++j) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (`XLEN),
|
||||
.SIZE (`NUM_REGS * ISSUE_RATIO),
|
||||
`ifdef GPR_RESET
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
`endif
|
||||
.NO_RWCHECK (1)
|
||||
) gpr_ram (
|
||||
.clk (clk),
|
||||
.read (1'b1),
|
||||
`UNUSED_PIN (wren),
|
||||
`ifdef GPR_RESET
|
||||
.write (wr_enabled && writeback_if.valid && writeback_if.data.tmask[j]),
|
||||
`else
|
||||
.write (writeback_if.valid && writeback_if.data.tmask[j]),
|
||||
`endif
|
||||
.waddr (gpr_wr_addr),
|
||||
.wdata (writeback_if.data.data[j]),
|
||||
.raddr (gpr_rd_addr),
|
||||
.rdata (gpr_rd_data[j])
|
||||
);
|
||||
end
|
||||
|
||||
endmodule
|
79
hw/rtl/core/VX_pending_instr.sv
Normal file
79
hw/rtl/core/VX_pending_instr.sv
Normal file
|
@ -0,0 +1,79 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
module VX_pending_instr #(
|
||||
parameter CTR_WIDTH = 12,
|
||||
parameter ALM_EMPTY = 1,
|
||||
parameter DECR_COUNT = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire incr,
|
||||
input wire [`NW_WIDTH-1:0] incr_wid,
|
||||
input wire [DECR_COUNT-1:0] decr,
|
||||
input wire [DECR_COUNT-1:0][`NW_WIDTH-1:0] decr_wid,
|
||||
input wire [`NW_WIDTH-1:0] alm_empty_wid,
|
||||
output wire empty,
|
||||
output wire alm_empty
|
||||
);
|
||||
localparam COUNTW = `CLOG2(DECR_COUNT+1);
|
||||
|
||||
reg [`NUM_WARPS-1:0][CTR_WIDTH-1:0] pending_instrs;
|
||||
reg [`NUM_WARPS-1:0][COUNTW-1:0] decr_cnt;
|
||||
reg [`NUM_WARPS-1:0][DECR_COUNT-1:0] decr_mask;
|
||||
reg [`NUM_WARPS-1:0] incr_cnt, incr_cnt_n;
|
||||
reg [`NUM_WARPS-1:0] alm_empty_r, empty_r;
|
||||
|
||||
always @(*) begin
|
||||
incr_cnt_n = 0;
|
||||
decr_mask = 0;
|
||||
if (incr) begin
|
||||
incr_cnt_n[incr_wid] = 1;
|
||||
end
|
||||
for (integer i = 0; i < DECR_COUNT; ++i) begin
|
||||
if (decr[i]) begin
|
||||
decr_mask[decr_wid[i]][i] = 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
wire [COUNTW-1:0] decr_cnt_n;
|
||||
`POP_COUNT(decr_cnt_n, decr_mask[i]);
|
||||
|
||||
wire [CTR_WIDTH-1:0] pending_instrs_n = pending_instrs[i] + CTR_WIDTH'(incr_cnt[i]) - CTR_WIDTH'(decr_cnt[i]);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
incr_cnt[i] <= '0;
|
||||
decr_cnt[i] <= '0;
|
||||
pending_instrs[i] <= '0;
|
||||
alm_empty_r[i] <= 0;
|
||||
empty_r[i] <= 1;
|
||||
end else begin
|
||||
incr_cnt[i] <= incr_cnt_n[i];
|
||||
decr_cnt[i] <= decr_cnt_n;
|
||||
pending_instrs[i] <= pending_instrs_n;
|
||||
alm_empty_r[i] <= (pending_instrs_n == ALM_EMPTY);
|
||||
empty_r[i] <= (pending_instrs_n == 0);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign alm_empty = alm_empty_r[alm_empty_wid];
|
||||
assign empty = (& empty_r);
|
||||
|
||||
endmodule
|
387
hw/rtl/core/VX_trace.vh
Normal file
387
hw/rtl/core/VX_trace.vh
Normal file
|
@ -0,0 +1,387 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`ifndef VX_TRACE_VH
|
||||
`define VX_TRACE_VH
|
||||
|
||||
`ifdef SIMULATION
|
||||
|
||||
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
|
||||
case (ex_type)
|
||||
`EX_ALU: `TRACE(level, ("ALU"));
|
||||
`EX_LSU: `TRACE(level, ("LSU"));
|
||||
`EX_FPU: `TRACE(level, ("FPU"));
|
||||
`EX_SFU: `TRACE(level, ("SFU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_ex_op(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
case (op_args.alu.xtype)
|
||||
`ALU_TYPE_ARITH: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end else begin
|
||||
if (op_args.alu.use_imm) begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XORI"));
|
||||
`INST_ALU_OR: `TRACE(level, ("ORI"));
|
||||
`INST_ALU_AND: `TRACE(level, ("ANDI"));
|
||||
`INST_ALU_LUI: `TRACE(level, ("LUI"));
|
||||
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_ALU_BITS'(op_type))
|
||||
`INST_ALU_ADD: `TRACE(level, ("ADD"));
|
||||
`INST_ALU_SUB: `TRACE(level, ("SUB"));
|
||||
`INST_ALU_SLL: `TRACE(level, ("SLL"));
|
||||
`INST_ALU_SRL: `TRACE(level, ("SRL"));
|
||||
`INST_ALU_SRA: `TRACE(level, ("SRA"));
|
||||
`INST_ALU_SLT: `TRACE(level, ("SLT"));
|
||||
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
|
||||
`INST_ALU_XOR: `TRACE(level, ("XOR"));
|
||||
`INST_ALU_OR: `TRACE(level, ("OR"));
|
||||
`INST_ALU_AND: `TRACE(level, ("AND"));
|
||||
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
|
||||
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
`ALU_TYPE_BRANCH: begin
|
||||
case (`INST_BR_BITS'(op_type))
|
||||
`INST_BR_EQ: `TRACE(level, ("BEQ"));
|
||||
`INST_BR_NE: `TRACE(level, ("BNE"));
|
||||
`INST_BR_LT: `TRACE(level, ("BLT"));
|
||||
`INST_BR_GE: `TRACE(level, ("BGE"));
|
||||
`INST_BR_LTU: `TRACE(level, ("BLTU"));
|
||||
`INST_BR_GEU: `TRACE(level, ("BGEU"));
|
||||
`INST_BR_JAL: `TRACE(level, ("JAL"));
|
||||
`INST_BR_JALR: `TRACE(level, ("JALR"));
|
||||
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
|
||||
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
|
||||
`INST_BR_URET: `TRACE(level, ("URET"));
|
||||
`INST_BR_SRET: `TRACE(level, ("SRET"));
|
||||
`INST_BR_MRET: `TRACE(level, ("MRET"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`ALU_TYPE_MULDIV: begin
|
||||
if (op_args.alu.is_w) begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MULW"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIVW"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
|
||||
`INST_M_REM: `TRACE(level, ("REMW"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMUW"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_M_BITS'(op_type))
|
||||
`INST_M_MUL: `TRACE(level, ("MUL"));
|
||||
`INST_M_MULH: `TRACE(level, ("MULH"));
|
||||
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
|
||||
`INST_M_MULHU: `TRACE(level, ("MULHU"));
|
||||
`INST_M_DIV: `TRACE(level, ("DIV"));
|
||||
`INST_M_DIVU: `TRACE(level, ("DIVU"));
|
||||
`INST_M_REM: `TRACE(level, ("REM"));
|
||||
`INST_M_REMU: `TRACE(level, ("REMU"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_LSU: begin
|
||||
if (op_args.lsu.is_float) begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LW: `TRACE(level, ("FLW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("FLD"));
|
||||
`INST_LSU_SW: `TRACE(level, ("FSW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("FSD"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (`INST_LSU_BITS'(op_type))
|
||||
`INST_LSU_LB: `TRACE(level, ("LB"));
|
||||
`INST_LSU_LH: `TRACE(level, ("LH"));
|
||||
`INST_LSU_LW: `TRACE(level, ("LW"));
|
||||
`INST_LSU_LD: `TRACE(level, ("LD"));
|
||||
`INST_LSU_LBU:`TRACE(level, ("LBU"));
|
||||
`INST_LSU_LHU:`TRACE(level, ("LHU"));
|
||||
`INST_LSU_LWU:`TRACE(level, ("LWU"));
|
||||
`INST_LSU_SB: `TRACE(level, ("SB"));
|
||||
`INST_LSU_SH: `TRACE(level, ("SH"));
|
||||
`INST_LSU_SW: `TRACE(level, ("SW"));
|
||||
`INST_LSU_SD: `TRACE(level, ("SD"));
|
||||
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_FPU: begin
|
||||
case (`INST_FPU_BITS'(op_type))
|
||||
`INST_FPU_ADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FADD.S"));
|
||||
end
|
||||
`INST_FPU_SUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FSUB.S"));
|
||||
end
|
||||
`INST_FPU_MUL: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMUL.D"));
|
||||
else
|
||||
`TRACE(level, ("FMUL.S"));
|
||||
end
|
||||
`INST_FPU_DIV: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FDIV.D"));
|
||||
else
|
||||
`TRACE(level, ("FDIV.S"));
|
||||
end
|
||||
`INST_FPU_SQRT: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FSQRT.D"));
|
||||
else
|
||||
`TRACE(level, ("FSQRT.S"));
|
||||
end
|
||||
`INST_FPU_MADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FMADD.S"));
|
||||
end
|
||||
`INST_FPU_MSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FMSUB.S"));
|
||||
end
|
||||
`INST_FPU_NMADD: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMADD.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMADD.S"));
|
||||
end
|
||||
`INST_FPU_NMSUB: begin
|
||||
if (op_args.fpu.fmt[0])
|
||||
`TRACE(level, ("FNMSUB.D"));
|
||||
else
|
||||
`TRACE(level, ("FNMSUB.S"));
|
||||
end
|
||||
`INST_FPU_CMP: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.D"));
|
||||
1: `TRACE(level, ("FLT.D"));
|
||||
2: `TRACE(level, ("FEQ.D"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm[1:0])
|
||||
0: `TRACE(level, ("FLE.S"));
|
||||
1: `TRACE(level, ("FLT.S"));
|
||||
2: `TRACE(level, ("FEQ.S"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
`TRACE(level, ("FCVT.D.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.D"));
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2I: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.L.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.W.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_F2U: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.D"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.D"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.LU.S"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.WU.S"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_I2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.W"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.L"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.W"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_U2F: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.D.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.D.WU"));
|
||||
end
|
||||
end else begin
|
||||
if (op_args.fpu.fmt[1]) begin
|
||||
`TRACE(level, ("FCVT.S.LU"));
|
||||
end else begin
|
||||
`TRACE(level, ("FCVT.S.WU"));
|
||||
end
|
||||
end
|
||||
end
|
||||
`INST_FPU_MISC: begin
|
||||
if (op_args.fpu.fmt[0]) begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.D"));
|
||||
1: `TRACE(level, ("FSGNJN.D"));
|
||||
2: `TRACE(level, ("FSGNJX.D"));
|
||||
3: `TRACE(level, ("FCLASS.D"));
|
||||
4: `TRACE(level, ("FMV.X.D"));
|
||||
5: `TRACE(level, ("FMV.D.X"));
|
||||
6: `TRACE(level, ("FMIN.D"));
|
||||
7: `TRACE(level, ("FMAX.D"));
|
||||
endcase
|
||||
end else begin
|
||||
case (op_args.fpu.frm)
|
||||
0: `TRACE(level, ("FSGNJ.S"));
|
||||
1: `TRACE(level, ("FSGNJN.S"));
|
||||
2: `TRACE(level, ("FSGNJX.S"));
|
||||
3: `TRACE(level, ("FCLASS.S"));
|
||||
4: `TRACE(level, ("FMV.X.S"));
|
||||
5: `TRACE(level, ("FMV.S.X"));
|
||||
6: `TRACE(level, ("FMIN.S"));
|
||||
7: `TRACE(level, ("FMAX.S"));
|
||||
endcase
|
||||
end
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
`EX_SFU: begin
|
||||
case (`INST_SFU_BITS'(op_type))
|
||||
`INST_SFU_TMC: `TRACE(level, ("TMC"));
|
||||
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
|
||||
`INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
|
||||
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
|
||||
`INST_SFU_BAR: `TRACE(level, ("BAR"));
|
||||
`INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
|
||||
`INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
|
||||
`INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
|
||||
`INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
end
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_op_args(input int level,
|
||||
input [`EX_BITS-1:0] ex_type,
|
||||
input [`INST_OP_BITS-1:0] op_type,
|
||||
input VX_gpu_pkg::op_args_t op_args
|
||||
);
|
||||
case (ex_type)
|
||||
`EX_ALU: begin
|
||||
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm));
|
||||
end
|
||||
`EX_LSU: begin
|
||||
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset));
|
||||
end
|
||||
`EX_FPU: begin
|
||||
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm));
|
||||
end
|
||||
`EX_SFU: begin
|
||||
if (`INST_SFU_IS_CSR(op_type)) begin
|
||||
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm));
|
||||
end
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
endtask
|
||||
|
||||
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
|
||||
case (addr)
|
||||
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
|
||||
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
|
||||
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
|
||||
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
|
||||
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
|
||||
default: `TRACE(level, ("?"));
|
||||
endcase
|
||||
endtask
|
||||
|
||||
`endif
|
||||
|
||||
`endif // VX_TRACE_VH
|
|
@ -32,6 +32,10 @@ CFLAGS += -O3 -mcmodel=medany -fno-exceptions -fdata-sections -ffunction-section
|
|||
CFLAGS += -I$(INC_DIR) -I$(ROOT_DIR)/hw
|
||||
CFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
ifeq ($(VM_ENABLE), 1)
|
||||
CFLAGS += -DVM_ENABLE
|
||||
endif
|
||||
|
||||
PROJECT := libvortex
|
||||
|
||||
SRCS = $(SRC_DIR)/vx_start.S $(SRC_DIR)/vx_syscalls.c $(SRC_DIR)/vx_print.S $(SRC_DIR)/tinyprintf.c $(SRC_DIR)/vx_print.c $(SRC_DIR)/vx_spawn.c $(SRC_DIR)/vx_serial.S $(SRC_DIR)/vx_perf.c
|
||||
|
|
46
miscs/patches/ramulator.patch
Normal file
46
miscs/patches/ramulator.patch
Normal file
|
@ -0,0 +1,46 @@
|
|||
diff --git a/Makefile b/Makefile
|
||||
index ea340c8..d2aac5b 100644
|
||||
--- a/Makefile
|
||||
+++ b/Makefile
|
||||
@@ -7,16 +7,16 @@ OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(SRCS))
|
||||
|
||||
# Ramulator currently supports g++ 5.1+ or clang++ 3.4+. It will NOT work with
|
||||
# g++ 4.x due to an internal compiler error when processing lambda functions.
|
||||
-CXX := clang++
|
||||
+#CXX := clang++
|
||||
# CXX := g++-5
|
||||
-CXXFLAGS := -O3 -std=c++11 -g -Wall
|
||||
+CXXFLAGS := -std=c++11 -O3 -g -Wall -fPIC
|
||||
|
||||
.PHONY: all clean depend
|
||||
|
||||
all: depend ramulator
|
||||
|
||||
clean:
|
||||
- rm -f ramulator
|
||||
+ rm -f ramulator libramulator.a
|
||||
rm -rf $(OBJDIR)
|
||||
|
||||
depend: $(OBJDIR)/.depend
|
||||
@@ -36,7 +36,7 @@ ramulator: $(MAIN) $(OBJS) $(SRCDIR)/*.h | depend
|
||||
$(CXX) $(CXXFLAGS) -DRAMULATOR -o $@ $(MAIN) $(OBJS)
|
||||
|
||||
libramulator.a: $(OBJS) $(OBJDIR)/Gem5Wrapper.o
|
||||
- libtool -static -o $@ $(OBJS) $(OBJDIR)/Gem5Wrapper.o
|
||||
+ $(AR) rcs $@ $^
|
||||
|
||||
$(OBJS): | $(OBJDIR)
|
||||
|
||||
diff --git a/src/Request.h b/src/Request.h
|
||||
index 57abd0d..a5ce061 100644
|
||||
--- a/src/Request.h
|
||||
+++ b/src/Request.h
|
||||
@@ -36,7 +36,7 @@ public:
|
||||
|
||||
Request(long addr, Type type, int coreid = 0)
|
||||
: is_first_command(true), addr(addr), coreid(coreid), type(type),
|
||||
- callback([](Request& req){}) {}
|
||||
+ callback([](Request&){}) {}
|
||||
|
||||
Request(long addr, Type type, function<void(Request&)> callback, int coreid = 0)
|
||||
: is_first_command(true), addr(addr), coreid(coreid), type(type), callback(callback) {}
|
|
@ -3,6 +3,8 @@ include $(ROOT_DIR)/config.mk
|
|||
|
||||
all: stub rtlsim simx opae xrt
|
||||
|
||||
vm: stub simx
|
||||
|
||||
stub:
|
||||
$(MAKE) -C stub
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#define CACHE_BLOCK_SIZE 64
|
||||
|
||||
#define RAM_PAGE_SIZE 4096
|
||||
#define RAM_PAGE_SIZE 4096 // Please use MEM_PAGE_SIZE in VX_config.h
|
||||
|
||||
#define ALLOC_BASE_ADDR USER_BASE_ADDR
|
||||
|
||||
|
|
|
@ -39,6 +39,15 @@ public:
|
|||
page_t* currPage = pages_;
|
||||
while (currPage) {
|
||||
auto nextPage = currPage->next;
|
||||
#ifdef VM_ENABLE
|
||||
block_t* currblock = currPage->findfirstUsedBlock();
|
||||
block_t* nextblock;
|
||||
while (currblock) {
|
||||
nextblock= currblock->nextUsed;
|
||||
currPage->release(currblock);
|
||||
currblock = nextblock;
|
||||
}
|
||||
#endif
|
||||
delete currPage;
|
||||
currPage = nextPage;
|
||||
}
|
||||
|
@ -70,7 +79,7 @@ public:
|
|||
size = alignSize(size, pageAlign_);
|
||||
|
||||
// Check if the reservation is within memory capacity bounds
|
||||
if (addr + size > capacity_) {
|
||||
if (addr + size > baseAddress_ + capacity_) {
|
||||
printf("error: address range out of bounds\n");
|
||||
return -1;
|
||||
}
|
||||
|
@ -118,12 +127,12 @@ public:
|
|||
auto pageSize = alignSize(size, pageAlign_);
|
||||
uint64_t pageAddr;
|
||||
if (!this->findNextAddress(pageSize, &pageAddr)) {
|
||||
printf("error: out of memory\n");
|
||||
printf("error: out of memory (Can't find next address)\n");
|
||||
return -1;
|
||||
}
|
||||
currPage = this->createPage(pageAddr, pageSize);
|
||||
if (nullptr == currPage) {
|
||||
printf("error: out of memory\n");
|
||||
printf("error: out of memory (Can't create a page)\n");
|
||||
return -1;
|
||||
}
|
||||
freeBlock = currPage->findFreeBlock(size);
|
||||
|
@ -335,6 +344,11 @@ private:
|
|||
}
|
||||
return nullptr;
|
||||
}
|
||||
#ifdef VM_ENABLE
|
||||
block_t* findfirstUsedBlock() {
|
||||
return usedList_;
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
||||
|
@ -480,7 +494,7 @@ private:
|
|||
|
||||
bool findNextAddress(uint64_t size, uint64_t* addr) {
|
||||
if (pages_ == nullptr) {
|
||||
*addr = baseAddress_;
|
||||
*addr = baseAddress_;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -498,10 +512,10 @@ private:
|
|||
endOfLastPage = current->addr + current->size;
|
||||
current = current->next;
|
||||
}
|
||||
|
||||
|
||||
// If no suitable gap is found, place the new page at the end of the last page
|
||||
// Check if the allocator has enough capacity
|
||||
if ((endOfLastPage + size) <= capacity_) {
|
||||
if ((endOfLastPage + size) <= (baseAddress_ + capacity_)) {
|
||||
*addr = endOfLastPage;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -66,6 +66,7 @@ typedef void* vx_buffer_h;
|
|||
#define VX_MEM_READ 0x1
|
||||
#define VX_MEM_WRITE 0x2
|
||||
#define VX_MEM_READ_WRITE 0x3
|
||||
#define VX_MEM_PIN_MEMORY 0x4
|
||||
|
||||
// open the device and connect to it
|
||||
int vx_dev_open(vx_device_h* hdevice);
|
||||
|
|
|
@ -10,6 +10,10 @@ CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMM
|
|||
CXXFLAGS += $(CONFIGS)
|
||||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
|
||||
ifeq ($(VM_ENABLE), 1)
|
||||
CXXFLAGS += -DVM_ENABLE
|
||||
endif
|
||||
|
||||
LDFLAGS += -shared -pthread
|
||||
LDFLAGS += -L$(DESTDIR) -lsimx
|
||||
|
||||
|
|
|
@ -27,24 +27,48 @@
|
|||
#include <future>
|
||||
#include <chrono>
|
||||
|
||||
#include <VX_config.h>
|
||||
#ifdef VM_ENABLE
|
||||
#include <malloc.h>
|
||||
|
||||
#include <VX_types.h>
|
||||
|
||||
#include <util.h>
|
||||
|
||||
#include <processor.h>
|
||||
#include <arch.h>
|
||||
#include <mem.h>
|
||||
#include <constants.h>
|
||||
#include <unordered_map>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#endif
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
class vx_device {
|
||||
public:
|
||||
vx_device()
|
||||
: arch_(NUM_THREADS, NUM_WARPS, NUM_CORES)
|
||||
, ram_(0, RAM_PAGE_SIZE)
|
||||
, processor_(arch_)
|
||||
, global_mem_(ALLOC_BASE_ADDR,
|
||||
GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR,
|
||||
RAM_PAGE_SIZE,
|
||||
CACHE_BLOCK_SIZE)
|
||||
{
|
||||
// attach memory module
|
||||
processor_.attach_ram(&ram_);
|
||||
}
|
||||
vx_device()
|
||||
: arch_(NUM_THREADS, NUM_WARPS, NUM_CORES)
|
||||
, ram_(0, MEM_PAGE_SIZE)
|
||||
, processor_(arch_)
|
||||
, global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE)
|
||||
{
|
||||
// attach memory module
|
||||
processor_.attach_ram(&ram_);
|
||||
#ifdef VM_ENABLE
|
||||
CHECK_ERR(init_VM(), );
|
||||
#endif
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
#ifdef VM_ENABLE
|
||||
global_mem_.release(PAGE_TABLE_BASE_ADDR);
|
||||
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
|
||||
// page_table_mem_->release(i->second << MEM_PAGE_SIZE);
|
||||
delete virtual_mem_;
|
||||
delete page_table_mem_;
|
||||
#endif
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
|
@ -93,35 +117,131 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int mem_alloc(uint64_t size, int flags, uint64_t* dev_addr) {
|
||||
uint64_t addr;
|
||||
CHECK_ERR(global_mem_.allocate(size, &addr), {
|
||||
#ifdef VM_ENABLE
|
||||
|
||||
// physical (ppn) to virtual (vpn) mapping
|
||||
uint64_t map_p2v(uint64_t ppn, uint32_t flags)
|
||||
{
|
||||
DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn);
|
||||
if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn];
|
||||
|
||||
// If ppn to vpn mapping doesnt exist, create mapping
|
||||
DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n");
|
||||
uint64_t vpn;
|
||||
virtual_mem_->allocate(MEM_PAGE_SIZE, &vpn);
|
||||
vpn = vpn >> MEM_PAGE_LOG2_SIZE;
|
||||
CHECK_ERR(update_page_table(ppn, vpn, flags),);
|
||||
addr_mapping[ppn] = vpn;
|
||||
return vpn;
|
||||
}
|
||||
|
||||
bool need_trans(uint64_t dev_pAddr)
|
||||
{
|
||||
|
||||
// Check if the satp is set and BARE mode
|
||||
if (processor_.is_satp_unset() || get_mode() == BARE)
|
||||
return 0;
|
||||
|
||||
// Check if the address is reserved for system usage
|
||||
// bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT);
|
||||
if (PAGE_TABLE_BASE_ADDR <= dev_pAddr)
|
||||
return 0;
|
||||
|
||||
// Check if the address is reserved for IO usage
|
||||
if (dev_pAddr < USER_BASE_ADDR)
|
||||
return 0;
|
||||
// Check if the address falls within the startup address range
|
||||
if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)))
|
||||
return 0;
|
||||
|
||||
// Now all conditions are not met. Return true because the address needs translation
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags)
|
||||
{
|
||||
DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags);
|
||||
DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN);
|
||||
|
||||
if (!need_trans(*dev_pAddr))
|
||||
{
|
||||
DBGPRINT(" [RT:PTV_MAP] Translation is not needed.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t init_pAddr = *dev_pAddr;
|
||||
uint64_t init_vAddr = (map_p2v(init_pAddr >> MEM_PAGE_LOG2_SIZE, flags) << MEM_PAGE_LOG2_SIZE) | (init_pAddr & ((1 << MEM_PAGE_LOG2_SIZE) - 1));
|
||||
uint64_t ppn = 0, vpn = 0;
|
||||
|
||||
// dev_pAddr can be of size greater than a page, but we have to map and update
|
||||
// page tables on a page table granularity. So divide the allocation into pages.
|
||||
// FUTURE Work: Super Page
|
||||
for (ppn = (*dev_pAddr >> MEM_PAGE_LOG2_SIZE); ppn < ((*dev_pAddr) >> MEM_PAGE_LOG2_SIZE) + (size >> MEM_PAGE_LOG2_SIZE) ; ppn++)
|
||||
{
|
||||
vpn = map_p2v(ppn, flags) >> MEM_PAGE_LOG2_SIZE;
|
||||
DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn);
|
||||
// Currently a 1-1 mapping is used, this can be changed here to support different
|
||||
// mapping schemes
|
||||
}
|
||||
DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: 0x%lx\n", init_vAddr, init_pAddr);
|
||||
// Sanity check
|
||||
assert(page_table_walk(init_vAddr) == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address\n");
|
||||
|
||||
*dev_pAddr = init_vAddr; // commit vpn to be returned to host
|
||||
DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int mem_alloc(uint64_t size, int flags, uint64_t *dev_addr)
|
||||
{
|
||||
uint64_t asize = aligned_size(size, MEM_PAGE_SIZE);
|
||||
uint64_t addr = 0;
|
||||
|
||||
DBGPRINT("[RT:mem_alloc] size: 0x%lx, asize, 0x%lx,flag : 0x%d\n", size, asize, flags);
|
||||
// HW: when vm is supported this global_mem_ should be virtual memory allocator
|
||||
CHECK_ERR(global_mem_.allocate(asize, &addr), {
|
||||
return err;
|
||||
});
|
||||
CHECK_ERR(this->mem_access(addr, size, flags), {
|
||||
CHECK_ERR(this->mem_access(addr, asize, flags), {
|
||||
global_mem_.release(addr);
|
||||
return err;
|
||||
});
|
||||
*dev_addr = addr;
|
||||
#ifdef VM_ENABLE
|
||||
// VM address translation
|
||||
phy_to_virt_map(asize, dev_addr, flags);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) {
|
||||
CHECK_ERR(global_mem_.reserve(dev_addr, size), {
|
||||
int mem_reserve(uint64_t dev_addr, uint64_t size, int flags)
|
||||
{
|
||||
uint64_t asize = aligned_size(size, MEM_PAGE_SIZE);
|
||||
CHECK_ERR(global_mem_.reserve(dev_addr, asize), {
|
||||
return err;
|
||||
});
|
||||
CHECK_ERR(this->mem_access(dev_addr, size, flags), {
|
||||
DBGPRINT("[RT:mem_reserve] addr: 0x%lx, asize:0x%lx, size: 0x%lx\n", dev_addr, asize, size);
|
||||
CHECK_ERR(this->mem_access(dev_addr, asize, flags), {
|
||||
global_mem_.release(dev_addr);
|
||||
return err;
|
||||
});
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mem_free(uint64_t dev_addr) {
|
||||
int mem_free(uint64_t dev_addr)
|
||||
{
|
||||
#ifdef VM_ENABLE
|
||||
uint64_t paddr = page_table_walk(dev_addr);
|
||||
return global_mem_.release(paddr);
|
||||
#else
|
||||
return global_mem_.release(dev_addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
int mem_access(uint64_t dev_addr, uint64_t size, int flags) {
|
||||
int mem_access(uint64_t dev_addr, uint64_t size, int flags)
|
||||
{
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dev_addr + asize > GLOBAL_MEM_SIZE)
|
||||
return -1;
|
||||
|
@ -130,7 +250,8 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int mem_info(uint64_t* mem_free, uint64_t* mem_used) const {
|
||||
int mem_info(uint64_t *mem_free, uint64_t *mem_used) const
|
||||
{
|
||||
if (mem_free)
|
||||
*mem_free = global_mem_.free();
|
||||
if (mem_used)
|
||||
|
@ -138,16 +259,31 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int upload(uint64_t dest_addr, const void* src, uint64_t size) {
|
||||
int upload(uint64_t dest_addr, const void *src, uint64_t size)
|
||||
{
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (dest_addr + asize > GLOBAL_MEM_SIZE)
|
||||
return -1;
|
||||
#ifdef VM_ENABLE
|
||||
uint64_t pAddr = page_table_walk(dest_addr);
|
||||
// uint64_t pAddr;
|
||||
// try {
|
||||
// pAddr = page_table_walk(dest_addr);
|
||||
// } catch ( Page_Fault_Exception ) {
|
||||
// // HW: place holder
|
||||
// // should be virt_to_phy_map here
|
||||
// phy_to_virt_map(0, dest_addr, 0);
|
||||
// }
|
||||
DBGPRINT(" [RT:upload] Upload data to vAddr = 0x%lx (pAddr=0x%lx)\n", dest_addr, pAddr);
|
||||
dest_addr = pAddr; //Overwirte
|
||||
#endif
|
||||
|
||||
ram_.enable_acl(false);
|
||||
ram_.write((const uint8_t*)src, dest_addr, size);
|
||||
ram_.write((const uint8_t *)src, dest_addr, size);
|
||||
ram_.enable_acl(true);
|
||||
|
||||
/*DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr);
|
||||
/*
|
||||
DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr);
|
||||
for (uint64_t i = 0; i < size && i < 1024; i += 4) {
|
||||
DBGPRINT(" 0x%lx <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + i));
|
||||
}*/
|
||||
|
@ -155,13 +291,19 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int download(void* dest, uint64_t src_addr, uint64_t size) {
|
||||
int download(void *dest, uint64_t src_addr, uint64_t size)
|
||||
{
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
if (src_addr + asize > GLOBAL_MEM_SIZE)
|
||||
return -1;
|
||||
#ifdef VM_ENABLE
|
||||
uint64_t pAddr = page_table_walk(src_addr);
|
||||
DBGPRINT(" [RT:download] Download data to vAddr = 0x%lx (pAddr=0x%lx)\n", src_addr, pAddr);
|
||||
src_addr = pAddr; //Overwirte
|
||||
#endif
|
||||
|
||||
ram_.enable_acl(false);
|
||||
ram_.read((uint8_t*)dest, src_addr, size);
|
||||
ram_.read((uint8_t *)dest, src_addr, size);
|
||||
ram_.enable_acl(true);
|
||||
|
||||
/*DBGPRINT("download %ld bytes from 0x%lx\n", size, src_addr);
|
||||
|
@ -172,9 +314,11 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int start(uint64_t krnl_addr, uint64_t args_addr) {
|
||||
int start(uint64_t krnl_addr, uint64_t args_addr)
|
||||
{
|
||||
// ensure prior run completed
|
||||
if (future_.valid()) {
|
||||
if (future_.valid())
|
||||
{
|
||||
future_.wait();
|
||||
}
|
||||
|
||||
|
@ -185,9 +329,8 @@ public:
|
|||
this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32);
|
||||
|
||||
// start new run
|
||||
future_ = std::async(std::launch::async, [&]{
|
||||
processor_.run();
|
||||
});
|
||||
future_ = std::async(std::launch::async, [&]
|
||||
{ processor_.run(); });
|
||||
|
||||
// clear mpm cache
|
||||
mpm_cache_.clear();
|
||||
|
@ -195,12 +338,14 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int ready_wait(uint64_t timeout) {
|
||||
int ready_wait(uint64_t timeout)
|
||||
{
|
||||
if (!future_.valid())
|
||||
return 0;
|
||||
uint64_t timeout_sec = timeout / 1000;
|
||||
std::chrono::seconds wait_time(1);
|
||||
for (;;) {
|
||||
for (;;)
|
||||
{
|
||||
// wait for 1 sec and check status
|
||||
auto status = future_.wait_for(wait_time);
|
||||
if (status == std::future_status::ready)
|
||||
|
@ -211,8 +356,10 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int dcr_write(uint32_t addr, uint32_t value) {
|
||||
if (future_.valid()) {
|
||||
int dcr_write(uint32_t addr, uint32_t value)
|
||||
{
|
||||
if (future_.valid())
|
||||
{
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
processor_.dcr_write(addr, value);
|
||||
|
@ -220,15 +367,18 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int dcr_read(uint32_t addr, uint32_t* value) const {
|
||||
int dcr_read(uint32_t addr, uint32_t *value) const
|
||||
{
|
||||
return dcrs_.read(addr, value);
|
||||
}
|
||||
|
||||
int mpm_query(uint32_t addr, uint32_t core_id, uint64_t* value) {
|
||||
int mpm_query(uint32_t addr, uint32_t core_id, uint64_t *value)
|
||||
{
|
||||
uint32_t offset = addr - VX_CSR_MPM_BASE;
|
||||
if (offset > 31)
|
||||
return -1;
|
||||
if (mpm_cache_.count(core_id) == 0) {
|
||||
if (mpm_cache_.count(core_id) == 0)
|
||||
{
|
||||
uint64_t mpm_mem_addr = IO_MPM_ADDR + core_id * 32 * sizeof(uint64_t);
|
||||
CHECK_ERR(this->download(mpm_cache_[core_id].data(), mpm_mem_addr, 32 * sizeof(uint64_t)), {
|
||||
return err;
|
||||
|
@ -237,15 +387,275 @@ public:
|
|||
*value = mpm_cache_.at(core_id).at(offset);
|
||||
return 0;
|
||||
}
|
||||
#ifdef VM_ENABLE
|
||||
/* VM Management */
|
||||
|
||||
// Initialize to zero the target page table area. 32bit 4K, 64bit 8K
|
||||
uint16_t init_page_table(uint64_t addr, uint64_t size)
|
||||
{
|
||||
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
|
||||
DBGPRINT(" [RT:init_page_table] (addr=0x%lx, size=0x%lx)\n", addr, asize);
|
||||
uint8_t *src = new uint8_t[asize];
|
||||
if (src == NULL)
|
||||
return 1;
|
||||
|
||||
for (uint64_t i = 0; i < asize; ++i)
|
||||
{
|
||||
src[i] = 0;
|
||||
}
|
||||
ram_.enable_acl(false);
|
||||
ram_.write((const uint8_t *)src, addr, asize);
|
||||
ram_.enable_acl(true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t alloc_page_table (uint64_t * pt_addr)
|
||||
{
|
||||
CHECK_ERR(page_table_mem_->allocate(PT_SIZE, pt_addr), { return err; });
|
||||
CHECK_ERR(init_page_table(*pt_addr, PT_SIZE), { return err; });
|
||||
DBGPRINT(" [RT:alloc_page_table] addr= 0x%lx\n", *pt_addr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// reserve IO space, startup space, and local mem area
|
||||
int virtual_mem_reserve(uint64_t dev_addr, uint64_t size, int flags)
|
||||
{
|
||||
CHECK_ERR(virtual_mem_->reserve(dev_addr, size), {
|
||||
return err;
|
||||
});
|
||||
DBGPRINT("[RT:mem_reserve] addr: 0x%lx, size:0x%lx, size: 0x%lx\n", dev_addr, size, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int16_t init_VM()
|
||||
{
|
||||
uint64_t pt_addr = 0;
|
||||
// Reserve space for PT
|
||||
DBGPRINT("[RT:init_VM] Initialize VM\n");
|
||||
CHECK_ERR(mem_reserve(PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, VX_MEM_READ_WRITE), {
|
||||
return err;
|
||||
});
|
||||
page_table_mem_ = new MemoryAllocator (PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE);
|
||||
if (page_table_mem_ == NULL)
|
||||
{
|
||||
CHECK_ERR(this->mem_free(PAGE_TABLE_BASE_ADDR),);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// HW: virtual mem allocator has the same address range as global_mem. next step is to adjust it
|
||||
virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR), MEM_PAGE_SIZE, CACHE_BLOCK_SIZE);
|
||||
CHECK_ERR(virtual_mem_reserve(PAGE_TABLE_BASE_ADDR, (GLOBAL_MEM_SIZE - PAGE_TABLE_BASE_ADDR), VX_MEM_READ_WRITE), {
|
||||
return err;
|
||||
});
|
||||
CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR, 0x40000, VX_MEM_READ_WRITE), {
|
||||
return err;
|
||||
});
|
||||
|
||||
if (virtual_mem_ == nullptr) {
|
||||
// virtual_mem_ does not intefere with physical mem, so no need to free space
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (VM_ADDR_MODE == BARE)
|
||||
DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)");
|
||||
else
|
||||
CHECK_ERR(alloc_page_table(&pt_addr),{return err;});
|
||||
|
||||
CHECK_ERR(processor_.set_satp_by_addr(pt_addr),{return err;});
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Return value in in ptbr
|
||||
uint64_t get_base_ppn()
|
||||
{
|
||||
return processor_.get_base_ppn();
|
||||
}
|
||||
uint64_t get_pte_address(uint64_t base_ppn, uint64_t vpn)
|
||||
{
|
||||
return (base_ppn * PT_SIZE) + (vpn * PTE_SIZE);
|
||||
}
|
||||
|
||||
uint8_t get_mode()
|
||||
{
|
||||
return processor_.get_satp_mode();
|
||||
}
|
||||
|
||||
int16_t update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag)
|
||||
{
|
||||
DBGPRINT(" [RT:Update PT] Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn, flag);
|
||||
// sanity check
|
||||
#if VM_ADDR_MODE == SV39
|
||||
assert((((ppn >> 44) == 0) && ((vpn >> 27) == 0)) && "Upper bits are not zero!");
|
||||
uint8_t level = 3;
|
||||
#else // Default is SV32, BARE will not reach this point.
|
||||
assert((((ppn >> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!");
|
||||
uint8_t level = 2;
|
||||
#endif
|
||||
int i = level - 1;
|
||||
vAddr_t vaddr(vpn << MEM_PAGE_LOG2_SIZE);
|
||||
uint64_t pte_addr = 0, pte_bytes = 0;
|
||||
uint64_t pt_addr = 0;
|
||||
uint64_t cur_base_ppn = get_base_ppn();
|
||||
|
||||
while (i >= 0)
|
||||
{
|
||||
DBGPRINT(" [RT:Update PT]Start %u-level page table\n", i);
|
||||
pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]);
|
||||
pte_bytes = read_pte(pte_addr);
|
||||
PTE_t pte_chk(pte_bytes);
|
||||
DBGPRINT(" [RT:Update PT] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes);
|
||||
if (pte_chk.v == 1 && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d))
|
||||
{
|
||||
DBGPRINT(" [RT:Update PT] PTE valid (ppn 0x%lx), continuing the walk...\n", pte_chk.ppn);
|
||||
cur_base_ppn = pte_chk.ppn;
|
||||
}
|
||||
else
|
||||
{
|
||||
// If valid bit not set, allocate a next level page table
|
||||
DBGPRINT(" [RT:Update PT] PTE Invalid (ppn 0x%lx) ...\n", pte_chk.ppn);
|
||||
if (i == 0)
|
||||
{
|
||||
// Reach to leaf
|
||||
DBGPRINT(" [RT:Update PT] Reached to level 0. This should be a leaf node(flag = %x) \n",flag);
|
||||
uint32_t pte_flag = (flag << 1) | 0x3;
|
||||
PTE_t new_pte(ppn <<MEM_PAGE_LOG2_SIZE, pte_flag);
|
||||
write_pte(pte_addr, new_pte.pte_bytes);
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// in device memory and store ppn in PTE. Set rwx = 000 in PTE
|
||||
// to indicate this is a pointer to the next level of the page table.
|
||||
// flag would READ: 0x1, Write 0x2, RW:0x3, which is matched with PTE flags if it is lsh by one.
|
||||
alloc_page_table(&pt_addr);
|
||||
uint32_t pte_flag = 0x1;
|
||||
PTE_t new_pte(pt_addr, pte_flag);
|
||||
write_pte(pte_addr, new_pte.pte_bytes);
|
||||
cur_base_ppn = new_pte.ppn;
|
||||
}
|
||||
}
|
||||
i--;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t page_table_walk(uint64_t vAddr_bits)
|
||||
{
|
||||
DBGPRINT(" [RT:PTW] start vAddr: 0x%lx\n", vAddr_bits);
|
||||
if (!need_trans(vAddr_bits))
|
||||
{
|
||||
DBGPRINT(" [RT:PTW] Translation is not needed.\n");
|
||||
return vAddr_bits;
|
||||
}
|
||||
uint8_t level = PT_LEVEL;
|
||||
int i = level-1;
|
||||
vAddr_t vaddr(vAddr_bits);
|
||||
uint64_t pte_addr = 0, pte_bytes = 0;
|
||||
uint64_t cur_base_ppn = get_base_ppn();
|
||||
while (true)
|
||||
{
|
||||
DBGPRINT(" [RT:PTW]Start %u-level page table walk\n",i);
|
||||
// Read PTE.
|
||||
pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]);
|
||||
pte_bytes = read_pte(pte_addr);
|
||||
PTE_t pte(pte_bytes);
|
||||
DBGPRINT(" [RT:PTW] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes);
|
||||
|
||||
assert(((pte.pte_bytes & 0xFFFFFFFF) != 0xbaadf00d) && "ERROR: uninitialzed PTE\n" );
|
||||
// Check if it has invalid flag bits.
|
||||
if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1)))
|
||||
{
|
||||
std::string msg = " [RT:PTW] Page Fault : Attempted to access invalid entry.";
|
||||
throw Page_Fault_Exception(msg);
|
||||
}
|
||||
|
||||
if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0))
|
||||
{
|
||||
i--;
|
||||
// Not a leaf node as rwx == 000
|
||||
if (i < 0)
|
||||
{
|
||||
throw Page_Fault_Exception(" [RT:PTW] Page Fault : No leaf node found.");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Continue on to next level.
|
||||
cur_base_ppn= pte.ppn ;
|
||||
DBGPRINT(" [RT:PTW] next base_ppn: 0x%lx\n", cur_base_ppn);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Leaf node found.
|
||||
// Check RWX permissions according to access type.
|
||||
if (pte.r == 0)
|
||||
{
|
||||
throw Page_Fault_Exception(" [RT:PTW] Page Fault : TYPE LOAD, Incorrect permissions.");
|
||||
}
|
||||
cur_base_ppn= pte.ppn ;
|
||||
DBGPRINT(" [RT:PTW] Found PT_Base_Address(0x%lx) on Level %d.\n", pte.ppn,i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint64_t paddr = (cur_base_ppn << MEM_PAGE_LOG2_SIZE) + vaddr.pgoff;
|
||||
return paddr;
|
||||
}
|
||||
|
||||
// void read_page_table(uint64_t addr) {
|
||||
// uint8_t *dest = new uint8_t[MEM_PAGE_SIZE];
|
||||
// download(dest, addr, MEM_PAGE_SIZE);
|
||||
// DBGPRINT("VXDRV: download %d bytes from 0x%x\n", MEM_PAGE_SIZE, addr);
|
||||
// for (int i = 0; i < MEM_PAGE_SIZE; i += 4) {
|
||||
// DBGPRINT("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i));
|
||||
// }
|
||||
// }
|
||||
|
||||
void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d)
|
||||
{
|
||||
DBGPRINT(" [RT:Write_pte] writing pte 0x%lx to pAddr: 0x%lx\n", value, addr);
|
||||
uint8_t *src = new uint8_t[PTE_SIZE];
|
||||
for (uint64_t i = 0; i < PTE_SIZE; ++i)
|
||||
{
|
||||
src[i] = (value >> (i << 3)) & 0xff;
|
||||
}
|
||||
// std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl;
|
||||
ram_.enable_acl(false);
|
||||
ram_.write((const uint8_t *)src, addr, PTE_SIZE);
|
||||
ram_.enable_acl(true);
|
||||
}
|
||||
|
||||
uint64_t read_pte(uint64_t addr)
|
||||
{
|
||||
uint8_t *dest = new uint8_t[PTE_SIZE];
|
||||
#ifdef XLEN_32
|
||||
uint64_t mask = 0x00000000FFFFFFFF;
|
||||
#else // 64bit
|
||||
uint64_t mask = 0xFFFFFFFFFFFFFFFF;
|
||||
#endif
|
||||
|
||||
ram_.read((uint8_t *)dest, addr, PTE_SIZE);
|
||||
uint64_t ret = (*(uint64_t *)((uint8_t *)dest)) & mask;
|
||||
DBGPRINT(" [RT:read_pte] reading PTE 0x%lx from RAM addr 0x%lx\n", ret, addr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif // VM_ENABLE
|
||||
|
||||
private:
|
||||
Arch arch_;
|
||||
RAM ram_;
|
||||
Processor processor_;
|
||||
MemoryAllocator global_mem_;
|
||||
DeviceConfig dcrs_;
|
||||
std::future<void> future_;
|
||||
Arch arch_;
|
||||
RAM ram_;
|
||||
Processor processor_;
|
||||
MemoryAllocator global_mem_;
|
||||
DeviceConfig dcrs_;
|
||||
std::future<void> future_;
|
||||
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
|
||||
#ifdef VM_ENABLE
|
||||
std::unordered_map<uint64_t, uint64_t> addr_mapping; // HW: key: ppn; value: vpn
|
||||
MemoryAllocator* page_table_mem_;
|
||||
MemoryAllocator* virtual_mem_;
|
||||
#endif
|
||||
};
|
||||
|
||||
#include <callbacks.inc>
|
||||
#include <callbacks.inc>
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
ROOT_DIR := $(realpath ..)
|
||||
include $(ROOT_DIR)/config.mk
|
||||
|
||||
simx:
|
||||
$(MAKE) -C simx
|
||||
|
||||
all:
|
||||
$(MAKE) -C simx
|
||||
$(MAKE) -C rtlsim
|
||||
|
|
|
@ -17,9 +17,20 @@
|
|||
#include <fstream>
|
||||
#include <assert.h>
|
||||
#include "util.h"
|
||||
#include <VX_config.h>
|
||||
#include <bitset>
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
// #ifndef NDEBUG
|
||||
// #define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0)
|
||||
// #else
|
||||
#define DBGPRINT(format, ...) ((void)0)
|
||||
// #endif
|
||||
#endif
|
||||
|
||||
|
||||
RamMemDevice::RamMemDevice(const char *filename, uint32_t wordSize)
|
||||
: wordSize_(wordSize) {
|
||||
std::ifstream input(filename);
|
||||
|
@ -123,17 +134,95 @@ void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size)
|
|||
|
||||
MemoryUnit::MemoryUnit(uint64_t pageSize)
|
||||
: pageSize_(pageSize)
|
||||
#ifndef VM_ENABLE
|
||||
, enableVM_(pageSize != 0)
|
||||
, amo_reservation_({0x0, false}) {
|
||||
if (pageSize != 0) {
|
||||
tlb_[0] = TLBEntry(0, 077);
|
||||
#endif
|
||||
, amo_reservation_({0x0, false})
|
||||
#ifdef VM_ENABLE
|
||||
, TLB_HIT(0)
|
||||
, TLB_MISS(0)
|
||||
, TLB_EVICT(0)
|
||||
, PTW(0)
|
||||
, satp_(NULL) {};
|
||||
#else
|
||||
{
|
||||
if (pageSize != 0)
|
||||
{
|
||||
tlb_[0] = TLBEntry(0, 077);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) {
|
||||
decoder_.map(start, end, m);
|
||||
}
|
||||
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
std::pair<bool, uint64_t> MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits) {
|
||||
|
||||
//Find entry while accounting for different sizes.
|
||||
for (auto entry : tlb_)
|
||||
{
|
||||
if(entry.first == vAddr >> entry.second.size_bits)
|
||||
{
|
||||
*size_bits = entry.second.size_bits;
|
||||
vAddr = vAddr >> (*size_bits);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
auto iter = tlb_.find(vAddr);
|
||||
if (iter != tlb_.end()) {
|
||||
TLBEntry e = iter->second;
|
||||
|
||||
//Set mru bit if it is a hit.
|
||||
iter->second.mru_bit = true;
|
||||
|
||||
//If at full capacity and no other unset bits.
|
||||
// Clear all bits except the one we just looked up.
|
||||
if (tlb_.size() == TLB_SIZE)
|
||||
{
|
||||
// bool no_cleared = true;
|
||||
// for (auto& entry : tlb_)
|
||||
// {
|
||||
// no_cleared = no_cleared & entry.second.mru_bit;
|
||||
// }
|
||||
|
||||
// if(no_cleared)
|
||||
// {
|
||||
for (auto& entry : tlb_)
|
||||
{
|
||||
entry.second.mru_bit = false;
|
||||
}
|
||||
iter->second.mru_bit = true;
|
||||
//}
|
||||
|
||||
}
|
||||
//Check access permissions.
|
||||
if ( (type == ACCESS_TYPE::FETCH) & ((e.r == 0) | (e.x == 0)) )
|
||||
{
|
||||
throw Page_Fault_Exception("Page Fault : Incorrect permissions.");
|
||||
}
|
||||
else if ( (type == ACCESS_TYPE::LOAD) & (e.r == 0) )
|
||||
{
|
||||
throw Page_Fault_Exception("Page Fault : Incorrect permissions.");
|
||||
}
|
||||
else if ( (type == ACCESS_TYPE::STORE) & (e.w == 0) )
|
||||
{
|
||||
throw Page_Fault_Exception("Page Fault : Incorrect permissions.");
|
||||
}
|
||||
else
|
||||
{
|
||||
//TLB Hit
|
||||
return std::make_pair(true, iter->second.pfn);
|
||||
}
|
||||
} else {
|
||||
//TLB Miss
|
||||
return std::make_pair(false, 0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
MemoryUnit::TLBEntry MemoryUnit::tlbLookup(uint64_t vAddr, uint32_t flagMask) {
|
||||
auto iter = tlb_.find(vAddr / pageSize_);
|
||||
if (iter != tlb_.end()) {
|
||||
|
@ -157,31 +246,96 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) {
|
|||
}
|
||||
return pAddr;
|
||||
}
|
||||
#endif
|
||||
|
||||
void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) {
|
||||
#ifdef VM_ENABLE
|
||||
void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) {
|
||||
DBGPRINT(" [MMU:read] 0x%lx, 0x%x, %u\n",addr,size,type);
|
||||
uint64_t pAddr;
|
||||
pAddr = vAddr_to_pAddr(addr, type);
|
||||
return decoder_.read(data, pAddr, size);
|
||||
}
|
||||
#else
|
||||
void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, bool sup) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1);
|
||||
return decoder_.read(data, pAddr, size);
|
||||
}
|
||||
|
||||
void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) {
|
||||
#endif
|
||||
#ifdef VM_ENABLE
|
||||
void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) {
|
||||
DBGPRINT(" [MMU:Write] 0x%lx, 0x%x, %u\n",addr,size,type);
|
||||
uint64_t pAddr;
|
||||
pAddr = vAddr_to_pAddr(addr, type);
|
||||
decoder_.write(data, pAddr, size);
|
||||
amo_reservation_.valid = false;
|
||||
}
|
||||
#else
|
||||
void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, bool sup) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1);
|
||||
decoder_.write(data, pAddr, size);
|
||||
amo_reservation_.valid = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void MemoryUnit::amo_reserve(uint64_t addr) {
|
||||
DBGPRINT(" [MMU:amo_reserve] 0x%lx\n",addr);
|
||||
uint64_t pAddr = this->vAddr_to_pAddr(addr,ACCESS_TYPE::LOAD);
|
||||
amo_reservation_.addr = pAddr;
|
||||
amo_reservation_.valid = true;
|
||||
}
|
||||
#else
|
||||
void MemoryUnit::amo_reserve(uint64_t addr) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, 1);
|
||||
amo_reservation_.addr = pAddr;
|
||||
amo_reservation_.valid = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
bool MemoryUnit::amo_check(uint64_t addr) {
|
||||
DBGPRINT(" [MMU:amo_check] 0x%lx\n",addr);
|
||||
uint64_t pAddr = this->vAddr_to_pAddr(addr, ACCESS_TYPE::LOAD);
|
||||
return amo_reservation_.valid && (amo_reservation_.addr == pAddr);
|
||||
}
|
||||
#else
|
||||
bool MemoryUnit::amo_check(uint64_t addr) {
|
||||
uint64_t pAddr = this->toPhyAddr(addr, 1);
|
||||
return amo_reservation_.valid && (amo_reservation_.addr == pAddr);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
|
||||
void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits) {
|
||||
// HW: evict TLB by Most Recently Used
|
||||
if (tlb_.size() == TLB_SIZE - 1) {
|
||||
for (auto& entry : tlb_)
|
||||
{
|
||||
entry.second.mru_bit = false;
|
||||
}
|
||||
|
||||
} else if (tlb_.size() == TLB_SIZE) {
|
||||
uint64_t del;
|
||||
for (auto entry : tlb_) {
|
||||
if (!entry.second.mru_bit)
|
||||
{
|
||||
del = entry.first;
|
||||
break;
|
||||
}
|
||||
}
|
||||
tlb_.erase(tlb_.find(del));
|
||||
TLB_EVICT++;
|
||||
}
|
||||
tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags, size_bits);
|
||||
}
|
||||
#else
|
||||
|
||||
void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) {
|
||||
tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
void MemoryUnit::tlbRm(uint64_t va) {
|
||||
if (tlb_.find(va / pageSize_) != tlb_.end())
|
||||
|
@ -325,6 +479,7 @@ uint8_t *RAM::get(uint64_t address) const {
|
|||
}
|
||||
|
||||
void RAM::read(void* data, uint64_t addr, uint64_t size) {
|
||||
// printf("====%s (addr= 0x%lx, size= 0x%lx) ====\n", __PRETTY_FUNCTION__,addr,size);
|
||||
if (check_acl_ && acl_mngr_.check(addr, size, 0x1) == false) {
|
||||
throw BadAddress();
|
||||
}
|
||||
|
@ -435,3 +590,171 @@ void RAM::loadHexImage(const char* filename) {
|
|||
--size;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
|
||||
uint64_t MemoryUnit::get_base_ppn()
|
||||
{
|
||||
assert(satp_!= NULL);
|
||||
return satp_->get_base_ppn();
|
||||
}
|
||||
|
||||
uint64_t MemoryUnit::get_satp()
|
||||
{
|
||||
if (is_satp_unset())
|
||||
return 0;
|
||||
else
|
||||
return satp_->get_satp();
|
||||
}
|
||||
|
||||
uint8_t MemoryUnit::is_satp_unset()
|
||||
{
|
||||
return (satp_==NULL);
|
||||
}
|
||||
|
||||
uint8_t MemoryUnit::get_mode()
|
||||
{
|
||||
assert(satp_!= NULL);
|
||||
return satp_->get_mode();
|
||||
}
|
||||
void MemoryUnit::set_satp(uint64_t satp)
|
||||
{
|
||||
// uint16_t asid = 0; // set asid for different process
|
||||
satp_ = new SATP_t (satp );
|
||||
}
|
||||
|
||||
bool MemoryUnit::need_trans(uint64_t dev_pAddr)
|
||||
{
|
||||
// Check if the satp is set and BARE mode
|
||||
if ( is_satp_unset() || (get_mode() == BARE))
|
||||
return 0;
|
||||
|
||||
// Check if the address is reserved for system usage
|
||||
// bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT);
|
||||
if (PAGE_TABLE_BASE_ADDR <= dev_pAddr)
|
||||
return 0;
|
||||
|
||||
// Check if the address is reserved for IO usage
|
||||
if (dev_pAddr < USER_BASE_ADDR)
|
||||
return 0;
|
||||
// Check if the address falls within the startup address range
|
||||
if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)))
|
||||
return 0;
|
||||
|
||||
// Now all conditions are not met. Return true because the address needs translation
|
||||
return 1;
|
||||
}
|
||||
|
||||
uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type)
|
||||
{
|
||||
uint64_t pfn;
|
||||
uint64_t size_bits;
|
||||
DBGPRINT(" [MMU: V2P] vaddr = 0x%lx, type = 0x%u\n",vAddr,type);
|
||||
if (!need_trans(vAddr))
|
||||
{
|
||||
DBGPRINT(" [MMU: V2P] Translation is not needed.\n");
|
||||
return vAddr;
|
||||
}
|
||||
|
||||
//First lookup TLB.
|
||||
std::pair<bool, uint64_t> tlb_access = tlbLookup(vAddr, type, &size_bits);
|
||||
if (tlb_access.first)
|
||||
{
|
||||
|
||||
pfn = tlb_access.second;
|
||||
TLB_HIT++;
|
||||
}
|
||||
else //Else walk the PT.
|
||||
{
|
||||
std::pair<uint64_t, uint8_t> ptw_access = page_table_walk(vAddr, type, &size_bits);
|
||||
tlbAdd(vAddr>>size_bits, ptw_access.first, ptw_access.second,size_bits);
|
||||
pfn = ptw_access.first; TLB_MISS++; PTW++;
|
||||
unique_translations.insert(vAddr>>size_bits);
|
||||
PERF_UNIQUE_PTW = unique_translations.size();
|
||||
|
||||
}
|
||||
|
||||
//Construct final address using pfn and offset.
|
||||
DBGPRINT(" [MMU: V2P] translated vAddr: 0x%lx to pAddr 0x%lx\n",vAddr,((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))));
|
||||
return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1));
|
||||
}
|
||||
|
||||
uint64_t MemoryUnit::get_pte_address(uint64_t base_ppn, uint64_t vpn)
|
||||
{
|
||||
return (base_ppn * PT_SIZE) + (vpn * PTE_SIZE);
|
||||
}
|
||||
|
||||
std::pair<uint64_t, uint8_t> MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t *size_bits)
|
||||
{
|
||||
DBGPRINT(" [MMU:PTW] Start: vaddr = 0x%lx, type = %u.\n", vAddr_bits, type);
|
||||
uint8_t level = PT_LEVEL;
|
||||
int i = level-1;
|
||||
vAddr_t vaddr(vAddr_bits);
|
||||
uint32_t flags =0;
|
||||
uint64_t pte_addr = 0, pte_bytes = 0;
|
||||
uint64_t cur_base_ppn = get_base_ppn();
|
||||
// Need to fix for super page
|
||||
*size_bits = 12;
|
||||
|
||||
while (true)
|
||||
{
|
||||
// Read PTE.
|
||||
pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]);
|
||||
decoder_.read(&pte_bytes, pte_addr, PTE_SIZE);
|
||||
PTE_t pte(pte_bytes);
|
||||
DBGPRINT(" [MMU:PTW] Level[%u] pte_addr=0x%lx, pte_bytes =0x%lx, pte.ppn= 0x%lx, pte.flags = %u)\n", i, pte_addr, pte_bytes, pte.ppn, pte.flags);
|
||||
|
||||
assert(((pte.pte_bytes & 0xFFFFFFFF) != 0xbaadf00d) && "ERROR: uninitialzed PTE\n" );
|
||||
|
||||
// Check if it has invalid flag bits.
|
||||
if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1)))
|
||||
{
|
||||
assert(0);
|
||||
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : Attempted to access invalid entry.");
|
||||
}
|
||||
|
||||
if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0))
|
||||
{
|
||||
// Not a leaf node as rwx == 000
|
||||
i--;
|
||||
if (i < 0)
|
||||
{
|
||||
assert(0);
|
||||
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : No leaf node found.");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Continue on to next level.
|
||||
cur_base_ppn= pte.ppn;
|
||||
DBGPRINT(" [MMU:PTW] next base_ppn: 0x%lx\n", cur_base_ppn);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Leaf node found, finished walking.
|
||||
// Check RWX permissions according to access type.
|
||||
if ((type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0)))
|
||||
{
|
||||
assert(0);
|
||||
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE FETCH, Incorrect permissions.");
|
||||
}
|
||||
else if ((type == ACCESS_TYPE::LOAD) & (pte.r == 0))
|
||||
{
|
||||
assert(0);
|
||||
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE LOAD, Incorrect permissions.");
|
||||
}
|
||||
else if ((type == ACCESS_TYPE::STORE) & (pte.w == 0))
|
||||
{
|
||||
assert(0);
|
||||
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE STORE, Incorrect permissions.");
|
||||
}
|
||||
cur_base_ppn = pte.ppn;
|
||||
flags = pte.flags;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return std::make_pair(cur_base_ppn, flags);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
331
sim/common/mem.h
331
sim/common/mem.h
|
@ -18,8 +18,108 @@
|
|||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <cstdint>
|
||||
#include <unordered_set>
|
||||
#include <stdexcept>
|
||||
#include "VX_config.h"
|
||||
#ifdef VM_ENABLE
|
||||
#include <unordered_set>
|
||||
#include <stdexcept>
|
||||
#include <cassert>
|
||||
#endif
|
||||
|
||||
|
||||
namespace vortex {
|
||||
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
|
||||
// VA MODE
|
||||
#define BARE 0x0
|
||||
#define SV32 0x1
|
||||
#define SV39 0x8
|
||||
|
||||
enum ACCESS_TYPE {
|
||||
LOAD,
|
||||
STORE,
|
||||
FETCH
|
||||
};
|
||||
class SATP_t
|
||||
{
|
||||
private:
|
||||
uint64_t address;
|
||||
uint16_t asid;
|
||||
uint8_t mode;
|
||||
uint64_t ppn;
|
||||
uint64_t satp;
|
||||
|
||||
uint64_t bits(uint64_t input, uint8_t s_idx, uint8_t e_idx)
|
||||
{
|
||||
return (input>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1);
|
||||
}
|
||||
bool bit(uint64_t input , uint8_t idx)
|
||||
{
|
||||
return (input ) & ((uint64_t)1 << idx);
|
||||
}
|
||||
|
||||
public:
|
||||
SATP_t(uint64_t satp) : satp(satp)
|
||||
{
|
||||
#ifdef XLEN_32
|
||||
mode = bit(satp, 31);
|
||||
asid = bits(satp, 22, 30);
|
||||
ppn = bits(satp, 0,21);
|
||||
#else
|
||||
mode = bits(satp, 60,63);
|
||||
asid = bits(satp, 44, 59);
|
||||
ppn = bits(satp, 0,43);
|
||||
#endif
|
||||
address = ppn << MEM_PAGE_LOG2_SIZE;
|
||||
}
|
||||
|
||||
SATP_t(uint64_t address, uint16_t asid) : address(address), asid(asid)
|
||||
{
|
||||
#ifdef XLEN_32
|
||||
assert((address >> 32) == 0 && "Upper 32 bits are not zero!");
|
||||
#endif
|
||||
mode= VM_ADDR_MODE;
|
||||
// asid = 0 ;
|
||||
ppn = address >> MEM_PAGE_LOG2_SIZE;
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wshift-count-overflow"
|
||||
#ifdef XLEN_32
|
||||
satp = (((uint64_t)mode << 31) | ((uint64_t)asid << 22) | ppn);
|
||||
#else
|
||||
satp = (((uint64_t)mode << 60) | ((uint64_t)asid << 44) | ppn);
|
||||
#endif
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
uint8_t get_mode()
|
||||
{
|
||||
return mode;
|
||||
}
|
||||
uint16_t get_asid()
|
||||
{
|
||||
return asid;
|
||||
}
|
||||
uint64_t get_base_ppn()
|
||||
{
|
||||
return ppn;
|
||||
}
|
||||
uint64_t get_satp()
|
||||
{
|
||||
return satp;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class Page_Fault_Exception : public std::runtime_error /* or logic_error */
|
||||
{
|
||||
public:
|
||||
Page_Fault_Exception(const std::string& what = "") : std::runtime_error(what) {}
|
||||
uint64_t addr;
|
||||
ACCESS_TYPE type;
|
||||
};
|
||||
#endif
|
||||
struct BadAddress {};
|
||||
struct OutOfRange {};
|
||||
|
||||
|
@ -73,26 +173,53 @@ public:
|
|||
class MemoryUnit {
|
||||
public:
|
||||
|
||||
// HW: Expand PageFault struct to contain access_type info for debug purposes
|
||||
struct PageFault {
|
||||
PageFault(uint64_t a, bool nf)
|
||||
: faultAddr(a)
|
||||
, notFound(nf)
|
||||
// , access_type(ACCESS_TYPE::LOAD)
|
||||
{}
|
||||
uint64_t faultAddr;
|
||||
bool notFound;
|
||||
uint64_t faultAddr;
|
||||
bool notFound;
|
||||
// ACCESS_TYPE access_type;
|
||||
};
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE);
|
||||
~MemoryUnit(){
|
||||
if ( this->satp_ != NULL)
|
||||
delete this->satp_;
|
||||
};
|
||||
#else
|
||||
MemoryUnit(uint64_t pageSize = 0);
|
||||
#endif
|
||||
|
||||
void attach(MemDevice &m, uint64_t start, uint64_t end);
|
||||
|
||||
void read(void* data, uint64_t addr, uint64_t size, bool sup);
|
||||
void write(const void* data, uint64_t addr, uint64_t size, bool sup);
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type = ACCESS_TYPE::LOAD);
|
||||
void write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type = ACCESS_TYPE::STORE);
|
||||
#else
|
||||
void read(void* data, uint64_t addr, uint32_t size, bool sup);
|
||||
void write(const void* data, uint64_t addr, uint32_t size, bool sup);
|
||||
#endif
|
||||
|
||||
void amo_reserve(uint64_t addr);
|
||||
bool amo_check(uint64_t addr);
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits);
|
||||
uint8_t is_satp_unset();
|
||||
uint64_t get_satp();
|
||||
uint8_t get_mode();
|
||||
uint64_t get_base_ppn();
|
||||
void set_satp(uint64_t satp);
|
||||
#else
|
||||
void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags);
|
||||
#endif
|
||||
|
||||
void tlbRm(uint64_t vaddr);
|
||||
void tlbFlush() {
|
||||
tlb_.clear();
|
||||
|
@ -134,24 +261,71 @@ private:
|
|||
|
||||
struct TLBEntry {
|
||||
TLBEntry() {}
|
||||
TLBEntry(uint32_t pfn, uint32_t flags)
|
||||
#ifdef VM_ENABLE
|
||||
TLBEntry(uint32_t pfn, uint32_t flags, uint64_t size_bits)
|
||||
: pfn(pfn)
|
||||
, flags(flags)
|
||||
, mru_bit(true)
|
||||
, size_bits (size_bits)
|
||||
{
|
||||
d = bit(7);
|
||||
a = bit(6);
|
||||
g = bit(5);
|
||||
u = bit(4);
|
||||
x = bit(3);
|
||||
w = bit(2);
|
||||
r = bit(1);
|
||||
v = bit(0);
|
||||
}
|
||||
bool bit(uint8_t idx)
|
||||
{
|
||||
return (flags) & (1 << idx);
|
||||
}
|
||||
|
||||
uint32_t pfn;
|
||||
uint32_t flags;
|
||||
bool mru_bit;
|
||||
uint64_t size_bits;
|
||||
bool d, a, g, u, x, w, r, v;
|
||||
#else
|
||||
TLBEntry(uint32_t pfn, uint32_t flags)
|
||||
: pfn(pfn)
|
||||
, flags(flags)
|
||||
{}
|
||||
uint32_t pfn;
|
||||
uint32_t flags;
|
||||
#endif
|
||||
};
|
||||
|
||||
TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask);
|
||||
#ifdef VM_ENABLE
|
||||
std::pair<bool, uint64_t> tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits);
|
||||
|
||||
bool need_trans(uint64_t dev_pAddr);
|
||||
uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type);
|
||||
|
||||
uint64_t get_pte_address(uint64_t base_ppn, uint64_t vpn);
|
||||
std::pair<uint64_t, uint8_t> page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits);
|
||||
#else
|
||||
uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask);
|
||||
TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
std::unordered_map<uint64_t, TLBEntry> tlb_;
|
||||
uint64_t pageSize_;
|
||||
ADecoder decoder_;
|
||||
#ifndef VM_ENABLE
|
||||
bool enableVM_;
|
||||
#endif
|
||||
|
||||
amo_reservation_t amo_reservation_;
|
||||
#ifdef VM_ENABLE
|
||||
std::unordered_set<uint64_t> unique_translations;
|
||||
uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW;
|
||||
SATP_t *satp_;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -219,4 +393,149 @@ private:
|
|||
bool check_acl_;
|
||||
};
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
class PTE_t
|
||||
{
|
||||
|
||||
private:
|
||||
uint64_t address;
|
||||
uint64_t bits(uint64_t input, uint8_t s_idx, uint8_t e_idx)
|
||||
{
|
||||
return (input>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1);
|
||||
}
|
||||
bool bit(uint64_t input, uint8_t idx)
|
||||
{
|
||||
return (input) & ((uint64_t)1 << idx);
|
||||
}
|
||||
|
||||
public:
|
||||
#if VM_ADDR_MODE == SV39
|
||||
bool N;
|
||||
uint8_t PBMT;
|
||||
#endif
|
||||
uint64_t ppn;
|
||||
uint32_t rsw;
|
||||
uint32_t flags;
|
||||
uint8_t level;
|
||||
bool d, a, g, u, x, w, r, v;
|
||||
uint64_t pte_bytes;
|
||||
|
||||
void set_flags (uint32_t flag)
|
||||
{
|
||||
this->flags = flag;
|
||||
d = bit(flags,7);
|
||||
a = bit(flags,6);
|
||||
g = bit(flags,5);
|
||||
u = bit(flags,4);
|
||||
x = bit(flags,3);
|
||||
w = bit(flags,2);
|
||||
r = bit(flags,1);
|
||||
v = bit(flags,0);
|
||||
}
|
||||
|
||||
PTE_t(uint64_t address, uint32_t flags) : address(address)
|
||||
{
|
||||
#if VM_ADDR_MODE == SV39
|
||||
N = 0;
|
||||
PBMT = 0;
|
||||
level = 3;
|
||||
ppn = address >> MEM_PAGE_LOG2_SIZE;
|
||||
// Reserve for Super page support
|
||||
// ppn = new uint32_t [level];
|
||||
// ppn[2]=bits(address,28,53);
|
||||
// ppn[1]=bits(address,19,27);
|
||||
// ppn[0]=bits(address,10,18);
|
||||
set_flags(flags);
|
||||
// pte_bytes = (N << 63) | (PBMT << 61) | (ppn <<10) | flags ;
|
||||
pte_bytes = (ppn <<10) | flags ;
|
||||
#else // if VM_ADDR_MODE == SV32
|
||||
assert((address>> 32) == 0 && "Upper 32 bits are not zero!");
|
||||
level = 2;
|
||||
ppn = address >> MEM_PAGE_LOG2_SIZE;
|
||||
// Reserve for Super page support
|
||||
// ppn = new uint32_t[level];
|
||||
// ppn[1]=bits(address,20,31);
|
||||
// ppn[0]=bits(address,10,19);
|
||||
set_flags(flags);
|
||||
pte_bytes = ppn <<10 | flags ;
|
||||
#endif
|
||||
}
|
||||
|
||||
PTE_t(uint64_t pte_bytes) : pte_bytes(pte_bytes)
|
||||
{
|
||||
#if VM_ADDR_MODE == SV39
|
||||
N = bit(pte_bytes,63);
|
||||
PBMT = bits(pte_bytes,61,62);
|
||||
level = 3;
|
||||
ppn=bits(pte_bytes,10,53);
|
||||
address = ppn << MEM_PAGE_LOG2_SIZE;
|
||||
// Reserve for Super page support
|
||||
// ppn = new uint32_t [level];
|
||||
// ppn[2]=bits(pte_bytes,28,53);
|
||||
// ppn[1]=bits(pte_bytes,19,27);
|
||||
// ppn[0]=bits(pte_bytes,10,18);
|
||||
#else //#if VM_ADDR_MODE == SV32
|
||||
assert((pte_bytes >> 32) == 0 && "Upper 32 bits are not zero!");
|
||||
level = 2;
|
||||
ppn=bits(pte_bytes,10, 31);
|
||||
address = ppn << MEM_PAGE_LOG2_SIZE;
|
||||
// Reserve for Super page support
|
||||
// ppn = new uint32_t[level];
|
||||
// ppn[1]=bits(address, 20,31);
|
||||
// ppn[0]=bits(address, 10,19);
|
||||
#endif
|
||||
rsw = bits(pte_bytes,8,9);
|
||||
set_flags((uint32_t)(bits(pte_bytes,0,7)));
|
||||
}
|
||||
~PTE_t()
|
||||
{
|
||||
// Reserve for Super page support
|
||||
// delete ppn;
|
||||
}
|
||||
};
|
||||
|
||||
class vAddr_t
|
||||
{
|
||||
|
||||
private:
|
||||
uint64_t address;
|
||||
uint64_t bits(uint8_t s_idx, uint8_t e_idx)
|
||||
{
|
||||
return (address>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1);
|
||||
}
|
||||
bool bit( uint8_t idx)
|
||||
{
|
||||
return (address) & ((uint64_t)1 << idx);
|
||||
}
|
||||
|
||||
public:
|
||||
uint64_t *vpn;
|
||||
uint64_t pgoff;
|
||||
uint8_t level;
|
||||
vAddr_t(uint64_t address) : address(address)
|
||||
{
|
||||
#if VM_ADDR_MODE == SV39
|
||||
level = 3;
|
||||
vpn = new uint64_t [level];
|
||||
vpn[2] = bits(30,38);
|
||||
vpn[1] = bits(21,29);
|
||||
vpn[0] = bits(12,20);
|
||||
pgoff = bits(0,11);
|
||||
#else //#if VM_ADDR_MODE == SV32
|
||||
assert((address>> 32) == 0 && "Upper 32 bits are not zero!");
|
||||
level = 2;
|
||||
vpn = new uint64_t [level];
|
||||
vpn[1] = bits(22,31);
|
||||
vpn[0] = bits(12,21);
|
||||
pgoff = bits(0,11);
|
||||
#endif
|
||||
}
|
||||
|
||||
~vAddr_t()
|
||||
{
|
||||
delete vpn;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // namespace vortex
|
||||
|
|
|
@ -14,6 +14,10 @@ CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
|
|||
CXXFLAGS += -DXLEN_$(XLEN)
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
ifeq ($(VM_ENABLE), 1)
|
||||
CXXFLAGS += -DVM_ENABLE
|
||||
endif
|
||||
|
||||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
|
|
|
@ -106,6 +106,14 @@ void Cluster::attach_ram(RAM* ram) {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void Cluster::set_satp(uint64_t satp) {
|
||||
for (auto& socket : sockets_) {
|
||||
socket->set_satp(satp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bool Cluster::running() const {
|
||||
for (auto& socket : sockets_) {
|
||||
if (socket->running())
|
||||
|
|
|
@ -57,6 +57,10 @@ public:
|
|||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void set_satp(uint64_t satp);
|
||||
#endif
|
||||
|
||||
bool running() const;
|
||||
|
||||
int get_exitcode() const;
|
||||
|
|
|
@ -428,3 +428,10 @@ bool Core::wspawn(uint32_t num_warps, Word nextPC) {
|
|||
void Core::attach_ram(RAM* ram) {
|
||||
emulator_.attach_ram(ram);
|
||||
}
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void Core::set_satp(uint64_t satp) {
|
||||
emulator_.set_satp(satp); //JAEWON wit, tid???
|
||||
// emulator_.set_csr(VX_CSR_SATP,satp,0,0); //JAEWON wit, tid???
|
||||
}
|
||||
#endif
|
|
@ -26,6 +26,7 @@
|
|||
#include "dispatcher.h"
|
||||
#include "func_unit.h"
|
||||
#include "mem_coalescer.h"
|
||||
#include "VX_config.h"
|
||||
|
||||
namespace vortex {
|
||||
|
||||
|
@ -98,6 +99,9 @@ public:
|
|||
void tick();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
#ifdef VM_ENABLE
|
||||
void set_satp(uint64_t satp);
|
||||
#endif
|
||||
|
||||
bool running() const;
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ void Emulator::clear() {
|
|||
void Emulator::attach_ram(RAM* ram) {
|
||||
// bind RAM to memory unit
|
||||
#if (XLEN == 64)
|
||||
mmu_.attach(*ram, 0, 0xFFFFFFFFFFFFFFFF);
|
||||
mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39
|
||||
#else
|
||||
mmu_.attach(*ram, 0, 0xFFFFFFFF);
|
||||
#endif
|
||||
|
@ -280,10 +280,54 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) {
|
|||
return false;
|
||||
}
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
|
||||
mmu_.read(data, addr, size, 0);
|
||||
}
|
||||
DP(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size);
|
||||
|
||||
try
|
||||
{
|
||||
mmu_.read(data, addr, size, ACCESS_TYPE::FETCH);
|
||||
}
|
||||
catch (Page_Fault_Exception& page_fault)
|
||||
{
|
||||
std::cout<<page_fault.what()<<std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
|
||||
mmu_.read(data, addr, size, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void Emulator::set_satp(uint64_t satp) {
|
||||
DPH(3, "set satp 0x" << std::hex << satp << " in emulator module\n");
|
||||
set_csr(VX_CSR_SATP,satp,0,0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
|
||||
DP(1, "*** dcache_read 0x" << std::hex << addr << ", size = 0x " << size);
|
||||
auto type = get_addr_type(addr);
|
||||
if (type == AddrType::Shared) {
|
||||
core_->local_mem()->read(data, addr, size);
|
||||
} else {
|
||||
try
|
||||
{
|
||||
mmu_.read(data, addr, size, ACCESS_TYPE::LOAD);
|
||||
}
|
||||
catch (Page_Fault_Exception& page_fault)
|
||||
{
|
||||
std::cout<<page_fault.what()<<std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
DPH(2, "Mem Read: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl);
|
||||
}
|
||||
#else
|
||||
void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
|
||||
auto type = get_addr_type(addr);
|
||||
if (type == AddrType::Shared) {
|
||||
|
@ -294,7 +338,34 @@ void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
|
|||
|
||||
DPH(2, "Mem Read: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << std::dec << " (size=" << size << ", type=" << type << ")" << std::endl);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
|
||||
DP(1, "*** dcache_write 0x" << std::hex << addr << ", size = 0x " << size);
|
||||
auto type = get_addr_type(addr);
|
||||
if (addr >= uint64_t(IO_COUT_ADDR)
|
||||
&& addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
|
||||
this->writeToStdOut(data, addr, size);
|
||||
} else {
|
||||
if (type == AddrType::Shared) {
|
||||
core_->local_mem()->write(data, addr, size);
|
||||
} else {
|
||||
try
|
||||
{
|
||||
// mmu_.write(data, addr, size, 0);
|
||||
mmu_.write(data, addr, size, ACCESS_TYPE::STORE);
|
||||
}
|
||||
catch (Page_Fault_Exception& page_fault)
|
||||
{
|
||||
std::cout<<page_fault.what()<<std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl);
|
||||
}
|
||||
#else
|
||||
void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
|
||||
auto type = get_addr_type(addr);
|
||||
if (addr >= uint64_t(IO_COUT_ADDR)
|
||||
|
@ -309,6 +380,7 @@ void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
|
|||
}
|
||||
DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << std::dec << " (size=" << size << ", type=" << type << ")" << std::endl);
|
||||
}
|
||||
#endif
|
||||
|
||||
void Emulator::dcache_amo_reserve(uint64_t addr) {
|
||||
auto type = get_addr_type(addr);
|
||||
|
@ -360,6 +432,10 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
auto core_perf = core_->perf_stats();
|
||||
switch (addr) {
|
||||
case VX_CSR_SATP:
|
||||
#ifdef VM_ENABLE
|
||||
// return csrs_.at(wid).at(tid)[addr];
|
||||
return mmu_.get_satp();
|
||||
#endif
|
||||
case VX_CSR_PMPCFG0:
|
||||
case VX_CSR_PMPADDR0:
|
||||
case VX_CSR_MSTATUS:
|
||||
|
@ -488,6 +564,12 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
|
|||
csr_mscratch_ = value;
|
||||
break;
|
||||
case VX_CSR_SATP:
|
||||
#ifdef VM_ENABLE
|
||||
// warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0x1F) | (value & 0x1F);
|
||||
// csrs_.at(wid).at(tid)[addr] = value; //what is wid and tid?
|
||||
mmu_.set_satp(value);
|
||||
break;
|
||||
#endif
|
||||
case VX_CSR_MSTATUS:
|
||||
case VX_CSR_MEDELEG:
|
||||
case VX_CSR_MIDELEG:
|
||||
|
@ -506,6 +588,8 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
uint32_t Emulator::get_fpu_rm(uint32_t func3, uint32_t tid, uint32_t wid) {
|
||||
return (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, tid, wid) : func3;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,9 @@ public:
|
|||
void clear();
|
||||
|
||||
void attach_ram(RAM* ram);
|
||||
#ifdef VM_ENABLE
|
||||
void set_satp(uint64_t satp) ;
|
||||
#endif
|
||||
|
||||
instr_trace_t* step();
|
||||
|
||||
|
|
|
@ -84,7 +84,7 @@ int main(int argc, char **argv) {
|
|||
Arch arch(num_threads, num_warps, num_cores);
|
||||
|
||||
// create memory module
|
||||
RAM ram(0, RAM_PAGE_SIZE);
|
||||
RAM ram(0, MEM_PAGE_SIZE);
|
||||
|
||||
// create processor
|
||||
Processor processor(arch);
|
||||
|
|
|
@ -99,6 +99,13 @@ void ProcessorImpl::attach_ram(RAM* ram) {
|
|||
cluster->attach_ram(ram);
|
||||
}
|
||||
}
|
||||
#ifdef VM_ENABLE
|
||||
void ProcessorImpl::set_satp(uint64_t satp) {
|
||||
for (auto cluster : clusters_) {
|
||||
cluster->set_satp(satp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void ProcessorImpl::run() {
|
||||
SimPlatform::instance().reset();
|
||||
|
@ -143,10 +150,18 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
|
|||
|
||||
Processor::Processor(const Arch& arch)
|
||||
: impl_(new ProcessorImpl(arch))
|
||||
{}
|
||||
{
|
||||
#ifdef VM_ENABLE
|
||||
satp_ = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
Processor::~Processor() {
|
||||
delete impl_;
|
||||
#ifdef VM_ENABLE
|
||||
if (satp_ != NULL)
|
||||
delete satp_;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Processor::attach_ram(RAM* mem) {
|
||||
|
@ -159,4 +174,27 @@ void Processor::run() {
|
|||
|
||||
void Processor::dcr_write(uint32_t addr, uint32_t value) {
|
||||
return impl_->dcr_write(addr, value);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
int16_t Processor::set_satp_by_addr(uint64_t base_addr) {
|
||||
uint16_t asid = 0;
|
||||
satp_ = new SATP_t (base_addr,asid);
|
||||
if (satp_ == NULL)
|
||||
return 1;
|
||||
uint64_t satp = satp_->get_satp();
|
||||
impl_->set_satp(satp);
|
||||
return 0;
|
||||
}
|
||||
bool Processor::is_satp_unset() {
|
||||
return (satp_== NULL);
|
||||
}
|
||||
uint8_t Processor::get_satp_mode() {
|
||||
assert (satp_!=NULL);
|
||||
return satp_->get_mode();
|
||||
}
|
||||
uint64_t Processor::get_base_ppn() {
|
||||
assert (satp_!=NULL);
|
||||
return satp_->get_base_ppn();
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -14,12 +14,17 @@
|
|||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <VX_config.h>
|
||||
#include <mem.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
class Arch;
|
||||
class RAM;
|
||||
class ProcessorImpl;
|
||||
#ifdef VM_ENABLE
|
||||
class SATP_t;
|
||||
#endif
|
||||
|
||||
class Processor {
|
||||
public:
|
||||
|
@ -31,9 +36,18 @@ public:
|
|||
void run();
|
||||
|
||||
void dcr_write(uint32_t addr, uint32_t value);
|
||||
#ifdef VM_ENABLE
|
||||
bool is_satp_unset();
|
||||
uint8_t get_satp_mode();
|
||||
uint64_t get_base_ppn();
|
||||
int16_t set_satp_by_addr(uint64_t addr);
|
||||
#endif
|
||||
|
||||
private:
|
||||
ProcessorImpl* impl_;
|
||||
#ifdef VM_ENABLE
|
||||
SATP_t *satp_;
|
||||
#endif
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -40,6 +40,10 @@ public:
|
|||
|
||||
void dcr_write(uint32_t addr, uint32_t value);
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void set_satp(uint64_t satp);
|
||||
#endif
|
||||
|
||||
PerfStats perf_stats() const;
|
||||
|
||||
private:
|
||||
|
|
|
@ -107,6 +107,14 @@ void Socket::attach_ram(RAM* ram) {
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void Socket::set_satp(uint64_t satp) {
|
||||
for (auto core : cores_) {
|
||||
core->set_satp(satp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bool Socket::running() const {
|
||||
for (auto& core : cores_) {
|
||||
if (core->running())
|
||||
|
|
|
@ -60,6 +60,10 @@ public:
|
|||
|
||||
void attach_ram(RAM* ram);
|
||||
|
||||
#ifdef VM_ENABLE
|
||||
void set_satp(uint64_t satp);
|
||||
#endif
|
||||
|
||||
bool running() const;
|
||||
|
||||
int get_exitcode() const;
|
||||
|
|
28677
tests/opencl/bfs/graph4096.txt
Executable file
28677
tests/opencl/bfs/graph4096.txt
Executable file
File diff suppressed because it is too large
Load diff
|
@ -62,7 +62,7 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) {
|
|||
value *= 5;
|
||||
break;
|
||||
default:
|
||||
assert(task_id < arg->num_points);
|
||||
//assert(task_id < arg->num_points);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue