Merge branch 'master' of github.com:vortexgpgpu/vortex

This commit is contained in:
sij814 2024-09-19 13:36:46 -07:00
commit e8ce3878bb
39 changed files with 31156 additions and 128 deletions

View file

@ -21,13 +21,13 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
submodules: recursive
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
@ -36,7 +36,7 @@ jobs:
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
@ -62,7 +62,111 @@ jobs:
run: |
make -C third_party > /dev/null
build:
# build:
# runs-on: ubuntu-20.04
# needs: setup
# strategy:
# matrix:
# xlen: [32, 64]
# steps:
# - name: Checkout code
# uses: actions/checkout@v2
# - name: Install Dependencies
# run: |
# sudo bash ./ci/system_updates.sh
# - name: Cache Toolchain Directory
# id: cache-toolchain
# uses: actions/cache@v2
# with:
# path: tools
# key: ${{ runner.os }}-toolchain-v0.1
# restore-keys: |
# ${{ runner.os }}-toolchain-
# - name: Cache Third Party Directory
# id: cache-thirdparty
# uses: actions/cache@v2
# with:
# path: third_party
# key: ${{ runner.os }}-thirdparty-v0.1
# restore-keys: |
# ${{ runner.os }}-thirdparty-
# - name: Run Build
# run: |
# TOOLDIR=$PWD/tools
# mkdir -p build${{ matrix.xlen }}
# cd build${{ matrix.xlen }}
# ../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
# source ci/toolchain_env.sh
# make software -s > /dev/null
# make tests -s > /dev/null
# - name: Upload Build Artifact
# uses: actions/upload-artifact@v2
# with:
# name: build-${{ matrix.xlen }}
# path: build${{ matrix.xlen }}
# tests:
# runs-on: ubuntu-20.04
# needs: build
# strategy:
# matrix:
# name: [regression, opencl, config1, config2, debug, stress]
# xlen: [32, 64]
# steps:
# - name: Checkout code
# uses: actions/checkout@v2
# - name: Install Dependencies
# run: |
# sudo bash ./ci/system_updates.sh
# - name: Cache Toolchain Directory
# id: cache-toolchain
# uses: actions/cache@v2
# with:
# path: tools
# key: ${{ runner.os }}-toolchain-v0.1
# restore-keys: |
# ${{ runner.os }}-toolchain-
# - name: Cache Third Party Directory
# id: cache-thirdparty
# uses: actions/cache@v2
# with:
# path: third_party
# key: ${{ runner.os }}-thirdparty-v0.1
# restore-keys: |
# ${{ runner.os }}-thirdparty-
# - name: Download Build Artifact
# uses: actions/download-artifact@v2
# with:
# name: build-${{ matrix.xlen }}
# path: build${{ matrix.xlen }}
# - name: Run tests
# run: |
# cd build${{ matrix.xlen }}
# source ci/toolchain_env.sh
# chmod -R +x . # Ensure all files have executable permissions
# if [ "${{ matrix.name }}" == "regression" ]; then
# ./ci/regression.sh --unittest
# ./ci/regression.sh --isa
# ./ci/regression.sh --kernel
# ./ci/regression.sh --synthesis
# ./ci/regression.sh --regression
# else
# ./ci/regression.sh --${{ matrix.name }}
# fi
build_vm:
runs-on: ubuntu-20.04
needs: setup
strategy:
@ -71,7 +175,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Install Dependencies
run: |
@ -79,7 +183,7 @@ jobs:
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
@ -88,7 +192,7 @@ jobs:
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
@ -98,31 +202,31 @@ jobs:
- name: Run Build
run: |
TOOLDIR=$PWD/tools
mkdir -p build${{ matrix.xlen }}
cd build${{ matrix.xlen }}
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }}
mkdir -p build${{ matrix.xlen }}-vm
cd build${{ matrix.xlen }}-vm
../configure --tooldir=$TOOLDIR --xlen=${{ matrix.xlen }} --vm_enable=1
source ci/toolchain_env.sh
make software -s > /dev/null
make tests -s > /dev/null
- name: Upload Build Artifact
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: build-${{ matrix.xlen }}
path: build${{ matrix.xlen }}
name: build-${{ matrix.xlen }}-vm
path: build${{ matrix.xlen }}-vm
tests:
test_vm:
runs-on: ubuntu-20.04
needs: build
needs: build_vm
strategy:
fail-fast: false
matrix:
name: [regression, opencl, cache, config1, config2, debug, stress]
name: [regression, opencl, cache, config1, config2, debug, stress, vm]
xlen: [32, 64]
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v4
- name: Install Dependencies
run: |
@ -130,7 +234,7 @@ jobs:
- name: Cache Toolchain Directory
id: cache-toolchain
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: tools
key: ${{ runner.os }}-toolchain-v0.1
@ -139,7 +243,7 @@ jobs:
- name: Cache Third Party Directory
id: cache-thirdparty
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: third_party
key: ${{ runner.os }}-thirdparty-v0.1
@ -147,30 +251,22 @@ jobs:
${{ runner.os }}-thirdparty-
- name: Download Build Artifact
uses: actions/download-artifact@v2
uses: actions/download-artifact@v4
with:
name: build-${{ matrix.xlen }}
path: build${{ matrix.xlen }}
name: build-${{ matrix.xlen }}-vm
path: build${{ matrix.xlen }}-vm
- name: Run tests
run: |
cd build${{ matrix.xlen }}
cd build${{ matrix.xlen }}-vm
source ci/toolchain_env.sh
chmod -R +x . # Ensure all files have executable permissions
if [ "${{ matrix.name }}" == "regression" ]; then
./ci/regression.sh --unittest
./ci/regression.sh --isa
./ci/regression.sh --kernel
./ci/regression.sh --synthesis
./ci/regression.sh --regression
else
./ci/regression.sh --${{ matrix.name }}
fi
./ci/regression.sh --vm
complete:
runs-on: ubuntu-20.04
needs: tests
needs: test_vm
steps:
- name: Check Completion
run: echo "All matrix jobs passed"
run: echo "All matrix jobs passed"

3
.gitignore vendored
View file

@ -1,3 +1,4 @@
/build*
/.vscode
*.cache
*.cache
*.code-workspace

View file

@ -2,6 +2,14 @@ include config.mk
.PHONY: build software tests
vm:
$(MAKE) -C $(VORTEX_HOME)/third_party
$(MAKE) -C hw
$(MAKE) -C sim simx
$(MAKE) -C kernel
$(MAKE) -C runtime vm
$(MAKE) -C tests
all:
$(MAKE) -C $(VORTEX_HOME)/third_party
$(MAKE) -C hw

View file

@ -54,23 +54,26 @@ sudo apt-get install git
```
### Install Vortex codebase
```sh
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
cd vortex
git clone --depth=1 --recursive https://github.com/vortexgpgpu/vortex.git
cd vortex
```
### Configure your build folder
```sh
mkdir build
cd build
../configure --xlen=32 --tooldir=$HOME/tools
mkdir build
cd build
# for 32bit
../configure --xlen=32 --tooldir=$HOME/tools
# for 64bit
../configure --xlen=64 --tooldir=$HOME/tools
```
### Install prebuilt toolchain
```sh
./ci/toolchain_install.sh --all
./ci/toolchain_install.sh --all
```
### Set environment variables
### set environment variables
```sh
# should always run before using the toolchain!
source ./ci/toolchain_env.sh
# should always run before using the toolchain!
source ./ci/toolchain_env.sh
```
### Building Vortex
```sh

View file

@ -19,6 +19,8 @@ set -e
# clear blackbox cache
rm -f blackbox.*.cache
# HW: add a test "VM Test" to make sure VM feature is enabled
XLEN=${XLEN:=@XLEN@}
XSIZE=$((XLEN / 8))
@ -124,6 +126,30 @@ opencl()
echo "opencl tests done!"
}
vm(){
echo "begin vm tests..."
make -C sim/simx
make -C runtime/simx
make -C tests/kernel run-simx
# Regression tests
make -C tests/regression run-simx
# test global barrier
CONFIGS="-DGBAR_ENABLE" ./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tgbar" --cores=2
# test local barrier
./ci/blackbox.sh --driver=simx --app=dogfood --args="-n1 -tbar"
# OpenCL tests
make -C tests/opencl run-simx
./ci/blackbox.sh --driver=simx --app=lbm --warps=8
echo "vm tests done!"
}
cache()
{
echo "begin cache tests..."
@ -242,7 +268,11 @@ config2()
# custom program startup address
make -C tests/regression/dogfood clean-kernel
STARTUP_ADDR=0x40000000 make -C tests/regression/dogfood
if [ "$XLEN" == "64" ]; then
STARTUP_ADDR=0x180000000 make -C tests/regression/dogfood
else
STARTUP_ADDR=0x80000000 make -C tests/regression/dogfood
fi
./ci/blackbox.sh --driver=simx --app=dogfood
./ci/blackbox.sh --driver=rtlsim --app=dogfood
make -C tests/regression/dogfood clean-kernel
@ -359,6 +389,9 @@ while [ "$1" != "" ]; do
--cache )
tests+=("cache")
;;
--vm )
tests+=("vm")
;;
--config1 )
tests+=("config1")
;;

View file

@ -1,13 +1,13 @@
#!/bin/sh
# Copyright 2019-2023
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -16,7 +16,8 @@
TOOLDIR=${TOOLDIR:=@TOOLDIR@}
export PATH=$TOOLDIR/verilator/bin:$PATH
# export VERILATOR_ROOT=$TOOLDIR/verilator
# export PATH=$VERILATOR_ROOT/bin:$PATH
export SV2V_PATH=$TOOLDIR/sv2v
export PATH=$SV2V_PATH/bin:$PATH

View file

@ -34,4 +34,6 @@ RISCV_SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/$(RISCV_PREFIX)
VORTEX_RT_PATH ?= $(VORTEX_HOME)/runtime
VORTEX_KN_PATH ?= $(VORTEX_HOME)/kernel
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
THIRD_PARTY_DIR ?= $(VORTEX_HOME)/third_party
VM_ENABLE ?= @VM_ENABLE@

7
configure vendored
View file

@ -63,7 +63,7 @@ copy_files() {
filename_no_ext="${filename%.in}"
dest_file="$dest_dir/$filename_no_ext"
mkdir -p "$dest_dir"
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g" "$file" > "$dest_file"
sed "s|@VORTEX_HOME@|$SCRIPT_DIR|g; s|@XLEN@|$XLEN|g; s|@TOOLDIR@|$TOOLDIR|g; s|@OSVERSION@|$OSVERSION|g; s|@INSTALLDIR@|$PREFIX|g; s|@VM_ENABLE@|$VM_ENABLE|g" "$file" > "$dest_file"
# apply permissions to bash scripts
read -r firstline < "$dest_file"
if [[ "$firstline" =~ ^#!.*bash ]]; then
@ -114,6 +114,7 @@ default_xlen=32
default_tooldir=$HOME/tools
default_osversion=$(detect_osversion)
default_prefix=$CURRENT_DIR
default_vm=0
# load default configuration parameters from existing config.mk
if [ -f "config.mk" ]; then
@ -126,6 +127,7 @@ if [ -f "config.mk" ]; then
TOOLDIR\ ?*) default_tooldir=${value//\?=/} ;;
OSVERSION\ ?*) default_osversion=${value//\?=/} ;;
PREFIX\ ?*) default_prefix=${value//\?=/} ;;
VM_ENABLE\ ?*) default_vm=${value//\?=/} ;;
esac
done < config.mk
fi
@ -135,6 +137,7 @@ XLEN=${XLEN:=$default_xlen}
TOOLDIR=${TOOLDIR:=$default_tooldir}
OSVERSION=${OSVERSION:=$default_osversion}
PREFIX=${PREFIX:=$default_prefix}
VM_ENABLE=${VM_ENABLE:=$default_vm}
# parse command line arguments
usage() {
@ -143,6 +146,7 @@ usage() {
echo " --tooldir=<path> Set the TOOLDIR path (default: $HOME/tools)"
echo " --osversion=<version> Set the OS Version (default: $(detect_osversion))"
echo " --prefix=<path> Set installation directory"
echo " --vm_enable=<value> Enable Virtual Memory support (default: 0)"
exit 1
}
while [[ "$#" -gt 0 ]]; do
@ -151,6 +155,7 @@ while [[ "$#" -gt 0 ]]; do
--tooldir=*) TOOLDIR="${1#*=}" ;;
--osversion=*) OSVERSION="${1#*=}" ;;
--prefix=*) PREFIX="${1#*=}" ;;
--vm_enable=*) VM_ENABLE="${1#*=}" ;;
-h|--help) usage ;;
*) echo "Unknown parameter passed: $1"; usage ;;
esac

74
docs/fpga_setup.md Normal file
View file

@ -0,0 +1,74 @@
# FPGA Startup and Configuration Guide
OPAE Environment Setup
----------------------
$ source /opt/inteldevstack/init_env_user.sh
$ export OPAE_HOME=/opt/opae/1.1.2
$ export PATH=$OPAE_HOME/bin:$PATH
$ export C_INCLUDE_PATH=$OPAE_HOME/include:$C_INCLUDE_PATH
$ export LIBRARY_PATH=$OPAE_HOME/lib:$LIBRARY_PATH
$ export LD_LIBRARY_PATH=$OPAE_HOME/lib:$LD_LIBRARY_PATH
OPAE Build
------------------
The FPGA has to following configuration options:
- DEVICE_FAMILY=arria10 | stratix10
- NUM_CORES=#n
Command line:
$ cd hw/syn/altera/opae
$ PREFIX=test1 TARGET=fpga NUM_CORES=4 make
A new folder (ex: `test1_xxx_4c`) will be created and the build will start and take ~30-480 min to complete.
Setting TARGET=ase will build the project for simulation using Intel ASE.
OPAE Build Configuration
------------------------
The hardware configuration file `/hw/rtl/VX_config.vh` defines all the hardware parameters that can be modified when build the processor.For example, have the following parameters that can be configured:
- `NUM_WARPS`: Number of warps per cores
- `NUM_THREADS`: Number of threads per warps
- `PERF_ENABLE`: enable the use of all profile counters
You configure the syntesis build from the command line:
$ CONFIGS="-DPERF_ENABLE -DNUM_THREADS=8" make
OPAE Build Progress
-------------------
You could check the last 10 lines in the build log for possible errors until build completion.
$ tail -n 10 <build_dir>/build.log
Check if the build is still running by looking for quartus_sh, quartus_syn, or quartus_fit programs.
$ ps -u <username>
If the build fails and you need to restart it, clean up the build folder using the following command:
$ make clean
The file `vortex_afu.gbs` should exist when the build is done:
$ ls -lsa <build_dir>/synth/vortex_afu.gbs
Signing the bitstream and Programming the FPGA
----------------------------------------------
$ cd <build_dir>
$ PACSign PR -t UPDATE -H openssl_manager -i vortex_afu.gbs -o vortex_afu_unsigned_ssl.gbs
$ fpgasupdate vortex_afu_unsigned_ssl.gbs
FPGA sample test running OpenCL sgemm kernel
--------------------------------------------
Run the following from the Vortex root directory
$ TARGET=fpga ./ci/blackbox.sh --driver=opae --app=sgemm --args="-n128"

View file

@ -14,6 +14,8 @@
`ifndef VX_CONFIG_VH
`define VX_CONFIG_VH
`ifndef MIN
`define MIN(x, y) (((x) < (y)) ? (x) : (y))
`endif
@ -31,7 +33,6 @@
`endif
///////////////////////////////////////////////////////////////////////////////
`ifndef EXT_M_DISABLE
`define EXT_M_ENABLE
`endif
@ -158,7 +159,7 @@
`endif
`ifndef STARTUP_ADDR
`define STARTUP_ADDR 64'h080000000
`define STARTUP_ADDR 64'h180000000
`endif
`ifndef USER_BASE_ADDR
@ -169,7 +170,14 @@
`define IO_BASE_ADDR 64'h000000040
`endif
`else
`ifdef VM_ENABLE
`ifndef PAGE_TABLE_BASE_ADDR
`define PAGE_TABLE_BASE_ADDR 64'h1F0000000
`endif
`endif
`else // XLEN_32
`ifndef STACK_BASE_ADDR
`define STACK_BASE_ADDR 32'hFFFF0000
@ -187,6 +195,13 @@
`define IO_BASE_ADDR 32'h00000040
`endif
`ifdef VM_ENABLE
`ifndef PAGE_TABLE_BASE_ADDR
`define PAGE_TABLE_BASE_ADDR 32'hF0000000
`endif
`endif
`endif
`define IO_END_ADDR `USER_BASE_ADDR
@ -202,7 +217,7 @@
`ifndef IO_COUT_ADDR
`define IO_COUT_ADDR `IO_BASE_ADDR
`endif
`define IO_COUT_SIZE 64
`define IO_COUT_SIZE `MEM_BLOCK_SIZE
`ifndef IO_MPM_ADDR
`define IO_MPM_ADDR (`IO_COUT_ADDR + `IO_COUT_SIZE)
@ -251,6 +266,59 @@
`define DEBUG_LEVEL 3
`endif
`ifndef MEM_PAGE_SIZE
`define MEM_PAGE_SIZE (4096)
`endif
`ifndef MEM_PAGE_LOG2_SIZE
`define MEM_PAGE_LOG2_SIZE (12)
`endif
// Virtual Memory Configuration ///////////////////////////////////////////////////////
`ifdef VM_ENABLE
`ifdef XLEN_32
`ifndef VM_ADDR_MODE
`define VM_ADDR_MODE SV32 //or BARE
`endif
`ifndef PT_LEVEL
`define PT_LEVEL (2)
`endif
`ifndef PTE_SIZE
`define PTE_SIZE (4)
`endif
`ifndef NUM_PTE_ENTRY
`define NUM_PTE_ENTRY (1024)
`endif
`ifndef PT_SIZE_LIMIT
`define PT_SIZE_LIMIT (1<<23)
`endif
`else
`ifndef VM_ADDR_MODE
`define VM_ADDR_MODE SV39 //or BARE
`endif
`ifndef PT_LEVEL
`define PT_LEVEL (3)
`endif
`ifndef PTE_SIZE
`define PTE_SIZE (8)
`endif
`ifndef NUM_PTE_ENTRY
`define NUM_PTE_ENTRY (512)
`endif
`ifndef PT_SIZE_LIMIT
`define PT_SIZE_LIMIT (1<<25)
`endif
`endif
`ifndef PT_SIZE
`define PT_SIZE MEM_PAGE_SIZE
`endif
`ifndef TLB_SIZE
`define TLB_SIZE (32)
`endif
`endif
// Pipeline Configuration /////////////////////////////////////////////////////
// Issue width

286
hw/rtl/core/VX_gpr_slice.sv Normal file
View file

@ -0,0 +1,286 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_gpr_slice import VX_gpu_pkg::*; #(
parameter CORE_ID = 0,
parameter CACHE_ENABLE = 0
) (
input wire clk,
input wire reset,
VX_writeback_if.slave writeback_if,
VX_scoreboard_if.slave scoreboard_if,
VX_operands_if.master operands_if
);
`UNUSED_PARAM (CORE_ID)
localparam DATAW = `UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + 1 + `EX_BITS + `INST_OP_BITS + `INST_ARGS_BITS + `NR_BITS;
localparam RAM_ADDRW = `LOG2UP(`NUM_REGS * ISSUE_RATIO);
localparam STATE_IDLE = 2'd0;
localparam STATE_FETCH1 = 2'd1;
localparam STATE_FETCH2 = 2'd2;
localparam STATE_FETCH3 = 2'd3;
localparam STATE_BITS = 2;
wire [`NUM_THREADS-1:0][`XLEN-1:0] gpr_rd_data;
reg [`NR_BITS-1:0] gpr_rd_rid, gpr_rd_rid_n;
reg [ISSUE_WIS_W-1:0] gpr_rd_wis, gpr_rd_wis_n;
reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data [ISSUE_RATIO-1:0];
reg [`NUM_THREADS-1:0][`XLEN-1:0] cache_data_n [ISSUE_RATIO-1:0];
reg [`NR_BITS-1:0] cache_reg [ISSUE_RATIO-1:0];
reg [`NR_BITS-1:0] cache_reg_n [ISSUE_RATIO-1:0];
reg [`NUM_THREADS-1:0] cache_tmask [ISSUE_RATIO-1:0];
reg [`NUM_THREADS-1:0] cache_tmask_n [ISSUE_RATIO-1:0];
reg [ISSUE_RATIO-1:0] cache_eop, cache_eop_n;
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data, rs1_data_n;
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data, rs2_data_n;
reg [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data, rs3_data_n;
reg [STATE_BITS-1:0] state, state_n;
reg [`NR_BITS-1:0] rs2, rs2_n;
reg [`NR_BITS-1:0] rs3, rs3_n;
reg rs2_ready, rs2_ready_n;
reg rs3_ready, rs3_ready_n;
reg data_ready, data_ready_n;
wire stg_valid_in, stg_ready_in;
wire is_rs1_zero = (scoreboard_if.data.rs1 == 0);
wire is_rs2_zero = (scoreboard_if.data.rs2 == 0);
wire is_rs3_zero = (scoreboard_if.data.rs3 == 0);
always @(*) begin
state_n = state;
rs2_n = rs2;
rs3_n = rs3;
rs2_ready_n = rs2_ready;
rs3_ready_n = rs3_ready;
rs1_data_n = rs1_data;
rs2_data_n = rs2_data;
rs3_data_n = rs3_data;
cache_data_n = cache_data;
cache_reg_n = cache_reg;
cache_tmask_n= cache_tmask;
cache_eop_n = cache_eop;
gpr_rd_rid_n = gpr_rd_rid;
gpr_rd_wis_n = gpr_rd_wis;
data_ready_n = data_ready;
case (state)
STATE_IDLE: begin
if (operands_if.valid && operands_if.ready) begin
data_ready_n = 0;
end
if (scoreboard_if.valid && data_ready_n == 0) begin
data_ready_n = 1;
if (is_rs3_zero || (CACHE_ENABLE != 0 &&
scoreboard_if.data.rs3 == cache_reg[scoreboard_if.data.wis] &&
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
rs3_data_n = (is_rs3_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
rs3_ready_n = 1;
end else begin
rs3_ready_n = 0;
gpr_rd_rid_n = scoreboard_if.data.rs3;
data_ready_n = 0;
state_n = STATE_FETCH3;
end
if (is_rs2_zero || (CACHE_ENABLE != 0 &&
scoreboard_if.data.rs2 == cache_reg[scoreboard_if.data.wis] &&
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
rs2_data_n = (is_rs2_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
rs2_ready_n = 1;
end else begin
rs2_ready_n = 0;
gpr_rd_rid_n = scoreboard_if.data.rs2;
data_ready_n = 0;
state_n = STATE_FETCH2;
end
if (is_rs1_zero || (CACHE_ENABLE != 0 &&
scoreboard_if.data.rs1 == cache_reg[scoreboard_if.data.wis] &&
(scoreboard_if.data.tmask & cache_tmask[scoreboard_if.data.wis]) == scoreboard_if.data.tmask)) begin
rs1_data_n = (is_rs1_zero || CACHE_ENABLE == 0) ? '0 : cache_data[scoreboard_if.data.wis];
end else begin
gpr_rd_rid_n = scoreboard_if.data.rs1;
data_ready_n = 0;
state_n = STATE_FETCH1;
end
end
gpr_rd_wis_n = scoreboard_if.data.wis;
rs2_n = scoreboard_if.data.rs2;
rs3_n = scoreboard_if.data.rs3;
end
STATE_FETCH1: begin
rs1_data_n = gpr_rd_data;
if (~rs2_ready) begin
gpr_rd_rid_n = rs2;
state_n = STATE_FETCH2;
end else if (~rs3_ready) begin
gpr_rd_rid_n = rs3;
state_n = STATE_FETCH3;
end else begin
data_ready_n = 1;
state_n = STATE_IDLE;
end
end
STATE_FETCH2: begin
rs2_data_n = gpr_rd_data;
if (~rs3_ready) begin
gpr_rd_rid_n = rs3;
state_n = STATE_FETCH3;
end else begin
data_ready_n = 1;
state_n = STATE_IDLE;
end
end
STATE_FETCH3: begin
rs3_data_n = gpr_rd_data;
data_ready_n = 1;
state_n = STATE_IDLE;
end
endcase
if (CACHE_ENABLE != 0 && writeback_if.valid) begin
if ((cache_reg[writeback_if.data.wis] == writeback_if.data.rd)
|| (cache_eop[writeback_if.data.wis] && writeback_if.data.sop)) begin
for (integer j = 0; j < `NUM_THREADS; ++j) begin
if (writeback_if.data.tmask[j]) begin
cache_data_n[writeback_if.data.wis][j] = writeback_if.data.data[j];
end
end
cache_reg_n[writeback_if.data.wis] = writeback_if.data.rd;
cache_eop_n[writeback_if.data.wis] = writeback_if.data.eop;
cache_tmask_n[writeback_if.data.wis] = writeback_if.data.sop ? writeback_if.data.tmask :
(cache_tmask_n[writeback_if.data.wis] | writeback_if.data.tmask);
end
end
end
always @(posedge clk) begin
if (reset) begin
state <= STATE_IDLE;
cache_eop <= {ISSUE_RATIO{1'b1}};
data_ready <= 0;
end else begin
state <= state_n;
cache_eop <= cache_eop_n;
data_ready <= data_ready_n;
end
gpr_rd_rid <= gpr_rd_rid_n;
gpr_rd_wis <= gpr_rd_wis_n;
rs2_ready <= rs2_ready_n;
rs3_ready <= rs3_ready_n;
rs2 <= rs2_n;
rs3 <= rs3_n;
rs1_data <= rs1_data_n;
rs2_data <= rs2_data_n;
rs3_data <= rs3_data_n;
cache_data <= cache_data_n;
cache_reg <= cache_reg_n;
cache_tmask <= cache_tmask_n;
end
assign stg_valid_in = scoreboard_if.valid && data_ready;
assign scoreboard_if.ready = stg_ready_in && data_ready;
VX_toggle_buffer #(
.DATAW (DATAW)
) toggle_buffer (
.clk (clk),
.reset (reset),
.valid_in (stg_valid_in),
.data_in ({
scoreboard_if.data.uuid,
scoreboard_if.data.wis,
scoreboard_if.data.tmask,
scoreboard_if.data.PC,
scoreboard_if.data.wb,
scoreboard_if.data.ex_type,
scoreboard_if.data.op_type,
scoreboard_if.data.op_args,
scoreboard_if.data.rd
}),
.ready_in (stg_ready_in),
.valid_out (operands_if.valid),
.data_out ({
operands_if.data.uuid,
operands_if.data.wis,
operands_if.data.tmask,
operands_if.data.PC,
operands_if.data.wb,
operands_if.data.ex_type,
operands_if.data.op_type,
operands_if.data.op_args,
operands_if.data.rd
}),
.ready_out (operands_if.ready)
);
assign operands_if.data.rs1_data = rs1_data;
assign operands_if.data.rs2_data = rs2_data;
assign operands_if.data.rs3_data = rs3_data;
// GPR banks
reg [RAM_ADDRW-1:0] gpr_rd_addr;
wire [RAM_ADDRW-1:0] gpr_wr_addr;
if (ISSUE_WIS != 0) begin
assign gpr_wr_addr = {writeback_if.data.wis, writeback_if.data.rd};
always @(posedge clk) begin
gpr_rd_addr <= {gpr_rd_wis_n, gpr_rd_rid_n};
end
end else begin
assign gpr_wr_addr = writeback_if.data.rd;
always @(posedge clk) begin
gpr_rd_addr <= gpr_rd_rid_n;
end
end
`ifdef GPR_RESET
reg wr_enabled = 0;
always @(posedge clk) begin
if (reset) begin
wr_enabled <= 1;
end
end
`endif
for (genvar j = 0; j < `NUM_THREADS; ++j) begin
VX_dp_ram #(
.DATAW (`XLEN),
.SIZE (`NUM_REGS * ISSUE_RATIO),
`ifdef GPR_RESET
.INIT_ENABLE (1),
.INIT_VALUE (0),
`endif
.NO_RWCHECK (1)
) gpr_ram (
.clk (clk),
.read (1'b1),
`UNUSED_PIN (wren),
`ifdef GPR_RESET
.write (wr_enabled && writeback_if.valid && writeback_if.data.tmask[j]),
`else
.write (writeback_if.valid && writeback_if.data.tmask[j]),
`endif
.waddr (gpr_wr_addr),
.wdata (writeback_if.data.data[j]),
.raddr (gpr_rd_addr),
.rdata (gpr_rd_data[j])
);
end
endmodule

View file

@ -0,0 +1,79 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`include "VX_define.vh"
module VX_pending_instr #(
parameter CTR_WIDTH = 12,
parameter ALM_EMPTY = 1,
parameter DECR_COUNT = 1
) (
input wire clk,
input wire reset,
input wire incr,
input wire [`NW_WIDTH-1:0] incr_wid,
input wire [DECR_COUNT-1:0] decr,
input wire [DECR_COUNT-1:0][`NW_WIDTH-1:0] decr_wid,
input wire [`NW_WIDTH-1:0] alm_empty_wid,
output wire empty,
output wire alm_empty
);
localparam COUNTW = `CLOG2(DECR_COUNT+1);
reg [`NUM_WARPS-1:0][CTR_WIDTH-1:0] pending_instrs;
reg [`NUM_WARPS-1:0][COUNTW-1:0] decr_cnt;
reg [`NUM_WARPS-1:0][DECR_COUNT-1:0] decr_mask;
reg [`NUM_WARPS-1:0] incr_cnt, incr_cnt_n;
reg [`NUM_WARPS-1:0] alm_empty_r, empty_r;
always @(*) begin
incr_cnt_n = 0;
decr_mask = 0;
if (incr) begin
incr_cnt_n[incr_wid] = 1;
end
for (integer i = 0; i < DECR_COUNT; ++i) begin
if (decr[i]) begin
decr_mask[decr_wid[i]][i] = 1;
end
end
end
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
wire [COUNTW-1:0] decr_cnt_n;
`POP_COUNT(decr_cnt_n, decr_mask[i]);
wire [CTR_WIDTH-1:0] pending_instrs_n = pending_instrs[i] + CTR_WIDTH'(incr_cnt[i]) - CTR_WIDTH'(decr_cnt[i]);
always @(posedge clk) begin
if (reset) begin
incr_cnt[i] <= '0;
decr_cnt[i] <= '0;
pending_instrs[i] <= '0;
alm_empty_r[i] <= 0;
empty_r[i] <= 1;
end else begin
incr_cnt[i] <= incr_cnt_n[i];
decr_cnt[i] <= decr_cnt_n;
pending_instrs[i] <= pending_instrs_n;
alm_empty_r[i] <= (pending_instrs_n == ALM_EMPTY);
empty_r[i] <= (pending_instrs_n == 0);
end
end
end
assign alm_empty = alm_empty_r[alm_empty_wid];
assign empty = (& empty_r);
endmodule

387
hw/rtl/core/VX_trace.vh Normal file
View file

@ -0,0 +1,387 @@
// Copyright © 2019-2023
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
`ifndef VX_TRACE_VH
`define VX_TRACE_VH
`ifdef SIMULATION
task trace_ex_type(input int level, input [`EX_BITS-1:0] ex_type);
case (ex_type)
`EX_ALU: `TRACE(level, ("ALU"));
`EX_LSU: `TRACE(level, ("LSU"));
`EX_FPU: `TRACE(level, ("FPU"));
`EX_SFU: `TRACE(level, ("SFU"));
default: `TRACE(level, ("?"));
endcase
endtask
task trace_ex_op(input int level,
input [`EX_BITS-1:0] ex_type,
input [`INST_OP_BITS-1:0] op_type,
input VX_gpu_pkg::op_args_t op_args
);
case (ex_type)
`EX_ALU: begin
case (op_args.alu.xtype)
`ALU_TYPE_ARITH: begin
if (op_args.alu.is_w) begin
if (op_args.alu.use_imm) begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDIW"));
`INST_ALU_SLL: `TRACE(level, ("SLLIW"));
`INST_ALU_SRL: `TRACE(level, ("SRLIW"));
`INST_ALU_SRA: `TRACE(level, ("SRAIW"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDW"));
`INST_ALU_SUB: `TRACE(level, ("SUBW"));
`INST_ALU_SLL: `TRACE(level, ("SLLW"));
`INST_ALU_SRL: `TRACE(level, ("SRLW"));
`INST_ALU_SRA: `TRACE(level, ("SRAW"));
default: `TRACE(level, ("?"));
endcase
end
end else begin
if (op_args.alu.use_imm) begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADDI"));
`INST_ALU_SLL: `TRACE(level, ("SLLI"));
`INST_ALU_SRL: `TRACE(level, ("SRLI"));
`INST_ALU_SRA: `TRACE(level, ("SRAI"));
`INST_ALU_SLT: `TRACE(level, ("SLTI"));
`INST_ALU_SLTU: `TRACE(level, ("SLTIU"));
`INST_ALU_XOR: `TRACE(level, ("XORI"));
`INST_ALU_OR: `TRACE(level, ("ORI"));
`INST_ALU_AND: `TRACE(level, ("ANDI"));
`INST_ALU_LUI: `TRACE(level, ("LUI"));
`INST_ALU_AUIPC: `TRACE(level, ("AUIPC"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_ALU_BITS'(op_type))
`INST_ALU_ADD: `TRACE(level, ("ADD"));
`INST_ALU_SUB: `TRACE(level, ("SUB"));
`INST_ALU_SLL: `TRACE(level, ("SLL"));
`INST_ALU_SRL: `TRACE(level, ("SRL"));
`INST_ALU_SRA: `TRACE(level, ("SRA"));
`INST_ALU_SLT: `TRACE(level, ("SLT"));
`INST_ALU_SLTU: `TRACE(level, ("SLTU"));
`INST_ALU_XOR: `TRACE(level, ("XOR"));
`INST_ALU_OR: `TRACE(level, ("OR"));
`INST_ALU_AND: `TRACE(level, ("AND"));
`INST_ALU_CZEQ: `TRACE(level, ("CZERO.EQZ"));
`INST_ALU_CZNE: `TRACE(level, ("CZERO.NEZ"));
default: `TRACE(level, ("?"));
endcase
end
end
end
`ALU_TYPE_BRANCH: begin
case (`INST_BR_BITS'(op_type))
`INST_BR_EQ: `TRACE(level, ("BEQ"));
`INST_BR_NE: `TRACE(level, ("BNE"));
`INST_BR_LT: `TRACE(level, ("BLT"));
`INST_BR_GE: `TRACE(level, ("BGE"));
`INST_BR_LTU: `TRACE(level, ("BLTU"));
`INST_BR_GEU: `TRACE(level, ("BGEU"));
`INST_BR_JAL: `TRACE(level, ("JAL"));
`INST_BR_JALR: `TRACE(level, ("JALR"));
`INST_BR_ECALL: `TRACE(level, ("ECALL"));
`INST_BR_EBREAK:`TRACE(level, ("EBREAK"));
`INST_BR_URET: `TRACE(level, ("URET"));
`INST_BR_SRET: `TRACE(level, ("SRET"));
`INST_BR_MRET: `TRACE(level, ("MRET"));
default: `TRACE(level, ("?"));
endcase
end
`ALU_TYPE_MULDIV: begin
if (op_args.alu.is_w) begin
case (`INST_M_BITS'(op_type))
`INST_M_MUL: `TRACE(level, ("MULW"));
`INST_M_DIV: `TRACE(level, ("DIVW"));
`INST_M_DIVU: `TRACE(level, ("DIVUW"));
`INST_M_REM: `TRACE(level, ("REMW"));
`INST_M_REMU: `TRACE(level, ("REMUW"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_M_BITS'(op_type))
`INST_M_MUL: `TRACE(level, ("MUL"));
`INST_M_MULH: `TRACE(level, ("MULH"));
`INST_M_MULHSU:`TRACE(level, ("MULHSU"));
`INST_M_MULHU: `TRACE(level, ("MULHU"));
`INST_M_DIV: `TRACE(level, ("DIV"));
`INST_M_DIVU: `TRACE(level, ("DIVU"));
`INST_M_REM: `TRACE(level, ("REM"));
`INST_M_REMU: `TRACE(level, ("REMU"));
default: `TRACE(level, ("?"));
endcase
end
end
default: `TRACE(level, ("?"));
endcase
end
`EX_LSU: begin
if (op_args.lsu.is_float) begin
case (`INST_LSU_BITS'(op_type))
`INST_LSU_LW: `TRACE(level, ("FLW"));
`INST_LSU_LD: `TRACE(level, ("FLD"));
`INST_LSU_SW: `TRACE(level, ("FSW"));
`INST_LSU_SD: `TRACE(level, ("FSD"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (`INST_LSU_BITS'(op_type))
`INST_LSU_LB: `TRACE(level, ("LB"));
`INST_LSU_LH: `TRACE(level, ("LH"));
`INST_LSU_LW: `TRACE(level, ("LW"));
`INST_LSU_LD: `TRACE(level, ("LD"));
`INST_LSU_LBU:`TRACE(level, ("LBU"));
`INST_LSU_LHU:`TRACE(level, ("LHU"));
`INST_LSU_LWU:`TRACE(level, ("LWU"));
`INST_LSU_SB: `TRACE(level, ("SB"));
`INST_LSU_SH: `TRACE(level, ("SH"));
`INST_LSU_SW: `TRACE(level, ("SW"));
`INST_LSU_SD: `TRACE(level, ("SD"));
`INST_LSU_FENCE:`TRACE(level,("FENCE"));
default: `TRACE(level, ("?"));
endcase
end
end
`EX_FPU: begin
case (`INST_FPU_BITS'(op_type))
`INST_FPU_ADD: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FADD.D"));
else
`TRACE(level, ("FADD.S"));
end
`INST_FPU_SUB: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FSUB.D"));
else
`TRACE(level, ("FSUB.S"));
end
`INST_FPU_MUL: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FMUL.D"));
else
`TRACE(level, ("FMUL.S"));
end
`INST_FPU_DIV: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FDIV.D"));
else
`TRACE(level, ("FDIV.S"));
end
`INST_FPU_SQRT: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FSQRT.D"));
else
`TRACE(level, ("FSQRT.S"));
end
`INST_FPU_MADD: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FMADD.D"));
else
`TRACE(level, ("FMADD.S"));
end
`INST_FPU_MSUB: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FMSUB.D"));
else
`TRACE(level, ("FMSUB.S"));
end
`INST_FPU_NMADD: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FNMADD.D"));
else
`TRACE(level, ("FNMADD.S"));
end
`INST_FPU_NMSUB: begin
if (op_args.fpu.fmt[0])
`TRACE(level, ("FNMSUB.D"));
else
`TRACE(level, ("FNMSUB.S"));
end
`INST_FPU_CMP: begin
if (op_args.fpu.fmt[0]) begin
case (op_args.fpu.frm[1:0])
0: `TRACE(level, ("FLE.D"));
1: `TRACE(level, ("FLT.D"));
2: `TRACE(level, ("FEQ.D"));
default: `TRACE(level, ("?"));
endcase
end else begin
case (op_args.fpu.frm[1:0])
0: `TRACE(level, ("FLE.S"));
1: `TRACE(level, ("FLT.S"));
2: `TRACE(level, ("FEQ.S"));
default: `TRACE(level, ("?"));
endcase
end
end
`INST_FPU_F2F: begin
if (op_args.fpu.fmt[0]) begin
`TRACE(level, ("FCVT.D.S"));
end else begin
`TRACE(level, ("FCVT.S.D"));
end
end
`INST_FPU_F2I: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.L.D"));
end else begin
`TRACE(level, ("FCVT.W.D"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.L.S"));
end else begin
`TRACE(level, ("FCVT.W.S"));
end
end
end
`INST_FPU_F2U: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.LU.D"));
end else begin
`TRACE(level, ("FCVT.WU.D"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.LU.S"));
end else begin
`TRACE(level, ("FCVT.WU.S"));
end
end
end
`INST_FPU_I2F: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.D.L"));
end else begin
`TRACE(level, ("FCVT.D.W"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.S.L"));
end else begin
`TRACE(level, ("FCVT.S.W"));
end
end
end
`INST_FPU_U2F: begin
if (op_args.fpu.fmt[0]) begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.D.LU"));
end else begin
`TRACE(level, ("FCVT.D.WU"));
end
end else begin
if (op_args.fpu.fmt[1]) begin
`TRACE(level, ("FCVT.S.LU"));
end else begin
`TRACE(level, ("FCVT.S.WU"));
end
end
end
`INST_FPU_MISC: begin
if (op_args.fpu.fmt[0]) begin
case (op_args.fpu.frm)
0: `TRACE(level, ("FSGNJ.D"));
1: `TRACE(level, ("FSGNJN.D"));
2: `TRACE(level, ("FSGNJX.D"));
3: `TRACE(level, ("FCLASS.D"));
4: `TRACE(level, ("FMV.X.D"));
5: `TRACE(level, ("FMV.D.X"));
6: `TRACE(level, ("FMIN.D"));
7: `TRACE(level, ("FMAX.D"));
endcase
end else begin
case (op_args.fpu.frm)
0: `TRACE(level, ("FSGNJ.S"));
1: `TRACE(level, ("FSGNJN.S"));
2: `TRACE(level, ("FSGNJX.S"));
3: `TRACE(level, ("FCLASS.S"));
4: `TRACE(level, ("FMV.X.S"));
5: `TRACE(level, ("FMV.S.X"));
6: `TRACE(level, ("FMIN.S"));
7: `TRACE(level, ("FMAX.S"));
endcase
end
end
default: `TRACE(level, ("?"));
endcase
end
`EX_SFU: begin
case (`INST_SFU_BITS'(op_type))
`INST_SFU_TMC: `TRACE(level, ("TMC"));
`INST_SFU_WSPAWN:`TRACE(level, ("WSPAWN"));
`INST_SFU_SPLIT: begin if (op_args.wctl.is_neg) `TRACE(level, ("SPLIT.N")); else `TRACE(level, ("SPLIT")); end
`INST_SFU_JOIN: `TRACE(level, ("JOIN"));
`INST_SFU_BAR: `TRACE(level, ("BAR"));
`INST_SFU_PRED: begin if (op_args.wctl.is_neg) `TRACE(level, ("PRED.N")); else `TRACE(level, ("PRED")); end
`INST_SFU_CSRRW: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRWI")); else `TRACE(level, ("CSRRW")); end
`INST_SFU_CSRRS: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRSI")); else `TRACE(level, ("CSRRS")); end
`INST_SFU_CSRRC: begin if (op_args.csr.use_imm) `TRACE(level, ("CSRRCI")); else `TRACE(level, ("CSRRC")); end
default: `TRACE(level, ("?"));
endcase
end
default: `TRACE(level, ("?"));
endcase
endtask
task trace_op_args(input int level,
input [`EX_BITS-1:0] ex_type,
input [`INST_OP_BITS-1:0] op_type,
input VX_gpu_pkg::op_args_t op_args
);
case (ex_type)
`EX_ALU: begin
`TRACE(level, (", use_PC=%b, use_imm=%b, imm=0x%0h", op_args.alu.use_PC, op_args.alu.use_imm, op_args.alu.imm));
end
`EX_LSU: begin
`TRACE(level, (", offset=0x%0h", op_args.lsu.offset));
end
`EX_FPU: begin
`TRACE(level, (", fmt=0x%0h, frm=0x%0h", op_args.fpu.fmt, op_args.fpu.frm));
end
`EX_SFU: begin
if (`INST_SFU_IS_CSR(op_type)) begin
`TRACE(level, (", addr=0x%0h, use_imm=%b, imm=0x%0h", op_args.csr.addr, op_args.csr.use_imm, op_args.csr.imm));
end
end
default:;
endcase
endtask
task trace_base_dcr(input int level, input [`VX_DCR_ADDR_WIDTH-1:0] addr);
case (addr)
`VX_DCR_BASE_STARTUP_ADDR0: `TRACE(level, ("STARTUP_ADDR0"));
`VX_DCR_BASE_STARTUP_ADDR1: `TRACE(level, ("STARTUP_ADDR1"));
`VX_DCR_BASE_STARTUP_ARG0: `TRACE(level, ("STARTUP_ARG0"));
`VX_DCR_BASE_STARTUP_ARG1: `TRACE(level, ("STARTUP_ARG1"));
`VX_DCR_BASE_MPM_CLASS: `TRACE(level, ("MPM_CLASS"));
default: `TRACE(level, ("?"));
endcase
endtask
`endif
`endif // VX_TRACE_VH

View file

@ -32,6 +32,10 @@ CFLAGS += -O3 -mcmodel=medany -fno-exceptions -fdata-sections -ffunction-section
CFLAGS += -I$(INC_DIR) -I$(ROOT_DIR)/hw
CFLAGS += -DXLEN_$(XLEN)
ifeq ($(VM_ENABLE), 1)
CFLAGS += -DVM_ENABLE
endif
PROJECT := libvortex
SRCS = $(SRC_DIR)/vx_start.S $(SRC_DIR)/vx_syscalls.c $(SRC_DIR)/vx_print.S $(SRC_DIR)/tinyprintf.c $(SRC_DIR)/vx_print.c $(SRC_DIR)/vx_spawn.c $(SRC_DIR)/vx_serial.S $(SRC_DIR)/vx_perf.c

View file

@ -0,0 +1,46 @@
diff --git a/Makefile b/Makefile
index ea340c8..d2aac5b 100644
--- a/Makefile
+++ b/Makefile
@@ -7,16 +7,16 @@ OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(SRCS))
# Ramulator currently supports g++ 5.1+ or clang++ 3.4+. It will NOT work with
# g++ 4.x due to an internal compiler error when processing lambda functions.
-CXX := clang++
+#CXX := clang++
# CXX := g++-5
-CXXFLAGS := -O3 -std=c++11 -g -Wall
+CXXFLAGS := -std=c++11 -O3 -g -Wall -fPIC
.PHONY: all clean depend
all: depend ramulator
clean:
- rm -f ramulator
+ rm -f ramulator libramulator.a
rm -rf $(OBJDIR)
depend: $(OBJDIR)/.depend
@@ -36,7 +36,7 @@ ramulator: $(MAIN) $(OBJS) $(SRCDIR)/*.h | depend
$(CXX) $(CXXFLAGS) -DRAMULATOR -o $@ $(MAIN) $(OBJS)
libramulator.a: $(OBJS) $(OBJDIR)/Gem5Wrapper.o
- libtool -static -o $@ $(OBJS) $(OBJDIR)/Gem5Wrapper.o
+ $(AR) rcs $@ $^
$(OBJS): | $(OBJDIR)
diff --git a/src/Request.h b/src/Request.h
index 57abd0d..a5ce061 100644
--- a/src/Request.h
+++ b/src/Request.h
@@ -36,7 +36,7 @@ public:
Request(long addr, Type type, int coreid = 0)
: is_first_command(true), addr(addr), coreid(coreid), type(type),
- callback([](Request& req){}) {}
+ callback([](Request&){}) {}
Request(long addr, Type type, function<void(Request&)> callback, int coreid = 0)
: is_first_command(true), addr(addr), coreid(coreid), type(type), callback(callback) {}

View file

@ -3,6 +3,8 @@ include $(ROOT_DIR)/config.mk
all: stub rtlsim simx opae xrt
vm: stub simx
stub:
$(MAKE) -C stub

View file

@ -25,7 +25,7 @@
#define CACHE_BLOCK_SIZE 64
#define RAM_PAGE_SIZE 4096
#define RAM_PAGE_SIZE 4096 // Please use MEM_PAGE_SIZE in VX_config.h
#define ALLOC_BASE_ADDR USER_BASE_ADDR

View file

@ -39,6 +39,15 @@ public:
page_t* currPage = pages_;
while (currPage) {
auto nextPage = currPage->next;
#ifdef VM_ENABLE
block_t* currblock = currPage->findfirstUsedBlock();
block_t* nextblock;
while (currblock) {
nextblock= currblock->nextUsed;
currPage->release(currblock);
currblock = nextblock;
}
#endif
delete currPage;
currPage = nextPage;
}
@ -70,7 +79,7 @@ public:
size = alignSize(size, pageAlign_);
// Check if the reservation is within memory capacity bounds
if (addr + size > capacity_) {
if (addr + size > baseAddress_ + capacity_) {
printf("error: address range out of bounds\n");
return -1;
}
@ -118,12 +127,12 @@ public:
auto pageSize = alignSize(size, pageAlign_);
uint64_t pageAddr;
if (!this->findNextAddress(pageSize, &pageAddr)) {
printf("error: out of memory\n");
printf("error: out of memory (Can't find next address)\n");
return -1;
}
currPage = this->createPage(pageAddr, pageSize);
if (nullptr == currPage) {
printf("error: out of memory\n");
printf("error: out of memory (Can't create a page)\n");
return -1;
}
freeBlock = currPage->findFreeBlock(size);
@ -335,6 +344,11 @@ private:
}
return nullptr;
}
#ifdef VM_ENABLE
block_t* findfirstUsedBlock() {
return usedList_;
}
#endif
private:
@ -480,7 +494,7 @@ private:
bool findNextAddress(uint64_t size, uint64_t* addr) {
if (pages_ == nullptr) {
*addr = baseAddress_;
*addr = baseAddress_;
return true;
}
@ -498,10 +512,10 @@ private:
endOfLastPage = current->addr + current->size;
current = current->next;
}
// If no suitable gap is found, place the new page at the end of the last page
// Check if the allocator has enough capacity
if ((endOfLastPage + size) <= capacity_) {
if ((endOfLastPage + size) <= (baseAddress_ + capacity_)) {
*addr = endOfLastPage;
return true;
}

View file

@ -66,6 +66,7 @@ typedef void* vx_buffer_h;
#define VX_MEM_READ 0x1
#define VX_MEM_WRITE 0x2
#define VX_MEM_READ_WRITE 0x3
#define VX_MEM_PIN_MEMORY 0x4
// open the device and connect to it
int vx_dev_open(vx_device_h* hdevice);

View file

@ -10,6 +10,10 @@ CXXFLAGS += -I$(INC_DIR) -I../common -I$(ROOT_DIR)/hw -I$(SIM_DIR)/simx -I$(COMM
CXXFLAGS += $(CONFIGS)
CXXFLAGS += -DXLEN_$(XLEN)
ifeq ($(VM_ENABLE), 1)
CXXFLAGS += -DVM_ENABLE
endif
LDFLAGS += -shared -pthread
LDFLAGS += -L$(DESTDIR) -lsimx

View file

@ -27,24 +27,48 @@
#include <future>
#include <chrono>
#include <VX_config.h>
#ifdef VM_ENABLE
#include <malloc.h>
#include <VX_types.h>
#include <util.h>
#include <processor.h>
#include <arch.h>
#include <mem.h>
#include <constants.h>
#include <unordered_map>
#include <array>
#include <cmath>
#endif
using namespace vortex;
class vx_device {
public:
vx_device()
: arch_(NUM_THREADS, NUM_WARPS, NUM_CORES)
, ram_(0, RAM_PAGE_SIZE)
, processor_(arch_)
, global_mem_(ALLOC_BASE_ADDR,
GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR,
RAM_PAGE_SIZE,
CACHE_BLOCK_SIZE)
{
// attach memory module
processor_.attach_ram(&ram_);
}
vx_device()
: arch_(NUM_THREADS, NUM_WARPS, NUM_CORES)
, ram_(0, MEM_PAGE_SIZE)
, processor_(arch_)
, global_mem_(ALLOC_BASE_ADDR, GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE)
{
// attach memory module
processor_.attach_ram(&ram_);
#ifdef VM_ENABLE
CHECK_ERR(init_VM(), );
#endif
}
~vx_device() {
#ifdef VM_ENABLE
global_mem_.release(PAGE_TABLE_BASE_ADDR);
// for (auto i = addr_mapping.begin(); i != addr_mapping.end(); i++)
// page_table_mem_->release(i->second << MEM_PAGE_SIZE);
delete virtual_mem_;
delete page_table_mem_;
#endif
if (future_.valid()) {
future_.wait();
}
@ -93,35 +117,131 @@ public:
return 0;
}
int mem_alloc(uint64_t size, int flags, uint64_t* dev_addr) {
uint64_t addr;
CHECK_ERR(global_mem_.allocate(size, &addr), {
#ifdef VM_ENABLE
// physical (ppn) to virtual (vpn) mapping
uint64_t map_p2v(uint64_t ppn, uint32_t flags)
{
DBGPRINT(" [RT:MAP_P2V] ppn: %lx\n", ppn);
if (addr_mapping.find(ppn) != addr_mapping.end()) return addr_mapping[ppn];
// If ppn to vpn mapping doesnt exist, create mapping
DBGPRINT(" [RT:MAP_P2V] Not found. Allocate new page table or update a PTE.\n");
uint64_t vpn;
virtual_mem_->allocate(MEM_PAGE_SIZE, &vpn);
vpn = vpn >> MEM_PAGE_LOG2_SIZE;
CHECK_ERR(update_page_table(ppn, vpn, flags),);
addr_mapping[ppn] = vpn;
return vpn;
}
bool need_trans(uint64_t dev_pAddr)
{
// Check if the satp is set and BARE mode
if (processor_.is_satp_unset() || get_mode() == BARE)
return 0;
// Check if the address is reserved for system usage
// bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT);
if (PAGE_TABLE_BASE_ADDR <= dev_pAddr)
return 0;
// Check if the address is reserved for IO usage
if (dev_pAddr < USER_BASE_ADDR)
return 0;
// Check if the address falls within the startup address range
if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)))
return 0;
// Now all conditions are not met. Return true because the address needs translation
return 1;
}
uint64_t phy_to_virt_map(uint64_t size, uint64_t *dev_pAddr, uint32_t flags)
{
DBGPRINT(" [RT:PTV_MAP] size = 0x%lx, dev_pAddr= 0x%lx, flags = 0x%x\n", size, *dev_pAddr, flags);
DBGPRINT(" [RT:PTV_MAP] bit mode: %d\n", XLEN);
if (!need_trans(*dev_pAddr))
{
DBGPRINT(" [RT:PTV_MAP] Translation is not needed.\n");
return 0;
}
uint64_t init_pAddr = *dev_pAddr;
uint64_t init_vAddr = (map_p2v(init_pAddr >> MEM_PAGE_LOG2_SIZE, flags) << MEM_PAGE_LOG2_SIZE) | (init_pAddr & ((1 << MEM_PAGE_LOG2_SIZE) - 1));
uint64_t ppn = 0, vpn = 0;
// dev_pAddr can be of size greater than a page, but we have to map and update
// page tables on a page table granularity. So divide the allocation into pages.
// FUTURE Work: Super Page
for (ppn = (*dev_pAddr >> MEM_PAGE_LOG2_SIZE); ppn < ((*dev_pAddr) >> MEM_PAGE_LOG2_SIZE) + (size >> MEM_PAGE_LOG2_SIZE) ; ppn++)
{
vpn = map_p2v(ppn, flags) >> MEM_PAGE_LOG2_SIZE;
DBGPRINT(" [RT:PTV_MAP] Search vpn in page table:0x%lx\n", vpn);
// Currently a 1-1 mapping is used, this can be changed here to support different
// mapping schemes
}
DBGPRINT(" [RT:PTV_MAP] Mapped virtual addr: 0x%lx to physical addr: 0x%lx\n", init_vAddr, init_pAddr);
// Sanity check
assert(page_table_walk(init_vAddr) == init_pAddr && "ERROR: translated virtual Addresses are not the same with physical Address\n");
*dev_pAddr = init_vAddr; // commit vpn to be returned to host
DBGPRINT(" [RT:PTV_MAP] Translated device virtual addr: 0x%lx\n", *dev_pAddr);
return 0;
}
#endif
int mem_alloc(uint64_t size, int flags, uint64_t *dev_addr)
{
uint64_t asize = aligned_size(size, MEM_PAGE_SIZE);
uint64_t addr = 0;
DBGPRINT("[RT:mem_alloc] size: 0x%lx, asize, 0x%lx,flag : 0x%d\n", size, asize, flags);
// HW: when vm is supported this global_mem_ should be virtual memory allocator
CHECK_ERR(global_mem_.allocate(asize, &addr), {
return err;
});
CHECK_ERR(this->mem_access(addr, size, flags), {
CHECK_ERR(this->mem_access(addr, asize, flags), {
global_mem_.release(addr);
return err;
});
*dev_addr = addr;
#ifdef VM_ENABLE
// VM address translation
phy_to_virt_map(asize, dev_addr, flags);
#endif
return 0;
}
int mem_reserve(uint64_t dev_addr, uint64_t size, int flags) {
CHECK_ERR(global_mem_.reserve(dev_addr, size), {
int mem_reserve(uint64_t dev_addr, uint64_t size, int flags)
{
uint64_t asize = aligned_size(size, MEM_PAGE_SIZE);
CHECK_ERR(global_mem_.reserve(dev_addr, asize), {
return err;
});
CHECK_ERR(this->mem_access(dev_addr, size, flags), {
DBGPRINT("[RT:mem_reserve] addr: 0x%lx, asize:0x%lx, size: 0x%lx\n", dev_addr, asize, size);
CHECK_ERR(this->mem_access(dev_addr, asize, flags), {
global_mem_.release(dev_addr);
return err;
});
return 0;
}
int mem_free(uint64_t dev_addr) {
int mem_free(uint64_t dev_addr)
{
#ifdef VM_ENABLE
uint64_t paddr = page_table_walk(dev_addr);
return global_mem_.release(paddr);
#else
return global_mem_.release(dev_addr);
#endif
}
int mem_access(uint64_t dev_addr, uint64_t size, int flags) {
int mem_access(uint64_t dev_addr, uint64_t size, int flags)
{
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (dev_addr + asize > GLOBAL_MEM_SIZE)
return -1;
@ -130,7 +250,8 @@ public:
return 0;
}
int mem_info(uint64_t* mem_free, uint64_t* mem_used) const {
int mem_info(uint64_t *mem_free, uint64_t *mem_used) const
{
if (mem_free)
*mem_free = global_mem_.free();
if (mem_used)
@ -138,16 +259,31 @@ public:
return 0;
}
int upload(uint64_t dest_addr, const void* src, uint64_t size) {
int upload(uint64_t dest_addr, const void *src, uint64_t size)
{
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (dest_addr + asize > GLOBAL_MEM_SIZE)
return -1;
#ifdef VM_ENABLE
uint64_t pAddr = page_table_walk(dest_addr);
// uint64_t pAddr;
// try {
// pAddr = page_table_walk(dest_addr);
// } catch ( Page_Fault_Exception ) {
// // HW: place holder
// // should be virt_to_phy_map here
// phy_to_virt_map(0, dest_addr, 0);
// }
DBGPRINT(" [RT:upload] Upload data to vAddr = 0x%lx (pAddr=0x%lx)\n", dest_addr, pAddr);
dest_addr = pAddr; //Overwirte
#endif
ram_.enable_acl(false);
ram_.write((const uint8_t*)src, dest_addr, size);
ram_.write((const uint8_t *)src, dest_addr, size);
ram_.enable_acl(true);
/*DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr);
/*
DBGPRINT("upload %ld bytes to 0x%lx\n", size, dest_addr);
for (uint64_t i = 0; i < size && i < 1024; i += 4) {
DBGPRINT(" 0x%lx <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + i));
}*/
@ -155,13 +291,19 @@ public:
return 0;
}
int download(void* dest, uint64_t src_addr, uint64_t size) {
int download(void *dest, uint64_t src_addr, uint64_t size)
{
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
if (src_addr + asize > GLOBAL_MEM_SIZE)
return -1;
#ifdef VM_ENABLE
uint64_t pAddr = page_table_walk(src_addr);
DBGPRINT(" [RT:download] Download data to vAddr = 0x%lx (pAddr=0x%lx)\n", src_addr, pAddr);
src_addr = pAddr; //Overwirte
#endif
ram_.enable_acl(false);
ram_.read((uint8_t*)dest, src_addr, size);
ram_.read((uint8_t *)dest, src_addr, size);
ram_.enable_acl(true);
/*DBGPRINT("download %ld bytes from 0x%lx\n", size, src_addr);
@ -172,9 +314,11 @@ public:
return 0;
}
int start(uint64_t krnl_addr, uint64_t args_addr) {
int start(uint64_t krnl_addr, uint64_t args_addr)
{
// ensure prior run completed
if (future_.valid()) {
if (future_.valid())
{
future_.wait();
}
@ -185,9 +329,8 @@ public:
this->dcr_write(VX_DCR_BASE_STARTUP_ARG1, args_addr >> 32);
// start new run
future_ = std::async(std::launch::async, [&]{
processor_.run();
});
future_ = std::async(std::launch::async, [&]
{ processor_.run(); });
// clear mpm cache
mpm_cache_.clear();
@ -195,12 +338,14 @@ public:
return 0;
}
int ready_wait(uint64_t timeout) {
int ready_wait(uint64_t timeout)
{
if (!future_.valid())
return 0;
uint64_t timeout_sec = timeout / 1000;
std::chrono::seconds wait_time(1);
for (;;) {
for (;;)
{
// wait for 1 sec and check status
auto status = future_.wait_for(wait_time);
if (status == std::future_status::ready)
@ -211,8 +356,10 @@ public:
return 0;
}
int dcr_write(uint32_t addr, uint32_t value) {
if (future_.valid()) {
int dcr_write(uint32_t addr, uint32_t value)
{
if (future_.valid())
{
future_.wait(); // ensure prior run completed
}
processor_.dcr_write(addr, value);
@ -220,15 +367,18 @@ public:
return 0;
}
int dcr_read(uint32_t addr, uint32_t* value) const {
int dcr_read(uint32_t addr, uint32_t *value) const
{
return dcrs_.read(addr, value);
}
int mpm_query(uint32_t addr, uint32_t core_id, uint64_t* value) {
int mpm_query(uint32_t addr, uint32_t core_id, uint64_t *value)
{
uint32_t offset = addr - VX_CSR_MPM_BASE;
if (offset > 31)
return -1;
if (mpm_cache_.count(core_id) == 0) {
if (mpm_cache_.count(core_id) == 0)
{
uint64_t mpm_mem_addr = IO_MPM_ADDR + core_id * 32 * sizeof(uint64_t);
CHECK_ERR(this->download(mpm_cache_[core_id].data(), mpm_mem_addr, 32 * sizeof(uint64_t)), {
return err;
@ -237,15 +387,275 @@ public:
*value = mpm_cache_.at(core_id).at(offset);
return 0;
}
#ifdef VM_ENABLE
/* VM Management */
// Initialize to zero the target page table area. 32bit 4K, 64bit 8K
uint16_t init_page_table(uint64_t addr, uint64_t size)
{
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
DBGPRINT(" [RT:init_page_table] (addr=0x%lx, size=0x%lx)\n", addr, asize);
uint8_t *src = new uint8_t[asize];
if (src == NULL)
return 1;
for (uint64_t i = 0; i < asize; ++i)
{
src[i] = 0;
}
ram_.enable_acl(false);
ram_.write((const uint8_t *)src, addr, asize);
ram_.enable_acl(true);
return 0;
}
uint8_t alloc_page_table (uint64_t * pt_addr)
{
CHECK_ERR(page_table_mem_->allocate(PT_SIZE, pt_addr), { return err; });
CHECK_ERR(init_page_table(*pt_addr, PT_SIZE), { return err; });
DBGPRINT(" [RT:alloc_page_table] addr= 0x%lx\n", *pt_addr);
return 0;
}
// reserve IO space, startup space, and local mem area
int virtual_mem_reserve(uint64_t dev_addr, uint64_t size, int flags)
{
CHECK_ERR(virtual_mem_->reserve(dev_addr, size), {
return err;
});
DBGPRINT("[RT:mem_reserve] addr: 0x%lx, size:0x%lx, size: 0x%lx\n", dev_addr, size, size);
return 0;
}
int16_t init_VM()
{
uint64_t pt_addr = 0;
// Reserve space for PT
DBGPRINT("[RT:init_VM] Initialize VM\n");
CHECK_ERR(mem_reserve(PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, VX_MEM_READ_WRITE), {
return err;
});
page_table_mem_ = new MemoryAllocator (PAGE_TABLE_BASE_ADDR, PT_SIZE_LIMIT, MEM_PAGE_SIZE, CACHE_BLOCK_SIZE);
if (page_table_mem_ == NULL)
{
CHECK_ERR(this->mem_free(PAGE_TABLE_BASE_ADDR),);
return 1;
}
// HW: virtual mem allocator has the same address range as global_mem. next step is to adjust it
virtual_mem_ = new MemoryAllocator(ALLOC_BASE_ADDR, (GLOBAL_MEM_SIZE - ALLOC_BASE_ADDR), MEM_PAGE_SIZE, CACHE_BLOCK_SIZE);
CHECK_ERR(virtual_mem_reserve(PAGE_TABLE_BASE_ADDR, (GLOBAL_MEM_SIZE - PAGE_TABLE_BASE_ADDR), VX_MEM_READ_WRITE), {
return err;
});
CHECK_ERR(virtual_mem_reserve(STARTUP_ADDR, 0x40000, VX_MEM_READ_WRITE), {
return err;
});
if (virtual_mem_ == nullptr) {
// virtual_mem_ does not intefere with physical mem, so no need to free space
return 1;
}
if (VM_ADDR_MODE == BARE)
DBGPRINT("[RT:init_VM] VA_MODE = BARE MODE(addr= 0x0)");
else
CHECK_ERR(alloc_page_table(&pt_addr),{return err;});
CHECK_ERR(processor_.set_satp_by_addr(pt_addr),{return err;});
return 0;
}
// Return value in in ptbr
uint64_t get_base_ppn()
{
return processor_.get_base_ppn();
}
uint64_t get_pte_address(uint64_t base_ppn, uint64_t vpn)
{
return (base_ppn * PT_SIZE) + (vpn * PTE_SIZE);
}
uint8_t get_mode()
{
return processor_.get_satp_mode();
}
int16_t update_page_table(uint64_t ppn, uint64_t vpn, uint32_t flag)
{
DBGPRINT(" [RT:Update PT] Mapping vpn 0x%05lx to ppn 0x%05lx(flags = %u)\n", vpn, ppn, flag);
// sanity check
#if VM_ADDR_MODE == SV39
assert((((ppn >> 44) == 0) && ((vpn >> 27) == 0)) && "Upper bits are not zero!");
uint8_t level = 3;
#else // Default is SV32, BARE will not reach this point.
assert((((ppn >> 20) == 0) && ((vpn >> 20) == 0)) && "Upper 12 bits are not zero!");
uint8_t level = 2;
#endif
int i = level - 1;
vAddr_t vaddr(vpn << MEM_PAGE_LOG2_SIZE);
uint64_t pte_addr = 0, pte_bytes = 0;
uint64_t pt_addr = 0;
uint64_t cur_base_ppn = get_base_ppn();
while (i >= 0)
{
DBGPRINT(" [RT:Update PT]Start %u-level page table\n", i);
pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]);
pte_bytes = read_pte(pte_addr);
PTE_t pte_chk(pte_bytes);
DBGPRINT(" [RT:Update PT] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes);
if (pte_chk.v == 1 && ((pte_bytes & 0xFFFFFFFF) != 0xbaadf00d))
{
DBGPRINT(" [RT:Update PT] PTE valid (ppn 0x%lx), continuing the walk...\n", pte_chk.ppn);
cur_base_ppn = pte_chk.ppn;
}
else
{
// If valid bit not set, allocate a next level page table
DBGPRINT(" [RT:Update PT] PTE Invalid (ppn 0x%lx) ...\n", pte_chk.ppn);
if (i == 0)
{
// Reach to leaf
DBGPRINT(" [RT:Update PT] Reached to level 0. This should be a leaf node(flag = %x) \n",flag);
uint32_t pte_flag = (flag << 1) | 0x3;
PTE_t new_pte(ppn <<MEM_PAGE_LOG2_SIZE, pte_flag);
write_pte(pte_addr, new_pte.pte_bytes);
break;
}
else
{
// in device memory and store ppn in PTE. Set rwx = 000 in PTE
// to indicate this is a pointer to the next level of the page table.
// flag would READ: 0x1, Write 0x2, RW:0x3, which is matched with PTE flags if it is lsh by one.
alloc_page_table(&pt_addr);
uint32_t pte_flag = 0x1;
PTE_t new_pte(pt_addr, pte_flag);
write_pte(pte_addr, new_pte.pte_bytes);
cur_base_ppn = new_pte.ppn;
}
}
i--;
}
return 0;
}
uint64_t page_table_walk(uint64_t vAddr_bits)
{
DBGPRINT(" [RT:PTW] start vAddr: 0x%lx\n", vAddr_bits);
if (!need_trans(vAddr_bits))
{
DBGPRINT(" [RT:PTW] Translation is not needed.\n");
return vAddr_bits;
}
uint8_t level = PT_LEVEL;
int i = level-1;
vAddr_t vaddr(vAddr_bits);
uint64_t pte_addr = 0, pte_bytes = 0;
uint64_t cur_base_ppn = get_base_ppn();
while (true)
{
DBGPRINT(" [RT:PTW]Start %u-level page table walk\n",i);
// Read PTE.
pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]);
pte_bytes = read_pte(pte_addr);
PTE_t pte(pte_bytes);
DBGPRINT(" [RT:PTW] PTE addr 0x%lx, PTE bytes 0x%lx\n", pte_addr, pte_bytes);
assert(((pte.pte_bytes & 0xFFFFFFFF) != 0xbaadf00d) && "ERROR: uninitialzed PTE\n" );
// Check if it has invalid flag bits.
if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1)))
{
std::string msg = " [RT:PTW] Page Fault : Attempted to access invalid entry.";
throw Page_Fault_Exception(msg);
}
if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0))
{
i--;
// Not a leaf node as rwx == 000
if (i < 0)
{
throw Page_Fault_Exception(" [RT:PTW] Page Fault : No leaf node found.");
}
else
{
// Continue on to next level.
cur_base_ppn= pte.ppn ;
DBGPRINT(" [RT:PTW] next base_ppn: 0x%lx\n", cur_base_ppn);
continue;
}
}
else
{
// Leaf node found.
// Check RWX permissions according to access type.
if (pte.r == 0)
{
throw Page_Fault_Exception(" [RT:PTW] Page Fault : TYPE LOAD, Incorrect permissions.");
}
cur_base_ppn= pte.ppn ;
DBGPRINT(" [RT:PTW] Found PT_Base_Address(0x%lx) on Level %d.\n", pte.ppn,i);
break;
}
}
uint64_t paddr = (cur_base_ppn << MEM_PAGE_LOG2_SIZE) + vaddr.pgoff;
return paddr;
}
// void read_page_table(uint64_t addr) {
// uint8_t *dest = new uint8_t[MEM_PAGE_SIZE];
// download(dest, addr, MEM_PAGE_SIZE);
// DBGPRINT("VXDRV: download %d bytes from 0x%x\n", MEM_PAGE_SIZE, addr);
// for (int i = 0; i < MEM_PAGE_SIZE; i += 4) {
// DBGPRINT("mem-read: 0x%x -> 0x%x\n", addr + i, *(uint64_t*)((uint8_t*)dest + i));
// }
// }
void write_pte(uint64_t addr, uint64_t value = 0xbaadf00d)
{
DBGPRINT(" [RT:Write_pte] writing pte 0x%lx to pAddr: 0x%lx\n", value, addr);
uint8_t *src = new uint8_t[PTE_SIZE];
for (uint64_t i = 0; i < PTE_SIZE; ++i)
{
src[i] = (value >> (i << 3)) & 0xff;
}
// std::cout << "writing PTE to RAM addr 0x" << std::hex << addr << std::endl;
ram_.enable_acl(false);
ram_.write((const uint8_t *)src, addr, PTE_SIZE);
ram_.enable_acl(true);
}
uint64_t read_pte(uint64_t addr)
{
uint8_t *dest = new uint8_t[PTE_SIZE];
#ifdef XLEN_32
uint64_t mask = 0x00000000FFFFFFFF;
#else // 64bit
uint64_t mask = 0xFFFFFFFFFFFFFFFF;
#endif
ram_.read((uint8_t *)dest, addr, PTE_SIZE);
uint64_t ret = (*(uint64_t *)((uint8_t *)dest)) & mask;
DBGPRINT(" [RT:read_pte] reading PTE 0x%lx from RAM addr 0x%lx\n", ret, addr);
return ret;
}
#endif // VM_ENABLE
private:
Arch arch_;
RAM ram_;
Processor processor_;
MemoryAllocator global_mem_;
DeviceConfig dcrs_;
std::future<void> future_;
Arch arch_;
RAM ram_;
Processor processor_;
MemoryAllocator global_mem_;
DeviceConfig dcrs_;
std::future<void> future_;
std::unordered_map<uint32_t, std::array<uint64_t, 32>> mpm_cache_;
#ifdef VM_ENABLE
std::unordered_map<uint64_t, uint64_t> addr_mapping; // HW: key: ppn; value: vpn
MemoryAllocator* page_table_mem_;
MemoryAllocator* virtual_mem_;
#endif
};
#include <callbacks.inc>
#include <callbacks.inc>

View file

@ -1,6 +1,9 @@
ROOT_DIR := $(realpath ..)
include $(ROOT_DIR)/config.mk
simx:
$(MAKE) -C simx
all:
$(MAKE) -C simx
$(MAKE) -C rtlsim

View file

@ -17,9 +17,20 @@
#include <fstream>
#include <assert.h>
#include "util.h"
#include <VX_config.h>
#include <bitset>
using namespace vortex;
#ifdef VM_ENABLE
// #ifndef NDEBUG
// #define DBGPRINT(format, ...) do { printf("[VXDRV] " format "", ##__VA_ARGS__); } while (0)
// #else
#define DBGPRINT(format, ...) ((void)0)
// #endif
#endif
RamMemDevice::RamMemDevice(const char *filename, uint32_t wordSize)
: wordSize_(wordSize) {
std::ifstream input(filename);
@ -123,17 +134,95 @@ void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size)
MemoryUnit::MemoryUnit(uint64_t pageSize)
: pageSize_(pageSize)
#ifndef VM_ENABLE
, enableVM_(pageSize != 0)
, amo_reservation_({0x0, false}) {
if (pageSize != 0) {
tlb_[0] = TLBEntry(0, 077);
#endif
, amo_reservation_({0x0, false})
#ifdef VM_ENABLE
, TLB_HIT(0)
, TLB_MISS(0)
, TLB_EVICT(0)
, PTW(0)
, satp_(NULL) {};
#else
{
if (pageSize != 0)
{
tlb_[0] = TLBEntry(0, 077);
}
}
}
#endif
void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) {
decoder_.map(start, end, m);
}
#ifdef VM_ENABLE
std::pair<bool, uint64_t> MemoryUnit::tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits) {
//Find entry while accounting for different sizes.
for (auto entry : tlb_)
{
if(entry.first == vAddr >> entry.second.size_bits)
{
*size_bits = entry.second.size_bits;
vAddr = vAddr >> (*size_bits);
}
}
auto iter = tlb_.find(vAddr);
if (iter != tlb_.end()) {
TLBEntry e = iter->second;
//Set mru bit if it is a hit.
iter->second.mru_bit = true;
//If at full capacity and no other unset bits.
// Clear all bits except the one we just looked up.
if (tlb_.size() == TLB_SIZE)
{
// bool no_cleared = true;
// for (auto& entry : tlb_)
// {
// no_cleared = no_cleared & entry.second.mru_bit;
// }
// if(no_cleared)
// {
for (auto& entry : tlb_)
{
entry.second.mru_bit = false;
}
iter->second.mru_bit = true;
//}
}
//Check access permissions.
if ( (type == ACCESS_TYPE::FETCH) & ((e.r == 0) | (e.x == 0)) )
{
throw Page_Fault_Exception("Page Fault : Incorrect permissions.");
}
else if ( (type == ACCESS_TYPE::LOAD) & (e.r == 0) )
{
throw Page_Fault_Exception("Page Fault : Incorrect permissions.");
}
else if ( (type == ACCESS_TYPE::STORE) & (e.w == 0) )
{
throw Page_Fault_Exception("Page Fault : Incorrect permissions.");
}
else
{
//TLB Hit
return std::make_pair(true, iter->second.pfn);
}
} else {
//TLB Miss
return std::make_pair(false, 0);
}
}
#else
MemoryUnit::TLBEntry MemoryUnit::tlbLookup(uint64_t vAddr, uint32_t flagMask) {
auto iter = tlb_.find(vAddr / pageSize_);
if (iter != tlb_.end()) {
@ -157,31 +246,96 @@ uint64_t MemoryUnit::toPhyAddr(uint64_t addr, uint32_t flagMask) {
}
return pAddr;
}
#endif
void MemoryUnit::read(void* data, uint64_t addr, uint64_t size, bool sup) {
#ifdef VM_ENABLE
void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) {
DBGPRINT(" [MMU:read] 0x%lx, 0x%x, %u\n",addr,size,type);
uint64_t pAddr;
pAddr = vAddr_to_pAddr(addr, type);
return decoder_.read(data, pAddr, size);
}
#else
void MemoryUnit::read(void* data, uint64_t addr, uint32_t size, bool sup) {
uint64_t pAddr = this->toPhyAddr(addr, sup ? 8 : 1);
return decoder_.read(data, pAddr, size);
}
void MemoryUnit::write(const void* data, uint64_t addr, uint64_t size, bool sup) {
#endif
#ifdef VM_ENABLE
void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type) {
DBGPRINT(" [MMU:Write] 0x%lx, 0x%x, %u\n",addr,size,type);
uint64_t pAddr;
pAddr = vAddr_to_pAddr(addr, type);
decoder_.write(data, pAddr, size);
amo_reservation_.valid = false;
}
#else
void MemoryUnit::write(const void* data, uint64_t addr, uint32_t size, bool sup) {
uint64_t pAddr = this->toPhyAddr(addr, sup ? 16 : 1);
decoder_.write(data, pAddr, size);
amo_reservation_.valid = false;
}
#endif
#ifdef VM_ENABLE
void MemoryUnit::amo_reserve(uint64_t addr) {
DBGPRINT(" [MMU:amo_reserve] 0x%lx\n",addr);
uint64_t pAddr = this->vAddr_to_pAddr(addr,ACCESS_TYPE::LOAD);
amo_reservation_.addr = pAddr;
amo_reservation_.valid = true;
}
#else
void MemoryUnit::amo_reserve(uint64_t addr) {
uint64_t pAddr = this->toPhyAddr(addr, 1);
amo_reservation_.addr = pAddr;
amo_reservation_.valid = true;
}
#endif
#ifdef VM_ENABLE
bool MemoryUnit::amo_check(uint64_t addr) {
DBGPRINT(" [MMU:amo_check] 0x%lx\n",addr);
uint64_t pAddr = this->vAddr_to_pAddr(addr, ACCESS_TYPE::LOAD);
return amo_reservation_.valid && (amo_reservation_.addr == pAddr);
}
#else
bool MemoryUnit::amo_check(uint64_t addr) {
uint64_t pAddr = this->toPhyAddr(addr, 1);
return amo_reservation_.valid && (amo_reservation_.addr == pAddr);
}
#endif
#ifdef VM_ENABLE
void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits) {
// HW: evict TLB by Most Recently Used
if (tlb_.size() == TLB_SIZE - 1) {
for (auto& entry : tlb_)
{
entry.second.mru_bit = false;
}
} else if (tlb_.size() == TLB_SIZE) {
uint64_t del;
for (auto entry : tlb_) {
if (!entry.second.mru_bit)
{
del = entry.first;
break;
}
}
tlb_.erase(tlb_.find(del));
TLB_EVICT++;
}
tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags, size_bits);
}
#else
void MemoryUnit::tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags) {
tlb_[virt / pageSize_] = TLBEntry(phys / pageSize_, flags);
}
#endif
void MemoryUnit::tlbRm(uint64_t va) {
if (tlb_.find(va / pageSize_) != tlb_.end())
@ -325,6 +479,7 @@ uint8_t *RAM::get(uint64_t address) const {
}
void RAM::read(void* data, uint64_t addr, uint64_t size) {
// printf("====%s (addr= 0x%lx, size= 0x%lx) ====\n", __PRETTY_FUNCTION__,addr,size);
if (check_acl_ && acl_mngr_.check(addr, size, 0x1) == false) {
throw BadAddress();
}
@ -435,3 +590,171 @@ void RAM::loadHexImage(const char* filename) {
--size;
}
}
#ifdef VM_ENABLE
uint64_t MemoryUnit::get_base_ppn()
{
assert(satp_!= NULL);
return satp_->get_base_ppn();
}
uint64_t MemoryUnit::get_satp()
{
if (is_satp_unset())
return 0;
else
return satp_->get_satp();
}
uint8_t MemoryUnit::is_satp_unset()
{
return (satp_==NULL);
}
uint8_t MemoryUnit::get_mode()
{
assert(satp_!= NULL);
return satp_->get_mode();
}
void MemoryUnit::set_satp(uint64_t satp)
{
// uint16_t asid = 0; // set asid for different process
satp_ = new SATP_t (satp );
}
bool MemoryUnit::need_trans(uint64_t dev_pAddr)
{
// Check if the satp is set and BARE mode
if ( is_satp_unset() || (get_mode() == BARE))
return 0;
// Check if the address is reserved for system usage
// bool isReserved = (PAGE_TABLE_BASE_ADDR <= dev_pAddr && dev_pAddr < PAGE_TABLE_BASE_ADDR + PT_SIZE_LIMIT);
if (PAGE_TABLE_BASE_ADDR <= dev_pAddr)
return 0;
// Check if the address is reserved for IO usage
if (dev_pAddr < USER_BASE_ADDR)
return 0;
// Check if the address falls within the startup address range
if ((STARTUP_ADDR <= dev_pAddr) && (dev_pAddr <= (STARTUP_ADDR + 0x40000)))
return 0;
// Now all conditions are not met. Return true because the address needs translation
return 1;
}
uint64_t MemoryUnit::vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type)
{
uint64_t pfn;
uint64_t size_bits;
DBGPRINT(" [MMU: V2P] vaddr = 0x%lx, type = 0x%u\n",vAddr,type);
if (!need_trans(vAddr))
{
DBGPRINT(" [MMU: V2P] Translation is not needed.\n");
return vAddr;
}
//First lookup TLB.
std::pair<bool, uint64_t> tlb_access = tlbLookup(vAddr, type, &size_bits);
if (tlb_access.first)
{
pfn = tlb_access.second;
TLB_HIT++;
}
else //Else walk the PT.
{
std::pair<uint64_t, uint8_t> ptw_access = page_table_walk(vAddr, type, &size_bits);
tlbAdd(vAddr>>size_bits, ptw_access.first, ptw_access.second,size_bits);
pfn = ptw_access.first; TLB_MISS++; PTW++;
unique_translations.insert(vAddr>>size_bits);
PERF_UNIQUE_PTW = unique_translations.size();
}
//Construct final address using pfn and offset.
DBGPRINT(" [MMU: V2P] translated vAddr: 0x%lx to pAddr 0x%lx\n",vAddr,((pfn << size_bits) + (vAddr & ((1 << size_bits) - 1))));
return (pfn << size_bits) + (vAddr & ((1 << size_bits) - 1));
}
uint64_t MemoryUnit::get_pte_address(uint64_t base_ppn, uint64_t vpn)
{
return (base_ppn * PT_SIZE) + (vpn * PTE_SIZE);
}
std::pair<uint64_t, uint8_t> MemoryUnit::page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t *size_bits)
{
DBGPRINT(" [MMU:PTW] Start: vaddr = 0x%lx, type = %u.\n", vAddr_bits, type);
uint8_t level = PT_LEVEL;
int i = level-1;
vAddr_t vaddr(vAddr_bits);
uint32_t flags =0;
uint64_t pte_addr = 0, pte_bytes = 0;
uint64_t cur_base_ppn = get_base_ppn();
// Need to fix for super page
*size_bits = 12;
while (true)
{
// Read PTE.
pte_addr = get_pte_address(cur_base_ppn, vaddr.vpn[i]);
decoder_.read(&pte_bytes, pte_addr, PTE_SIZE);
PTE_t pte(pte_bytes);
DBGPRINT(" [MMU:PTW] Level[%u] pte_addr=0x%lx, pte_bytes =0x%lx, pte.ppn= 0x%lx, pte.flags = %u)\n", i, pte_addr, pte_bytes, pte.ppn, pte.flags);
assert(((pte.pte_bytes & 0xFFFFFFFF) != 0xbaadf00d) && "ERROR: uninitialzed PTE\n" );
// Check if it has invalid flag bits.
if ((pte.v == 0) | ((pte.r == 0) & (pte.w == 1)))
{
assert(0);
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : Attempted to access invalid entry.");
}
if ((pte.r == 0) & (pte.w == 0) & (pte.x == 0))
{
// Not a leaf node as rwx == 000
i--;
if (i < 0)
{
assert(0);
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : No leaf node found.");
}
else
{
// Continue on to next level.
cur_base_ppn= pte.ppn;
DBGPRINT(" [MMU:PTW] next base_ppn: 0x%lx\n", cur_base_ppn);
continue;
}
}
else
{
// Leaf node found, finished walking.
// Check RWX permissions according to access type.
if ((type == ACCESS_TYPE::FETCH) & ((pte.r == 0) | (pte.x == 0)))
{
assert(0);
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE FETCH, Incorrect permissions.");
}
else if ((type == ACCESS_TYPE::LOAD) & (pte.r == 0))
{
assert(0);
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE LOAD, Incorrect permissions.");
}
else if ((type == ACCESS_TYPE::STORE) & (pte.w == 0))
{
assert(0);
throw Page_Fault_Exception(" [MMU:PTW] Page Fault : TYPE STORE, Incorrect permissions.");
}
cur_base_ppn = pte.ppn;
flags = pte.flags;
break;
}
}
return std::make_pair(cur_base_ppn, flags);
}
#endif

View file

@ -18,8 +18,108 @@
#include <map>
#include <unordered_map>
#include <cstdint>
#include <unordered_set>
#include <stdexcept>
#include "VX_config.h"
#ifdef VM_ENABLE
#include <unordered_set>
#include <stdexcept>
#include <cassert>
#endif
namespace vortex {
#ifdef VM_ENABLE
// VA MODE
#define BARE 0x0
#define SV32 0x1
#define SV39 0x8
enum ACCESS_TYPE {
LOAD,
STORE,
FETCH
};
class SATP_t
{
private:
uint64_t address;
uint16_t asid;
uint8_t mode;
uint64_t ppn;
uint64_t satp;
uint64_t bits(uint64_t input, uint8_t s_idx, uint8_t e_idx)
{
return (input>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1);
}
bool bit(uint64_t input , uint8_t idx)
{
return (input ) & ((uint64_t)1 << idx);
}
public:
SATP_t(uint64_t satp) : satp(satp)
{
#ifdef XLEN_32
mode = bit(satp, 31);
asid = bits(satp, 22, 30);
ppn = bits(satp, 0,21);
#else
mode = bits(satp, 60,63);
asid = bits(satp, 44, 59);
ppn = bits(satp, 0,43);
#endif
address = ppn << MEM_PAGE_LOG2_SIZE;
}
SATP_t(uint64_t address, uint16_t asid) : address(address), asid(asid)
{
#ifdef XLEN_32
assert((address >> 32) == 0 && "Upper 32 bits are not zero!");
#endif
mode= VM_ADDR_MODE;
// asid = 0 ;
ppn = address >> MEM_PAGE_LOG2_SIZE;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshift-count-overflow"
#ifdef XLEN_32
satp = (((uint64_t)mode << 31) | ((uint64_t)asid << 22) | ppn);
#else
satp = (((uint64_t)mode << 60) | ((uint64_t)asid << 44) | ppn);
#endif
#pragma GCC diagnostic pop
}
uint8_t get_mode()
{
return mode;
}
uint16_t get_asid()
{
return asid;
}
uint64_t get_base_ppn()
{
return ppn;
}
uint64_t get_satp()
{
return satp;
}
};
class Page_Fault_Exception : public std::runtime_error /* or logic_error */
{
public:
Page_Fault_Exception(const std::string& what = "") : std::runtime_error(what) {}
uint64_t addr;
ACCESS_TYPE type;
};
#endif
struct BadAddress {};
struct OutOfRange {};
@ -73,26 +173,53 @@ public:
class MemoryUnit {
public:
// HW: Expand PageFault struct to contain access_type info for debug purposes
struct PageFault {
PageFault(uint64_t a, bool nf)
: faultAddr(a)
, notFound(nf)
// , access_type(ACCESS_TYPE::LOAD)
{}
uint64_t faultAddr;
bool notFound;
uint64_t faultAddr;
bool notFound;
// ACCESS_TYPE access_type;
};
#ifdef VM_ENABLE
MemoryUnit(uint64_t pageSize = MEM_PAGE_SIZE);
~MemoryUnit(){
if ( this->satp_ != NULL)
delete this->satp_;
};
#else
MemoryUnit(uint64_t pageSize = 0);
#endif
void attach(MemDevice &m, uint64_t start, uint64_t end);
void read(void* data, uint64_t addr, uint64_t size, bool sup);
void write(const void* data, uint64_t addr, uint64_t size, bool sup);
#ifdef VM_ENABLE
void read(void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type = ACCESS_TYPE::LOAD);
void write(const void* data, uint64_t addr, uint32_t size, ACCESS_TYPE type = ACCESS_TYPE::STORE);
#else
void read(void* data, uint64_t addr, uint32_t size, bool sup);
void write(const void* data, uint64_t addr, uint32_t size, bool sup);
#endif
void amo_reserve(uint64_t addr);
bool amo_check(uint64_t addr);
#ifdef VM_ENABLE
void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags, uint64_t size_bits);
uint8_t is_satp_unset();
uint64_t get_satp();
uint8_t get_mode();
uint64_t get_base_ppn();
void set_satp(uint64_t satp);
#else
void tlbAdd(uint64_t virt, uint64_t phys, uint32_t flags);
#endif
void tlbRm(uint64_t vaddr);
void tlbFlush() {
tlb_.clear();
@ -134,24 +261,71 @@ private:
struct TLBEntry {
TLBEntry() {}
TLBEntry(uint32_t pfn, uint32_t flags)
#ifdef VM_ENABLE
TLBEntry(uint32_t pfn, uint32_t flags, uint64_t size_bits)
: pfn(pfn)
, flags(flags)
, mru_bit(true)
, size_bits (size_bits)
{
d = bit(7);
a = bit(6);
g = bit(5);
u = bit(4);
x = bit(3);
w = bit(2);
r = bit(1);
v = bit(0);
}
bool bit(uint8_t idx)
{
return (flags) & (1 << idx);
}
uint32_t pfn;
uint32_t flags;
bool mru_bit;
uint64_t size_bits;
bool d, a, g, u, x, w, r, v;
#else
TLBEntry(uint32_t pfn, uint32_t flags)
: pfn(pfn)
, flags(flags)
{}
uint32_t pfn;
uint32_t flags;
#endif
};
TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask);
#ifdef VM_ENABLE
std::pair<bool, uint64_t> tlbLookup(uint64_t vAddr, ACCESS_TYPE type, uint64_t* size_bits);
bool need_trans(uint64_t dev_pAddr);
uint64_t vAddr_to_pAddr(uint64_t vAddr, ACCESS_TYPE type);
uint64_t get_pte_address(uint64_t base_ppn, uint64_t vpn);
std::pair<uint64_t, uint8_t> page_table_walk(uint64_t vAddr_bits, ACCESS_TYPE type, uint64_t* size_bits);
#else
uint64_t toPhyAddr(uint64_t vAddr, uint32_t flagMask);
TLBEntry tlbLookup(uint64_t vAddr, uint32_t flagMask);
#endif
std::unordered_map<uint64_t, TLBEntry> tlb_;
uint64_t pageSize_;
ADecoder decoder_;
#ifndef VM_ENABLE
bool enableVM_;
#endif
amo_reservation_t amo_reservation_;
#ifdef VM_ENABLE
std::unordered_set<uint64_t> unique_translations;
uint64_t TLB_HIT, TLB_MISS, TLB_EVICT, PTW, PERF_UNIQUE_PTW;
SATP_t *satp_;
#endif
};
///////////////////////////////////////////////////////////////////////////////
@ -219,4 +393,149 @@ private:
bool check_acl_;
};
#ifdef VM_ENABLE
class PTE_t
{
private:
uint64_t address;
uint64_t bits(uint64_t input, uint8_t s_idx, uint8_t e_idx)
{
return (input>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1);
}
bool bit(uint64_t input, uint8_t idx)
{
return (input) & ((uint64_t)1 << idx);
}
public:
#if VM_ADDR_MODE == SV39
bool N;
uint8_t PBMT;
#endif
uint64_t ppn;
uint32_t rsw;
uint32_t flags;
uint8_t level;
bool d, a, g, u, x, w, r, v;
uint64_t pte_bytes;
void set_flags (uint32_t flag)
{
this->flags = flag;
d = bit(flags,7);
a = bit(flags,6);
g = bit(flags,5);
u = bit(flags,4);
x = bit(flags,3);
w = bit(flags,2);
r = bit(flags,1);
v = bit(flags,0);
}
PTE_t(uint64_t address, uint32_t flags) : address(address)
{
#if VM_ADDR_MODE == SV39
N = 0;
PBMT = 0;
level = 3;
ppn = address >> MEM_PAGE_LOG2_SIZE;
// Reserve for Super page support
// ppn = new uint32_t [level];
// ppn[2]=bits(address,28,53);
// ppn[1]=bits(address,19,27);
// ppn[0]=bits(address,10,18);
set_flags(flags);
// pte_bytes = (N << 63) | (PBMT << 61) | (ppn <<10) | flags ;
pte_bytes = (ppn <<10) | flags ;
#else // if VM_ADDR_MODE == SV32
assert((address>> 32) == 0 && "Upper 32 bits are not zero!");
level = 2;
ppn = address >> MEM_PAGE_LOG2_SIZE;
// Reserve for Super page support
// ppn = new uint32_t[level];
// ppn[1]=bits(address,20,31);
// ppn[0]=bits(address,10,19);
set_flags(flags);
pte_bytes = ppn <<10 | flags ;
#endif
}
PTE_t(uint64_t pte_bytes) : pte_bytes(pte_bytes)
{
#if VM_ADDR_MODE == SV39
N = bit(pte_bytes,63);
PBMT = bits(pte_bytes,61,62);
level = 3;
ppn=bits(pte_bytes,10,53);
address = ppn << MEM_PAGE_LOG2_SIZE;
// Reserve for Super page support
// ppn = new uint32_t [level];
// ppn[2]=bits(pte_bytes,28,53);
// ppn[1]=bits(pte_bytes,19,27);
// ppn[0]=bits(pte_bytes,10,18);
#else //#if VM_ADDR_MODE == SV32
assert((pte_bytes >> 32) == 0 && "Upper 32 bits are not zero!");
level = 2;
ppn=bits(pte_bytes,10, 31);
address = ppn << MEM_PAGE_LOG2_SIZE;
// Reserve for Super page support
// ppn = new uint32_t[level];
// ppn[1]=bits(address, 20,31);
// ppn[0]=bits(address, 10,19);
#endif
rsw = bits(pte_bytes,8,9);
set_flags((uint32_t)(bits(pte_bytes,0,7)));
}
~PTE_t()
{
// Reserve for Super page support
// delete ppn;
}
};
class vAddr_t
{
private:
uint64_t address;
uint64_t bits(uint8_t s_idx, uint8_t e_idx)
{
return (address>> s_idx) & (((uint64_t)1 << (e_idx - s_idx + 1)) - 1);
}
bool bit( uint8_t idx)
{
return (address) & ((uint64_t)1 << idx);
}
public:
uint64_t *vpn;
uint64_t pgoff;
uint8_t level;
vAddr_t(uint64_t address) : address(address)
{
#if VM_ADDR_MODE == SV39
level = 3;
vpn = new uint64_t [level];
vpn[2] = bits(30,38);
vpn[1] = bits(21,29);
vpn[0] = bits(12,20);
pgoff = bits(0,11);
#else //#if VM_ADDR_MODE == SV32
assert((address>> 32) == 0 && "Upper 32 bits are not zero!");
level = 2;
vpn = new uint64_t [level];
vpn[1] = bits(22,31);
vpn[0] = bits(12,21);
pgoff = bits(0,11);
#endif
}
~vAddr_t()
{
delete vpn;
}
};
#endif
} // namespace vortex

View file

@ -14,6 +14,10 @@ CXXFLAGS += -I$(THIRD_PARTY_DIR)/ramulator/src
CXXFLAGS += -DXLEN_$(XLEN)
CXXFLAGS += $(CONFIGS)
ifeq ($(VM_ENABLE), 1)
CXXFLAGS += -DVM_ENABLE
endif
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator

View file

@ -106,6 +106,14 @@ void Cluster::attach_ram(RAM* ram) {
}
}
#ifdef VM_ENABLE
void Cluster::set_satp(uint64_t satp) {
for (auto& socket : sockets_) {
socket->set_satp(satp);
}
}
#endif
bool Cluster::running() const {
for (auto& socket : sockets_) {
if (socket->running())

View file

@ -57,6 +57,10 @@ public:
void attach_ram(RAM* ram);
#ifdef VM_ENABLE
void set_satp(uint64_t satp);
#endif
bool running() const;
int get_exitcode() const;

View file

@ -428,3 +428,10 @@ bool Core::wspawn(uint32_t num_warps, Word nextPC) {
void Core::attach_ram(RAM* ram) {
emulator_.attach_ram(ram);
}
#ifdef VM_ENABLE
void Core::set_satp(uint64_t satp) {
emulator_.set_satp(satp); //JAEWON wit, tid???
// emulator_.set_csr(VX_CSR_SATP,satp,0,0); //JAEWON wit, tid???
}
#endif

View file

@ -26,6 +26,7 @@
#include "dispatcher.h"
#include "func_unit.h"
#include "mem_coalescer.h"
#include "VX_config.h"
namespace vortex {
@ -98,6 +99,9 @@ public:
void tick();
void attach_ram(RAM* ram);
#ifdef VM_ENABLE
void set_satp(uint64_t satp);
#endif
bool running() const;

View file

@ -127,7 +127,7 @@ void Emulator::clear() {
void Emulator::attach_ram(RAM* ram) {
// bind RAM to memory unit
#if (XLEN == 64)
mmu_.attach(*ram, 0, 0xFFFFFFFFFFFFFFFF);
mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39
#else
mmu_.attach(*ram, 0, 0xFFFFFFFF);
#endif
@ -280,10 +280,54 @@ bool Emulator::barrier(uint32_t bar_id, uint32_t count, uint32_t wid) {
return false;
}
#ifdef VM_ENABLE
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
mmu_.read(data, addr, size, 0);
}
DP(3, "*** icache_read 0x" << std::hex << addr << ", size = 0x " << size);
try
{
mmu_.read(data, addr, size, ACCESS_TYPE::FETCH);
}
catch (Page_Fault_Exception& page_fault)
{
std::cout<<page_fault.what()<<std::endl;
throw;
}
}
#else
void Emulator::icache_read(void *data, uint64_t addr, uint32_t size) {
mmu_.read(data, addr, size, 0);
}
#endif
#ifdef VM_ENABLE
void Emulator::set_satp(uint64_t satp) {
DPH(3, "set satp 0x" << std::hex << satp << " in emulator module\n");
set_csr(VX_CSR_SATP,satp,0,0);
}
#endif
#ifdef VM_ENABLE
void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
DP(1, "*** dcache_read 0x" << std::hex << addr << ", size = 0x " << size);
auto type = get_addr_type(addr);
if (type == AddrType::Shared) {
core_->local_mem()->read(data, addr, size);
} else {
try
{
mmu_.read(data, addr, size, ACCESS_TYPE::LOAD);
}
catch (Page_Fault_Exception& page_fault)
{
std::cout<<page_fault.what()<<std::endl;
throw;
}
}
DPH(2, "Mem Read: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl);
}
#else
void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
auto type = get_addr_type(addr);
if (type == AddrType::Shared) {
@ -294,7 +338,34 @@ void Emulator::dcache_read(void *data, uint64_t addr, uint32_t size) {
DPH(2, "Mem Read: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << std::dec << " (size=" << size << ", type=" << type << ")" << std::endl);
}
#endif
#ifdef VM_ENABLE
void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
DP(1, "*** dcache_write 0x" << std::hex << addr << ", size = 0x " << size);
auto type = get_addr_type(addr);
if (addr >= uint64_t(IO_COUT_ADDR)
&& addr < (uint64_t(IO_COUT_ADDR) + IO_COUT_SIZE)) {
this->writeToStdOut(data, addr, size);
} else {
if (type == AddrType::Shared) {
core_->local_mem()->write(data, addr, size);
} else {
try
{
// mmu_.write(data, addr, size, 0);
mmu_.write(data, addr, size, ACCESS_TYPE::STORE);
}
catch (Page_Fault_Exception& page_fault)
{
std::cout<<page_fault.what()<<std::endl;
throw;
}
}
}
DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << " (size=" << size << ", type=" << type << ")" << std::endl);
}
#else
void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
auto type = get_addr_type(addr);
if (addr >= uint64_t(IO_COUT_ADDR)
@ -309,6 +380,7 @@ void Emulator::dcache_write(const void* data, uint64_t addr, uint32_t size) {
}
DPH(2, "Mem Write: addr=0x" << std::hex << addr << ", data=0x" << ByteStream(data, size) << std::dec << " (size=" << size << ", type=" << type << ")" << std::endl);
}
#endif
void Emulator::dcache_amo_reserve(uint64_t addr) {
auto type = get_addr_type(addr);
@ -360,6 +432,10 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
auto core_perf = core_->perf_stats();
switch (addr) {
case VX_CSR_SATP:
#ifdef VM_ENABLE
// return csrs_.at(wid).at(tid)[addr];
return mmu_.get_satp();
#endif
case VX_CSR_PMPCFG0:
case VX_CSR_PMPADDR0:
case VX_CSR_MSTATUS:
@ -488,6 +564,12 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
csr_mscratch_ = value;
break;
case VX_CSR_SATP:
#ifdef VM_ENABLE
// warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0x1F) | (value & 0x1F);
// csrs_.at(wid).at(tid)[addr] = value; //what is wid and tid?
mmu_.set_satp(value);
break;
#endif
case VX_CSR_MSTATUS:
case VX_CSR_MEDELEG:
case VX_CSR_MIDELEG:
@ -506,6 +588,8 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
}
}
uint32_t Emulator::get_fpu_rm(uint32_t func3, uint32_t tid, uint32_t wid) {
return (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, tid, wid) : func3;
}

View file

@ -39,6 +39,9 @@ public:
void clear();
void attach_ram(RAM* ram);
#ifdef VM_ENABLE
void set_satp(uint64_t satp) ;
#endif
instr_trace_t* step();

View file

@ -84,7 +84,7 @@ int main(int argc, char **argv) {
Arch arch(num_threads, num_warps, num_cores);
// create memory module
RAM ram(0, RAM_PAGE_SIZE);
RAM ram(0, MEM_PAGE_SIZE);
// create processor
Processor processor(arch);

View file

@ -99,6 +99,13 @@ void ProcessorImpl::attach_ram(RAM* ram) {
cluster->attach_ram(ram);
}
}
#ifdef VM_ENABLE
void ProcessorImpl::set_satp(uint64_t satp) {
for (auto cluster : clusters_) {
cluster->set_satp(satp);
}
}
#endif
void ProcessorImpl::run() {
SimPlatform::instance().reset();
@ -143,10 +150,18 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
Processor::Processor(const Arch& arch)
: impl_(new ProcessorImpl(arch))
{}
{
#ifdef VM_ENABLE
satp_ = NULL;
#endif
}
Processor::~Processor() {
delete impl_;
#ifdef VM_ENABLE
if (satp_ != NULL)
delete satp_;
#endif
}
void Processor::attach_ram(RAM* mem) {
@ -159,4 +174,27 @@ void Processor::run() {
void Processor::dcr_write(uint32_t addr, uint32_t value) {
return impl_->dcr_write(addr, value);
}
}
#ifdef VM_ENABLE
int16_t Processor::set_satp_by_addr(uint64_t base_addr) {
uint16_t asid = 0;
satp_ = new SATP_t (base_addr,asid);
if (satp_ == NULL)
return 1;
uint64_t satp = satp_->get_satp();
impl_->set_satp(satp);
return 0;
}
bool Processor::is_satp_unset() {
return (satp_== NULL);
}
uint8_t Processor::get_satp_mode() {
assert (satp_!=NULL);
return satp_->get_mode();
}
uint64_t Processor::get_base_ppn() {
assert (satp_!=NULL);
return satp_->get_base_ppn();
}
#endif

View file

@ -14,12 +14,17 @@
#pragma once
#include <stdint.h>
#include <VX_config.h>
#include <mem.h>
namespace vortex {
class Arch;
class RAM;
class ProcessorImpl;
#ifdef VM_ENABLE
class SATP_t;
#endif
class Processor {
public:
@ -31,9 +36,18 @@ public:
void run();
void dcr_write(uint32_t addr, uint32_t value);
#ifdef VM_ENABLE
bool is_satp_unset();
uint8_t get_satp_mode();
uint64_t get_base_ppn();
int16_t set_satp_by_addr(uint64_t addr);
#endif
private:
ProcessorImpl* impl_;
#ifdef VM_ENABLE
SATP_t *satp_;
#endif
};
}

View file

@ -40,6 +40,10 @@ public:
void dcr_write(uint32_t addr, uint32_t value);
#ifdef VM_ENABLE
void set_satp(uint64_t satp);
#endif
PerfStats perf_stats() const;
private:

View file

@ -107,6 +107,14 @@ void Socket::attach_ram(RAM* ram) {
}
}
#ifdef VM_ENABLE
void Socket::set_satp(uint64_t satp) {
for (auto core : cores_) {
core->set_satp(satp);
}
}
#endif
bool Socket::running() const {
for (auto& core : cores_) {
if (core->running())

View file

@ -60,6 +60,10 @@ public:
void attach_ram(RAM* ram);
#ifdef VM_ENABLE
void set_satp(uint64_t satp);
#endif
bool running() const;
int get_exitcode() const;

28677
tests/opencl/bfs/graph4096.txt Executable file

File diff suppressed because it is too large Load diff

View file

@ -62,7 +62,7 @@ void kernel_body(kernel_arg_t* __UNIFORM__ arg) {
value *= 5;
break;
default:
assert(task_id < arg->num_points);
//assert(task_id < arg->num_points);
break;
}