mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Adds the riscv vector extension into simx
This commit is contained in:
parent
8230b37411
commit
1e4583ac17
22 changed files with 5717 additions and 105 deletions
|
@ -386,10 +386,20 @@ synthesis()
|
|||
echo "synthesis tests done!"
|
||||
}
|
||||
|
||||
vector()
|
||||
{
|
||||
echo "begin vector tests..."
|
||||
|
||||
make -C sim/simx
|
||||
TOOLDIR=@TOOLDIR@ XLEN=@XLEN@ VLEN=256 REG_TESTS=1 ./tests/riscv/riscv-vector-tests/run-test.sh
|
||||
|
||||
echo "vector tests done!"
|
||||
}
|
||||
|
||||
show_usage()
|
||||
{
|
||||
echo "Vortex Regression Test"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--all] [--h|--help]"
|
||||
echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--all] [--h|--help]"
|
||||
}
|
||||
|
||||
declare -a tests=()
|
||||
|
@ -439,6 +449,9 @@ while [ "$1" != "" ]; do
|
|||
--synthesis )
|
||||
tests+=("synthesis")
|
||||
;;
|
||||
--vector )
|
||||
tests+=("vector")
|
||||
;;
|
||||
--all )
|
||||
tests=()
|
||||
tests+=("unittest")
|
||||
|
@ -454,6 +467,7 @@ while [ "$1" != "" ]; do
|
|||
tests+=("scope")
|
||||
tests+=("stress")
|
||||
tests+=("synthesis")
|
||||
tests+=("vector")
|
||||
;;
|
||||
-h | --help )
|
||||
show_usage
|
||||
|
|
|
@ -87,6 +87,10 @@
|
|||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef VLEN
|
||||
`define VLEN 256
|
||||
`endif
|
||||
|
||||
`ifndef NUM_CLUSTERS
|
||||
`define NUM_CLUSTERS 1
|
||||
`endif
|
||||
|
|
|
@ -188,6 +188,19 @@
|
|||
`define VX_CSR_MIMPID 12'hF13
|
||||
`define VX_CSR_MHARTID 12'hF14
|
||||
|
||||
// Vector CSRs
|
||||
|
||||
`define VX_CSR_VSTART 12'h008
|
||||
`define VX_CSR_VXSAT 12'h009
|
||||
`define VX_CSR_VXRM 12'h00A
|
||||
`define VX_CSR_VCSR 12'h00F
|
||||
`define VX_CSR_VL 12'hC20
|
||||
`define VX_CSR_VTYPE 12'hC21
|
||||
`define VX_CSR_VLENB 12'hC22
|
||||
`define VX_CSR_VCYCLE 12'hC00
|
||||
`define VX_CSR_VTIME 12'hC01
|
||||
`define VX_CSR_VINSTRET 12'hC02
|
||||
|
||||
// GPGU CSRs
|
||||
|
||||
`define VX_CSR_THREAD_ID 12'hCC0
|
||||
|
|
2
perf/cache/cache_perf.log
vendored
2
perf/cache/cache_perf.log
vendored
|
@ -1,3 +1,3 @@
|
|||
CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1
|
||||
running: CONFIGS=-DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 make -C ./ci/../driver/rtlsim
|
||||
verilator --build --exe --cc Vortex --top-module Vortex --language 1800-2009 --assert -Wall -Wpedantic -Wno-DECLFILENAME -Wno-REDEFMACRO --x-initial unique --x-assign unique verilator.vlt -I../../hw/rtl -I../../hw/dpi -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/simulate -I../../hw/rtl/fp_cores -I../../third_party/fpnew/src/common_cells/include -I../../third_party/fpnew/src/common_cells/src -I../../third_party/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../third_party/fpnew/src -I../../hw/rtl/tex_unit -I../../hw/rtl/raster_unit -I../../hw/rtl/rop_unit -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -j 64 -DNDEBUG -DIMUL_DPI -DIDIV_DPI -DFPU_DPI ../common/util.cpp ../common/mem.cpp ../common/rvfloats.cpp ../../hw/dpi/util_dpi.cpp ../../hw/dpi/float_dpi.cpp processor.cpp -CFLAGS '-std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -fPIC -Wno-maybe-uninitialized -I../../../hw -I../../common -I../../../third_party/softfloat/source/include -I../../../third_party -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -O2 -DNDEBUG' -LDFLAGS '-shared ../../../third_party/softfloat/build/Linux-x86_64-GCC/softfloat.a -L../../../third_party/ramulator -lramulator' -o ../../../driver/rtlsim/librtlsim.so
|
||||
verilator --build --exe --cc Vortex --top-module Vortex --language 1800-2009 --assert -Wall -Wpedantic -Wno-DECLFILENAME -Wno-REDEFMACRO --x-initial unique --x-assign unique verilator.vlt -I../../hw/rtl -I../../hw/dpi -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../../hw/rtl/cache -I../../hw/rtl/simulate -I../../hw/rtl/fp_cores -I../../third_party/fpnew/src/common_cells/include -I../../third_party/fpnew/src/common_cells/src -I../../third_party/fpnew/src/fpu_div_sqrt_mvp/hdl -I../../third_party/fpnew/src -I../../hw/rtl/tex_unit -I../../hw/rtl/raster_unit -I../../hw/rtl/rop_unit -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -j 64 -DNDEBUG -DIMUL_DPI -DIDIV_DPI -DFPU_DPI ../common/util.cpp ../common/mem.cpp ../common/softfloat_ext.cpp ../common/rvfloats.cpp ../../hw/dpi/util_dpi.cpp ../../hw/dpi/float_dpi.cpp processor.cpp -CFLAGS '-std=c++11 -Wall -Wextra -Wfatal-errors -Wno-array-bounds -fPIC -Wno-maybe-uninitialized -I../../../hw -I../../common -I../../../third_party/softfloat/source/include -I../../../third_party -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DNUM_WARPS=2 -DNUM_THREADS=2 -DPERF_ENABLE -DICACHE_NUM_WAYS=1 -O2 -DNDEBUG' -LDFLAGS '-shared ../../../third_party/softfloat/build/Linux-x86_64-GCC/softfloat.a -L../../../third_party/ramulator -lramulator' -o ../../../driver/rtlsim/librtlsim.so
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
// limitations under the License.
|
||||
|
||||
#include "rvfloats.h"
|
||||
#include "softfloat_ext.h"
|
||||
#include <stdio.h>
|
||||
|
||||
extern "C" {
|
||||
|
@ -158,6 +159,34 @@ uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags) {
|
|||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_frecip7_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_recip7(to_float32_t(a));
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_frecip7_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_recip7(to_float64_t(a));
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_frsqrt7_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f32_rsqrte7(to_float32_t(a));
|
||||
if (fflags) { *fflags =softfloat_exceptionFlags; }
|
||||
return from_float32_t(r);
|
||||
}
|
||||
|
||||
uint64_t rv_frsqrt7_d(uint64_t a, uint32_t frm, uint32_t* fflags) {
|
||||
softfloat_roundingMode = frm;
|
||||
auto r = f64_rsqrte7(to_float64_t(a));
|
||||
if (fflags) { *fflags = softfloat_exceptionFlags; }
|
||||
return from_float64_t(r);
|
||||
}
|
||||
|
||||
uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags) {
|
||||
rv_init(frm);
|
||||
auto r = f32_sqrt(to_float32_t(a));
|
||||
|
@ -486,6 +515,11 @@ uint64_t rv_fsgnjx_d(uint64_t a, uint64_t b) {
|
|||
return r;
|
||||
}
|
||||
|
||||
uint32_t rv_dtof_r(uint64_t a, uint32_t frm) {
|
||||
rv_init(frm);
|
||||
return rv_dtof(a);
|
||||
}
|
||||
|
||||
uint32_t rv_dtof(uint64_t a) {
|
||||
auto r = f64_to_f32(to_float64_t(a));
|
||||
return from_float32_t(r);
|
||||
|
|
|
@ -28,6 +28,8 @@ uint32_t rv_fnmadd_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t*
|
|||
uint32_t rv_fnmsub_s(uint32_t a, uint32_t b, uint32_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fdiv_s(uint32_t a, uint32_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_fsqrt_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_frecip7_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_frsqrt7_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
|
||||
uint32_t rv_ftoi_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint32_t rv_ftou_s(uint32_t a, uint32_t frm, uint32_t* fflags);
|
||||
|
@ -58,6 +60,8 @@ uint64_t rv_fsub_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
|
|||
uint64_t rv_fmul_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fdiv_d(uint64_t a, uint64_t b, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fsqrt_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_frecip7_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_frsqrt7_d(uint64_t a, uint32_t frm, uint32_t* fflags);
|
||||
|
||||
uint64_t rv_fmadd_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
|
||||
uint64_t rv_fmsub_d(uint64_t a, uint64_t b, uint64_t c, uint32_t frm, uint32_t* fflags);
|
||||
|
@ -85,6 +89,7 @@ uint64_t rv_fmin_d(uint64_t a, uint64_t b, uint32_t* fflags);
|
|||
uint64_t rv_fmax_d(uint64_t a, uint64_t b, uint32_t* fflags);
|
||||
|
||||
uint32_t rv_dtof(uint64_t a);
|
||||
uint32_t rv_dtof_r(uint64_t a, uint32_t frm);
|
||||
uint64_t rv_ftod(uint32_t a);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
486
sim/common/softfloat_ext.cpp
Normal file
486
sim/common/softfloat_ext.cpp
Normal file
|
@ -0,0 +1,486 @@
|
|||
/*============================================================================
|
||||
|
||||
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
|
||||
Package, Release 3e, by John R. Hauser.
|
||||
|
||||
Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
|
||||
California. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions, and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions, and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
3. Neither the name of the University nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software without
|
||||
specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
=============================================================================*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <internals.h>
|
||||
#include <../RISCV/specialize.h>
|
||||
#include <softfloat.h>
|
||||
#include "softfloat_ext.h"
|
||||
|
||||
uint_fast16_t f16_classify( float16_t a )
|
||||
{
|
||||
union ui16_f16 uA;
|
||||
uint_fast16_t uiA;
|
||||
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
|
||||
uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F;
|
||||
uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0;
|
||||
bool sign = signF16UI( uiA );
|
||||
bool fracZero = fracF16UI( uiA ) == 0;
|
||||
bool isNaN = isNaNF16UI( uiA );
|
||||
bool isSNaN = softfloat_isSigNaNF16UI( uiA );
|
||||
|
||||
return
|
||||
( sign && infOrNaN && fracZero ) << 0 |
|
||||
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
|
||||
( sign && subnormalOrZero && !fracZero ) << 2 |
|
||||
( sign && subnormalOrZero && fracZero ) << 3 |
|
||||
( !sign && infOrNaN && fracZero ) << 7 |
|
||||
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
|
||||
( !sign && subnormalOrZero && !fracZero ) << 5 |
|
||||
( !sign && subnormalOrZero && fracZero ) << 4 |
|
||||
( isNaN && isSNaN ) << 8 |
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
}
|
||||
|
||||
uint_fast16_t f32_classify( float32_t a )
|
||||
{
|
||||
union ui32_f32 uA;
|
||||
uint_fast32_t uiA;
|
||||
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
|
||||
uint_fast16_t infOrNaN = expF32UI( uiA ) == 0xFF;
|
||||
uint_fast16_t subnormalOrZero = expF32UI( uiA ) == 0;
|
||||
bool sign = signF32UI( uiA );
|
||||
bool fracZero = fracF32UI( uiA ) == 0;
|
||||
bool isNaN = isNaNF32UI( uiA );
|
||||
bool isSNaN = softfloat_isSigNaNF32UI( uiA );
|
||||
|
||||
return
|
||||
( sign && infOrNaN && fracZero ) << 0 |
|
||||
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
|
||||
( sign && subnormalOrZero && !fracZero ) << 2 |
|
||||
( sign && subnormalOrZero && fracZero ) << 3 |
|
||||
( !sign && infOrNaN && fracZero ) << 7 |
|
||||
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
|
||||
( !sign && subnormalOrZero && !fracZero ) << 5 |
|
||||
( !sign && subnormalOrZero && fracZero ) << 4 |
|
||||
( isNaN && isSNaN ) << 8 |
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
}
|
||||
|
||||
uint_fast16_t f64_classify( float64_t a )
|
||||
{
|
||||
union ui64_f64 uA;
|
||||
uint_fast64_t uiA;
|
||||
|
||||
uA.f = a;
|
||||
uiA = uA.ui;
|
||||
|
||||
uint_fast16_t infOrNaN = expF64UI( uiA ) == 0x7FF;
|
||||
uint_fast16_t subnormalOrZero = expF64UI( uiA ) == 0;
|
||||
bool sign = signF64UI( uiA );
|
||||
bool fracZero = fracF64UI( uiA ) == 0;
|
||||
bool isNaN = isNaNF64UI( uiA );
|
||||
bool isSNaN = softfloat_isSigNaNF64UI( uiA );
|
||||
|
||||
return
|
||||
( sign && infOrNaN && fracZero ) << 0 |
|
||||
( sign && !infOrNaN && !subnormalOrZero ) << 1 |
|
||||
( sign && subnormalOrZero && !fracZero ) << 2 |
|
||||
( sign && subnormalOrZero && fracZero ) << 3 |
|
||||
( !sign && infOrNaN && fracZero ) << 7 |
|
||||
( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
|
||||
( !sign && subnormalOrZero && !fracZero ) << 5 |
|
||||
( !sign && subnormalOrZero && fracZero ) << 4 |
|
||||
( isNaN && isSNaN ) << 8 |
|
||||
( isNaN && !isSNaN ) << 9;
|
||||
}
|
||||
|
||||
static inline uint64_t extract64(uint64_t val, int pos, int len)
|
||||
{
|
||||
assert(pos >= 0 && len > 0 && len <= 64 - pos);
|
||||
return (val >> pos) & (~UINT64_C(0) >> (64 - len));
|
||||
}
|
||||
|
||||
static inline uint64_t make_mask64(int pos, int len)
|
||||
{
|
||||
assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
|
||||
return (UINT64_MAX >> (64 - len)) << pos;
|
||||
}
|
||||
|
||||
//user needs to truncate output to required length
|
||||
static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
|
||||
uint64_t exp = extract64(val, s, e);
|
||||
uint64_t sig = extract64(val, 0, s);
|
||||
uint64_t sign = extract64(val, s + e, 1);
|
||||
const int p = 7;
|
||||
|
||||
static const uint8_t table[] = {
|
||||
52, 51, 50, 48, 47, 46, 44, 43,
|
||||
42, 41, 40, 39, 38, 36, 35, 34,
|
||||
33, 32, 31, 30, 30, 29, 28, 27,
|
||||
26, 25, 24, 23, 23, 22, 21, 20,
|
||||
19, 19, 18, 17, 16, 16, 15, 14,
|
||||
14, 13, 12, 12, 11, 10, 10, 9,
|
||||
9, 8, 7, 7, 6, 6, 5, 4,
|
||||
4, 3, 3, 2, 2, 1, 1, 0,
|
||||
127, 125, 123, 121, 119, 118, 116, 114,
|
||||
113, 111, 109, 108, 106, 105, 103, 102,
|
||||
100, 99, 97, 96, 95, 93, 92, 91,
|
||||
90, 88, 87, 86, 85, 84, 83, 82,
|
||||
80, 79, 78, 77, 76, 75, 74, 73,
|
||||
72, 71, 70, 70, 69, 68, 67, 66,
|
||||
65, 64, 63, 63, 62, 61, 60, 59,
|
||||
59, 58, 57, 56, 56, 55, 54, 53};
|
||||
|
||||
if (sub) {
|
||||
while (extract64(sig, s - 1, 1) == 0)
|
||||
exp--, sig <<= 1;
|
||||
|
||||
sig = (sig << 1) & make_mask64(0 ,s);
|
||||
}
|
||||
|
||||
int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1));
|
||||
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
|
||||
uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2;
|
||||
|
||||
return (sign << (s+e)) | (out_exp << s) | out_sig;
|
||||
}
|
||||
|
||||
float16_t f16_rsqrte7(float16_t in)
|
||||
{
|
||||
union ui16_f16 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f16_classify(in);
|
||||
bool sub = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
case 0x002: // -normal
|
||||
case 0x004: // -subnormal
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF16UI;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfc00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7c00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +num
|
||||
uA.ui = rsqrte7(uA.ui, 5, 10, sub);
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
float32_t f32_rsqrte7(float32_t in)
|
||||
{
|
||||
union ui32_f32 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f32_classify(in);
|
||||
bool sub = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
case 0x002: // -normal
|
||||
case 0x004: // -subnormal
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF32UI;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xff800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7f800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +num
|
||||
uA.ui = rsqrte7(uA.ui, 8, 23, sub);
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
float64_t f64_rsqrte7(float64_t in)
|
||||
{
|
||||
union ui64_f64 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f64_classify(in);
|
||||
bool sub = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
case 0x002: // -normal
|
||||
case 0x004: // -subnormal
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF64UI;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfff0000000000000ul;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7ff0000000000000ul;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +num
|
||||
uA.ui = rsqrte7(uA.ui, 11, 52, sub);
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
//user needs to truncate output to required length
|
||||
static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub,
|
||||
bool *round_abnormal)
|
||||
{
|
||||
uint64_t exp = extract64(val, s, e);
|
||||
uint64_t sig = extract64(val, 0, s);
|
||||
uint64_t sign = extract64(val, s + e, 1);
|
||||
const int p = 7;
|
||||
|
||||
static const uint8_t table[] = {
|
||||
127, 125, 123, 121, 119, 117, 116, 114,
|
||||
112, 110, 109, 107, 105, 104, 102, 100,
|
||||
99, 97, 96, 94, 93, 91, 90, 88,
|
||||
87, 85, 84, 83, 81, 80, 79, 77,
|
||||
76, 75, 74, 72, 71, 70, 69, 68,
|
||||
66, 65, 64, 63, 62, 61, 60, 59,
|
||||
58, 57, 56, 55, 54, 53, 52, 51,
|
||||
50, 49, 48, 47, 46, 45, 44, 43,
|
||||
42, 41, 40, 40, 39, 38, 37, 36,
|
||||
35, 35, 34, 33, 32, 31, 31, 30,
|
||||
29, 28, 28, 27, 26, 25, 25, 24,
|
||||
23, 23, 22, 21, 21, 20, 19, 19,
|
||||
18, 17, 17, 16, 15, 15, 14, 14,
|
||||
13, 12, 12, 11, 11, 10, 9, 9,
|
||||
8, 8, 7, 7, 6, 5, 5, 4,
|
||||
4, 3, 3, 2, 2, 1, 1, 0};
|
||||
|
||||
if (sub) {
|
||||
while (extract64(sig, s - 1, 1) == 0)
|
||||
exp--, sig <<= 1;
|
||||
|
||||
sig = (sig << 1) & make_mask64(0 ,s);
|
||||
|
||||
if (exp != 0 && exp != UINT64_MAX) {
|
||||
*round_abnormal = true;
|
||||
if (rm == 1 ||
|
||||
(rm == 2 && !sign) ||
|
||||
(rm == 3 && sign))
|
||||
return ((sign << (s+e)) | make_mask64(s, e)) - 1;
|
||||
else
|
||||
return (sign << (s+e)) | make_mask64(s, e);
|
||||
}
|
||||
}
|
||||
|
||||
int idx = sig >> (s-p);
|
||||
uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
|
||||
uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
|
||||
if (out_exp == 0 || out_exp == UINT64_MAX) {
|
||||
out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
|
||||
if (out_exp == UINT64_MAX) {
|
||||
out_sig >>= 1;
|
||||
out_exp = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return (sign << (s+e)) | (out_exp << s) | out_sig;
|
||||
}
|
||||
|
||||
float16_t f16_recip7(float16_t in)
|
||||
{
|
||||
union ui16_f16 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f16_classify(in);
|
||||
bool sub = false;
|
||||
bool round_abnormal = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
uA.ui = 0x8000;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfc00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7c00;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF16UI;
|
||||
break;
|
||||
case 0x004: // -subnormal
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 5, 10,
|
||||
softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact |
|
||||
softfloat_flag_overflow;
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
float32_t f32_recip7(float32_t in)
|
||||
{
|
||||
union ui32_f32 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f32_classify(in);
|
||||
bool sub = false;
|
||||
bool round_abnormal = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
uA.ui = 0x80000000;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xff800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7f800000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF32UI;
|
||||
break;
|
||||
case 0x004: // -subnormal
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 8, 23,
|
||||
softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact |
|
||||
softfloat_flag_overflow;
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
}
|
||||
|
||||
float64_t f64_recip7(float64_t in)
|
||||
{
|
||||
union ui64_f64 uA;
|
||||
|
||||
uA.f = in;
|
||||
unsigned int ret = f64_classify(in);
|
||||
bool sub = false;
|
||||
bool round_abnormal = false;
|
||||
switch(ret) {
|
||||
case 0x001: // -inf
|
||||
uA.ui = 0x8000000000000000;
|
||||
break;
|
||||
case 0x080: //+inf
|
||||
uA.ui = 0x0;
|
||||
break;
|
||||
case 0x008: // -0
|
||||
uA.ui = 0xfff0000000000000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x010: // +0
|
||||
uA.ui = 0x7ff0000000000000;
|
||||
softfloat_exceptionFlags |= softfloat_flag_infinite;
|
||||
break;
|
||||
case 0x100: // sNaN
|
||||
softfloat_exceptionFlags |= softfloat_flag_invalid;
|
||||
[[fallthrough]];
|
||||
case 0x200: //qNaN
|
||||
uA.ui = defaultNaNF64UI;
|
||||
break;
|
||||
case 0x004: // -subnormal
|
||||
case 0x020: //+ sub
|
||||
sub = true;
|
||||
[[fallthrough]];
|
||||
default: // +- normal
|
||||
uA.ui = recip7(uA.ui, 11, 52,
|
||||
softfloat_roundingMode, sub, &round_abnormal);
|
||||
if (round_abnormal)
|
||||
softfloat_exceptionFlags |= softfloat_flag_inexact |
|
||||
softfloat_flag_overflow;
|
||||
break;
|
||||
}
|
||||
|
||||
return uA.f;
|
||||
}
|
14
sim/common/softfloat_ext.h
Normal file
14
sim/common/softfloat_ext.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#include <stdint.h>
|
||||
#include <softfloat_types.h>
|
||||
|
||||
uint_fast16_t f16_classify( float16_t );
|
||||
float16_t f16_rsqrte7( float16_t );
|
||||
float16_t f16_recip7( float16_t );
|
||||
|
||||
uint_fast16_t f32_classify( float32_t );
|
||||
float32_t f32_rsqrte7( float32_t );
|
||||
float32_t f32_recip7( float32_t );
|
||||
|
||||
uint_fast16_t f64_classify( float64_t );
|
||||
float64_t f64_rsqrte7( float64_t );
|
||||
float64_t f64_recip7( float64_t );
|
|
@ -51,7 +51,7 @@ endif
|
|||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/fpga.cpp $(SRC_DIR)/opae_sim.cpp
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ ifneq (,$(findstring FPU_FPNEW,$(CONFIGS)))
|
|||
endif
|
||||
RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/core -I$(RTL_DIR)/mem -I$(RTL_DIR)/cache $(FPU_INCLUDE)
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp
|
||||
|
||||
|
|
|
@ -17,8 +17,8 @@ CXXFLAGS += $(CONFIGS)
|
|||
LDFLAGS += $(THIRD_PARTY_DIR)/softfloat/build/Linux-x86_64-GCC/softfloat.a
|
||||
LDFLAGS += -Wl,-rpath,$(THIRD_PARTY_DIR)/ramulator -L$(THIRD_PARTY_DIR)/ramulator -lramulator
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(SRC_DIR)/core.cpp $(SRC_DIR)/emulator.cpp $(SRC_DIR)/decode.cpp $(SRC_DIR)/execute.cpp $(SRC_DIR)/execute_vector.cpp $(SRC_DIR)/func_unit.cpp $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
|
||||
|
||||
# Debugging
|
||||
ifdef DEBUG
|
||||
|
|
|
@ -29,6 +29,7 @@ private:
|
|||
uint16_t num_cores_;
|
||||
uint16_t num_clusters_;
|
||||
uint16_t socket_size_;
|
||||
uint16_t vsize_;
|
||||
uint16_t num_barriers_;
|
||||
uint64_t local_mem_base_;
|
||||
|
||||
|
@ -39,6 +40,7 @@ public:
|
|||
, num_cores_(num_cores)
|
||||
, num_clusters_(NUM_CLUSTERS)
|
||||
, socket_size_(SOCKET_SIZE)
|
||||
, vsize_(VLEN / 8)
|
||||
, num_barriers_(NUM_BARRIERS)
|
||||
, local_mem_base_(LMEM_BASE_ADDR)
|
||||
{}
|
||||
|
@ -71,6 +73,10 @@ public:
|
|||
return socket_size_;
|
||||
}
|
||||
|
||||
uint16_t vsize() const {
|
||||
return vsize_;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
|
@ -47,6 +47,7 @@ static const std::unordered_map<Opcode, InstType> sc_instTable = {
|
|||
{Opcode::FMSUB, InstType::R4},
|
||||
{Opcode::FMNMADD, InstType::R4},
|
||||
{Opcode::FMNMSUB, InstType::R4},
|
||||
{Opcode::VSET, InstType::V},
|
||||
{Opcode::EXT1, InstType::R},
|
||||
{Opcode::EXT2, InstType::R4},
|
||||
{Opcode::R_W, InstType::R},
|
||||
|
@ -54,33 +55,6 @@ static const std::unordered_map<Opcode, InstType> sc_instTable = {
|
|||
{Opcode::TCU, InstType::I},
|
||||
};
|
||||
|
||||
enum Constants {
|
||||
width_opcode= 7,
|
||||
width_reg = 5,
|
||||
width_func2 = 2,
|
||||
width_func3 = 3,
|
||||
width_func7 = 7,
|
||||
width_i_imm = 12,
|
||||
width_j_imm = 20,
|
||||
|
||||
shift_opcode= 0,
|
||||
shift_rd = width_opcode,
|
||||
shift_func3 = shift_rd + width_reg,
|
||||
shift_rs1 = shift_func3 + width_func3,
|
||||
shift_rs2 = shift_rs1 + width_reg,
|
||||
shift_func2 = shift_rs2 + width_reg,
|
||||
shift_func7 = shift_rs2 + width_reg,
|
||||
shift_rs3 = shift_func7 + width_func2,
|
||||
|
||||
mask_opcode = (1 << width_opcode) - 1,
|
||||
mask_reg = (1 << width_reg) - 1,
|
||||
mask_func2 = (1 << width_func2) - 1,
|
||||
mask_func3 = (1 << width_func3) - 1,
|
||||
mask_func7 = (1 << width_func7) - 1,
|
||||
mask_i_imm = (1 << width_i_imm) - 1,
|
||||
mask_j_imm = (1 << width_j_imm) - 1,
|
||||
};
|
||||
|
||||
static const char* op_string(const Instr &instr) {
|
||||
auto opcode = instr.getOpcode();
|
||||
auto func2 = instr.getFunc2();
|
||||
|
@ -230,10 +204,14 @@ static const char* op_string(const Instr &instr) {
|
|||
case Opcode::FENCE: return "FENCE";
|
||||
case Opcode::FL:
|
||||
switch (func3) {
|
||||
case 0x1: return "VL";
|
||||
case 0x2: return "FLW";
|
||||
case 0x3: return "FLD";
|
||||
case 0x0: return "VL8";
|
||||
case 0x5: return "VL16";
|
||||
case 0x6: return "VL32";
|
||||
case 0x7: return "VL64";
|
||||
default:
|
||||
std::cout << "Could not decode float/vector load with func3: " << func3 << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::FS:
|
||||
|
@ -241,7 +219,12 @@ static const char* op_string(const Instr &instr) {
|
|||
case 0x1: return "VS";
|
||||
case 0x2: return "FSW";
|
||||
case 0x3: return "FSD";
|
||||
case 0x0: return "VS8";
|
||||
case 0x5: return "VS16";
|
||||
case 0x6: return "VS32";
|
||||
case 0x7: return "VS64";
|
||||
default:
|
||||
std::cout << "Could not decode float/vector store with func3: " << func3 << std::endl;
|
||||
std::abort();
|
||||
}
|
||||
case Opcode::AMO: {
|
||||
|
@ -390,6 +373,7 @@ static const char* op_string(const Instr &instr) {
|
|||
case Opcode::FMSUB: return func2 ? "FMSUB.D" : "FMSUB.S";
|
||||
case Opcode::FMNMADD: return func2 ? "FNMADD.D" : "FNMADD.S";
|
||||
case Opcode::FMNMSUB: return func2 ? "FNMSUB.D" : "FNMSUB.S";
|
||||
case Opcode::VSET: return "VSET";
|
||||
case Opcode::EXT1:
|
||||
switch (func7) {
|
||||
case 0:
|
||||
|
@ -421,6 +405,39 @@ static const char* op_string(const Instr &instr) {
|
|||
}
|
||||
}
|
||||
|
||||
inline void vec_log(std::ostream &os, const Instr &instr) {
|
||||
if (instr.getVUseMask() & set_func3)
|
||||
os << ", func3:" << instr.getFunc3();
|
||||
if (instr.getVUseMask() & set_func6)
|
||||
os << ", func6:" << instr.getFunc6();
|
||||
if (instr.getVUseMask() & set_imm)
|
||||
os << ", imm:" << instr.getImm();
|
||||
if (instr.getVUseMask() & set_vlswidth)
|
||||
os << ", width:" << instr.getVlsWidth();
|
||||
if (instr.getVUseMask() & set_vmop)
|
||||
os << ", mop:" << instr.getVmop();
|
||||
if (instr.getVUseMask() & set_vumop)
|
||||
os << ", umop:" << instr.getVumop();
|
||||
if (instr.getVUseMask() & set_vnf)
|
||||
os << ", nf:" << instr.getVnf();
|
||||
if (instr.getVUseMask() & set_vmask)
|
||||
os << ", vmask:" << instr.getVmask();
|
||||
if (instr.getVUseMask() & set_vs3)
|
||||
os << ", vs3:" << instr.getVs3();
|
||||
if (instr.getVUseMask() & set_zimm)
|
||||
os << ", zimm:" << ((instr.hasZimm()) ? "true" : "false");
|
||||
if (instr.getVUseMask() & set_vlmul)
|
||||
os << ", lmul:" << instr.getVlmul();
|
||||
if (instr.getVUseMask() & set_vsew)
|
||||
os << ", sew:" << instr.getVsew();
|
||||
if (instr.getVUseMask() & set_vta)
|
||||
os << ", ta:" << instr.getVta();
|
||||
if (instr.getVUseMask() & set_vma)
|
||||
os << ", ma:" << instr.getVma();
|
||||
if (instr.getVUseMask() & set_vediv)
|
||||
os << ", ediv:" << instr.getVediv();
|
||||
}
|
||||
|
||||
namespace vortex {
|
||||
std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
||||
os << op_string(instr);
|
||||
|
@ -441,6 +458,13 @@ std::ostream &operator<<(std::ostream &os, const Instr &instr) {
|
|||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << "0x" << std::hex << instr.getImm() << std::dec;
|
||||
}
|
||||
if (instr.getOpcode() == Opcode::SYS && instr.getFunc3() >= 5) {
|
||||
// CSRs with immediate values
|
||||
if (sep++ != 0) { os << ", "; } else { os << " "; }
|
||||
os << "0x" << std::hex << instr.getRSrc(0);
|
||||
}
|
||||
// Log vector-specific vtype and vreg info
|
||||
if (instr.isVec()) vec_log(os, instr);
|
||||
return os;
|
||||
}
|
||||
}
|
||||
|
@ -452,6 +476,7 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
|
||||
auto func2 = (code >> shift_func2) & mask_func2;
|
||||
auto func3 = (code >> shift_func3) & mask_func3;
|
||||
auto func6 = (code >> shift_func6) & mask_func6;
|
||||
auto func7 = (code >> shift_func7) & mask_func7;
|
||||
|
||||
auto rd = (code >> shift_rd) & mask_reg;
|
||||
|
@ -466,6 +491,12 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
}
|
||||
|
||||
auto iType = op_it->second;
|
||||
if (op == Opcode::FL || op == Opcode::FS) {
|
||||
if (func3 != 0x2 && func3 != 0x3) {
|
||||
iType = InstType::V;
|
||||
}
|
||||
}
|
||||
|
||||
switch (iType) {
|
||||
case InstType::R:
|
||||
switch (op) {
|
||||
|
@ -659,7 +690,104 @@ std::shared_ptr<Instr> Emulator::decode(uint32_t code) const {
|
|||
auto imm = (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
|
||||
instr->setImm(sext(imm, width_j_imm+1));
|
||||
} break;
|
||||
|
||||
case InstType::V:
|
||||
instr->setVec(true);
|
||||
switch (op) {
|
||||
case Opcode::VSET: {
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
instr->setFunc3(func3);
|
||||
switch (func3) {
|
||||
case 7: {
|
||||
if (code >> (shift_vset - 1) == 0b10) { // vsetvl
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
} else {
|
||||
auto zimm = (code >> shift_rs2) & mask_v_zimm;
|
||||
instr->setZimm(true);
|
||||
instr->setVlmul(zimm & mask_v_lmul);
|
||||
instr->setVsew((zimm >> shift_v_sew) & mask_v_sew);
|
||||
instr->setVta((zimm >> shift_v_ta) & mask_v_ta);
|
||||
instr->setVma((zimm >> shift_v_ma) & mask_v_ma);
|
||||
if ((code >> shift_vset)) { // vsetivli
|
||||
instr->setImm(rs1);
|
||||
} else { // vsetvli
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
case 3: { // Vector - immediate arithmetic instructions
|
||||
instr->setDestReg(rd, RegType::Vector);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
instr->setImm(rs1);
|
||||
instr->setVmask((code >> shift_func7) & 0x1);
|
||||
instr->setFunc6(func6);
|
||||
} break;
|
||||
default: { // Vector - vector/scalar arithmetic instructions
|
||||
if (func3 == 1 && func6 == 16) {
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
} else if (func3 == 2 && func6 == 16) {
|
||||
instr->setDestReg(rd, RegType::Integer);
|
||||
} else {
|
||||
instr->setDestReg(rd, RegType::Vector);
|
||||
}
|
||||
instr->addSrcReg(rs1, RegType::Vector);
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
instr->setVmask((code >> shift_func7) & 0x1);
|
||||
instr->setFunc6(func6);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
||||
case Opcode::FL:
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->setVmop((code >> shift_vmop) & 0b11);
|
||||
switch (instr->getVmop()) {
|
||||
case 0b00:
|
||||
instr->setVumop(rs2);
|
||||
break;
|
||||
case 0b10:
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
break;
|
||||
case 0b01:
|
||||
case 0b11:
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
break;
|
||||
}
|
||||
instr->setVsew(func3 & 0x3);
|
||||
instr->setDestReg(rd, RegType::Vector);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setVmask((code >> shift_func7) & 0x1);
|
||||
instr->setVnf((code >> shift_vnf) & mask_func3);
|
||||
break;
|
||||
|
||||
case Opcode::FS:
|
||||
instr->addSrcReg(rs1, RegType::Integer);
|
||||
instr->setVmop((code >> shift_vmop) & 0b11);
|
||||
switch (instr->getVmop()) {
|
||||
case 0b00:
|
||||
instr->setVumop(rs2);
|
||||
break;
|
||||
case 0b10:
|
||||
instr->addSrcReg(rs2, RegType::Integer);
|
||||
break;
|
||||
case 0b01:
|
||||
case 0b11:
|
||||
instr->addSrcReg(rs2, RegType::Vector);
|
||||
break;
|
||||
}
|
||||
instr->setVsew(func3 & 0x3);
|
||||
instr->addSrcReg(rd, RegType::Vector);
|
||||
instr->setVlsWidth(func3);
|
||||
instr->setVmask((code >> shift_func7) & 0x1);
|
||||
instr->setVmop((code >> shift_vmop) & 0b11);
|
||||
instr->setVnf((code >> shift_vnf) & mask_func3);
|
||||
break;
|
||||
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
case InstType::R4:
|
||||
instr->setDestReg(rd, RegType::Float);
|
||||
instr->addSrcReg(rs1, RegType::Float);
|
||||
|
|
|
@ -33,6 +33,7 @@ using namespace vortex;
|
|||
Emulator::warp_t::warp_t(const Arch& arch)
|
||||
: ireg_file(arch.num_threads(), std::vector<Word>(MAX_NUM_REGS))
|
||||
, freg_file(arch.num_threads(), std::vector<uint64_t>(MAX_NUM_REGS))
|
||||
, vreg_file(MAX_NUM_REGS, std::vector<Byte>(arch.vsize()))
|
||||
, uuid(0)
|
||||
{}
|
||||
|
||||
|
@ -64,6 +65,26 @@ void Emulator::warp_t::clear(uint64_t startup_addr) {
|
|||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& reg_file : this->vreg_file) {
|
||||
for (auto& reg : reg_file) {
|
||||
#ifndef NDEBUG
|
||||
reg = 0;
|
||||
#else
|
||||
reg = std::rand();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& reg_file : this->vreg_file) {
|
||||
for (auto& reg : reg_file) {
|
||||
#ifndef NDEBUG
|
||||
reg = 0;
|
||||
#else
|
||||
reg = std::rand();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -79,7 +100,12 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core)
|
|||
// considered to be big enough to hold input tiles for one output tile.
|
||||
// In future versions, scratchpad size should be fixed to an appropriate value.
|
||||
, scratchpad(std::vector<Word>(32 * 32 * 32768))
|
||||
, csrs_(arch.num_warps())
|
||||
{
|
||||
for (uint32_t i = 0; i < arch_.num_warps(); ++i) {
|
||||
csrs_.at(i).resize(arch.num_threads());
|
||||
}
|
||||
|
||||
this->clear();
|
||||
}
|
||||
|
||||
|
@ -463,6 +489,32 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
case VX_CSR_FFLAGS: return warps_.at(wid).fcsr & 0x1F;
|
||||
case VX_CSR_FRM: return (warps_.at(wid).fcsr >> 5);
|
||||
case VX_CSR_FCSR: return warps_.at(wid).fcsr;
|
||||
|
||||
// Vector CRSs
|
||||
case VX_CSR_VSTART:
|
||||
return csrs_.at(wid).at(tid)[VX_CSR_VSTART];
|
||||
case VX_CSR_VXSAT:
|
||||
return csrs_.at(wid).at(tid)[VX_CSR_VXSAT];
|
||||
case VX_CSR_VXRM:
|
||||
return csrs_.at(wid).at(tid)[VX_CSR_VXRM];
|
||||
case VX_CSR_VCSR: {
|
||||
Word vxsat = csrs_.at(wid).at(tid)[VX_CSR_VXSAT];
|
||||
Word vxrm = csrs_.at(wid).at(tid)[VX_CSR_VXRM];
|
||||
return (vxrm << 1) | vxsat;
|
||||
}
|
||||
case VX_CSR_VL:
|
||||
return csrs_.at(wid).at(tid)[VX_CSR_VL];
|
||||
case VX_CSR_VTYPE:
|
||||
return csrs_.at(wid).at(tid)[VX_CSR_VTYPE];
|
||||
case VX_CSR_VLENB:
|
||||
return VLEN / 8;
|
||||
case VX_CSR_VCYCLE:
|
||||
return csrs_.at(wid).at(tid)[VX_CSR_VCYCLE];
|
||||
case VX_CSR_VTIME:
|
||||
return csrs_.at(wid).at(tid)[VX_CSR_VTIME];
|
||||
case VX_CSR_VINSTRET:
|
||||
return csrs_.at(wid).at(tid)[VX_CSR_VINSTRET];
|
||||
|
||||
case VX_CSR_MHARTID: return (core_->id() * arch_.num_warps() + wid) * arch_.num_threads() + tid;
|
||||
case VX_CSR_THREAD_ID: return tid;
|
||||
case VX_CSR_WARP_ID: return wid;
|
||||
|
@ -578,6 +630,29 @@ void Emulator::set_csr(uint32_t addr, Word value, uint32_t tid, uint32_t wid) {
|
|||
case VX_CSR_MSCRATCH:
|
||||
csr_mscratch_ = value;
|
||||
break;
|
||||
|
||||
// Vector CRSs
|
||||
case VX_CSR_VSTART:
|
||||
csrs_.at(wid).at(tid)[VX_CSR_VSTART] = value;
|
||||
break;
|
||||
case VX_CSR_VXSAT:
|
||||
csrs_.at(wid).at(tid)[VX_CSR_VXSAT] = value & 0b1;
|
||||
break;
|
||||
case VX_CSR_VXRM:
|
||||
csrs_.at(wid).at(tid)[VX_CSR_VXRM] = value & 0b11;
|
||||
break;
|
||||
case VX_CSR_VCSR:
|
||||
csrs_.at(wid).at(tid)[VX_CSR_VXSAT] = value & 0b1;
|
||||
csrs_.at(wid).at(tid)[VX_CSR_VXRM] = (value >> 1) & 0b11;
|
||||
break;
|
||||
case VX_CSR_VL: // read only, written by vset(i)vl(i)
|
||||
csrs_.at(wid).at(tid)[VX_CSR_VL] = value;
|
||||
break;
|
||||
case VX_CSR_VTYPE: // read only, written by vset(i)vl(i)
|
||||
csrs_.at(wid).at(tid)[VX_CSR_VTYPE] = value;
|
||||
break;
|
||||
case VX_CSR_VLENB: // read only, set to VLEN / 8
|
||||
|
||||
case VX_CSR_SATP:
|
||||
#ifdef VM_ENABLE
|
||||
// warps_.at(wid).fcsr = (warps_.at(wid).fcsr & ~0x1F) | (value & 0x1F);
|
||||
|
|
|
@ -28,6 +28,76 @@ class Core;
|
|||
class Instr;
|
||||
class instr_trace_t;
|
||||
|
||||
enum Constants {
|
||||
width_opcode= 7,
|
||||
width_reg = 5,
|
||||
width_func2 = 2,
|
||||
width_func3 = 3,
|
||||
width_func6 = 6,
|
||||
width_func7 = 7,
|
||||
width_mop = 3,
|
||||
width_vmask = 1,
|
||||
width_i_imm = 12,
|
||||
width_j_imm = 20,
|
||||
width_v_zimm = 11,
|
||||
width_v_ma = 1,
|
||||
width_v_ta = 1,
|
||||
width_v_sew = 3,
|
||||
width_v_lmul = 3,
|
||||
width_aq = 1,
|
||||
width_rl = 1,
|
||||
|
||||
shift_opcode= 0,
|
||||
shift_rd = width_opcode,
|
||||
shift_func3 = shift_rd + width_reg,
|
||||
shift_rs1 = shift_func3 + width_func3,
|
||||
shift_rs2 = shift_rs1 + width_reg,
|
||||
shift_func2 = shift_rs2 + width_reg,
|
||||
shift_func7 = shift_rs2 + width_reg,
|
||||
shift_rs3 = shift_func7 + width_func2,
|
||||
shift_vmop = shift_func7 + width_vmask,
|
||||
shift_vnf = shift_vmop + width_mop,
|
||||
shift_func6 = shift_func7 + width_vmask,
|
||||
shift_vset = shift_func7 + width_func6,
|
||||
shift_v_sew = width_v_lmul,
|
||||
shift_v_ta = shift_v_sew + width_v_sew,
|
||||
shift_v_ma = shift_v_ta + width_v_ta,
|
||||
|
||||
mask_opcode = (1 << width_opcode) - 1,
|
||||
mask_reg = (1 << width_reg) - 1,
|
||||
mask_func2 = (1 << width_func2) - 1,
|
||||
mask_func3 = (1 << width_func3) - 1,
|
||||
mask_func6 = (1 << width_func6) - 1,
|
||||
mask_func7 = (1 << width_func7) - 1,
|
||||
mask_i_imm = (1 << width_i_imm) - 1,
|
||||
mask_j_imm = (1 << width_j_imm) - 1,
|
||||
mask_v_zimm = (1 << width_v_zimm) - 1,
|
||||
mask_v_ma = (1 << width_v_ma) - 1,
|
||||
mask_v_ta = (1 << width_v_ta) - 1,
|
||||
mask_v_sew = (1 << width_v_sew) - 1,
|
||||
mask_v_lmul = (1 << width_v_lmul) - 1,
|
||||
};
|
||||
|
||||
struct vtype {
|
||||
uint32_t vill;
|
||||
uint32_t vma;
|
||||
uint32_t vta;
|
||||
uint32_t vsew;
|
||||
uint32_t vlmul;
|
||||
};
|
||||
|
||||
union reg_data_t {
|
||||
Word u;
|
||||
WordI i;
|
||||
WordF f;
|
||||
float f32;
|
||||
double f64;
|
||||
uint32_t u32;
|
||||
uint64_t u64;
|
||||
int32_t i32;
|
||||
int64_t i64;
|
||||
};
|
||||
|
||||
class Emulator {
|
||||
public:
|
||||
Emulator(const Arch &arch,
|
||||
|
@ -61,6 +131,10 @@ public:
|
|||
Word get_tc_size();
|
||||
Word get_tc_num();
|
||||
|
||||
void dcache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void dcache_write(const void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
private:
|
||||
|
||||
struct ipdom_entry_t {
|
||||
|
@ -85,9 +159,14 @@ private:
|
|||
ThreadMask tmask;
|
||||
std::vector<std::vector<Word>> ireg_file;
|
||||
std::vector<std::vector<uint64_t>>freg_file;
|
||||
std::vector<std::vector<Byte>> vreg_file;
|
||||
std::stack<ipdom_entry_t> ipdom_stack;
|
||||
Byte fcsr;
|
||||
uint32_t uuid;
|
||||
|
||||
struct vtype vtype;
|
||||
uint32_t vl;
|
||||
Word VLMAX;
|
||||
};
|
||||
|
||||
struct wspawn_t {
|
||||
|
@ -100,12 +179,14 @@ private:
|
|||
|
||||
void execute(const Instr &instr, uint32_t wid, instr_trace_t *trace);
|
||||
|
||||
void executeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata, std::vector<reg_data_t> &rddata);
|
||||
|
||||
void loadVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata);
|
||||
|
||||
void storeVector(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata);
|
||||
|
||||
void icache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void dcache_read(void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void dcache_write(const void* data, uint64_t addr, uint32_t size);
|
||||
|
||||
void dcache_amo_reserve(uint64_t addr);
|
||||
|
||||
bool dcache_amo_check(uint64_t addr);
|
||||
|
@ -142,6 +223,7 @@ private:
|
|||
uint32_t mat_size;
|
||||
uint32_t tc_size;
|
||||
uint32_t tc_num;
|
||||
std::vector<std::vector<std::unordered_map<uint32_t, uint32_t>>> csrs_;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -25,22 +25,11 @@
|
|||
#include "emulator.h"
|
||||
#include "instr.h"
|
||||
#include "core.h"
|
||||
#include "processor_impl.h"
|
||||
#include "VX_types.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
union reg_data_t {
|
||||
Word u;
|
||||
WordI i;
|
||||
WordF f;
|
||||
float f32;
|
||||
double f64;
|
||||
uint32_t u32;
|
||||
uint64_t u64;
|
||||
int32_t i32;
|
||||
int64_t i64;
|
||||
};
|
||||
|
||||
inline uint64_t nan_box(uint32_t value) {
|
||||
return value | 0xffffffff00000000;
|
||||
}
|
||||
|
@ -128,6 +117,8 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
}
|
||||
DPN(2, "}" << std::endl);
|
||||
break;
|
||||
case RegType::Vector:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -678,41 +669,47 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->src_regs[0] = {RegType::Integer, rsrc0};
|
||||
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
|
||||
trace->data = trace_data;
|
||||
uint32_t data_bytes = 1 << (func3 & 0x3);
|
||||
uint32_t data_width = 8 * data_bytes;
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
uint64_t mem_addr = rsdata[t][0].i + immsrc;
|
||||
uint64_t read_data = 0;
|
||||
this->dcache_read(&read_data, mem_addr, data_bytes);
|
||||
trace_data->mem_addrs.at(t) = {mem_addr, data_bytes};
|
||||
switch (func3) {
|
||||
case 0: // RV32I: LB
|
||||
case 1: // RV32I: LH
|
||||
rddata[t].i = sext((Word)read_data, data_width);
|
||||
break;
|
||||
case 2:
|
||||
if (opcode == Opcode::L) {
|
||||
// RV32I: LW
|
||||
if ((opcode == Opcode::L )
|
||||
|| (opcode == Opcode::FL && func3 == 2)
|
||||
|| (opcode == Opcode::FL && func3 == 3)) {
|
||||
uint32_t data_bytes = 1 << (func3 & 0x3);
|
||||
uint32_t data_width = 8 * data_bytes;
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
uint64_t mem_addr = rsdata[t][0].i + immsrc;
|
||||
uint64_t read_data = 0;
|
||||
this->dcache_read(&read_data, mem_addr, data_bytes);
|
||||
trace_data->mem_addrs.at(t) = {mem_addr, data_bytes};
|
||||
switch (func3) {
|
||||
case 0: // RV32I: LB
|
||||
case 1: // RV32I: LH
|
||||
rddata[t].i = sext((Word)read_data, data_width);
|
||||
} else {
|
||||
// RV32F: FLW
|
||||
rddata[t].u64 = nan_box((uint32_t)read_data);
|
||||
break;
|
||||
case 2:
|
||||
if (opcode == Opcode::L) {
|
||||
// RV32I: LW
|
||||
rddata[t].i = sext((Word)read_data, data_width);
|
||||
} else {
|
||||
// RV32F: FLW
|
||||
rddata[t].u64 = nan_box((uint32_t)read_data);
|
||||
}
|
||||
break;
|
||||
case 3: // RV64I: LD
|
||||
// RV32D: FLD
|
||||
case 4: // RV32I: LBU
|
||||
case 5: // RV32I: LHU
|
||||
case 6: // RV64I: LWU
|
||||
rddata[t].u64 = read_data;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
case 3: // RV64I: LD
|
||||
// RV32D: FLD
|
||||
case 4: // RV32I: LBU
|
||||
case 5: // RV32I: LHU
|
||||
case 6: // RV64I: LWU
|
||||
rddata[t].u64 = read_data;
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
rd_write = true;
|
||||
} else {
|
||||
loadVector(instr, wid, rsdata);
|
||||
}
|
||||
rd_write = true;
|
||||
break;
|
||||
}
|
||||
case Opcode::S:
|
||||
|
@ -724,23 +721,29 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->src_regs[1] = {data_type, rsrc1};
|
||||
auto trace_data = std::make_shared<LsuTraceData>(num_threads);
|
||||
trace->data = trace_data;
|
||||
uint32_t data_bytes = 1 << (func3 & 0x3);
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
uint64_t mem_addr = rsdata[t][0].i + immsrc;
|
||||
uint64_t write_data = rsdata[t][1].u64;
|
||||
trace_data->mem_addrs.at(t) = {mem_addr, data_bytes};
|
||||
switch (func3) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
this->dcache_write(&write_data, mem_addr, data_bytes);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
if ((opcode == Opcode::S)
|
||||
|| (opcode == Opcode::FS && func3 == 2)
|
||||
|| (opcode == Opcode::FS && func3 == 3)) {
|
||||
uint32_t data_bytes = 1 << (func3 & 0x3);
|
||||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
uint64_t mem_addr = rsdata[t][0].i + immsrc;
|
||||
uint64_t write_data = rsdata[t][1].u64;
|
||||
trace_data->mem_addrs.at(t) = {mem_addr, data_bytes};
|
||||
switch (func3) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
this->dcache_write(&write_data, mem_addr, data_bytes);
|
||||
break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
storeVector(instr, wid, rsdata);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -925,7 +928,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
for (uint32_t t = thread_start; t < num_threads; ++t) {
|
||||
if (!warp.tmask.test(t))
|
||||
continue;
|
||||
uint32_t frm = this->get_fpu_rm(func3, t, wid);
|
||||
uint32_t frm = (func3 == 0x7) ? this->get_csr(VX_CSR_FRM, t, wid) : func3;
|
||||
uint32_t fflags = 0;
|
||||
switch (func7) {
|
||||
case 0x00: { // RV32F: FADD.S
|
||||
|
@ -1240,7 +1243,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
this->update_fcrs(fflags, t, wid);
|
||||
if (fflags) {
|
||||
this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid);
|
||||
this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid);
|
||||
}
|
||||
}
|
||||
rd_write = true;
|
||||
break;
|
||||
|
@ -1294,7 +1300,10 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
default:
|
||||
break;
|
||||
}
|
||||
this->update_fcrs(fflags, t, wid);
|
||||
if (fflags) {
|
||||
this->set_csr(VX_CSR_FCSR, this->get_csr(VX_CSR_FCSR, t, wid) | fflags, t, wid);
|
||||
this->set_csr(VX_CSR_FFLAGS, this->get_csr(VX_CSR_FFLAGS, t, wid) | fflags, t, wid);
|
||||
}
|
||||
}
|
||||
rd_write = true;
|
||||
break;
|
||||
|
@ -1586,6 +1595,13 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
std::abort();
|
||||
}
|
||||
} break;
|
||||
case Opcode::VSET: {
|
||||
auto func6 = instr.getFunc6();
|
||||
if ((func3 == 0x7) || (func3 == 0x2 && func6 == 16) || (func3 == 0x1 && func6 == 16)) {
|
||||
rd_write = true;
|
||||
}
|
||||
executeVector(instr, wid, rsdata, rddata);
|
||||
} break;
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
|
@ -1629,6 +1645,7 @@ void Emulator::execute(const Instr &instr, uint32_t wid, instr_trace_t *trace) {
|
|||
trace->dst_reg = {type, rdest};
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unrecognized register write back type: " << type << std::endl;
|
||||
std::abort();
|
||||
break;
|
||||
}
|
||||
|
|
4493
sim/simx/execute_vector.cpp
Normal file
4493
sim/simx/execute_vector.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -42,6 +42,8 @@ enum class Opcode {
|
|||
// RV64 Standard Extension
|
||||
R_W = 0x3b,
|
||||
I_W = 0x1b,
|
||||
// Vector Extension
|
||||
VSET = 0x57,
|
||||
// Custom Extensions
|
||||
EXT1 = 0x0b,
|
||||
EXT2 = 0x2b,
|
||||
|
@ -56,9 +58,28 @@ enum class InstType {
|
|||
B,
|
||||
U,
|
||||
J,
|
||||
V,
|
||||
R4
|
||||
};
|
||||
|
||||
enum set_vuse_mask {
|
||||
set_func3 = (1 << 0),
|
||||
set_func6 = (1 << 1),
|
||||
set_imm = (1 << 2),
|
||||
set_vlswidth = (1 << 3),
|
||||
set_vmop = (1 << 4),
|
||||
set_vumop = (1 << 5),
|
||||
set_vnf = (1 << 6),
|
||||
set_vmask = (1 << 7),
|
||||
set_vs3 = (1 << 8),
|
||||
set_zimm = (1 << 9),
|
||||
set_vlmul = (1 << 10),
|
||||
set_vsew = (1 << 11),
|
||||
set_vta = (1 << 12),
|
||||
set_vma = (1 << 13),
|
||||
set_vediv = (1 << 14)
|
||||
};
|
||||
|
||||
class Instr {
|
||||
public:
|
||||
Instr()
|
||||
|
@ -70,7 +91,22 @@ public:
|
|||
, rdest_(0)
|
||||
, func2_(0)
|
||||
, func3_(0)
|
||||
, func7_(0) {
|
||||
, func6_(0)
|
||||
, func7_(0)
|
||||
, vmask_(0)
|
||||
, vlsWidth_(0)
|
||||
, vMop_(0)
|
||||
, vUmop_(0)
|
||||
, vNf_(0)
|
||||
, vs3_(0)
|
||||
, has_zimm_(false)
|
||||
, vlmul_(0)
|
||||
, vsew_(0)
|
||||
, vta_(0)
|
||||
, vma_(0)
|
||||
, vediv_(0)
|
||||
, _vusemask(0)
|
||||
, _is_vec(false) {
|
||||
for (uint32_t i = 0; i < MAX_REG_SOURCES; ++i) {
|
||||
rsrc_type_[i] = RegType::None;
|
||||
rsrc_[i] = 0;
|
||||
|
@ -93,13 +129,28 @@ public:
|
|||
num_rsrcs_ = std::max<uint32_t>(num_rsrcs_, index+1);
|
||||
}
|
||||
void setFunc2(uint32_t func2) { func2_ = func2; }
|
||||
void setFunc3(uint32_t func3) { func3_ = func3; }
|
||||
void setFunc3(uint32_t func3) { func3_ = func3; _vusemask |= set_func3; }
|
||||
void setFunc6(uint32_t func6) { func6_ = func6; _vusemask |= set_func6; }
|
||||
void setFunc7(uint32_t func7) { func7_ = func7; }
|
||||
void setImm(uint32_t imm) { has_imm_ = true; imm_ = imm; }
|
||||
void setImm(uint32_t imm) { has_imm_ = true; imm_ = imm; _vusemask |= set_imm; }
|
||||
void setVlsWidth(uint32_t width) { vlsWidth_ = width; _vusemask |= set_vlswidth; }
|
||||
void setVmop(uint32_t mop) { vMop_ = mop; _vusemask |= set_vmop; }
|
||||
void setVumop(uint32_t umop) { vUmop_ = umop; _vusemask |= set_vumop; }
|
||||
void setVnf(uint32_t nf) { vNf_ = nf; _vusemask |= set_vnf; }
|
||||
void setVmask(uint32_t mask) { vmask_ = mask; _vusemask |= set_vmask; }
|
||||
void setVs3(uint32_t vs) { vs3_ = vs; _vusemask |= set_vs3; }
|
||||
void setZimm(bool has_zimm) { has_zimm_ = has_zimm; _vusemask |= set_zimm; }
|
||||
void setVlmul(uint32_t lmul) { vlmul_ = lmul; _vusemask |= set_vlmul; }
|
||||
void setVsew(uint32_t sew) { vsew_ = sew; _vusemask |= set_vsew; }
|
||||
void setVta(uint32_t vta) { vta_ = vta; _vusemask |= set_vta; }
|
||||
void setVma(uint32_t vma) { vma_ = vma; _vusemask |= set_vma; }
|
||||
void setVediv(uint32_t ediv) { vediv_ = 1 << ediv; _vusemask |= set_vediv; }
|
||||
void setVec(bool is_vec) { _is_vec = is_vec; }
|
||||
|
||||
Opcode getOpcode() const { return opcode_; }
|
||||
uint32_t getFunc2() const { return func2_; }
|
||||
uint32_t getFunc3() const { return func3_; }
|
||||
uint32_t getFunc6() const { return func6_; }
|
||||
uint32_t getFunc7() const { return func7_; }
|
||||
uint32_t getNRSrc() const { return num_rsrcs_; }
|
||||
uint32_t getRSrc(uint32_t i) const { return rsrc_[i]; }
|
||||
|
@ -108,6 +159,21 @@ public:
|
|||
RegType getRDType() const { return rdest_type_; }
|
||||
bool hasImm() const { return has_imm_; }
|
||||
uint32_t getImm() const { return imm_; }
|
||||
uint32_t getVlsWidth() const { return vlsWidth_; }
|
||||
uint32_t getVmop() const { return vMop_; }
|
||||
uint32_t getVumop() const { return vUmop_; }
|
||||
uint32_t getVnf() const { return vNf_; }
|
||||
uint32_t getVmask() const { return vmask_; }
|
||||
uint32_t getVs3() const { return vs3_; }
|
||||
bool hasZimm() const { return has_zimm_; }
|
||||
uint32_t getVlmul() const { return vlmul_; }
|
||||
uint32_t getVsew() const { return 1 << (3 + vsew_); }
|
||||
uint32_t getVsewO() const { return vsew_; }
|
||||
uint32_t getVta() const { return vta_; }
|
||||
uint32_t getVma() const { return vma_; }
|
||||
uint32_t getVediv() const { return vediv_; }
|
||||
uint32_t getVUseMask() const { return _vusemask; }
|
||||
bool isVec() const { return _is_vec; }
|
||||
|
||||
private:
|
||||
|
||||
|
@ -125,8 +191,25 @@ private:
|
|||
uint32_t rdest_;
|
||||
uint32_t func2_;
|
||||
uint32_t func3_;
|
||||
uint32_t func6_;
|
||||
uint32_t func7_;
|
||||
|
||||
// Vector
|
||||
uint32_t vmask_;
|
||||
uint32_t vlsWidth_;
|
||||
uint32_t vMop_;
|
||||
uint32_t vUmop_;
|
||||
uint32_t vNf_;
|
||||
uint32_t vs3_;
|
||||
bool has_zimm_;
|
||||
uint32_t vlmul_;
|
||||
uint32_t vsew_;
|
||||
uint32_t vta_;
|
||||
uint32_t vma_;
|
||||
uint32_t vediv_;
|
||||
uint32_t _vusemask;
|
||||
bool _is_vec;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &, const Instr&);
|
||||
};
|
||||
|
||||
|
|
|
@ -84,7 +84,8 @@ enum class RegType {
|
|||
None,
|
||||
Integer,
|
||||
Float,
|
||||
Count
|
||||
Count,
|
||||
Vector
|
||||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &os, const RegType& type) {
|
||||
|
@ -92,6 +93,7 @@ inline std::ostream &operator<<(std::ostream &os, const RegType& type) {
|
|||
case RegType::None: break;
|
||||
case RegType::Integer: os << "x"; break;
|
||||
case RegType::Float: os << "f"; break;
|
||||
case RegType::Vector: os << "v"; break;
|
||||
default: assert(false);
|
||||
}
|
||||
return os;
|
||||
|
|
|
@ -51,7 +51,7 @@ endif
|
|||
|
||||
DBG_FLAGS += -DDEBUG_LEVEL=$(DEBUG) -DVCD_OUTPUT $(DBG_TRACE_FLAGS)
|
||||
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS = $(COMMON_DIR)/util.cpp $(COMMON_DIR)/mem.cpp $(COMMON_DIR)/softfloat_ext.cpp $(COMMON_DIR)/rvfloats.cpp $(COMMON_DIR)/dram_sim.cpp
|
||||
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
|
||||
SRCS += $(SRC_DIR)/xrt.cpp $(SRC_DIR)/xrt_sim.cpp
|
||||
|
||||
|
|
39
tests/riscv/riscv-vector-tests/README
Normal file
39
tests/riscv/riscv-vector-tests/README
Normal file
|
@ -0,0 +1,39 @@
|
|||
## Running the testcases
|
||||
|
||||
```
|
||||
XLEN=32 ./run-test.sh testcase1 testcase2
|
||||
XLEN=64 ./run-test.sh testcase1 testcase2
|
||||
|
||||
# or to run all default testcases
|
||||
XLEN=32 ./run-test.sh
|
||||
XLEN=64 ./run-test.sh
|
||||
```
|
||||
|
||||
## Adding a new testcase
|
||||
|
||||
The source code for the vector extension can be found in `sim/simx/execute_vector.cpp`.
|
||||
If you add support for a new vector instruction please go to `run-test.sh` and it to the default testcases.
|
||||
This will ensure your instruction is included in the regression test suite.
|
||||
|
||||
## Updating the testcase binaries
|
||||
|
||||
As `riscv-vector-tests` is still under development,
|
||||
we should periodically recompile the testscases and update the binaries.
|
||||
|
||||
To update the test case binaries run:
|
||||
|
||||
```
|
||||
XLEN=32 make -C ../../../third_party/ riscv-vector-tests
|
||||
XLEN=64 make -C ../../../third_party/ riscv-vector-tests
|
||||
```
|
||||
This requires Spike and Go to be installed on your machine.
|
||||
|
||||
Then run the testcases that you want to update - this will automatically copy them e.g.:
|
||||
```
|
||||
XLEN=64 ./run-test.sh testcase1 testcase2
|
||||
```
|
||||
|
||||
Finally use git to add the updated testcases to your commit (-f required due to .gitignore):
|
||||
```
|
||||
git add -f testcase1 testcase2
|
||||
```
|
117
tests/riscv/riscv-vector-tests/run-test.sh.in
Executable file
117
tests/riscv/riscv-vector-tests/run-test.sh.in
Executable file
|
@ -0,0 +1,117 @@
|
|||
#!/bin/bash
|
||||
VLEN=${VLEN:-256}
|
||||
XLEN=${XLEN:-32}
|
||||
|
||||
RISCV_TOOLCHAIN_PATH=${RISCV_TOOLCHAIN_PATH:-$TOOLDIR"/riscv"$XLEN"-gnu-toolchain"}
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
RESTORE_PREV_DIR=$(pwd)
|
||||
|
||||
VECTOR_TESTS_REPOSITORY=https://github.com/MichaelJSr/testcases/raw/main
|
||||
VECTOR_TESTS_BASE_NAME=vector-tests.tar.bz2
|
||||
|
||||
vector_tests()
|
||||
{
|
||||
parts=$(eval echo {a..l})
|
||||
for x in $parts
|
||||
do
|
||||
wget $VECTOR_TESTS_REPOSITORY/$VECTOR_TESTS_BASE_NAME.parta$x
|
||||
done
|
||||
cat $VECTOR_TESTS_BASE_NAME.part* > $VECTOR_TESTS_BASE_NAME
|
||||
tar -xvf $VECTOR_TESTS_BASE_NAME
|
||||
rm -f $VECTOR_TESTS_BASE_NAME*
|
||||
}
|
||||
|
||||
# get selected testcases from command line or run default testcases
|
||||
if [ "$#" == "0" ];
|
||||
then
|
||||
# write out test case name explicitely if there are collisions with other test names
|
||||
testcases=(vset vmv vslide vmerge vrgather \
|
||||
vlm.v vsm.v \
|
||||
vle8 vle16 vle32 \
|
||||
vse8 vse16 vse32 \
|
||||
vlseg vlsseg vluxseg vloxseg \
|
||||
vsseg vssseg vsuxseg vsoxseg \
|
||||
vlse8 vlse16 vlse32 \
|
||||
vsse8 vsse16 vsse32 \
|
||||
vloxei vluxei vsoxei vsuxei \
|
||||
vl1r vl2r vl4r vl8r \
|
||||
vs1r vs2r vs4r vs8r \
|
||||
vadd vsub vmin vmax vand vor vxor \
|
||||
vmseq vmsne vmslt vmsle vmsgt \
|
||||
vsll vsrl vsra vssr \
|
||||
vaadd vasub \
|
||||
vfmin vfmax vfcvt vfsqrt vfrsqrt7 vfrec7 vfclass vfmv vfslide vfmerge \
|
||||
vfadd vfredusum vfsub vfredosum vfredmin vfredmax vfsgnj vmf vfdiv vfrdiv vfmul vfrsub \
|
||||
vfmacc vfnmacc vfmsac vfnmsac vfmadd vfnmadd vfmsub vfnmsub \
|
||||
vredsum vredand vredor vredxor vredmin vredmax \
|
||||
vwred \
|
||||
vmand vmor vmxor vmnand vmnor vmxnor \
|
||||
vdiv vrem vmul vsmul \
|
||||
vmadd vnmsub vmacc vnmsac \
|
||||
vwadd vwsub vwmul vwmacc \
|
||||
vrsub vcompress vnclip vssub vsadd vnsra vnsrl \
|
||||
vadc vmadc vsbc vmsbc \
|
||||
vsext vzext \
|
||||
vid)
|
||||
if [ $XLEN -eq 64 ]; then
|
||||
testcases+=(vle64 vse64 vlse64 vsse64 vfwcvt vfncvt \
|
||||
vfwadd vfwsub vfwmul vfwred vfwmacc vfwnmacc vfwmsac vfwnmsac )
|
||||
fi
|
||||
else
|
||||
testcases="${@}"
|
||||
fi
|
||||
|
||||
cd $SCRIPT_DIR
|
||||
|
||||
# Fallback #2: If testcases directory exists, we will use existing testcases
|
||||
if [ ! -d "$SCRIPT_DIR/testcases" ]; then
|
||||
mkdir testcases
|
||||
cd testcases
|
||||
# Fallback #3: Otherwise, download testcases
|
||||
vector_tests
|
||||
fi
|
||||
|
||||
cd $SCRIPT_DIR/testcases/v$VLEN"x"$XLEN
|
||||
|
||||
# Fallback #1: Copy locally generated testcases (assuming they exist)
|
||||
rm *".ddr4.log"
|
||||
for testcase in ${testcases[@]}; do
|
||||
rm "$testcase"*.elf "$testcase"*.bin "$testcase"*.dump "$testcase"*.log
|
||||
cp -f $SCRIPT_DIR/../../../third_party/riscv-vector-tests/out/v"$VLEN"x"$XLEN"machine/bin/stage2/"$testcase"* .
|
||||
done
|
||||
|
||||
passed=0
|
||||
failed=0
|
||||
selected=0
|
||||
|
||||
# count all available testcases, exclude *.elf, *.bin, *.dump, *.log to prevent double counting
|
||||
all=$(($(ls | wc -l) - $(ls -d *.elf | wc -l) - $(ls -d *.bin | wc -l) - $(ls -d *.dump | wc -l) - $(ls -d *.log | wc -l)))
|
||||
|
||||
for testcase in ${testcases[@]}; do
|
||||
for f in "$testcase"* ; do
|
||||
ln -s "$f" "$f.elf";
|
||||
"$RISCV_TOOLCHAIN_PATH"/bin/riscv"$XLEN"-unknown-elf-objdump -D "$f.elf" > "$f.dump";
|
||||
"$RISCV_TOOLCHAIN_PATH"/bin/riscv"$XLEN"-unknown-elf-objcopy -O binary "$f.elf" "$f.bin";
|
||||
$SCRIPT_DIR/../../../sim/simx/simx -c 1 "$f.bin" &> "$f.log";
|
||||
if [ $? -eq 13 ]; then
|
||||
echo "$f PASSED"
|
||||
let "passed++"
|
||||
else
|
||||
echo "$f FAILED"
|
||||
let "failed++"
|
||||
fi
|
||||
# REG_TESTS=1 informs the script to delete the previous binary after each vector test to save disk space
|
||||
# Otherwise, the vector regression tests would run out of disk space eventually
|
||||
if [ $REG_TESTS -eq 1 ]; then
|
||||
cat $f.log
|
||||
rm $f.*
|
||||
rm $f
|
||||
fi
|
||||
let "selected++"
|
||||
done
|
||||
done
|
||||
cd $RESTORE_PREV_DIR
|
||||
echo "Passed $passed out of $selected selected vector tests."
|
||||
echo "Total available vector tests: $all"
|
||||
exit $failed
|
Loading…
Add table
Add a link
Reference in a new issue