FPU SVDPI support complete

This commit is contained in:
Blaise Tine 2020-09-01 00:59:37 -04:00
parent c1df08843c
commit 4e8b9fb296
16 changed files with 17598 additions and 28978 deletions

View file

@ -8,7 +8,7 @@ VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++
VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump
VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
VX_CFLAGS += -march=rv32imf -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections
VX_CFLAGS += -march=rv32imf -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -nostartfiles -Wl,--gc-sections
VX_CFLAGS += -I$(VORTEX_RT_PATH)/include
VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a

View file

@ -40,7 +40,7 @@ public:
this->add_test("fsqrt", new Test_FSQRT());
this->add_test("ftoi", new Test_FTOI());
this->add_test("ftou", new Test_FTOU());
this->add_test("tof", new Test_ITOF());
this->add_test("itof", new Test_ITOF());
this->add_test("utof", new Test_UTOF());
#endif
}
@ -257,14 +257,14 @@ int main(int argc, char *argv[]) {
(void*)vx_host_ptr(src2_buf));
if (errors != 0) {
std::cout << "found " << errors << " errors!" << std::endl;
std::cout << "FAILED!" << std::endl << std::flush;
std::cout << "Test" << t << "-" << name << " FAILED!" << std::endl << std::flush;
if (stop_on_error) {
cleanup();
exit(1);
}
exitcode = 1;
} else {
std::cout << "PASSED!" << std::endl << std::flush;
std::cout << "Test" << t << "-" << name << " PASSED!" << std::endl << std::flush;
}
}

Binary file not shown.

View file

@ -247,7 +247,7 @@ void kernel_fsqrt(void* arg) {
for (uint32_t i = 0; i < count; ++i) {
float a = src0_ptr[offset+i];
float b = src1_ptr[offset+i];
float c = sqrt(a * b);
float c = sqrtf(a * b);
dst_ptr[offset+i] = c;
}
}

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -59,7 +59,9 @@
`define EXT_F_ENABLE
`endif
`ifndef FPNEW_DISABLE
`define FPNEW_ENABLE
`endif
// Device identification
`define VENDOR_ID 0

View file

@ -64,13 +64,13 @@ module VX_scoreboard #(
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
`ifdef DBG_PRINT_PIPELINE
/*always @(posedge clk) begin
always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.curr_PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
inuse_regs[ibuf_deq_if.rd], inuse_regs[ibuf_deq_if.rs1], inuse_regs[ibuf_deq_if.rs2], inuse_regs[ibuf_deq_if.rs3], exe_delay, gpr_delay);
end
end*/
end
`endif
endmodule

View file

@ -39,7 +39,7 @@ module VX_fp_div #(
);
`else
always @(posedge clk) begin
dpi_fdiv(clk, ~stall, dataa[i], datab[i], result[i]);
dpi_fdiv(8*LANES+i, ~stall, valid_in, dataa[i], datab[i], result[i]);
end
`endif
end

View file

@ -53,8 +53,8 @@ module VX_fp_ftoi #(
);
`else
always @(posedge clk) begin
dpi_ftoi(clk, ~stall, dataa[i], result_s);
dpi_ftou(clk, ~stall, dataa[i], result_u);
dpi_ftoi(10*LANES+i, ~stall, valid_in, dataa[i], result_s);
dpi_ftou(11*LANES+i, ~stall, valid_in, dataa[i], result_u);
end
`endif

View file

@ -53,8 +53,8 @@ module VX_fp_itof #(
);
`else
always @(posedge clk) begin
dpi_itof(clk, ~stall, dataa[i], result_s);
dpi_utof(clk, ~stall, dataa[i], result_u);
dpi_itof(12*LANES+i, ~stall, valid_in, dataa[i], result_s);
dpi_utof(13*LANES+i, ~stall, valid_in, dataa[i], result_u);
end
`endif
@ -63,7 +63,7 @@ module VX_fp_itof #(
VX_shift_register #(
.DATAW(TAGW + 1 + 1),
.DEPTH(`LATENCY_FTOI)
.DEPTH(`LATENCY_ITOF)
) shift_reg (
.clk(clk),
.reset(reset),

View file

@ -245,11 +245,11 @@ module VX_fp_madd #(
defparam mac_fp_msub.accum_adder_clock = "none";
`else
always @(posedge clk) begin
dpi_fadd(clk, ~stall, dataa[i], datab[i], result_add);
dpi_fsub(clk, ~stall, dataa[i], datab[i], result_sub);
dpi_fmul(clk, ~stall, dataa[i], datab[i], result_mul);
dpi_fmadd(clk, ~stall, dataa[i], datab[i], datac[i], result_madd);
dpi_fmsub(clk, ~stall, dataa[i], datab[i], datac[i], result_msub);
dpi_fadd(0*LANES+i, ~stall, valid_in, dataa[i], datab[i], result_add);
dpi_fsub(1*LANES+i, ~stall, valid_in, dataa[i], datab[i], result_sub);
dpi_fmul(2*LANES+i, ~stall, valid_in, dataa[i], datab[i], result_mul);
dpi_fmadd(3*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_madd);
dpi_fmsub(4*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_msub);
end
`endif

View file

@ -161,10 +161,12 @@ module VX_fp_nmadd #(
defparam mac_fp_neg.adder_input_clock = "0";
defparam mac_fp_neg.accum_adder_clock = "none";
`else
reg valid_in_st0;
always @(posedge clk) begin
dpi_fmadd(clk, ~stall, dataa[i], datab[i], datac[i], result_madd);
dpi_fmsub(clk, ~stall, dataa[i], datab[i], datac[i], result_msub);
dpi_fsub(clk, ~stall, 32'b0, result_st0, result[i]);
valid_in_st0 <= reset ? 0 : valid_in;
dpi_fmadd(5*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_madd);
dpi_fmsub(6*LANES+i, ~stall, valid_in, dataa[i], datab[i], datac[i], result_msub);
dpi_fsub(7*LANES+i, ~stall, valid_in_st0, 32'b0, result_st0, result[i]);
end
`endif
end

View file

@ -37,7 +37,7 @@ module VX_fp_sqrt #(
);
`else
always @(posedge clk) begin
dpi_fsqrt(clk, ~stall, dataa[i], result[i]);
dpi_fsqrt(9*LANES+i, ~stall, valid_in, dataa[i], result[i]);
end
`endif
end

View file

@ -4,22 +4,25 @@
#include <vector>
#include <mutex>
#include "svdpi.h"
#include "verilated_vpi.h"
#include "VX_config.h"
extern "C" {
void dpi_fadd(bool clk, bool enable, int a, int b, int* result);
void dpi_fsub(bool clk, bool enable, int a, int b, int* result);
void dpi_fmul(bool clk, bool enable, int a, int b, int* result);
void dpi_fmadd(bool clk, bool enable, int a, int b, int c, int* result);
void dpi_fmsub(bool clk, bool enable, int a, int b, int c, int* result);
void dpi_fdiv(bool clk, bool enable, int a, int b, int* result);
void dpi_fsqrt(bool clk, bool enable, int a, int* result);
void dpi_ftoi(bool clk, bool enable, int a, int* result);
void dpi_ftou(bool clk, bool enable, int a, int* result);
void dpi_itof(bool clk, bool enable, int a, int* result);
void dpi_utof(bool clk, bool enable, int a, int* result);
void dpi_fadd(int inst, bool enable, bool valid, int a, int b, int* result);
void dpi_fsub(int inst, bool enable, bool valid, int a, int b, int* result);
void dpi_fmul(int inst, bool enable, bool valid, int a, int b, int* result);
void dpi_fmadd(int inst, bool enable, bool valid, int a, int b, int c, int* result);
void dpi_fmsub(int inst, bool enable, bool valid, int a, int b, int c, int* result);
void dpi_fdiv(int inst, bool enable, bool valid, int a, int b, int* result);
void dpi_fsqrt(int inst, bool enable, bool valid, int a, int* result);
void dpi_ftoi(int inst, bool enable, bool valid, int a, int* result);
void dpi_ftou(int inst, bool enable, bool valid, int a, int* result);
void dpi_itof(int inst, bool enable, bool valid, int a, int* result);
void dpi_utof(int inst, bool enable, bool valid, int a, int* result);
}
extern double sc_time_stamp();
class ShiftRegister {
public:
ShiftRegister() : init_(false), depth_(0) {}
@ -32,179 +35,177 @@ public:
}
}
void push(int value, bool clk, bool enable) {
if (clk || !enable)
return;
void push(int value, bool enable, bool valid) {
if (!enable)
return;
for (unsigned i = 0; i < depth_-1; ++i) {
buffer_[i] = buffer_[i+1];
}
buffer_[depth_-1] = value;
buffer_[depth_-1].value = value;
buffer_[depth_-1].valid = valid;
}
int top() const {
return buffer_[0];
return buffer_[0].value;
}
bool valid() const {
return buffer_[0].valid;
}
private:
std::vector<int> buffer_;
struct entry_t {
int value;
bool valid;
};
std::vector<entry_t> buffer_;
int top_;
unsigned depth_;
bool init_;
};
class Instances {
public:
ShiftRegister& get(svScope scope) {
ShiftRegister& get(int inst) {
mutex_.lock();
ShiftRegister& reg = instances_[scope];
ShiftRegister& sr = instances_[inst];
mutex_.unlock();
return reg;
return sr;
}
private:
std::unordered_map<svScope, ShiftRegister> instances_;
std::unordered_map<int, ShiftRegister> instances_;
std::mutex mutex_;
};
Instances instances;
void dpi_fadd(bool clk, bool enable, int a, int b, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_fadd(int inst, bool enable, bool valid, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
float fa = *(float*)&a;
float fb = *(float*)&b;
float fr = fa + fb;
inst.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_FMADD);
sr.push(*(int*)&fr, enable, valid);
*result = sr.top();
}
void dpi_fsub(bool clk, bool enable, int a, int b, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_fsub(int inst, bool enable, bool valid, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
float fa = *(float*)&a;
float fb = *(float*)&b;
float fr = fa - fb;
inst.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_FMADD);
sr.push(*(int*)&fr, enable, valid);
*result = sr.top();
}
void dpi_fmul(bool clk, bool enable, int a, int b, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_fmul(int inst, bool enable, bool valid, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
float fa = *(float*)&a;
float fb = *(float*)&b;
float fr = fa * fb;
inst.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_FMADD);
sr.push(*(int*)&fr, enable, valid);
*result = sr.top();
}
void dpi_fmadd(bool clk, bool enable, int a, int b, int c, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_fmadd(int inst, bool enable, bool valid, int a, int b, int c, int* result) {
ShiftRegister& sr = instances.get(inst);
float fa = *(float*)&a;
float fb = *(float*)&b;
float fc = *(float*)&c;
float fr = fa * fb + fc;
inst.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_FMADD);
sr.push(*(int*)&fr, enable, valid);
*result = sr.top();
}
void dpi_fmsub(bool clk, bool enable, int a, int b, int c, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_fmsub(int inst, bool enable, bool valid, int a, int b, int c, int* result) {
ShiftRegister& sr = instances.get(inst);
float fa = *(float*)&a;
float fb = *(float*)&b;
float fc = *(float*)&c;
float fr = fa * fb - fc;
inst.ensure_init(LATENCY_FMADD);
inst.push(*(int*)&fr, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_FMADD);
sr.push(*(int*)&fr, enable, valid);
*result = sr.top();
}
void dpi_fdiv(bool clk, bool enable, int a, int b, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_fdiv(int inst, bool enable, bool valid, int a, int b, int* result) {
ShiftRegister& sr = instances.get(inst);
float fa = *(float*)&a;
float fb = *(float*)&b;
float fr = fa / fb;
inst.ensure_init(LATENCY_FDIV);
inst.push(*(int*)&fr, clk, enable);
*result = inst.
top();
sr.ensure_init(LATENCY_FDIV);
sr.push(*(int*)&fr, enable, valid);
*result = sr.top();
}
void dpi_fsqrt(bool clk, bool enable, int a, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_fsqrt(int inst, bool enable, bool valid, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
float fa = *(float*)&a;
float fr = sqrt(fa);
float fr = sqrtf(fa);
inst.ensure_init(LATENCY_FSQRT);
inst.push(*(int*)&fr, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_FSQRT);
sr.push(*(int*)&fr, enable, valid);
*result = sr.top();
}
void dpi_ftoi(bool clk, bool enable, int a, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_ftoi(int inst, bool enable, bool valid, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
float fa = *(float*)&a;
int ir = int(fa);
inst.ensure_init(LATENCY_FTOI);
inst.push(ir, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_FTOI);
sr.push(ir, enable, valid);
*result = sr.top();
}
void dpi_ftou(bool clk, bool enable, int a, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_ftou(int inst, bool enable, bool valid, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
float fa = *(float*)&a;
unsigned ir = unsigned(fa);
inst.ensure_init(LATENCY_FTOI);
inst.push(ir, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_FTOI);
sr.push(ir, enable, valid);
*result = sr.top();
}
void dpi_itof(bool clk, bool enable, int a, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_itof(int inst, bool enable, bool valid, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
float fr = float(a);
float fr = (float)a;
inst.ensure_init(LATENCY_ITOF);
inst.push(*(int*)&fr, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_ITOF);
sr.push(*(int*)&fr, enable, valid);
*result = sr.top();
}
void dpi_utof(bool clk, bool enable, int a, int* result) {
auto scope = svGetScope();
ShiftRegister& inst = instances.get(scope);
void dpi_utof(int inst, bool enable, bool valid, int a, int* result) {
ShiftRegister& sr = instances.get(inst);
unsigned ua = *(unsigned*)&a;
float fr = float(ua);
float fr = (float)ua;
inst.ensure_init(LATENCY_ITOF);
inst.push(*(int*)&fr, clk, enable);
*result = inst.top();
sr.ensure_init(LATENCY_ITOF);
sr.push(*(int*)&fr, enable, valid);
*result = sr.top();
}

View file

@ -1,16 +1,16 @@
`ifndef FLOAT_DPI
`define FLOAT_DPI
import "DPI-C" context function void dpi_fadd(input logic clk, input logic enable, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsub(input logic clk, input logic enable, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fmul(input logic clk, input logic enable, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fmadd(input logic clk, input logic enable, input int a, input int b, input int c, output int result);
import "DPI-C" context function void dpi_fmsub(input logic clk, input logic enable, input int a, input int b, input int c, output int result);
import "DPI-C" context function void dpi_fdiv(input logic clk, input logic enable, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsqrt(input logic clk, input logic enable, input int a, output int result);
import "DPI-C" context function void dpi_ftoi(input logic clk, input logic enable, input int a, output int result);
import "DPI-C" context function void dpi_ftou(input logic clk, input logic enable, input int a, output int result);
import "DPI-C" context function void dpi_itof(input logic clk, input logic enable, input int a, output int result);
import "DPI-C" context function void dpi_utof(input logic clk, input logic enable, input int a, output int result);
import "DPI-C" context function void dpi_fadd(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsub(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fmul(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fmadd(int inst, input logic enable, input logic valid, input int a, input int b, input int c, output int result);
import "DPI-C" context function void dpi_fmsub(int inst, input logic enable, input logic valid, input int a, input int b, input int c, output int result);
import "DPI-C" context function void dpi_fdiv(int inst, input logic enable, input logic valid, input int a, input int b, output int result);
import "DPI-C" context function void dpi_fsqrt(int inst, input logic enable, input logic valid, input int a, output int result);
import "DPI-C" context function void dpi_ftoi(int inst, input logic enable, input logic valid, input int a, output int result);
import "DPI-C" context function void dpi_ftou(int inst, input logic enable, input logic valid, input int a, output int result);
import "DPI-C" context function void dpi_itof(int inst, input logic enable, input logic valid, input int a, output int result);
import "DPI-C" context function void dpi_utof(int inst, input logic enable, input logic valid, input int a, output int result);
`endif