Fix alu changes, enable 64bit in FPU

- Move FPU DPI to support 64b
This commit is contained in:
Shashank Holla 2023-03-12 21:17:32 -04:00 committed by Blaise Tine
parent 8da207c5bf
commit 5bdff46810
13 changed files with 147 additions and 147 deletions

View file

@ -10,161 +10,161 @@
#include "VX_config.h"
extern "C" {
void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fadd(bool enable, long int a, long int b, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_fsub(bool enable, long int a, long int b, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_fmul(bool enable, long int a, long int b, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_fmadd(bool enable, long int a, long int b, int c, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_fmsub(bool enable, long int a, long int b, int c, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_fnmadd(bool enable, long int a, long int b, int c, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_fnmsub(bool enable, long int a, long int b, int c, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_fdiv(bool enable, long int a, long int b, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags);
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_itof(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_utof(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags);
void dpi_fclss(bool enable, int a, int* result);
void dpi_fsgnj(bool enable, int a, int b, int* result);
void dpi_fsgnjn(bool enable, int a, int b, int* result);
void dpi_fsgnjx(bool enable, int a, int b, int* result);
void dpi_fclss(bool enable, long int a, long long int* result);
void dpi_fsgnj(bool enable, long int a, long int b, long int* result);
void dpi_fsgnjn(bool enable, long int a, long int b, long int* result);
void dpi_fsgnjx(bool enable, long int a, long int b, long int* result);
void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags);
void dpi_flt(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags);
void dpi_fle(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags);
void dpi_feq(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags);
void dpi_fmin(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags);
void dpi_fmax(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags);
}
void dpi_fadd(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_fadd(bool enable, long int a, long int b, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fadd_s(a, b, (*frm & 0x7), fflags);
}
void dpi_fsub(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_fsub(bool enable, long int a, long int b, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fsub_s(a, b, (*frm & 0x7), fflags);
}
void dpi_fmul(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_fmul(bool enable, long int a, long int b, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fmul_s(a, b, (*frm & 0x7), fflags);
}
void dpi_fmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_fmadd(bool enable, long int a, long int b, int c, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fmadd_s(a, b, c, (*frm & 0x7), fflags);
}
void dpi_fmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_fmsub(bool enable, long int a, long int b, int c, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fmsub_s(a, b, c, (*frm & 0x7), fflags);
}
void dpi_fnmadd(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_fnmadd(bool enable, long int a, long int b, int c, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fnmadd_s(a, b, c, (*frm & 0x7), fflags);
}
void dpi_fnmsub(bool enable, int a, int b, int c, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_fnmsub(bool enable, long int a, long int b, int c, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fnmsub_s(a, b, c, (*frm & 0x7), fflags);
}
void dpi_fdiv(bool enable, int a, int b, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_fdiv(bool enable, long int a, long int b, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fdiv_s(a, b, (*frm & 0x7), fflags);
}
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_fsqrt(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fsqrt_s(a, (*frm & 0x7), fflags);
}
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_ftoi(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_ftoi_s(a, (*frm & 0x7), fflags);
}
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_ftou(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_ftou_s(a, (*frm & 0x7), fflags);
}
void dpi_itof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_itof(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_itof_s(a, (*frm & 0x7), fflags);
}
void dpi_utof(bool enable, int a, const svBitVecVal* frm, int* result, svBitVecVal* fflags) {
void dpi_utof(bool enable, int a, const svBitVecVal* frm, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_utof_s(a, (*frm & 0x7), fflags);
}
void dpi_flt(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
void dpi_flt(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_flt_s(a, b, fflags);
}
void dpi_fle(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
void dpi_fle(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fle_s(a, b, fflags);
}
void dpi_feq(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
void dpi_feq(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_feq_s(a, b, fflags);
}
void dpi_fmin(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
void dpi_fmin(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fmin_s(a, b, fflags);
}
void dpi_fmax(bool enable, int a, int b, int* result, svBitVecVal* fflags) {
void dpi_fmax(bool enable, long int a, long int b, long int* result, svBitVecVal* fflags) {
if (!enable)
return;
*result = rv_fmax_s(a, b, fflags);
}
void dpi_fclss(bool enable, int a, int* result) {
void dpi_fclss(bool enable, long int a, long long int* result) {
if (!enable)
return;
*result = rv_fclss_s(a);
}
void dpi_fsgnj(bool enable, int a, int b, int* result) {
void dpi_fsgnj(bool enable, long int a, long int b, long int* result) {
if (!enable)
return;
*result = rv_fsgnj_s(a, b);
}
void dpi_fsgnjn(bool enable, int a, int b, int* result) {
void dpi_fsgnjn(bool enable, long int a, long int b, long int* result) {
if (!enable)
return;
*result = rv_fsgnjn_s(a, b);
}
void dpi_fsgnjx(bool enable, int a, int b, int* result) {
void dpi_fsgnjx(bool enable, long int a, long int b, long int* result) {
if (!enable)
return;
*result = rv_fsgnjx_s(a, b);

View file

@ -1,31 +1,31 @@
`ifndef FLOAT_DPI
`define FLOAT_DPI
import "DPI-C" function void dpi_fadd(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsub(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmul(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmadd(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmsub(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmadd(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmsub(input logic enable, input int a, input int b, input int c, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fadd(input logic enable, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsub(input logic enable, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmul(input logic enable, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmadd(input logic enable, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmsub(input logic enable, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmadd(input logic enable, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fnmsub(input logic enable, input longint a, input longint b, input longint c, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fdiv(input logic enable, input int a, input int b, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsqrt(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fdiv(input logic enable, input longint a, input longint b, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fsqrt(input logic enable, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_ftoi(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_ftou(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_itof(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_utof(input logic enable, input int a, input bit[2:0] frm, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_ftoi(input logic enable, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_ftou(input logic enable, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_itof(input logic enable, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_utof(input logic enable, input longint a, input bit[2:0] frm, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fclss(input logic enable, input int a, output int result);
import "DPI-C" function void dpi_fsgnj(input logic enable, input int a, input int b, output int result);
import "DPI-C" function void dpi_fsgnjn(input logic enable, input int a, input int b, output int result);
import "DPI-C" function void dpi_fsgnjx(input logic enable, input int a, input int b, output int result);
import "DPI-C" function void dpi_fclss(input logic enable, input longint a, output longint result);
import "DPI-C" function void dpi_fsgnj(input logic enable, input longint a, input longint b, output longint result);
import "DPI-C" function void dpi_fsgnjn(input logic enable, input longint a, input longint b, output longint result);
import "DPI-C" function void dpi_fsgnjx(input logic enable, input longint a, input longint b, output longint result);
import "DPI-C" function void dpi_flt(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fle(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_feq(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmin(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmax(input logic enable, input int a, input int b, output int result, output bit[4:0] fflags);
import "DPI-C" function void dpi_flt(input logic enable, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fle(input logic enable, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_feq(input logic enable, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmin(input logic enable, input longint a, input longint b, output longint result, output bit[4:0] fflags);
import "DPI-C" function void dpi_fmax(input logic enable, input longint a, input longint b, output longint result, output bit[4:0] fflags);
`endif

View file

@ -13,8 +13,8 @@
#endif
extern "C" {
void dpi_imul(bool enable, int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth);
void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* remainder);
void dpi_imul(bool enable, long int a, long int b, bool is_signed_a, bool is_signed_b, long int* resultl, long int* resulth);
void dpi_idiv(bool enable, long int a, long int b, bool is_signed, long int* quotient, long int* remainder);
int dpi_register();
void dpi_assert(int inst, bool cond, int delay);
@ -99,12 +99,12 @@ void dpi_assert(int inst, bool cond, int delay) {
///////////////////////////////////////////////////////////////////////////////
void dpi_imul(bool enable, int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth) {
void dpi_imul(bool enable, long int a, long int b, bool is_signed_a, bool is_signed_b, long int* resultl, long int* resulth) {
if (!enable)
return;
uint64_t first = *(uint32_t*)&a;
uint64_t second = *(uint32_t*)&b;
uint64_t first = *(long int*)&a;
uint64_t second = *(long int*)&b;
if (is_signed_a && (first & 0x80000000)) {
first |= 0xFFFFFFFF00000000;
@ -125,12 +125,12 @@ void dpi_imul(bool enable, int a, int b, bool is_signed_a, bool is_signed_b, int
*resulth = (result >> 32) & 0xFFFFFFFF;
}
void dpi_idiv(bool enable, int a, int b, bool is_signed, int* quotient, int* remainder) {
void dpi_idiv(bool enable, long int a, long int b, bool is_signed, long int* quotient, long int* remainder) {
if (!enable)
return;
uint32_t dividen = *(uint32_t*)&a;
uint32_t divisor = *(uint32_t*)&b;
uint32_t dividen = *(long int*)&a;
uint32_t divisor = *(long int*)&b;
if (is_signed) {
if (b == 0) {

View file

@ -1,8 +1,8 @@
`ifndef UTIL_DPI
`define UTIL_DPI
import "DPI-C" function void dpi_imul(input logic enable, input int a, input int b, input logic is_signed_a, input logic is_signed_b, output int resultl, output int resulth);
import "DPI-C" function void dpi_idiv(input logic enable, input int a, input int b, input logic is_signed, output int quotient, output int remainder);
import "DPI-C" function void dpi_imul(input logic enable, input longint a, input longint b, input logic is_signed_a, input logic is_signed_b, output longint resultl, output longint resulth);
import "DPI-C" function void dpi_idiv(input logic enable, input longint a, input longint b, input logic is_signed, output longint quotient, output longint remainder);
import "DPI-C" function int dpi_register();
import "DPI-C" function void dpi_assert(int inst, input logic cond, input int delay);

View file

@ -28,8 +28,8 @@
`endif
// Disable MULDIV, FPU, and TEX units since irrelevant to RV64I instructions
`define EXT_M_DISABLE 1
`define EXT_F_DISABLE 1
// `define EXT_M_DISABLE 1
//`define EXT_F_DISABLE 1
`define EXT_TEX_DISABLE 1
`ifndef NUM_CLUSTERS

View file

@ -43,26 +43,26 @@ module VX_alu_unit #(
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in1 = alu_req_if.rs1_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2 = alu_req_if.rs2_data;
wire [`NUM_THREADS-1:0][31:0] trunc_alu_in1, trunc_alu_result;
wire [`NUM_THREADS-1:0][31:0] trunc_alu_in1;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
// PC operations should only be for 32 bits
assign trunc_alu_in1[i] = alu_in1[i][31:0];
assign trunc_alu_result[i] = alu_result[i][31:0];
// assign trunc_alu_result[i] = alu_result[i][`XLEN-1:0];
end
// PC operations should only be for 32 bits
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : trunc_alu_in1;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2_imm = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_A = alu_req_if.use_PC ? {`NUM_THREADS{`XLEN'(alu_req_if.PC)}} : alu_in1;
wire [`NUM_THREADS-1:0][31:0] alu_A_trunc = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : trunc_alu_in1;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_B = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2_less = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`XLEN-1:0] temp_add_result = {{`XLEN-32{1'b0}}, alu_in1_PC[i]} + alu_in2_imm[i];
always @(*) begin
case(alu_op)
`INST_ALU_ADD: add_result[i] = temp_add_result;
`INST_ALU_LUI, `INST_ALU_AUIPC, `INST_ALU_ADD_W: add_result[i] = `XLEN'($signed(temp_add_result[31:0])); //{{`XLEN-32{add_result[31]}}, temp_add_result[31:0]};
default: add_result[i] = temp_add_result;
`INST_ALU_ADD, `INST_ALU_AUIPC, `INST_ALU_LUI: add_result[i] = alu_A[i] + alu_B[i];
`INST_ALU_ADD_W: add_result[i] = `XLEN'($signed(alu_A_trunc[i] + alu_B[i][31:0]));
default: add_result[i] = alu_A[i] + alu_B[i];
endcase
end
end
@ -83,8 +83,8 @@ module VX_alu_unit #(
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`XLEN:0] shr_in1 = {alu_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
wire [`XLEN-1:0] temp_shr_result = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS:0]);
wire [31:0] temp_shr_result_w = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]);
wire [`XLEN-1:0] temp_shr_result = `XLEN'($signed(shr_in1) >>> alu_B[i][SHIFT_IMM_BITS:0]);
wire [31:0] temp_shr_result_w = 32'($signed(shr_in1) >>> alu_B[i][4:0]);
always @(*) begin
case(alu_op)
@ -97,14 +97,14 @@ module VX_alu_unit #(
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [31:0] temp_shift_result = alu_in1[i][31:0] << alu_in2_imm[i][4:0]; // only used for SLLW
wire [31:0] temp_shift_result = alu_in1[i][31:0] << alu_B[i][4:0]; // only used for SLLW
always @(*) begin
case (alu_op)
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
// `INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
`INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS:0]; // TODO: CHANGED: adjust this to shift using 6 bits for 64 bit
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_B[i];
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_B[i];
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_B[i];
// `INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_B[i][4:0];
`INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_B[i][SHIFT_IMM_BITS:0]; // TODO: CHANGED: adjust this to shift using 6 bits for 64 bit
`INST_ALU_SLL_W: msc_result[i] = `XLEN'($signed(temp_shift_result[31:0])); // TODO: CHANGED: adjust this to shift using 6 bits for 32 signed bit
default: msc_result[i] = 'x;
endcase
@ -127,7 +127,7 @@ module VX_alu_unit #(
// branch
wire is_jal = is_br_op && (br_op == `INST_BR_JAL || br_op == `INST_BR_JALR);
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : trunc_alu_result;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_jal_result = is_jal ? {`NUM_THREADS{`XLEN'(alu_req_if.next_PC)}} : alu_result;
wire [`XLEN-1:0] br_dest = add_result[alu_req_if.tid][`XLEN-1:0];
wire [32:0] cmp_result = sub_result[alu_req_if.tid][32:0];
@ -147,11 +147,11 @@ module VX_alu_unit #(
wire [31:0] alu_PC;
wire [`NR_BITS-1:0] alu_rd;
wire alu_wb;
wire [`NUM_THREADS-1:0][31:0] alu_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] full_alu_data;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign full_alu_data[i] = {{`XLEN-31{alu_data[i][31]}},alu_data[i][30:0]};
assign full_alu_data[i] =alu_data[i];
end
wire [`INST_BR_BITS-1:0] br_op_r;
@ -163,7 +163,7 @@ module VX_alu_unit #(
assign alu_ready_in = alu_ready_out || ~alu_valid_out;
VX_pipe_register #(
.DATAW (1 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `INST_BR_BITS + 1 + 1 + `XLEN),
.DATAW (1 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * `XLEN) + 1 + `INST_BR_BITS + 1 + 1 + `XLEN),
.RESETW (1)
) pipe_reg (
.clk (clk),

View file

@ -121,7 +121,7 @@ module VX_dispatch (
wire [`INST_FPU_BITS-1:0] fpu_op_type = `INST_FPU_BITS'(dispatch_if.op_type);
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + (3 * `NUM_THREADS * 32)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_FPU_BITS + `INST_MOD_BITS + `NR_BITS + (3 * `NUM_THREADS * `XLEN)),
.OUT_REG (1)
) fpu_buffer (
.clk (clk),

View file

@ -73,7 +73,7 @@ module VX_fpu_agent #(
assign fpu_agent_if.ready = ready_in && mdata_and_csr_ready;
VX_skid_buffer #(
.DATAW (`INST_FPU_BITS + `INST_FRM_BITS + `NUM_THREADS * 3 * 32 + `FPU_REQ_TAG_WIDTH),
.DATAW (`INST_FPU_BITS + `INST_FRM_BITS + `NUM_THREADS * 3 * `XLEN + `FPU_REQ_TAG_WIDTH),
.OUT_REG (1)
) req_sbuf (
.clk (clk),
@ -109,7 +109,7 @@ module VX_fpu_agent #(
// commit
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * 32))
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + (`NUM_THREADS * `XLEN))
) rsp_sbuf (
.clk (clk),
.reset (reset),

View file

@ -9,9 +9,9 @@ interface VX_fpu_agent_if ();
wire [31:0] PC;
wire [`INST_FPU_BITS-1:0] op_type;
wire [`INST_MOD_BITS-1:0] op_mod;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
wire [`NR_BITS-1:0] rd;
wire ready;

View file

@ -29,8 +29,8 @@ module VX_fpu_arb #(
localparam LOG_NUM_REQS = `ARB_SEL_BITS(NUM_INPUTS, NUM_OUTPUTS);
localparam NUM_REQS = 1 << LOG_NUM_REQS;
localparam TAG_OUT_WIDTH = TAG_WIDTH + LOG_NUM_REQS;
localparam REQ_DATAW = TAG_OUT_WIDTH + `INST_FPU_BITS + `INST_FRM_BITS + NUM_LANES * 3 * 32;
localparam RSP_DATAW = TAG_WIDTH + NUM_LANES * (32 + `FP_FLAGS_BITS) + 1;
localparam REQ_DATAW = TAG_OUT_WIDTH + `INST_FPU_BITS + `INST_FRM_BITS + NUM_LANES * 3 * `XLEN;
localparam RSP_DATAW = TAG_WIDTH + NUM_LANES * (`XLEN + `FFLAGS_BITS) + 1;
///////////////////////////////////////////////////////////////////////

View file

@ -16,10 +16,10 @@ module VX_fpu_dpi #(
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_MOD_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
input wire [NUM_LANES-1:0][31:0] datac,
output wire [NUM_LANES-1:0][31:0] result,
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire has_fflags,
output wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags,
@ -37,10 +37,10 @@ module VX_fpu_dpi #(
localparam NUM_FPC = 5;
localparam FPC_BITS = `LOG2UP(NUM_FPC);
localparam RSP_ARB_DATAW = (NUM_LANES * 32) + 1 + (NUM_LANES * $bits(fflags_t)) + TAGW;
localparam RSP_ARB_DATAW = (NUM_LANES * `XLEN) + 1 + (NUM_LANES * $bits(fflags_t)) + TAGW;
wire [NUM_FPC-1:0] per_core_ready_in;
wire [NUM_FPC-1:0][NUM_LANES-1:0][31:0] per_core_result;
wire [NUM_FPC-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result;
wire [NUM_FPC-1:0][TAGW-1:0] per_core_tag_out;
reg [NUM_FPC-1:0] per_core_ready_out;
wire [NUM_FPC-1:0] per_core_valid_out;
@ -109,14 +109,14 @@ module VX_fpu_dpi #(
generate
begin : fma
wire [NUM_LANES-1:0][31:0] result_fma;
wire [NUM_LANES-1:0][31:0] result_fadd;
wire [NUM_LANES-1:0][31:0] result_fsub;
wire [NUM_LANES-1:0][31:0] result_fmul;
wire [NUM_LANES-1:0][31:0] result_fmadd;
wire [NUM_LANES-1:0][31:0] result_fmsub;
wire [NUM_LANES-1:0][31:0] result_fnmadd;
wire [NUM_LANES-1:0][31:0] result_fnmsub;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fma;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fadd;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fsub;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fmul;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fmadd;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fmsub;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fnmadd;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fnmsub;
fflags_t [NUM_LANES-1:0] fflags_fma;
fflags_t [NUM_LANES-1:0] fflags_fadd;
@ -163,7 +163,7 @@ module VX_fpu_dpi #(
0;
VX_shift_register #(
.DATAW (1 + TAGW + NUM_LANES * (32 + $bits(fflags_t))),
.DATAW (1 + TAGW + NUM_LANES * (`XLEN + $bits(fflags_t))),
.DEPTH (`LATENCY_FMA),
.RESETW (1)
) shift_reg (
@ -183,7 +183,7 @@ module VX_fpu_dpi #(
generate
begin : fdiv
wire [NUM_LANES-1:0][31:0] result_fdiv;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fdiv;
fflags_t [NUM_LANES-1:0] fflags_fdiv;
wire fdiv_valid = (valid_in && core_select == FPU_DIV);
@ -198,7 +198,7 @@ module VX_fpu_dpi #(
end
VX_shift_register #(
.DATAW (1 + TAGW + NUM_LANES * (32 + $bits(fflags_t))),
.DATAW (1 + TAGW + NUM_LANES * (`XLEN + $bits(fflags_t))),
.DEPTH (`LATENCY_FDIV),
.RESETW (1)
) shift_reg (
@ -218,7 +218,7 @@ module VX_fpu_dpi #(
generate
begin : fsqrt
wire [NUM_LANES-1:0][31:0] result_fsqrt;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt;
fflags_t [NUM_LANES-1:0] fflags_fsqrt;
wire fsqrt_valid = (valid_in && core_select == FPU_SQRT);
@ -233,7 +233,7 @@ module VX_fpu_dpi #(
end
VX_shift_register #(
.DATAW (1 + TAGW + NUM_LANES * (32 + $bits(fflags_t))),
.DATAW (1 + TAGW + NUM_LANES * (`XLEN + $bits(fflags_t))),
.DEPTH (`LATENCY_FSQRT),
.RESETW (1)
) shift_reg (
@ -253,11 +253,11 @@ module VX_fpu_dpi #(
generate
begin : fcvt
wire [NUM_LANES-1:0][31:0] result_fcvt;
wire [NUM_LANES-1:0][31:0] result_itof;
wire [NUM_LANES-1:0][31:0] result_utof;
wire [NUM_LANES-1:0][31:0] result_ftoi;
wire [NUM_LANES-1:0][31:0] result_ftou;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fcvt;
wire [NUM_LANES-1:0][`XLEN-1:0] result_itof;
wire [NUM_LANES-1:0][`XLEN-1:0] result_utof;
wire [NUM_LANES-1:0][`XLEN-1:0] result_ftoi;
wire [NUM_LANES-1:0][`XLEN-1:0] result_ftou;
fflags_t [NUM_LANES-1:0] fflags_fcvt;
fflags_t [NUM_LANES-1:0] fflags_itof;
@ -292,7 +292,7 @@ module VX_fpu_dpi #(
0;
VX_shift_register #(
.DATAW (1 + TAGW + NUM_LANES * (32 + $bits(fflags_t))),
.DATAW (1 + TAGW + NUM_LANES * (`XLEN + $bits(fflags_t))),
.DEPTH (`LATENCY_FCVT),
.RESETW (1)
) shift_reg (
@ -312,17 +312,17 @@ module VX_fpu_dpi #(
generate
begin : fncp
wire [NUM_LANES-1:0][31:0] result_fncp;
wire [NUM_LANES-1:0][31:0] result_fclss;
wire [NUM_LANES-1:0][31:0] result_flt;
wire [NUM_LANES-1:0][31:0] result_fle;
wire [NUM_LANES-1:0][31:0] result_feq;
wire [NUM_LANES-1:0][31:0] result_fmin;
wire [NUM_LANES-1:0][31:0] result_fmax;
wire [NUM_LANES-1:0][31:0] result_fsgnj;
wire [NUM_LANES-1:0][31:0] result_fsgnjn;
wire [NUM_LANES-1:0][31:0] result_fsgnjx;
reg [NUM_LANES-1:0][31:0] result_fmv;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fclss;
wire [NUM_LANES-1:0][`XLEN-1:0] result_flt;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fle;
wire [NUM_LANES-1:0][`XLEN-1:0] result_feq;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fmin;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fmax;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fsgnj;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fsgnjn;
wire [NUM_LANES-1:0][`XLEN-1:0] result_fsgnjx;
reg [NUM_LANES-1:0][`XLEN-1:0] result_fmv;
fflags_t [NUM_LANES-1:0] fflags_fncp;
fflags_t [NUM_LANES-1:0] fflags_flt;
@ -372,7 +372,7 @@ module VX_fpu_dpi #(
0;
VX_shift_register #(
.DATAW (1 + TAGW + 1 + NUM_LANES * (32 + $bits(fflags_t))),
.DATAW (1 + TAGW + 1 + NUM_LANES * (`XLEN + $bits(fflags_t))),
.DEPTH (`LATENCY_FNCP),
.RESETW (1)
) shift_reg (

View file

@ -9,9 +9,9 @@ interface VX_fpu_req_if #(
wire valid;
wire [`INST_FPU_BITS-1:0] op_type;
wire [`INST_FRM_BITS-1:0] frm;
wire [NUM_LANES-1:0][31:0] dataa;
wire [NUM_LANES-1:0][31:0] datab;
wire [NUM_LANES-1:0][31:0] datac;
wire [NUM_LANES-1:0][`XLEN-1:0] dataa;
wire [NUM_LANES-1:0][`XLEN-1:0] datab;
wire [NUM_LANES-1:0][`XLEN-1:0] datac;
wire [TAG_WIDTH-1:0] tag;
wire ready;

View file

@ -11,7 +11,7 @@ interface VX_fpu_rsp_if #(
) ();
wire valid;
wire [NUM_LANES-1:0][31:0] result;
wire [NUM_LANES-1:0][`XLEN-1:0] result;
fflags_t [NUM_LANES-1:0] fflags;
wire has_fflags;
wire [TAG_WIDTH-1:0] tag;