mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
fixed fpu serialization
This commit is contained in:
parent
c28449f515
commit
19d6142023
7 changed files with 62 additions and 60 deletions
|
@ -41,31 +41,23 @@ isa()
|
|||
make -C tests/riscv/isa run-simx
|
||||
make -C tests/riscv/isa run-rtlsim
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32f
|
||||
|
||||
if [ "$XLEN" == "64" ]
|
||||
then
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64d
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64d
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DPI -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64f
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_FPNEW -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64f
|
||||
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-64fx
|
||||
make -C sim/rtlsim clean && CONFIGS="-DFPU_DSP -DEXT_D_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-64fx
|
||||
fi
|
||||
|
||||
# clean build
|
||||
|
@ -257,8 +249,7 @@ config2()
|
|||
make -C tests/regression/dogfood clean-kernel
|
||||
|
||||
# disabling M & F extensions
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null
|
||||
make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean && CONFIGS="-DEXT_M_DISABLE -DEXT_F_DISABLE" make -C sim/rtlsim > /dev/null && make -C tests/riscv/isa run-rtlsim-32i
|
||||
make -C sim/rtlsim clean
|
||||
|
||||
# disabling ZICOND extension
|
||||
|
|
|
@ -46,21 +46,29 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
localparam DATAW = 32 + `INST_FRM_BITS + 1 + 1;
|
||||
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][31:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: `INST_FRM_BITS] = frm;
|
||||
assign data_in[i][32 + `INST_FRM_BITS +: 1] = is_itof;
|
||||
assign data_in[i][32 + `INST_FRM_BITS + 1 +: 1] = is_signed;
|
||||
end
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FCVT),
|
||||
.DATA_IN_WIDTH(32),
|
||||
.DATA_IN_WIDTH(DATAW),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
|
@ -69,7 +77,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.data_in (dataa),
|
||||
.data_in (data_in),
|
||||
.tag_in ({mask_in, tag_in}),
|
||||
.ready_in (ready_in),
|
||||
.pe_enable (pe_enable),
|
||||
|
@ -81,6 +89,8 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
|
@ -94,9 +104,9 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.is_itof (is_itof),
|
||||
.is_signed (is_signed),
|
||||
.frm (pe_data_in[0][32 +: `INST_FRM_BITS]),
|
||||
.is_itof (pe_data_in[0][32 + `INST_FRM_BITS +: 1]),
|
||||
.is_signed (pe_data_in[0][32 + `INST_FRM_BITS + 1 +: 1]),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
.fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS])
|
||||
|
|
|
@ -46,13 +46,15 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
);
|
||||
`UNUSED_VAR (frm)
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
localparam DATAW = 2 * 32;
|
||||
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
|
@ -64,7 +66,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
|
|||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FDIV),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_IN_WIDTH(DATAW),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
|
|
|
@ -49,15 +49,15 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
localparam DATAW = 3 * 32 + `INST_FRM_BITS;
|
||||
|
||||
wire [NUM_LANES-1:0][3*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][3*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
reg [NUM_LANES-1:0][31:0] a, b, c;
|
||||
|
@ -66,9 +66,9 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
always @(*) begin
|
||||
if (is_madd) begin
|
||||
// MADD / MSUB / NMADD / NMSUB
|
||||
a[i] = is_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
|
||||
a[i] = {is_neg ^ dataa[i][31], dataa[i][30:0]};
|
||||
b[i] = datab[i];
|
||||
c[i] = (is_neg ^ is_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
|
||||
c[i] = {is_neg ^ is_sub ^ datac[i][31], datac[i][30:0]};
|
||||
end else begin
|
||||
if (is_neg) begin
|
||||
// MUL
|
||||
|
@ -77,9 +77,9 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
c[i] = '0;
|
||||
end else begin
|
||||
// ADD / SUB
|
||||
a[i] = 32'h3f800000; // 1.0f
|
||||
b[i] = dataa[i];
|
||||
c[i] = is_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
|
||||
a[i] = dataa[i];
|
||||
b[i] = 32'h3f800000; // 1.0f
|
||||
c[i] = {is_sub ^ datab[i][31], datab[i][30:0]};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -89,13 +89,14 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
assign data_in[i][0 +: 32] = a[i];
|
||||
assign data_in[i][32 +: 32] = b[i];
|
||||
assign data_in[i][64 +: 32] = c[i];
|
||||
assign data_in[i][96 +: `INST_FRM_BITS] = frm;
|
||||
end
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FMA),
|
||||
.DATA_IN_WIDTH(3*32),
|
||||
.DATA_IN_WIDTH(DATAW),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (1), // must be registered for DSPs
|
||||
|
@ -116,6 +117,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
|
@ -177,10 +180,10 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
|
|||
dpi_fmadd (
|
||||
pe_enable,
|
||||
int'(0),
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]},
|
||||
{32'hffffffff, pe_data_in[i][64 +: 32]},
|
||||
frm,
|
||||
{32'hffffffff, pe_data_in[i][0 +: 32]}, // a
|
||||
{32'hffffffff, pe_data_in[i][32 +: 32]}, // b
|
||||
{32'hffffffff, pe_data_in[i][64 +: 32]}, // c
|
||||
pe_data_in[0][96 +: `INST_FRM_BITS], // frm
|
||||
r,
|
||||
f
|
||||
);
|
||||
|
|
|
@ -45,27 +45,29 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
`UNUSED_VAR (frm)
|
||||
localparam DATAW = 2 * 32 + `INST_FRM_BITS + `INST_FPU_BITS;
|
||||
|
||||
wire [NUM_LANES-1:0][2*32-1:0] data_in;
|
||||
wire [NUM_LANES-1:0][DATAW-1:0] data_in;
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
fflags_t [NUM_LANES-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign data_in[i][0 +: 32] = dataa[i];
|
||||
assign data_in[i][32 +: 32] = datab[i];
|
||||
assign data_in[i][64 +: `INST_FRM_BITS] = frm;
|
||||
assign data_in[i][64 + `INST_FRM_BITS +: `INST_FPU_BITS] = op_type;
|
||||
end
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FNCP),
|
||||
.DATA_IN_WIDTH(2*32),
|
||||
.DATA_IN_WIDTH(DATAW),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
|
@ -86,6 +88,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
`UNUSED_VAR (pe_data_in)
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign result[i] = data_out[i][0 +: 32];
|
||||
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
|
||||
|
@ -99,8 +103,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (pe_enable),
|
||||
.frm (frm),
|
||||
.op_type (op_type),
|
||||
.frm (pe_data_in[0][64 +: `INST_FRM_BITS]),
|
||||
.op_type (pe_data_in[0][64 + `INST_FRM_BITS +: `INST_FPU_BITS]),
|
||||
.dataa (pe_data_in[i][0 +: 32]),
|
||||
.datab (pe_data_in[i][32 +: 32]),
|
||||
.result (pe_data_out[i][0 +: 32]),
|
||||
|
|
|
@ -46,19 +46,21 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
|
|||
|
||||
`UNUSED_VAR (frm)
|
||||
|
||||
localparam DATAW = 32;
|
||||
|
||||
wire [NUM_LANES-1:0] mask_out;
|
||||
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
|
||||
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
|
||||
|
||||
wire pe_enable;
|
||||
wire [NUM_PES-1:0][31:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][DATAW-1:0] pe_data_in;
|
||||
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
|
||||
|
||||
VX_pe_serializer #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.NUM_PES (NUM_PES),
|
||||
.LATENCY (`LATENCY_FSQRT),
|
||||
.DATA_IN_WIDTH(32),
|
||||
.DATA_IN_WIDTH(DATAW),
|
||||
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
|
||||
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
|
||||
.PE_REG (0),
|
||||
|
|
|
@ -77,17 +77,7 @@ module VX_pe_serializer #(
|
|||
.data_out (pe_data_out)
|
||||
);
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1),
|
||||
.RESETW (1),
|
||||
.DEPTH (PE_REG)
|
||||
) pe_en_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (1'b1),
|
||||
.data_in (enable),
|
||||
.data_out (pe_enable)
|
||||
);
|
||||
assign pe_enable = enable;
|
||||
|
||||
if (NUM_LANES != NUM_PES) begin
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue