synthesis fixes

This commit is contained in:
Blaise Tine 2022-07-25 01:41:34 -07:00
parent 8c9ac27716
commit 6c1b08f45d
26 changed files with 1248 additions and 641 deletions

View file

@ -266,20 +266,12 @@
`endif
`ifndef LATENCY_FDIV
`ifdef ALTERA_S10
`define LATENCY_FDIV 34
`else
`define LATENCY_FDIV 15
`endif
`endif
`ifndef LATENCY_FSQRT
`ifdef ALTERA_S10
`define LATENCY_FSQRT 25
`else
`define LATENCY_FSQRT 10
`endif
`endif
`ifndef LATENCY_FDIVSQRT
`define LATENCY_FDIVSQRT 32

View file

@ -25,12 +25,58 @@ module VX_fpu_div #(
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
);
`ifdef QUARTUS
VX_acl_fdiv #(
.NUM_LANES (NUM_LANES),
.TAGW (TAGW)
) fp_div (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.frm (frm),
.dataa (dataa),
.datab (datab),
.has_fflags (has_fflags),
.fflags (fflags),
.result (result),
.tag_out (tag_out),
.valid_out (valid_out),
.ready_out (ready_out)
);
`elsif VIVADO
VX_xil_fdiv #(
.NUM_LANES (NUM_LANES),
.TAGW (TAGW)
) fp_div (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.frm (frm),
.dataa (dataa),
.datab (datab),
.has_fflags (has_fflags),
.fflags (fflags),
.result (result),
.tag_out (tag_out),
.valid_out (valid_out),
.ready_out (ready_out)
);
`else
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
`ifdef VERILATOR
for (genvar i = 0; i < NUM_LANES; ++i) begin
reg [31:0] r;
fflags_t f;
@ -50,18 +96,6 @@ module VX_fpu_div #(
.data_in (r),
.data_out (result[i])
);
`else
`RESET_RELAY (fdiv_reset, reset);
acl_fdiv fdiv (
.clk (clk),
.areset (fdiv_reset),
.en (enable),
.a (dataa[i]),
.b (datab[i]),
.q (result[i])
);
`endif
end
VX_shift_register #(
@ -82,4 +116,6 @@ module VX_fpu_div #(
assign has_fflags = 0;
assign fflags = 0;
`endif
endmodule

View file

@ -32,11 +32,67 @@ module VX_fpu_fma #(
output wire valid_out
);
`ifdef QUARTUS
VX_acl_fma #(
.NUM_LANES (NUM_LANES),
.TAGW (TAGW)
) fp_fma (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.frm (frm),
.do_madd (do_madd),
.do_sub (do_sub),
.do_neg (do_neg),
.dataa (dataa),
.datab (datab),
.datac (datac),
.has_fflags (has_fflags),
.fflags (fflags),
.result (result),
.tag_out (tag_out),
.valid_out (valid_out),
.ready_out (ready_out)
);
`elsif VIVADO
VX_xil_fma #(
.NUM_LANES (NUM_LANES),
.TAGW (TAGW)
) fp_fma (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.frm (frm),
.do_madd (do_madd),
.do_sub (do_sub),
.do_neg (do_neg),
.dataa (dataa),
.datab (datab),
.datac (datac),
.has_fflags (has_fflags),
.fflags (fflags),
.result (result),
.tag_out (tag_out),
.valid_out (valid_out),
.ready_out (ready_out)
);
`else
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
reg [31:0] a, b, c;
reg [31:0] r;
fflags_t f;
always @(*) begin
if (do_madd) begin
@ -59,10 +115,6 @@ module VX_fpu_fma #(
end
end
`ifdef VERILATOR
reg [31:0] r;
fflags_t f;
always @(*) begin
dpi_fmadd (enable && valid_in, a, b, c, frm, r, f);
end
@ -79,19 +131,6 @@ module VX_fpu_fma #(
.data_in (r),
.data_out (result[i])
);
`else
`RESET_RELAY (fma_reset, reset);
acl_fmadd fmadd (
.clk (clk),
.areset (fma_reset),
.en (enable),
.a (a),
.b (b),
.c (c),
.q (result[i])
);
`endif
end
VX_shift_register #(
@ -112,4 +151,6 @@ module VX_fpu_fma #(
assign has_fflags = 0;
assign fflags = 0;
`endif
endmodule

View file

@ -25,11 +25,55 @@ module VX_fpu_sqrt #(
input wire ready_out,
output wire valid_out
);
`ifdef QUARTUS
VX_acl_fsqrt #(
.NUM_LANES (NUM_LANES),
.TAGW (TAGW)
) fp_sqrt (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.frm (frm),
.dataa (dataa),
.has_fflags (has_fflags),
.fflags (fflags),
.result (result),
.tag_out (tag_out),
.valid_out (valid_out),
.ready_out (ready_out)
);
`elsif VIVADO
VX_xil_fsqrt #(
.NUM_LANES (NUM_LANES),
.TAGW (TAGW)
) fp_sqrt (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.ready_in (ready_in),
.tag_in (tag_in),
.frm (frm),
.dataa (dataa),
.has_fflags (has_fflags),
.fflags (fflags),
.result (result),
.tag_out (tag_out),
.valid_out (valid_out),
.ready_out (ready_out)
);
`else
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
`ifdef VERILATOR
reg [31:0] r;
fflags_t f;
@ -49,17 +93,6 @@ module VX_fpu_sqrt #(
.data_in (r),
.data_out (result[i])
);
`else
`RESET_RELAY (fsqrt_reset, reset);
acl_fsqrt fsqrt (
.clk (clk),
.areset (fsqrt_reset),
.en (enable),
.a (dataa[i]),
.q (result[i])
);
`endif
end
VX_shift_register #(
@ -80,4 +113,6 @@ module VX_fpu_sqrt #(
assign has_fflags = 0;
assign fflags = 0;
`endif
endmodule

View file

@ -0,0 +1,64 @@
`include "VX_fpu_define.vh"
module VX_acl_fdiv #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter LATENCY = 15
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [NUM_LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
`RESET_RELAY (fdiv_reset, reset);
acl_fdiv fdiv (
.clk (clk),
.areset (fdiv_reset),
.en (enable),
.a (dataa[i]),
.b (datab[i]),
.q (result[i])
);
end
VX_shift_register #(
.DATAW (1 + TAGW),
.DEPTH (LATENCY),
.RESETW (1)
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in ({valid_in, tag_in}),
.data_out ({valid_out, tag_out})
);
assign ready_in = enable;
`UNUSED_VAR (frm)
assign has_fflags = 0;
assign fflags = 0;
endmodule

View file

@ -0,0 +1,94 @@
`include "VX_fpu_define.vh"
module VX_acl_fma #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter LATENCY = 4
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire do_madd,
input wire do_sub,
input wire do_neg,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
input wire [NUM_LANES-1:0][31:0] datac,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [NUM_LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
reg [31:0] a, b, c;
always @(*) begin
if (do_madd) begin
// MADD/MSUB/NMADD/NMSUB
a = do_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
b = datab[i];
c = (do_neg ^ do_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
end else begin
if (do_neg) begin
// MUL
a = dataa[i];
b = datab[i];
c = 0;
end else begin
// ADD/SUB
a = 32'h3f800000; // 1.0f
b = dataa[i];
c = do_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
end
end
end
`RESET_RELAY (fma_reset, reset);
acl_fmadd fmadd (
.clk (clk),
.areset (fma_reset),
.en (enable),
.a (a),
.b (b),
.c (c),
.q (result[i])
);
end
VX_shift_register #(
.DATAW (1 + TAGW),
.DEPTH (LATENCY),
.RESETW (1)
) shift_reg (
.clk(clk),
.reset (reset),
.enable (enable),
.data_in ({valid_in, tag_in}),
.data_out ({valid_out, tag_out})
);
assign ready_in = enable;
`UNUSED_VAR (frm)
assign has_fflags = 0;
assign fflags = 0;
endmodule

View file

@ -0,0 +1,62 @@
`include "VX_fpu_define.vh"
module VX_acl_fsqrt #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter LATENCY = 10
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [NUM_LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
`RESET_RELAY (fsqrt_reset, reset);
acl_fsqrt fsqrt (
.clk (clk),
.areset (fsqrt_reset),
.en (enable),
.a (dataa[i]),
.q (result[i])
);
end
VX_shift_register #(
.DATAW (1 + TAGW),
.DEPTH (LATENCY),
.RESETW (1)
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in ({valid_in, tag_in}),
.data_out ({valid_out, tag_out})
);
assign ready_in = enable;
`UNUSED_VAR (frm)
assign has_fflags = 0;
assign fflags = 0;
endmodule

View file

@ -0,0 +1,64 @@
`include "VX_fpu_define.vh"
module VX_acl_fdiv #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter LATENCY = 34
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [NUM_LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
`RESET_RELAY (fdiv_reset, reset);
acl_fdiv fdiv (
.clk (clk),
.areset (fdiv_reset),
.en (enable),
.a (dataa[i]),
.b (datab[i]),
.q (result[i])
);
end
VX_shift_register #(
.DATAW (1 + TAGW),
.DEPTH (LATENCY),
.RESETW (1)
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in ({valid_in, tag_in}),
.data_out ({valid_out, tag_out})
);
assign ready_in = enable;
`UNUSED_VAR (frm)
assign has_fflags = 0;
assign fflags = 0;
endmodule

View file

@ -0,0 +1,94 @@
`include "VX_fpu_define.vh"
module VX_acl_fma #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter LATENCY = 4
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire do_madd,
input wire do_sub,
input wire do_neg,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
input wire [NUM_LANES-1:0][31:0] datac,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [NUM_LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
reg [31:0] a, b, c;
always @(*) begin
if (do_madd) begin
// MADD/MSUB/NMADD/NMSUB
a = do_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
b = datab[i];
c = (do_neg ^ do_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
end else begin
if (do_neg) begin
// MUL
a = dataa[i];
b = datab[i];
c = 0;
end else begin
// ADD/SUB
a = 32'h3f800000; // 1.0f
b = dataa[i];
c = do_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
end
end
end
`RESET_RELAY (fma_reset, reset);
acl_fmadd fmadd (
.clk (clk),
.areset (fma_reset),
.en (enable),
.a (a),
.b (b),
.c (c),
.q (result[i])
);
end
VX_shift_register #(
.DATAW (1 + TAGW),
.DEPTH (LATENCY),
.RESETW (1)
) shift_reg (
.clk(clk),
.reset (reset),
.enable (enable),
.data_in ({valid_in, tag_in}),
.data_out ({valid_out, tag_out})
);
assign ready_in = enable;
`UNUSED_VAR (frm)
assign has_fflags = 0;
assign fflags = 0;
endmodule

View file

@ -0,0 +1,62 @@
`include "VX_fpu_define.vh"
module VX_acl_fsqrt #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter LATENCY = 25
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [NUM_LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
`RESET_RELAY (fsqrt_reset, reset);
acl_fsqrt fsqrt (
.clk (clk),
.areset (fsqrt_reset),
.en (enable),
.a (dataa[i]),
.q (result[i])
);
end
VX_shift_register #(
.DATAW (1 + TAGW),
.DEPTH (LATENCY),
.RESETW (1)
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in ({valid_in, tag_in}),
.data_out ({valid_out, tag_out})
);
assign ready_in = enable;
`UNUSED_VAR (frm)
assign has_fflags = 0;
assign fflags = 0;
endmodule

View file

@ -16,7 +16,7 @@
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fdiv
// SystemVerilog created on Sun Dec 27 09:48:58 2020
// SystemVerilog created on Sun Jul 24 13:44:12 2022
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)

View file

@ -16,7 +16,7 @@
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fmadd
// SystemVerilog created on Sun Dec 27 09:48:58 2020
// SystemVerilog created on Sun Jul 24 13:44:12 2022
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)

View file

@ -16,7 +16,7 @@
// ---------------------------------------------------------------------------
// SystemVerilog created from acl_fsqrt
// SystemVerilog created on Sun Dec 27 09:48:58 2020
// SystemVerilog created on Sun Jul 24 13:44:12 2022
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)

View file

@ -1,96 +1,6 @@
starting execution ...
build model options ...
argc=22
Generation context:
Will not generate valid and channel signals
HardFP is enabled enabling set to true
Correct rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_fadd
Frequency 250MHz
Deployment FPGA Stratix10
Estimated resources LUTs 0, DSPs 2, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s)
@@start
@name FPAdd@
@latency 3@
@LUT 0@
@DSP 2@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 0.50@
@rounding RNE@
@method single path@
@inPort 0 fpieee 8 23@
@inPort 1 fpieee 8 23@
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=22
Generation context:
Will not generate valid and channel signals
HardFP is enabled enabling set to true
Correct rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_fsub
Frequency 250MHz
Deployment FPGA Stratix10
Estimated resources LUTs 0, DSPs 2, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s)
@@start
@name FPSub@
@latency 3@
@LUT 0@
@DSP 2@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 0.50@
@rounding RNE@
@method single path@
@inPort 0 fpieee 8 23@
@inPort 1 fpieee 8 23@
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=22
Generation context:
Will not generate valid and channel signals
HardFP is enabled enabling set to true
Correct rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_fmul
Frequency 250MHz
Deployment FPGA Stratix10
Estimated resources LUTs 0, DSPs 2, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s)
@@start
@name FPMul@
@latency 3@
@LUT 0@
@DSP 2@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 0.50@
@rounding RNE@
@method default@
@inPort 0 fpieee 8 23@
@inPort 1 fpieee 8 23@
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=22
Generation context:
Will not generate valid and channel signals
HardFP is enabled enabling set to true
@ -178,119 +88,3 @@ The pipeline depth of the block is 25 cycle(s)
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=25
Generation context:
Will not generate valid and channel signals
HardFP is enabled enabling set to true
Correct rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_ftoi
Frequency 250MHz
Deployment FPGA Stratix10
Estimated resources LUTs 344, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s)
@@start
@name FPToFXP@
@latency 3@
@LUT 344@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 0.50@
@rounding RNE@
@method default@
@inPort 0 fpieee 8 23@
@outPort 0 fxp 32 0 1@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=25
Generation context:
Will not generate valid and channel signals
HardFP is enabled enabling set to true
Correct rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_ftou
Frequency 250MHz
Deployment FPGA Stratix10
Estimated resources LUTs 272, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s)
@@start
@name FPToFXP@
@latency 3@
@LUT 272@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 0.50@
@rounding RNE@
@method default@
@inPort 0 fpieee 8 23@
@outPort 0 fxp 32 0 0@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=25
Generation context:
Will not generate valid and channel signals
HardFP is enabled enabling set to true
Correct rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_itof
Frequency 250MHz
Deployment FPGA Stratix10
Estimated resources LUTs 362, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 7 cycle(s)
@@start
@name FXPToFP@
@latency 7@
@LUT 362@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 0.50@
@rounding RNE@
@method default@
@inPort 0 fxp 32 0 1@
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end
starting execution ...
build model options ...
argc=25
Generation context:
Will not generate valid and channel signals
HardFP is enabled enabling set to true
Correct rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_utof
Frequency 300MHz
Deployment FPGA Stratix10
Estimated resources LUTs 310, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 7 cycle(s)
@@start
@name FXPToFP@
@latency 7@
@LUT 310@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@enable 1@
@subnormals 0@
@error 0.50@
@rounding RNE@
@method default@
@inPort 0 fxp 32 0 0@
@outPort 0 fpieee 8 23@
@nochanvalid 1@
@@end

View file

@ -33,6 +33,13 @@ DBG_FLAGS += $(DBG_TRACE_FLAGS)
#CONFIGS += -DSM_DISABLE
#CONFIGS += -DRCACHE_DISABLE -DOCACHE_DISABLE -DTCACHE_DISABLE
ifeq ($(DEVICE_FAMILY), stratix10)
CONFIGS += -DALTERA_S10
endif
ifeq ($(DEVICE_FAMILY), arria10)
CONFIGS += -DALTERA_A10
endif
CONFIG1 := -DNUM_CLUSTERS=1 -DNUM_CORES=1 $(CONFIGS)
CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 $(CONFIGS)
CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE $(CONFIGS)

View file

@ -0,0 +1,77 @@
`include "VX_fpu_define.vh"
module VX_xil_fdiv #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter LATENCY = 28
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [NUM_LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [3:0] tuser;
wire tvalid_in;
`RESET_RELAY (fdiv_reset, reset);
assign tvalid_in = enable && valid_in;
xil_fdiv fdiv (
.aclk (clk),
.aresetn (~fdiv_reset),
.s_axis_a_tvalid (tvalid_in),
.s_axis_a_tdata (a),
.s_axis_b_tvalid (tvalid_in),
.s_axis_b_tdata (b),
.m_axis_result_tvalid (valid_out),
.m_axis_result_tdata (result[i]),
.m_axis_result_tuser (tuser[i])
);
assign fflags[i].NX = 1'b0;
assign fflags[i].UF = tuser[0];
assign fflags[i].OF = tuser[1];
assign fflags[i].DZ = tuser[3];
assign fflags[i].NV = tuser[2];
end
VX_shift_register #(
.DATAW (TAGW),
.DEPTH (LATENCY),
.RESETW (1)
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in (tag_in),
.data_out (tag_out)
);
assign ready_in = enable;
`UNUSED_VAR (frm)
assign has_fflags = 1;
endmodule

View file

@ -0,0 +1,108 @@
`include "VX_fpu_define.vh"
module VX_xil_fma #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter LATENCY = 16
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire do_madd,
input wire do_sub,
input wire do_neg,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
input wire [NUM_LANES-1:0][31:0] datac,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [NUM_LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
reg [31:0] a, b, c;
wire [2:0] tuser;
wire tvalid_in;
always @(*) begin
if (do_madd) begin
// MADD/MSUB/NMADD/NMSUB
a = do_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
b = datab[i];
c = (do_neg ^ do_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
end else begin
if (do_neg) begin
// MUL
a = dataa[i];
b = datab[i];
c = 0;
end else begin
// ADD/SUB
a = 32'h3f800000; // 1.0f
b = dataa[i];
c = do_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
end
end
end
`RESET_RELAY (fma_reset, reset);
assign tvalid_in = enable && valid_in;
xil_fma fma (
.aclk (clk),
.aresetn (~fma_reset),
.s_axis_a_tvalid (tvalid_in),
.s_axis_a_tdata (a),
.s_axis_b_tvalid (tvalid_in),
.s_axis_b_tdata (b),
.s_axis_c_tvalid (tvalid_in),
.s_axis_c_tdata (c),
.s_axis_operation_tvalid (tvalid_in),
.s_axis_operation_tdata ('0),
.m_axis_result_tvalid (valid_out),
.m_axis_result_tdata (result[i]),
.m_axis_result_tuser (tuser[i])
);
assign fflags[i].NX = 1'b0;
assign fflags[i].UF = tuser[0];
assign fflags[i].OF = tuser[1];
assign fflags[i].DZ = 1'b0;
assign fflags[i].NV = tuser[2];
end
VX_shift_register #(
.DATAW (TAGW),
.DEPTH (LATENCY),
.RESETW (1)
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in (tag_in),
.data_out (tag_out)
);
assign ready_in = enable;
`UNUSED_VAR (frm)
assign has_fflags = 1;
endmodule

View file

@ -0,0 +1,74 @@
`include "VX_fpu_define.vh"
module VX_xil_fsqrt #(
parameter NUM_LANES = 1,
parameter TAGW = 1,
parameter LATENCY = 28
) (
input wire clk,
input wire reset,
output wire ready_in,
input wire valid_in,
input wire [TAGW-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output fflags_t [NUM_LANES-1:0] fflags,
output wire [TAGW-1:0] tag_out,
input wire ready_out,
output wire valid_out
);
wire stall = ~ready_out && valid_out;
wire enable = ~stall;
for (genvar i = 0; i < NUM_LANES; ++i) begin
wire [0:0] tuser;
wire tvalid_in;
`RESET_RELAY (fsqrt_reset, reset);
assign tvalid_in = enable && valid_in;
xil_fsqrt fsqrt (
.aclk (clk),
.aresetn (~fsqrt_reset),
.s_axis_a_tvalid (tvalid_in),
.s_axis_a_tdata (a),
.m_axis_result_tvalid (valid_out),
.m_axis_result_tdata (result[i]),
.m_axis_result_tuser (tuser[i])
);
assign fflags[i].NX = 1'b0;
assign fflags[i].UF = 1'b0;
assign fflags[i].OF = 1'b0;
assign fflags[i].DZ = 1'b0;
assign fflags[i].NV = tuser[0];
end
VX_shift_register #(
.DATAW (TAGW),
.DEPTH (LATENCY),
.RESETW (1)
) shift_reg (
.clk (clk),
.reset (reset),
.enable (enable),
.data_in (tag_in),
.data_out (tag_out)
);
assign ready_in = enable;
`UNUSED_VAR (frm)
assign has_fflags = 0;
endmodule

View file

@ -54,17 +54,18 @@
(* CHECK_LICENSE_TYPE = "fdiv,floating_point_v7_1_11,{}" *)
(* CORE_GENERATION_INFO = "fdiv,floating_point_v7_1_11,{x_ipProduct=Vivado 2020.2.2,x_ipVendor=xilinx.com,x_ipLibrary=ip,x_ipName=floating_point,x_ipVersion=7.1,x_ipCoreRevision=11,x_ipLanguage=VERILOG,x_ipSimLanguage=VERILOG,C_XDEVICEFAMILY=virtexuplusHBM,C_PART=xcu280-fsvh2892-2L-e,C_HAS_ADD=0,C_HAS_SUBTRACT=0,C_HAS_MULTIPLY=0,C_HAS_DIVIDE=1,C_HAS_SQRT=0,C_HAS_COMPARE=0,C_HAS_FIX_TO_FLT=0,C_HAS_FLT_TO_FIX=0,C_HAS_FLT_TO_FLT=0,C_HAS_RECIP=0,C_HAS_RECIP_SQRT=0,C_HAS_ABSOLUTE=0,C_HAS_LOGARITHM=0,C_HAS_EXPONENTIAL=0,C_HAS_F\
MA=0,C_HAS_FMS=0,C_HAS_UNFUSED_MULTIPLY_ADD=0,C_HAS_UNFUSED_MULTIPLY_SUB=0,C_HAS_UNFUSED_MULTIPLY_ACCUMULATOR_A=0,C_HAS_UNFUSED_MULTIPLY_ACCUMULATOR_S=0,C_HAS_ACCUMULATOR_A=0,C_HAS_ACCUMULATOR_S=0,C_HAS_ACCUMULATOR_PRIMITIVE_A=0,C_HAS_ACCUMULATOR_PRIMITIVE_S=0,C_A_WIDTH=32,C_A_FRACTION_WIDTH=24,C_B_WIDTH=32,C_B_FRACTION_WIDTH=24,C_C_WIDTH=32,C_C_FRACTION_WIDTH=24,C_RESULT_WIDTH=32,C_RESULT_FRACTION_WIDTH=24,C_COMPARE_OPERATION=8,C_LATENCY=28,C_OPTIMIZATION=1,C_MULT_USAGE=0,C_BRAM_USAGE=0,C_RATE=\
1,C_ACCUM_INPUT_MSB=32,C_ACCUM_MSB=32,C_ACCUM_LSB=-31,C_HAS_UNDERFLOW=0,C_HAS_OVERFLOW=0,C_HAS_INVALID_OP=0,C_HAS_DIVIDE_BY_ZERO=0,C_HAS_ACCUM_OVERFLOW=0,C_HAS_ACCUM_INPUT_OVERFLOW=0,C_HAS_ACLKEN=0,C_HAS_ARESETN=0,C_THROTTLE_SCHEME=3,C_HAS_A_TUSER=0,C_HAS_A_TLAST=0,C_HAS_B=1,C_HAS_B_TUSER=0,C_HAS_B_TLAST=0,C_HAS_C=0,C_HAS_C_TUSER=0,C_HAS_C_TLAST=0,C_HAS_OPERATION=0,C_HAS_OPERATION_TUSER=0,C_HAS_OPERATION_TLAST=0,C_HAS_RESULT_TUSER=0,C_HAS_RESULT_TLAST=0,C_TLAST_RESOLUTION=0,C_A_TDATA_WIDTH=32,C_\
A_TUSER_WIDTH=1,C_B_TDATA_WIDTH=32,C_B_TUSER_WIDTH=1,C_C_TDATA_WIDTH=32,C_C_TUSER_WIDTH=1,C_OPERATION_TDATA_WIDTH=8,C_OPERATION_TUSER_WIDTH=1,C_RESULT_TDATA_WIDTH=32,C_RESULT_TUSER_WIDTH=1,C_FIXED_DATA_UNSIGNED=0}" *)
1,C_ACCUM_INPUT_MSB=32,C_ACCUM_MSB=32,C_ACCUM_LSB=-31,C_HAS_UNDERFLOW=1,C_HAS_OVERFLOW=1,C_HAS_INVALID_OP=1,C_HAS_DIVIDE_BY_ZERO=1,C_HAS_ACCUM_OVERFLOW=0,C_HAS_ACCUM_INPUT_OVERFLOW=0,C_HAS_ACLKEN=0,C_HAS_ARESETN=0,C_THROTTLE_SCHEME=3,C_HAS_A_TUSER=0,C_HAS_A_TLAST=0,C_HAS_B=1,C_HAS_B_TUSER=0,C_HAS_B_TLAST=0,C_HAS_C=0,C_HAS_C_TUSER=0,C_HAS_C_TLAST=0,C_HAS_OPERATION=0,C_HAS_OPERATION_TUSER=0,C_HAS_OPERATION_TLAST=0,C_HAS_RESULT_TUSER=1,C_HAS_RESULT_TLAST=0,C_TLAST_RESOLUTION=0,C_A_TDATA_WIDTH=32,C_\
A_TUSER_WIDTH=1,C_B_TDATA_WIDTH=32,C_B_TUSER_WIDTH=1,C_C_TDATA_WIDTH=32,C_C_TUSER_WIDTH=1,C_OPERATION_TDATA_WIDTH=8,C_OPERATION_TUSER_WIDTH=1,C_RESULT_TDATA_WIDTH=32,C_RESULT_TUSER_WIDTH=4,C_FIXED_DATA_UNSIGNED=0}" *)
(* DowngradeIPIdentifiedWarnings = "yes" *)
module fdiv (
module xil_fdiv (
aclk,
s_axis_a_tvalid,
s_axis_a_tdata,
s_axis_b_tvalid,
s_axis_b_tdata,
m_axis_result_tvalid,
m_axis_result_tdata
m_axis_result_tdata,
m_axis_result_tuser
);
(* X_INTERFACE_PARAMETER = "XIL_INTERFACENAME aclk_intf, ASSOCIATED_BUSIF S_AXIS_OPERATION:M_AXIS_RESULT:S_AXIS_C:S_AXIS_B:S_AXIS_A, ASSOCIATED_RESET aresetn, ASSOCIATED_CLKEN aclken, FREQ_HZ 10000000, FREQ_TOLERANCE_HZ 0, PHASE 0.000, INSERT_VIP 0" *)
@ -82,9 +83,11 @@ input wire s_axis_b_tvalid;
input wire [31 : 0] s_axis_b_tdata;
(* X_INTERFACE_INFO = "xilinx.com:interface:axis:1.0 M_AXIS_RESULT TVALID" *)
output wire m_axis_result_tvalid;
(* X_INTERFACE_PARAMETER = "XIL_INTERFACENAME M_AXIS_RESULT, TDATA_NUM_BYTES 4, TDEST_WIDTH 0, TID_WIDTH 0, TUSER_WIDTH 0, HAS_TREADY 0, HAS_TSTRB 0, HAS_TKEEP 0, HAS_TLAST 0, FREQ_HZ 100000000, PHASE 0.000, LAYERED_METADATA undef, INSERT_VIP 0" *)
(* X_INTERFACE_INFO = "xilinx.com:interface:axis:1.0 M_AXIS_RESULT TDATA" *)
output wire [31 : 0] m_axis_result_tdata;
(* X_INTERFACE_PARAMETER = "XIL_INTERFACENAME M_AXIS_RESULT, TDATA_NUM_BYTES 4, TDEST_WIDTH 0, TID_WIDTH 0, TUSER_WIDTH 4, HAS_TREADY 0, HAS_TSTRB 0, HAS_TKEEP 0, HAS_TLAST 0, FREQ_HZ 100000000, PHASE 0.000, LAYERED_METADATA undef, INSERT_VIP 0" *)
(* X_INTERFACE_INFO = "xilinx.com:interface:axis:1.0 M_AXIS_RESULT TUSER" *)
output wire [3 : 0] m_axis_result_tuser;
floating_point_v7_1_11 #(
.C_XDEVICEFAMILY("virtexuplusHBM"),
@ -130,10 +133,10 @@ output wire [31 : 0] m_axis_result_tdata;
.C_ACCUM_INPUT_MSB(32),
.C_ACCUM_MSB(32),
.C_ACCUM_LSB(-31),
.C_HAS_UNDERFLOW(0),
.C_HAS_OVERFLOW(0),
.C_HAS_INVALID_OP(0),
.C_HAS_DIVIDE_BY_ZERO(0),
.C_HAS_UNDERFLOW(1),
.C_HAS_OVERFLOW(1),
.C_HAS_INVALID_OP(1),
.C_HAS_DIVIDE_BY_ZERO(1),
.C_HAS_ACCUM_OVERFLOW(0),
.C_HAS_ACCUM_INPUT_OVERFLOW(0),
.C_HAS_ACLKEN(0),
@ -150,7 +153,7 @@ output wire [31 : 0] m_axis_result_tdata;
.C_HAS_OPERATION(0),
.C_HAS_OPERATION_TUSER(0),
.C_HAS_OPERATION_TLAST(0),
.C_HAS_RESULT_TUSER(0),
.C_HAS_RESULT_TUSER(1),
.C_HAS_RESULT_TLAST(0),
.C_TLAST_RESOLUTION(0),
.C_A_TDATA_WIDTH(32),
@ -162,7 +165,7 @@ output wire [31 : 0] m_axis_result_tdata;
.C_OPERATION_TDATA_WIDTH(8),
.C_OPERATION_TUSER_WIDTH(1),
.C_RESULT_TDATA_WIDTH(32),
.C_RESULT_TUSER_WIDTH(1),
.C_RESULT_TUSER_WIDTH(4),
.C_FIXED_DATA_UNSIGNED(0)
) inst (
.aclk(aclk),
@ -191,7 +194,7 @@ output wire [31 : 0] m_axis_result_tdata;
.m_axis_result_tvalid(m_axis_result_tvalid),
.m_axis_result_tready(1'H0),
.m_axis_result_tdata(m_axis_result_tdata),
.m_axis_result_tuser(),
.m_axis_result_tuser(m_axis_result_tuser),
.m_axis_result_tlast()
);
endmodule

View file

@ -57,7 +57,7 @@ FMA=1,C_HAS_FMS=1,C_HAS_UNFUSED_MULTIPLY_ADD=0,C_HAS_UNFUSED_MULTIPLY_SUB=0,C_HA
=1,C_ACCUM_INPUT_MSB=32,C_ACCUM_MSB=32,C_ACCUM_LSB=-31,C_HAS_UNDERFLOW=1,C_HAS_OVERFLOW=1,C_HAS_INVALID_OP=1,C_HAS_DIVIDE_BY_ZERO=0,C_HAS_ACCUM_OVERFLOW=0,C_HAS_ACCUM_INPUT_OVERFLOW=0,C_HAS_ACLKEN=0,C_HAS_ARESETN=0,C_THROTTLE_SCHEME=3,C_HAS_A_TUSER=0,C_HAS_A_TLAST=0,C_HAS_B=1,C_HAS_B_TUSER=0,C_HAS_B_TLAST=0,C_HAS_C=1,C_HAS_C_TUSER=0,C_HAS_C_TLAST=0,C_HAS_OPERATION=1,C_HAS_OPERATION_TUSER=0,C_HAS_OPERATION_TLAST=0,C_HAS_RESULT_TUSER=1,C_HAS_RESULT_TLAST=0,C_TLAST_RESOLUTION=0,C_A_TDATA_WIDTH=32,C\
_A_TUSER_WIDTH=1,C_B_TDATA_WIDTH=32,C_B_TUSER_WIDTH=1,C_C_TDATA_WIDTH=32,C_C_TUSER_WIDTH=1,C_OPERATION_TDATA_WIDTH=8,C_OPERATION_TUSER_WIDTH=1,C_RESULT_TDATA_WIDTH=32,C_RESULT_TUSER_WIDTH=3,C_FIXED_DATA_UNSIGNED=0}" *)
(* DowngradeIPIdentifiedWarnings = "yes" *)
module fmadd (
module xil_fma (
aclk,
s_axis_a_tvalid,
s_axis_a_tdata,

View file

@ -57,7 +57,7 @@ FMA=0,C_HAS_FMS=0,C_HAS_UNFUSED_MULTIPLY_ADD=0,C_HAS_UNFUSED_MULTIPLY_SUB=0,C_HA
=1,C_ACCUM_INPUT_MSB=32,C_ACCUM_MSB=32,C_ACCUM_LSB=-31,C_HAS_UNDERFLOW=0,C_HAS_OVERFLOW=0,C_HAS_INVALID_OP=1,C_HAS_DIVIDE_BY_ZERO=0,C_HAS_ACCUM_OVERFLOW=0,C_HAS_ACCUM_INPUT_OVERFLOW=0,C_HAS_ACLKEN=0,C_HAS_ARESETN=0,C_THROTTLE_SCHEME=3,C_HAS_A_TUSER=0,C_HAS_A_TLAST=0,C_HAS_B=0,C_HAS_B_TUSER=0,C_HAS_B_TLAST=0,C_HAS_C=0,C_HAS_C_TUSER=0,C_HAS_C_TLAST=0,C_HAS_OPERATION=0,C_HAS_OPERATION_TUSER=0,C_HAS_OPERATION_TLAST=0,C_HAS_RESULT_TUSER=1,C_HAS_RESULT_TLAST=0,C_TLAST_RESOLUTION=0,C_A_TDATA_WIDTH=32,C\
_A_TUSER_WIDTH=1,C_B_TDATA_WIDTH=32,C_B_TUSER_WIDTH=1,C_C_TDATA_WIDTH=32,C_C_TUSER_WIDTH=1,C_OPERATION_TDATA_WIDTH=8,C_OPERATION_TUSER_WIDTH=1,C_RESULT_TDATA_WIDTH=32,C_RESULT_TUSER_WIDTH=1,C_FIXED_DATA_UNSIGNED=0}" *)
(* DowngradeIPIdentifiedWarnings = "yes" *)
module fsqrt (
module xil_fsqrt (
aclk,
s_axis_a_tvalid,
s_axis_a_tdata,

View file

@ -18,9 +18,9 @@ draw3d(){
echo -e "draw3d $TRACE benchmark\n" >> $LOG
if [ $ALL = true ]
then
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w8 -h8" >> $LOG
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w512 -h512" >> $LOG
else
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w8 -h8" | grep 'PERF' >> $LOG
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w512 -h512" | grep 'PERF' >> $LOG
fi
done
echo "draw3d tests done!"

View file

@ -4,7 +4,7 @@ RTL_DIR=../../hw/rtl
SCRIPT_DIR=../../hw/scripts
OPAE_SYN_DIR=../../hw/syn/opae
OPAE_SYN_DIR=../../hw/syn/altera/opae
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors

File diff suppressed because it is too large Load diff

0
tests/regression/raster/kernel.bin Executable file → Normal file
View file

0
tests/regression/raster/kernel.elf Executable file → Normal file
View file