mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 13:27:29 -04:00
synthesis fixes
This commit is contained in:
parent
8c9ac27716
commit
6c1b08f45d
26 changed files with 1248 additions and 641 deletions
|
@ -266,20 +266,12 @@
|
|||
`endif
|
||||
|
||||
`ifndef LATENCY_FDIV
|
||||
`ifdef ALTERA_S10
|
||||
`define LATENCY_FDIV 34
|
||||
`else
|
||||
`define LATENCY_FDIV 15
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef LATENCY_FSQRT
|
||||
`ifdef ALTERA_S10
|
||||
`define LATENCY_FSQRT 25
|
||||
`else
|
||||
`define LATENCY_FSQRT 10
|
||||
`endif
|
||||
`endif
|
||||
|
||||
`ifndef LATENCY_FDIVSQRT
|
||||
`define LATENCY_FDIVSQRT 32
|
||||
|
|
|
@ -25,12 +25,58 @@ module VX_fpu_div #(
|
|||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
);
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
VX_acl_fdiv #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAGW (TAGW)
|
||||
) fp_div (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
.result (result),
|
||||
.tag_out (tag_out),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
`elsif VIVADO
|
||||
|
||||
VX_xil_fdiv #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAGW (TAGW)
|
||||
) fp_div (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
.result (result),
|
||||
.tag_out (tag_out),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef VERILATOR
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
reg [31:0] r;
|
||||
fflags_t f;
|
||||
|
||||
|
@ -50,18 +96,6 @@ module VX_fpu_div #(
|
|||
.data_in (r),
|
||||
.data_out (result[i])
|
||||
);
|
||||
`else
|
||||
`RESET_RELAY (fdiv_reset, reset);
|
||||
|
||||
acl_fdiv fdiv (
|
||||
.clk (clk),
|
||||
.areset (fdiv_reset),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.b (datab[i]),
|
||||
.q (result[i])
|
||||
);
|
||||
`endif
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
|
@ -82,4 +116,6 @@ module VX_fpu_div #(
|
|||
assign has_fflags = 0;
|
||||
assign fflags = 0;
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -32,11 +32,67 @@ module VX_fpu_fma #(
|
|||
output wire valid_out
|
||||
);
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
VX_acl_fma #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAGW (TAGW)
|
||||
) fp_fma (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.do_madd (do_madd),
|
||||
.do_sub (do_sub),
|
||||
.do_neg (do_neg),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
.result (result),
|
||||
.tag_out (tag_out),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
`elsif VIVADO
|
||||
|
||||
VX_xil_fma #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAGW (TAGW)
|
||||
) fp_fma (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.do_madd (do_madd),
|
||||
.do_sub (do_sub),
|
||||
.do_neg (do_neg),
|
||||
.dataa (dataa),
|
||||
.datab (datab),
|
||||
.datac (datac),
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
.result (result),
|
||||
.tag_out (tag_out),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
reg [31:0] a, b, c;
|
||||
reg [31:0] r;
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
if (do_madd) begin
|
||||
|
@ -59,10 +115,6 @@ module VX_fpu_fma #(
|
|||
end
|
||||
end
|
||||
|
||||
`ifdef VERILATOR
|
||||
reg [31:0] r;
|
||||
fflags_t f;
|
||||
|
||||
always @(*) begin
|
||||
dpi_fmadd (enable && valid_in, a, b, c, frm, r, f);
|
||||
end
|
||||
|
@ -79,19 +131,6 @@ module VX_fpu_fma #(
|
|||
.data_in (r),
|
||||
.data_out (result[i])
|
||||
);
|
||||
`else
|
||||
`RESET_RELAY (fma_reset, reset);
|
||||
|
||||
acl_fmadd fmadd (
|
||||
.clk (clk),
|
||||
.areset (fma_reset),
|
||||
.en (enable),
|
||||
.a (a),
|
||||
.b (b),
|
||||
.c (c),
|
||||
.q (result[i])
|
||||
);
|
||||
`endif
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
|
@ -112,4 +151,6 @@ module VX_fpu_fma #(
|
|||
assign has_fflags = 0;
|
||||
assign fflags = 0;
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -25,11 +25,55 @@ module VX_fpu_sqrt #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
|
||||
`ifdef QUARTUS
|
||||
|
||||
VX_acl_fsqrt #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAGW (TAGW)
|
||||
) fp_sqrt (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa),
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
.result (result),
|
||||
.tag_out (tag_out),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
`elsif VIVADO
|
||||
|
||||
VX_xil_fsqrt #(
|
||||
.NUM_LANES (NUM_LANES),
|
||||
.TAGW (TAGW)
|
||||
) fp_sqrt (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
.tag_in (tag_in),
|
||||
.frm (frm),
|
||||
.dataa (dataa),
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
.result (result),
|
||||
.tag_out (tag_out),
|
||||
.valid_out (valid_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
`else
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`ifdef VERILATOR
|
||||
reg [31:0] r;
|
||||
fflags_t f;
|
||||
|
||||
|
@ -49,17 +93,6 @@ module VX_fpu_sqrt #(
|
|||
.data_in (r),
|
||||
.data_out (result[i])
|
||||
);
|
||||
`else
|
||||
`RESET_RELAY (fsqrt_reset, reset);
|
||||
|
||||
acl_fsqrt fsqrt (
|
||||
.clk (clk),
|
||||
.areset (fsqrt_reset),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.q (result[i])
|
||||
);
|
||||
`endif
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
|
@ -80,4 +113,6 @@ module VX_fpu_sqrt #(
|
|||
assign has_fflags = 0;
|
||||
assign fflags = 0;
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
64
hw/syn/altera/ip/arria10/VX_acl_fdiv.sv
Normal file
64
hw/syn/altera/ip/arria10/VX_acl_fdiv.sv
Normal file
|
@ -0,0 +1,64 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_acl_fdiv #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAGW = 1,
|
||||
parameter LATENCY = 15
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [NUM_LANES-1:0] fflags,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`RESET_RELAY (fdiv_reset, reset);
|
||||
|
||||
acl_fdiv fdiv (
|
||||
.clk (clk),
|
||||
.areset (fdiv_reset),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.b (datab[i]),
|
||||
.q (result[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW),
|
||||
.DEPTH (LATENCY),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid_in, tag_in}),
|
||||
.data_out ({valid_out, tag_out})
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
assign has_fflags = 0;
|
||||
assign fflags = 0;
|
||||
|
||||
endmodule
|
94
hw/syn/altera/ip/arria10/VX_acl_fma.sv
Normal file
94
hw/syn/altera/ip/arria10/VX_acl_fma.sv
Normal file
|
@ -0,0 +1,94 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_acl_fma #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAGW = 1,
|
||||
parameter LATENCY = 4
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire do_madd,
|
||||
input wire do_sub,
|
||||
input wire do_neg,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
input wire [NUM_LANES-1:0][31:0] datac,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [NUM_LANES-1:0] fflags,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
reg [31:0] a, b, c;
|
||||
|
||||
always @(*) begin
|
||||
if (do_madd) begin
|
||||
// MADD/MSUB/NMADD/NMSUB
|
||||
a = do_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
|
||||
b = datab[i];
|
||||
c = (do_neg ^ do_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
|
||||
end else begin
|
||||
if (do_neg) begin
|
||||
// MUL
|
||||
a = dataa[i];
|
||||
b = datab[i];
|
||||
c = 0;
|
||||
end else begin
|
||||
// ADD/SUB
|
||||
a = 32'h3f800000; // 1.0f
|
||||
b = dataa[i];
|
||||
c = do_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (fma_reset, reset);
|
||||
|
||||
acl_fmadd fmadd (
|
||||
.clk (clk),
|
||||
.areset (fma_reset),
|
||||
.en (enable),
|
||||
.a (a),
|
||||
.b (b),
|
||||
.c (c),
|
||||
.q (result[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW),
|
||||
.DEPTH (LATENCY),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid_in, tag_in}),
|
||||
.data_out ({valid_out, tag_out})
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
assign has_fflags = 0;
|
||||
assign fflags = 0;
|
||||
|
||||
endmodule
|
62
hw/syn/altera/ip/arria10/VX_acl_fsqrt.sv
Normal file
62
hw/syn/altera/ip/arria10/VX_acl_fsqrt.sv
Normal file
|
@ -0,0 +1,62 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_acl_fsqrt #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAGW = 1,
|
||||
parameter LATENCY = 10
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [NUM_LANES-1:0] fflags,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`RESET_RELAY (fsqrt_reset, reset);
|
||||
|
||||
acl_fsqrt fsqrt (
|
||||
.clk (clk),
|
||||
.areset (fsqrt_reset),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.q (result[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW),
|
||||
.DEPTH (LATENCY),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid_in, tag_in}),
|
||||
.data_out ({valid_out, tag_out})
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
assign has_fflags = 0;
|
||||
assign fflags = 0;
|
||||
|
||||
endmodule
|
64
hw/syn/altera/ip/stratix10/VX_acl_fdiv.sv
Normal file
64
hw/syn/altera/ip/stratix10/VX_acl_fdiv.sv
Normal file
|
@ -0,0 +1,64 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_acl_fdiv #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAGW = 1,
|
||||
parameter LATENCY = 34
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [NUM_LANES-1:0] fflags,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`RESET_RELAY (fdiv_reset, reset);
|
||||
|
||||
acl_fdiv fdiv (
|
||||
.clk (clk),
|
||||
.areset (fdiv_reset),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.b (datab[i]),
|
||||
.q (result[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW),
|
||||
.DEPTH (LATENCY),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid_in, tag_in}),
|
||||
.data_out ({valid_out, tag_out})
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
assign has_fflags = 0;
|
||||
assign fflags = 0;
|
||||
|
||||
endmodule
|
94
hw/syn/altera/ip/stratix10/VX_acl_fma.sv
Normal file
94
hw/syn/altera/ip/stratix10/VX_acl_fma.sv
Normal file
|
@ -0,0 +1,94 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_acl_fma #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAGW = 1,
|
||||
parameter LATENCY = 4
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire do_madd,
|
||||
input wire do_sub,
|
||||
input wire do_neg,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
input wire [NUM_LANES-1:0][31:0] datac,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [NUM_LANES-1:0] fflags,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
reg [31:0] a, b, c;
|
||||
|
||||
always @(*) begin
|
||||
if (do_madd) begin
|
||||
// MADD/MSUB/NMADD/NMSUB
|
||||
a = do_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
|
||||
b = datab[i];
|
||||
c = (do_neg ^ do_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
|
||||
end else begin
|
||||
if (do_neg) begin
|
||||
// MUL
|
||||
a = dataa[i];
|
||||
b = datab[i];
|
||||
c = 0;
|
||||
end else begin
|
||||
// ADD/SUB
|
||||
a = 32'h3f800000; // 1.0f
|
||||
b = dataa[i];
|
||||
c = do_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (fma_reset, reset);
|
||||
|
||||
acl_fmadd fmadd (
|
||||
.clk (clk),
|
||||
.areset (fma_reset),
|
||||
.en (enable),
|
||||
.a (a),
|
||||
.b (b),
|
||||
.c (c),
|
||||
.q (result[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW),
|
||||
.DEPTH (LATENCY),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid_in, tag_in}),
|
||||
.data_out ({valid_out, tag_out})
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
assign has_fflags = 0;
|
||||
assign fflags = 0;
|
||||
|
||||
endmodule
|
62
hw/syn/altera/ip/stratix10/VX_acl_fsqrt.sv
Normal file
62
hw/syn/altera/ip/stratix10/VX_acl_fsqrt.sv
Normal file
|
@ -0,0 +1,62 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_acl_fsqrt #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAGW = 1,
|
||||
parameter LATENCY = 25
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [NUM_LANES-1:0] fflags,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
`RESET_RELAY (fsqrt_reset, reset);
|
||||
|
||||
acl_fsqrt fsqrt (
|
||||
.clk (clk),
|
||||
.areset (fsqrt_reset),
|
||||
.en (enable),
|
||||
.a (dataa[i]),
|
||||
.q (result[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + TAGW),
|
||||
.DEPTH (LATENCY),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in ({valid_in, tag_in}),
|
||||
.data_out ({valid_out, tag_out})
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
assign has_fflags = 0;
|
||||
assign fflags = 0;
|
||||
|
||||
endmodule
|
|
@ -16,7 +16,7 @@
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
// SystemVerilog created from acl_fdiv
|
||||
// SystemVerilog created on Sun Dec 27 09:48:58 2020
|
||||
// SystemVerilog created on Sun Jul 24 13:44:12 2022
|
||||
|
||||
|
||||
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
// SystemVerilog created from acl_fmadd
|
||||
// SystemVerilog created on Sun Dec 27 09:48:58 2020
|
||||
// SystemVerilog created on Sun Jul 24 13:44:12 2022
|
||||
|
||||
|
||||
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
// SystemVerilog created from acl_fsqrt
|
||||
// SystemVerilog created on Sun Dec 27 09:48:58 2020
|
||||
// SystemVerilog created on Sun Jul 24 13:44:12 2022
|
||||
|
||||
|
||||
(* altera_attribute = "-name AUTO_SHIFT_REGISTER_RECOGNITION OFF; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 10037; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 15400; -name MESSAGE_DISABLE 14130; -name MESSAGE_DISABLE 10036; -name MESSAGE_DISABLE 12020; -name MESSAGE_DISABLE 12030; -name MESSAGE_DISABLE 12010; -name MESSAGE_DISABLE 12110; -name MESSAGE_DISABLE 14320; -name MESSAGE_DISABLE 13410; -name MESSAGE_DISABLE 113007; -name MESSAGE_DISABLE 10958" *)
|
||||
|
|
|
@ -1,96 +1,6 @@
|
|||
starting execution ...
|
||||
build model options ...
|
||||
argc=22
|
||||
Generation context:
|
||||
Will not generate valid and channel signals
|
||||
HardFP is enabled enabling set to true
|
||||
Correct rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_fadd
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Stratix10
|
||||
Estimated resources LUTs 0, DSPs 2, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 3 cycle(s)
|
||||
@@start
|
||||
@name FPAdd@
|
||||
@latency 3@
|
||||
@LUT 0@
|
||||
@DSP 2@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 0.50@
|
||||
@rounding RNE@
|
||||
@method single path@
|
||||
@inPort 0 fpieee 8 23@
|
||||
@inPort 1 fpieee 8 23@
|
||||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=22
|
||||
Generation context:
|
||||
Will not generate valid and channel signals
|
||||
HardFP is enabled enabling set to true
|
||||
Correct rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_fsub
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Stratix10
|
||||
Estimated resources LUTs 0, DSPs 2, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 3 cycle(s)
|
||||
@@start
|
||||
@name FPSub@
|
||||
@latency 3@
|
||||
@LUT 0@
|
||||
@DSP 2@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 0.50@
|
||||
@rounding RNE@
|
||||
@method single path@
|
||||
@inPort 0 fpieee 8 23@
|
||||
@inPort 1 fpieee 8 23@
|
||||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=22
|
||||
Generation context:
|
||||
Will not generate valid and channel signals
|
||||
HardFP is enabled enabling set to true
|
||||
Correct rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_fmul
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Stratix10
|
||||
Estimated resources LUTs 0, DSPs 2, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 3 cycle(s)
|
||||
@@start
|
||||
@name FPMul@
|
||||
@latency 3@
|
||||
@LUT 0@
|
||||
@DSP 2@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 0.50@
|
||||
@rounding RNE@
|
||||
@method default@
|
||||
@inPort 0 fpieee 8 23@
|
||||
@inPort 1 fpieee 8 23@
|
||||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=22
|
||||
Generation context:
|
||||
Will not generate valid and channel signals
|
||||
HardFP is enabled enabling set to true
|
||||
|
@ -178,119 +88,3 @@ The pipeline depth of the block is 25 cycle(s)
|
|||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=25
|
||||
Generation context:
|
||||
Will not generate valid and channel signals
|
||||
HardFP is enabled enabling set to true
|
||||
Correct rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_ftoi
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Stratix10
|
||||
Estimated resources LUTs 344, DSPs 0, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 3 cycle(s)
|
||||
@@start
|
||||
@name FPToFXP@
|
||||
@latency 3@
|
||||
@LUT 344@
|
||||
@DSP 0@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 0.50@
|
||||
@rounding RNE@
|
||||
@method default@
|
||||
@inPort 0 fpieee 8 23@
|
||||
@outPort 0 fxp 32 0 1@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=25
|
||||
Generation context:
|
||||
Will not generate valid and channel signals
|
||||
HardFP is enabled enabling set to true
|
||||
Correct rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_ftou
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Stratix10
|
||||
Estimated resources LUTs 272, DSPs 0, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 3 cycle(s)
|
||||
@@start
|
||||
@name FPToFXP@
|
||||
@latency 3@
|
||||
@LUT 272@
|
||||
@DSP 0@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 0.50@
|
||||
@rounding RNE@
|
||||
@method default@
|
||||
@inPort 0 fpieee 8 23@
|
||||
@outPort 0 fxp 32 0 0@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=25
|
||||
Generation context:
|
||||
Will not generate valid and channel signals
|
||||
HardFP is enabled enabling set to true
|
||||
Correct rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_itof
|
||||
Frequency 250MHz
|
||||
Deployment FPGA Stratix10
|
||||
Estimated resources LUTs 362, DSPs 0, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 7 cycle(s)
|
||||
@@start
|
||||
@name FXPToFP@
|
||||
@latency 7@
|
||||
@LUT 362@
|
||||
@DSP 0@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 0.50@
|
||||
@rounding RNE@
|
||||
@method default@
|
||||
@inPort 0 fxp 32 0 1@
|
||||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
starting execution ...
|
||||
build model options ...
|
||||
argc=25
|
||||
Generation context:
|
||||
Will not generate valid and channel signals
|
||||
HardFP is enabled enabling set to true
|
||||
Correct rounding constraint detected
|
||||
Will not generate valid and channel signals
|
||||
The new component name is acl_utof
|
||||
Frequency 300MHz
|
||||
Deployment FPGA Stratix10
|
||||
Estimated resources LUTs 310, DSPs 0, RAMBits 0, RAMBlocks 0
|
||||
The pipeline depth of the block is 7 cycle(s)
|
||||
@@start
|
||||
@name FXPToFP@
|
||||
@latency 7@
|
||||
@LUT 310@
|
||||
@DSP 0@
|
||||
@RAMBits 0@
|
||||
@RAMBlockUsage 0@
|
||||
@enable 1@
|
||||
@subnormals 0@
|
||||
@error 0.50@
|
||||
@rounding RNE@
|
||||
@method default@
|
||||
@inPort 0 fxp 32 0 0@
|
||||
@outPort 0 fpieee 8 23@
|
||||
@nochanvalid 1@
|
||||
@@end
|
||||
|
|
|
@ -33,6 +33,13 @@ DBG_FLAGS += $(DBG_TRACE_FLAGS)
|
|||
#CONFIGS += -DSM_DISABLE
|
||||
#CONFIGS += -DRCACHE_DISABLE -DOCACHE_DISABLE -DTCACHE_DISABLE
|
||||
|
||||
ifeq ($(DEVICE_FAMILY), stratix10)
|
||||
CONFIGS += -DALTERA_S10
|
||||
endif
|
||||
ifeq ($(DEVICE_FAMILY), arria10)
|
||||
CONFIGS += -DALTERA_A10
|
||||
endif
|
||||
|
||||
CONFIG1 := -DNUM_CLUSTERS=1 -DNUM_CORES=1 $(CONFIGS)
|
||||
CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 $(CONFIGS)
|
||||
CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE $(CONFIGS)
|
||||
|
|
77
hw/syn/xilinx/ip/ultrascale/VX_xil_fdiv.sv
Normal file
77
hw/syn/xilinx/ip/ultrascale/VX_xil_fdiv.sv
Normal file
|
@ -0,0 +1,77 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_xil_fdiv #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAGW = 1,
|
||||
parameter LATENCY = 28
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [NUM_LANES-1:0] fflags,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [3:0] tuser;
|
||||
wire tvalid_in;
|
||||
|
||||
`RESET_RELAY (fdiv_reset, reset);
|
||||
|
||||
assign tvalid_in = enable && valid_in;
|
||||
|
||||
xil_fdiv fdiv (
|
||||
.aclk (clk),
|
||||
.aresetn (~fdiv_reset),
|
||||
.s_axis_a_tvalid (tvalid_in),
|
||||
.s_axis_a_tdata (a),
|
||||
.s_axis_b_tvalid (tvalid_in),
|
||||
.s_axis_b_tdata (b),
|
||||
.m_axis_result_tvalid (valid_out),
|
||||
.m_axis_result_tdata (result[i]),
|
||||
.m_axis_result_tuser (tuser[i])
|
||||
);
|
||||
|
||||
assign fflags[i].NX = 1'b0;
|
||||
assign fflags[i].UF = tuser[0];
|
||||
assign fflags[i].OF = tuser[1];
|
||||
assign fflags[i].DZ = tuser[3];
|
||||
assign fflags[i].NV = tuser[2];
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (TAGW),
|
||||
.DEPTH (LATENCY),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in (tag_in),
|
||||
.data_out (tag_out)
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
assign has_fflags = 1;
|
||||
|
||||
endmodule
|
108
hw/syn/xilinx/ip/ultrascale/VX_xil_fma.sv
Normal file
108
hw/syn/xilinx/ip/ultrascale/VX_xil_fma.sv
Normal file
|
@ -0,0 +1,108 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_xil_fma #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAGW = 1,
|
||||
parameter LATENCY = 16
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire do_madd,
|
||||
input wire do_sub,
|
||||
input wire do_neg,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
input wire [NUM_LANES-1:0][31:0] datab,
|
||||
input wire [NUM_LANES-1:0][31:0] datac,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [NUM_LANES-1:0] fflags,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
reg [31:0] a, b, c;
|
||||
wire [2:0] tuser;
|
||||
wire tvalid_in;
|
||||
|
||||
always @(*) begin
|
||||
if (do_madd) begin
|
||||
// MADD/MSUB/NMADD/NMSUB
|
||||
a = do_neg ? {~dataa[i][31], dataa[i][30:0]} : dataa[i];
|
||||
b = datab[i];
|
||||
c = (do_neg ^ do_sub) ? {~datac[i][31], datac[i][30:0]} : datac[i];
|
||||
end else begin
|
||||
if (do_neg) begin
|
||||
// MUL
|
||||
a = dataa[i];
|
||||
b = datab[i];
|
||||
c = 0;
|
||||
end else begin
|
||||
// ADD/SUB
|
||||
a = 32'h3f800000; // 1.0f
|
||||
b = dataa[i];
|
||||
c = do_sub ? {~datab[i][31], datab[i][30:0]} : datab[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
`RESET_RELAY (fma_reset, reset);
|
||||
|
||||
assign tvalid_in = enable && valid_in;
|
||||
|
||||
xil_fma fma (
|
||||
.aclk (clk),
|
||||
.aresetn (~fma_reset),
|
||||
.s_axis_a_tvalid (tvalid_in),
|
||||
.s_axis_a_tdata (a),
|
||||
.s_axis_b_tvalid (tvalid_in),
|
||||
.s_axis_b_tdata (b),
|
||||
.s_axis_c_tvalid (tvalid_in),
|
||||
.s_axis_c_tdata (c),
|
||||
.s_axis_operation_tvalid (tvalid_in),
|
||||
.s_axis_operation_tdata ('0),
|
||||
.m_axis_result_tvalid (valid_out),
|
||||
.m_axis_result_tdata (result[i]),
|
||||
.m_axis_result_tuser (tuser[i])
|
||||
);
|
||||
|
||||
assign fflags[i].NX = 1'b0;
|
||||
assign fflags[i].UF = tuser[0];
|
||||
assign fflags[i].OF = tuser[1];
|
||||
assign fflags[i].DZ = 1'b0;
|
||||
assign fflags[i].NV = tuser[2];
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (TAGW),
|
||||
.DEPTH (LATENCY),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in (tag_in),
|
||||
.data_out (tag_out)
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
assign has_fflags = 1;
|
||||
|
||||
endmodule
|
74
hw/syn/xilinx/ip/ultrascale/VX_xil_fsqrt.sv
Normal file
74
hw/syn/xilinx/ip/ultrascale/VX_xil_fsqrt.sv
Normal file
|
@ -0,0 +1,74 @@
|
|||
`include "VX_fpu_define.vh"
|
||||
|
||||
module VX_xil_fsqrt #(
|
||||
parameter NUM_LANES = 1,
|
||||
parameter TAGW = 1,
|
||||
parameter LATENCY = 28
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`INST_FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [NUM_LANES-1:0][31:0] dataa,
|
||||
output wire [NUM_LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [NUM_LANES-1:0] fflags,
|
||||
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
wire stall = ~ready_out && valid_out;
|
||||
wire enable = ~stall;
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
wire [0:0] tuser;
|
||||
wire tvalid_in;
|
||||
|
||||
`RESET_RELAY (fsqrt_reset, reset);
|
||||
|
||||
assign tvalid_in = enable && valid_in;
|
||||
|
||||
xil_fsqrt fsqrt (
|
||||
.aclk (clk),
|
||||
.aresetn (~fsqrt_reset),
|
||||
.s_axis_a_tvalid (tvalid_in),
|
||||
.s_axis_a_tdata (a),
|
||||
.m_axis_result_tvalid (valid_out),
|
||||
.m_axis_result_tdata (result[i]),
|
||||
.m_axis_result_tuser (tuser[i])
|
||||
);
|
||||
|
||||
assign fflags[i].NX = 1'b0;
|
||||
assign fflags[i].UF = 1'b0;
|
||||
assign fflags[i].OF = 1'b0;
|
||||
assign fflags[i].DZ = 1'b0;
|
||||
assign fflags[i].NV = tuser[0];
|
||||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (TAGW),
|
||||
.DEPTH (LATENCY),
|
||||
.RESETW (1)
|
||||
) shift_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (enable),
|
||||
.data_in (tag_in),
|
||||
.data_out (tag_out)
|
||||
);
|
||||
|
||||
assign ready_in = enable;
|
||||
|
||||
`UNUSED_VAR (frm)
|
||||
assign has_fflags = 0;
|
||||
|
||||
endmodule
|
|
@ -54,17 +54,18 @@
|
|||
(* CHECK_LICENSE_TYPE = "fdiv,floating_point_v7_1_11,{}" *)
|
||||
(* CORE_GENERATION_INFO = "fdiv,floating_point_v7_1_11,{x_ipProduct=Vivado 2020.2.2,x_ipVendor=xilinx.com,x_ipLibrary=ip,x_ipName=floating_point,x_ipVersion=7.1,x_ipCoreRevision=11,x_ipLanguage=VERILOG,x_ipSimLanguage=VERILOG,C_XDEVICEFAMILY=virtexuplusHBM,C_PART=xcu280-fsvh2892-2L-e,C_HAS_ADD=0,C_HAS_SUBTRACT=0,C_HAS_MULTIPLY=0,C_HAS_DIVIDE=1,C_HAS_SQRT=0,C_HAS_COMPARE=0,C_HAS_FIX_TO_FLT=0,C_HAS_FLT_TO_FIX=0,C_HAS_FLT_TO_FLT=0,C_HAS_RECIP=0,C_HAS_RECIP_SQRT=0,C_HAS_ABSOLUTE=0,C_HAS_LOGARITHM=0,C_HAS_EXPONENTIAL=0,C_HAS_F\
|
||||
MA=0,C_HAS_FMS=0,C_HAS_UNFUSED_MULTIPLY_ADD=0,C_HAS_UNFUSED_MULTIPLY_SUB=0,C_HAS_UNFUSED_MULTIPLY_ACCUMULATOR_A=0,C_HAS_UNFUSED_MULTIPLY_ACCUMULATOR_S=0,C_HAS_ACCUMULATOR_A=0,C_HAS_ACCUMULATOR_S=0,C_HAS_ACCUMULATOR_PRIMITIVE_A=0,C_HAS_ACCUMULATOR_PRIMITIVE_S=0,C_A_WIDTH=32,C_A_FRACTION_WIDTH=24,C_B_WIDTH=32,C_B_FRACTION_WIDTH=24,C_C_WIDTH=32,C_C_FRACTION_WIDTH=24,C_RESULT_WIDTH=32,C_RESULT_FRACTION_WIDTH=24,C_COMPARE_OPERATION=8,C_LATENCY=28,C_OPTIMIZATION=1,C_MULT_USAGE=0,C_BRAM_USAGE=0,C_RATE=\
|
||||
1,C_ACCUM_INPUT_MSB=32,C_ACCUM_MSB=32,C_ACCUM_LSB=-31,C_HAS_UNDERFLOW=0,C_HAS_OVERFLOW=0,C_HAS_INVALID_OP=0,C_HAS_DIVIDE_BY_ZERO=0,C_HAS_ACCUM_OVERFLOW=0,C_HAS_ACCUM_INPUT_OVERFLOW=0,C_HAS_ACLKEN=0,C_HAS_ARESETN=0,C_THROTTLE_SCHEME=3,C_HAS_A_TUSER=0,C_HAS_A_TLAST=0,C_HAS_B=1,C_HAS_B_TUSER=0,C_HAS_B_TLAST=0,C_HAS_C=0,C_HAS_C_TUSER=0,C_HAS_C_TLAST=0,C_HAS_OPERATION=0,C_HAS_OPERATION_TUSER=0,C_HAS_OPERATION_TLAST=0,C_HAS_RESULT_TUSER=0,C_HAS_RESULT_TLAST=0,C_TLAST_RESOLUTION=0,C_A_TDATA_WIDTH=32,C_\
|
||||
A_TUSER_WIDTH=1,C_B_TDATA_WIDTH=32,C_B_TUSER_WIDTH=1,C_C_TDATA_WIDTH=32,C_C_TUSER_WIDTH=1,C_OPERATION_TDATA_WIDTH=8,C_OPERATION_TUSER_WIDTH=1,C_RESULT_TDATA_WIDTH=32,C_RESULT_TUSER_WIDTH=1,C_FIXED_DATA_UNSIGNED=0}" *)
|
||||
1,C_ACCUM_INPUT_MSB=32,C_ACCUM_MSB=32,C_ACCUM_LSB=-31,C_HAS_UNDERFLOW=1,C_HAS_OVERFLOW=1,C_HAS_INVALID_OP=1,C_HAS_DIVIDE_BY_ZERO=1,C_HAS_ACCUM_OVERFLOW=0,C_HAS_ACCUM_INPUT_OVERFLOW=0,C_HAS_ACLKEN=0,C_HAS_ARESETN=0,C_THROTTLE_SCHEME=3,C_HAS_A_TUSER=0,C_HAS_A_TLAST=0,C_HAS_B=1,C_HAS_B_TUSER=0,C_HAS_B_TLAST=0,C_HAS_C=0,C_HAS_C_TUSER=0,C_HAS_C_TLAST=0,C_HAS_OPERATION=0,C_HAS_OPERATION_TUSER=0,C_HAS_OPERATION_TLAST=0,C_HAS_RESULT_TUSER=1,C_HAS_RESULT_TLAST=0,C_TLAST_RESOLUTION=0,C_A_TDATA_WIDTH=32,C_\
|
||||
A_TUSER_WIDTH=1,C_B_TDATA_WIDTH=32,C_B_TUSER_WIDTH=1,C_C_TDATA_WIDTH=32,C_C_TUSER_WIDTH=1,C_OPERATION_TDATA_WIDTH=8,C_OPERATION_TUSER_WIDTH=1,C_RESULT_TDATA_WIDTH=32,C_RESULT_TUSER_WIDTH=4,C_FIXED_DATA_UNSIGNED=0}" *)
|
||||
(* DowngradeIPIdentifiedWarnings = "yes" *)
|
||||
module fdiv (
|
||||
module xil_fdiv (
|
||||
aclk,
|
||||
s_axis_a_tvalid,
|
||||
s_axis_a_tdata,
|
||||
s_axis_b_tvalid,
|
||||
s_axis_b_tdata,
|
||||
m_axis_result_tvalid,
|
||||
m_axis_result_tdata
|
||||
m_axis_result_tdata,
|
||||
m_axis_result_tuser
|
||||
);
|
||||
|
||||
(* X_INTERFACE_PARAMETER = "XIL_INTERFACENAME aclk_intf, ASSOCIATED_BUSIF S_AXIS_OPERATION:M_AXIS_RESULT:S_AXIS_C:S_AXIS_B:S_AXIS_A, ASSOCIATED_RESET aresetn, ASSOCIATED_CLKEN aclken, FREQ_HZ 10000000, FREQ_TOLERANCE_HZ 0, PHASE 0.000, INSERT_VIP 0" *)
|
||||
|
@ -82,9 +83,11 @@ input wire s_axis_b_tvalid;
|
|||
input wire [31 : 0] s_axis_b_tdata;
|
||||
(* X_INTERFACE_INFO = "xilinx.com:interface:axis:1.0 M_AXIS_RESULT TVALID" *)
|
||||
output wire m_axis_result_tvalid;
|
||||
(* X_INTERFACE_PARAMETER = "XIL_INTERFACENAME M_AXIS_RESULT, TDATA_NUM_BYTES 4, TDEST_WIDTH 0, TID_WIDTH 0, TUSER_WIDTH 0, HAS_TREADY 0, HAS_TSTRB 0, HAS_TKEEP 0, HAS_TLAST 0, FREQ_HZ 100000000, PHASE 0.000, LAYERED_METADATA undef, INSERT_VIP 0" *)
|
||||
(* X_INTERFACE_INFO = "xilinx.com:interface:axis:1.0 M_AXIS_RESULT TDATA" *)
|
||||
output wire [31 : 0] m_axis_result_tdata;
|
||||
(* X_INTERFACE_PARAMETER = "XIL_INTERFACENAME M_AXIS_RESULT, TDATA_NUM_BYTES 4, TDEST_WIDTH 0, TID_WIDTH 0, TUSER_WIDTH 4, HAS_TREADY 0, HAS_TSTRB 0, HAS_TKEEP 0, HAS_TLAST 0, FREQ_HZ 100000000, PHASE 0.000, LAYERED_METADATA undef, INSERT_VIP 0" *)
|
||||
(* X_INTERFACE_INFO = "xilinx.com:interface:axis:1.0 M_AXIS_RESULT TUSER" *)
|
||||
output wire [3 : 0] m_axis_result_tuser;
|
||||
|
||||
floating_point_v7_1_11 #(
|
||||
.C_XDEVICEFAMILY("virtexuplusHBM"),
|
||||
|
@ -130,10 +133,10 @@ output wire [31 : 0] m_axis_result_tdata;
|
|||
.C_ACCUM_INPUT_MSB(32),
|
||||
.C_ACCUM_MSB(32),
|
||||
.C_ACCUM_LSB(-31),
|
||||
.C_HAS_UNDERFLOW(0),
|
||||
.C_HAS_OVERFLOW(0),
|
||||
.C_HAS_INVALID_OP(0),
|
||||
.C_HAS_DIVIDE_BY_ZERO(0),
|
||||
.C_HAS_UNDERFLOW(1),
|
||||
.C_HAS_OVERFLOW(1),
|
||||
.C_HAS_INVALID_OP(1),
|
||||
.C_HAS_DIVIDE_BY_ZERO(1),
|
||||
.C_HAS_ACCUM_OVERFLOW(0),
|
||||
.C_HAS_ACCUM_INPUT_OVERFLOW(0),
|
||||
.C_HAS_ACLKEN(0),
|
||||
|
@ -150,7 +153,7 @@ output wire [31 : 0] m_axis_result_tdata;
|
|||
.C_HAS_OPERATION(0),
|
||||
.C_HAS_OPERATION_TUSER(0),
|
||||
.C_HAS_OPERATION_TLAST(0),
|
||||
.C_HAS_RESULT_TUSER(0),
|
||||
.C_HAS_RESULT_TUSER(1),
|
||||
.C_HAS_RESULT_TLAST(0),
|
||||
.C_TLAST_RESOLUTION(0),
|
||||
.C_A_TDATA_WIDTH(32),
|
||||
|
@ -162,7 +165,7 @@ output wire [31 : 0] m_axis_result_tdata;
|
|||
.C_OPERATION_TDATA_WIDTH(8),
|
||||
.C_OPERATION_TUSER_WIDTH(1),
|
||||
.C_RESULT_TDATA_WIDTH(32),
|
||||
.C_RESULT_TUSER_WIDTH(1),
|
||||
.C_RESULT_TUSER_WIDTH(4),
|
||||
.C_FIXED_DATA_UNSIGNED(0)
|
||||
) inst (
|
||||
.aclk(aclk),
|
||||
|
@ -191,7 +194,7 @@ output wire [31 : 0] m_axis_result_tdata;
|
|||
.m_axis_result_tvalid(m_axis_result_tvalid),
|
||||
.m_axis_result_tready(1'H0),
|
||||
.m_axis_result_tdata(m_axis_result_tdata),
|
||||
.m_axis_result_tuser(),
|
||||
.m_axis_result_tuser(m_axis_result_tuser),
|
||||
.m_axis_result_tlast()
|
||||
);
|
||||
endmodule
|
|
@ -57,7 +57,7 @@ FMA=1,C_HAS_FMS=1,C_HAS_UNFUSED_MULTIPLY_ADD=0,C_HAS_UNFUSED_MULTIPLY_SUB=0,C_HA
|
|||
=1,C_ACCUM_INPUT_MSB=32,C_ACCUM_MSB=32,C_ACCUM_LSB=-31,C_HAS_UNDERFLOW=1,C_HAS_OVERFLOW=1,C_HAS_INVALID_OP=1,C_HAS_DIVIDE_BY_ZERO=0,C_HAS_ACCUM_OVERFLOW=0,C_HAS_ACCUM_INPUT_OVERFLOW=0,C_HAS_ACLKEN=0,C_HAS_ARESETN=0,C_THROTTLE_SCHEME=3,C_HAS_A_TUSER=0,C_HAS_A_TLAST=0,C_HAS_B=1,C_HAS_B_TUSER=0,C_HAS_B_TLAST=0,C_HAS_C=1,C_HAS_C_TUSER=0,C_HAS_C_TLAST=0,C_HAS_OPERATION=1,C_HAS_OPERATION_TUSER=0,C_HAS_OPERATION_TLAST=0,C_HAS_RESULT_TUSER=1,C_HAS_RESULT_TLAST=0,C_TLAST_RESOLUTION=0,C_A_TDATA_WIDTH=32,C\
|
||||
_A_TUSER_WIDTH=1,C_B_TDATA_WIDTH=32,C_B_TUSER_WIDTH=1,C_C_TDATA_WIDTH=32,C_C_TUSER_WIDTH=1,C_OPERATION_TDATA_WIDTH=8,C_OPERATION_TUSER_WIDTH=1,C_RESULT_TDATA_WIDTH=32,C_RESULT_TUSER_WIDTH=3,C_FIXED_DATA_UNSIGNED=0}" *)
|
||||
(* DowngradeIPIdentifiedWarnings = "yes" *)
|
||||
module fmadd (
|
||||
module xil_fma (
|
||||
aclk,
|
||||
s_axis_a_tvalid,
|
||||
s_axis_a_tdata,
|
|
@ -57,7 +57,7 @@ FMA=0,C_HAS_FMS=0,C_HAS_UNFUSED_MULTIPLY_ADD=0,C_HAS_UNFUSED_MULTIPLY_SUB=0,C_HA
|
|||
=1,C_ACCUM_INPUT_MSB=32,C_ACCUM_MSB=32,C_ACCUM_LSB=-31,C_HAS_UNDERFLOW=0,C_HAS_OVERFLOW=0,C_HAS_INVALID_OP=1,C_HAS_DIVIDE_BY_ZERO=0,C_HAS_ACCUM_OVERFLOW=0,C_HAS_ACCUM_INPUT_OVERFLOW=0,C_HAS_ACLKEN=0,C_HAS_ARESETN=0,C_THROTTLE_SCHEME=3,C_HAS_A_TUSER=0,C_HAS_A_TLAST=0,C_HAS_B=0,C_HAS_B_TUSER=0,C_HAS_B_TLAST=0,C_HAS_C=0,C_HAS_C_TUSER=0,C_HAS_C_TLAST=0,C_HAS_OPERATION=0,C_HAS_OPERATION_TUSER=0,C_HAS_OPERATION_TLAST=0,C_HAS_RESULT_TUSER=1,C_HAS_RESULT_TLAST=0,C_TLAST_RESOLUTION=0,C_A_TDATA_WIDTH=32,C\
|
||||
_A_TUSER_WIDTH=1,C_B_TDATA_WIDTH=32,C_B_TUSER_WIDTH=1,C_C_TDATA_WIDTH=32,C_C_TUSER_WIDTH=1,C_OPERATION_TDATA_WIDTH=8,C_OPERATION_TUSER_WIDTH=1,C_RESULT_TDATA_WIDTH=32,C_RESULT_TUSER_WIDTH=1,C_FIXED_DATA_UNSIGNED=0}" *)
|
||||
(* DowngradeIPIdentifiedWarnings = "yes" *)
|
||||
module fsqrt (
|
||||
module xil_fsqrt (
|
||||
aclk,
|
||||
s_axis_a_tvalid,
|
||||
s_axis_a_tdata,
|
|
@ -18,9 +18,9 @@ draw3d(){
|
|||
echo -e "draw3d $TRACE benchmark\n" >> $LOG
|
||||
if [ $ALL = true ]
|
||||
then
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w8 -h8" >> $LOG
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w512 -h512" >> $LOG
|
||||
else
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w8 -h8" | grep 'PERF' >> $LOG
|
||||
CONFIGS="-DEXT_GFX_ENABLE" ./ci/blackbox.sh --driver=simx --app=draw3d --args="-t$TRACE.cgltrace -w512 -h512" | grep 'PERF' >> $LOG
|
||||
fi
|
||||
done
|
||||
echo "draw3d tests done!"
|
||||
|
|
|
@ -4,7 +4,7 @@ RTL_DIR=../../hw/rtl
|
|||
|
||||
SCRIPT_DIR=../../hw/scripts
|
||||
|
||||
OPAE_SYN_DIR=../../hw/syn/opae
|
||||
OPAE_SYN_DIR=../../hw/syn/altera/opae
|
||||
|
||||
CXXFLAGS += -std=c++11 -Wall -Wextra -pedantic -Wfatal-errors
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
0
tests/regression/raster/kernel.bin
Executable file → Normal file
0
tests/regression/raster/kernel.bin
Executable file → Normal file
0
tests/regression/raster/kernel.elf
Executable file → Normal file
0
tests/regression/raster/kernel.elf
Executable file → Normal file
Loading…
Add table
Add a link
Reference in a new issue