adding out_buf to VX_pe_serializer + testing

This commit is contained in:
Blaise Tine 2024-08-02 18:16:50 -07:00
parent f723e7baf5
commit 410c47e2ae
7 changed files with 120 additions and 87 deletions

View file

@ -210,6 +210,13 @@ config1()
CONFIGS="-DISSUE_WIDTH=2 -DNUM_FPU_BLOCK=1 -DNUM_FPU_LANES=2" ./ci/blackbox.sh --driver=simx --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_FPU_BLOCK=4 -DNUM_FPU_LANES=4" ./ci/blackbox.sh --driver=simx --app=vecaddx
# FPU's PE scaling
CONFIGS="-DFMA_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfmadd"
CONFIGS="-DFCVT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tftoi"
CONFIGS="-DFDIV_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfdiv"
CONFIGS="-DFSQRT_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfsqrt"
CONFIGS="-DFNCP_PE_RATIO=2" ./ci/blackbox.sh --driver=rtlsim --app=dogfood --args="-tfclamp"
# LSU scaling
CONFIGS="-DISSUE_WIDTH=2 -DNUM_LSU_BLOCK=1 -DNUM_LSU_LANES=2" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx
CONFIGS="-DISSUE_WIDTH=4 -DNUM_LSU_BLOCK=4 -DNUM_LSU_LANES=4" ./ci/blackbox.sh --driver=rtlsim --app=vecaddx

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,7 +21,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
parameter TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
input wire reset,
output wire ready_in,
input wire valid_in,
@ -36,7 +36,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
input wire is_signed,
input wire [NUM_LANES-1:0][31:0] dataa,
output wire [NUM_LANES-1:0][31:0] result,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -45,25 +45,26 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
input wire ready_out,
output wire valid_out
);
);
`UNUSED_VAR (frm)
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
fflags_t [NUM_LANES-1:0] fflags_out;
wire pe_enable;
wire pe_enable;
wire [NUM_PES-1:0][31:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
VX_pe_serializer #(
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FCVT),
.DATA_IN_WIDTH(32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (0)
.PE_REG (0),
.OUT_BUF ((`FCVT_PE_RATIO > 2) ? 1 : 0)
) pe_serializer (
.clk (clk),
.reset (reset),
@ -94,7 +95,7 @@ module VX_fpu_cvt import VX_fpu_pkg::*; #(
.enable (pe_enable),
.frm (frm),
.is_itof (is_itof),
.is_signed (is_signed),
.is_signed (is_signed),
.dataa (pe_data_in[i][0 +: 32]),
.result (pe_data_out[i][0 +: 32]),
.fflags (pe_data_out[i][32 +: `FP_FLAGS_BITS])

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -21,7 +21,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
parameter TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
input wire reset,
input wire valid_in,
output wire ready_in,
@ -31,10 +31,10 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
output wire [NUM_LANES-1:0][31:0] result,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -47,27 +47,28 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
`UNUSED_VAR (frm)
wire [NUM_LANES-1:0][2*32-1:0] data_in;
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
wire pe_enable;
wire pe_enable;
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
for (genvar i = 0; i < NUM_LANES; ++i) begin
assign data_in[i][0 +: 32] = dataa[i];
assign data_in[i][32 +: 32] = datab[i];
end
VX_pe_serializer #(
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FDIV),
.DATA_IN_WIDTH(2*32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (0)
.PE_REG (0),
.OUT_BUF ((`FDIV_PE_RATIO > 2) ? 1 : 0)
) pe_serializer (
.clk (clk),
.reset (reset),
@ -92,7 +93,7 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
fflags_t [NUM_LANES-1:0] per_lane_fflags;
`ifdef QUARTUS
for (genvar i = 0; i < NUM_PES; ++i) begin
acl_fdiv fdiv (
.clk (clk),
@ -103,8 +104,8 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
.q (pe_data_out[i][0 +: 32])
);
assign pe_data_out[i][32 +: `FP_FLAGS_BITS] = 'x;
end
end
assign has_fflags = 0;
assign per_lane_fflags = 'x;
`UNUSED_VAR (fflags_out)
@ -131,21 +132,21 @@ module VX_fpu_div import VX_fpu_pkg::*; #(
assign has_fflags = 1;
assign per_lane_fflags = fflags_out;
`else
`else
for (genvar i = 0; i < NUM_PES; ++i) begin
reg [63:0] r;
`UNUSED_VAR (r)
`UNUSED_VAR (r)
fflags_t f;
always @(*) begin
always @(*) begin
dpi_fdiv (
pe_enable,
int'(0),
{32'hffffffff, pe_data_in[i][0 +: 32]},
{32'hffffffff, pe_data_in[i][32 +: 32]},
frm,
r,
pe_enable,
int'(0),
{32'hffffffff, pe_data_in[i][0 +: 32]},
{32'hffffffff, pe_data_in[i][32 +: 32]},
frm,
r,
f
);
end

View file

@ -98,7 +98,8 @@ module VX_fpu_fma import VX_fpu_pkg::*; #(
.DATA_IN_WIDTH(3*32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG ((NUM_LANES != NUM_PES) ? 1 : 0)
.PE_REG ((`FMA_PE_RATIO != 1) ? 1 : 0),
.OUT_BUF ((`FMA_PE_RATIO > 2) ? 1 : 0)
) pe_serializer (
.clk (clk),
.reset (reset),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -35,7 +35,7 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0][31:0] dataa,
input wire [NUM_LANES-1:0][31:0] datab,
output wire [NUM_LANES-1:0][31:0] result,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -44,15 +44,15 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
input wire ready_out,
output wire valid_out
);
);
`UNUSED_VAR (frm)
wire [NUM_LANES-1:0][2*32-1:0] data_in;
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
fflags_t [NUM_LANES-1:0] fflags_out;
wire pe_enable;
wire pe_enable;
wire [NUM_PES-1:0][2*32-1:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
@ -60,15 +60,16 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
assign data_in[i][0 +: 32] = dataa[i];
assign data_in[i][32 +: 32] = datab[i];
end
VX_pe_serializer #(
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FNCP),
.DATA_IN_WIDTH(2*32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (0)
.PE_REG (0),
.OUT_BUF ((`FNCP_PE_RATIO > 2) ? 1 : 0)
) pe_serializer (
.clk (clk),
.reset (reset),
@ -97,8 +98,8 @@ module VX_fpu_ncp import VX_fpu_pkg::*; #(
.clk (clk),
.reset (reset),
.enable (pe_enable),
.frm (frm),
.op_type (op_type),
.frm (frm),
.op_type (op_type),
.dataa (pe_data_in[i][0 +: 32]),
.datab (pe_data_in[i][32 +: 32]),
.result (pe_data_out[i][0 +: 32]),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -18,10 +18,10 @@
module VX_fpu_sqrt import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter NUM_PES = `UP(NUM_LANES /`FSQRT_PE_RATIO),
parameter TAG_WIDTH = 1
parameter TAG_WIDTH = 1
) (
input wire clk,
input wire reset,
input wire reset,
output wire ready_in,
input wire valid_in,
@ -29,11 +29,11 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FRM_BITS-1:0] frm,
input wire [NUM_LANES-1:0][31:0] dataa,
output wire [NUM_LANES-1:0][31:0] result,
output wire [NUM_LANES-1:0][31:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -46,22 +46,23 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
`UNUSED_VAR (frm)
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0] mask_out;
wire [NUM_LANES-1:0][(`FP_FLAGS_BITS+32)-1:0] data_out;
wire [NUM_LANES-1:0][`FP_FLAGS_BITS-1:0] fflags_out;
wire pe_enable;
wire pe_enable;
wire [NUM_PES-1:0][31:0] pe_data_in;
wire [NUM_PES-1:0][(`FP_FLAGS_BITS+32)-1:0] pe_data_out;
VX_pe_serializer #(
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.NUM_LANES (NUM_LANES),
.NUM_PES (NUM_PES),
.LATENCY (`LATENCY_FSQRT),
.DATA_IN_WIDTH(32),
.DATA_OUT_WIDTH(`FP_FLAGS_BITS + 32),
.TAG_WIDTH (NUM_LANES + TAG_WIDTH),
.PE_REG (0)
.PE_REG (0),
.OUT_BUF ((`FSQRT_PE_RATIO > 2) ? 1 : 0)
) pe_serializer (
.clk (clk),
.reset (reset),
@ -83,10 +84,10 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
assign fflags_out[i] = data_out[i][32 +: `FP_FLAGS_BITS];
end
fflags_t [NUM_LANES-1:0] per_lane_fflags;
fflags_t [NUM_LANES-1:0] per_lane_fflags;
`ifdef QUARTUS
for (genvar i = 0; i < NUM_PES; ++i) begin
acl_fsqrt fsqrt (
.clk (clk),
@ -105,7 +106,7 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
`elsif VIVADO
for (genvar i = 0; i < NUM_PES; ++i) begin
wire tuser;
wire tuser;
xil_fsqrt fsqrt (
.aclk (clk),
@ -130,17 +131,17 @@ module VX_fpu_sqrt import VX_fpu_pkg::*; #(
`UNUSED_VAR (r)
fflags_t f;
always @(*) begin
always @(*) begin
dpi_fsqrt (
pe_enable,
int'(0),
{32'hffffffff, pe_data_in[i]},
frm,
r,
pe_enable,
int'(0),
{32'hffffffff, pe_data_in[i]},
frm,
r,
f
);
end
VX_shift_register #(
.DATAW (32 + $bits(fflags_t)),
.DEPTH (`LATENCY_FSQRT)

View file

@ -21,7 +21,8 @@ module VX_pe_serializer #(
parameter DATA_IN_WIDTH = 1,
parameter DATA_OUT_WIDTH = 1,
parameter TAG_WIDTH = 0,
parameter PE_REG = 0
parameter PE_REG = 0'
parameter OUT_BUF = 0
) (
input wire clk,
input wire reset,
@ -43,6 +44,11 @@ module VX_pe_serializer #(
output wire [TAG_WIDTH-1:0] tag_out,
input wire ready_out
);
wire valid_out_u;
wire [NUM_LANES-1:0][DATA_OUT_WIDTH-1:0] data_out_u;
wire [TAG_WIDTH-1:0] tag_out_u;
wire ready_out_u;
wire [NUM_PES-1:0][DATA_IN_WIDTH-1:0] pe_data_in_s;
wire valid_out_s;
wire [TAG_WIDTH-1:0] tag_out_s;
@ -105,7 +111,7 @@ module VX_pe_serializer #(
reg [TAG_WIDTH-1:0] tag_out_r;
wire valid_out_b = valid_out_s && batch_out_done;
wire ready_out_b = ready_out || ~valid_out;
wire ready_out_b = ready_out_u || ~valid_out_u;
always @(posedge clk) begin
if (reset) begin
@ -119,29 +125,44 @@ module VX_pe_serializer #(
end
end
assign enable = ready_out_b || ~valid_out_b;
assign ready_in = enable && batch_in_done;
assign enable = ready_out_b || ~valid_out_b;
assign ready_in = enable && batch_in_done;
assign pe_enable = enable;
assign pe_enable = enable;
assign valid_out = valid_out_r;
assign data_out = data_out_r;
assign tag_out = tag_out_r;
assign valid_out_u = valid_out_r;
assign data_out_u = data_out_r;
assign tag_out_u = tag_out_r;
end else begin
assign pe_data_in_s = data_in;
assign enable = ready_out || ~valid_out;
assign ready_in = enable;
assign enable = ready_out_u || ~valid_out_u;
assign ready_in = enable;
assign pe_enable = enable;
assign pe_enable = enable;
assign valid_out = valid_out_s;
assign data_out = pe_data_out;
assign tag_out = tag_out_s;
assign valid_out_u = valid_out_s;
assign data_out_u = pe_data_out;
assign tag_out_u = tag_out_s;
end
`RESET_RELAY (out_buf_reset, reset);
VX_elastic_buffer #(
.DATAW (DATA_OUT_WIDTH + TAG_WIDTH),
.SIZE (`TO_OUT_BUF_SIZE(OUT_BUF)),
.OUT_REG (`TO_OUT_BUF_REG(OUT_BUF))
) out_buf (
.clk (clk),
.reset (out_buf_reset),
.valid_in (valid_out_u),
.ready_in (ready_out_u),
.data_in ({data_out_u, tag_out_u}),
.data_out ({data_out, tag_out}),
.valid_out (valid_out),
.ready_out (ready_out)
);
endmodule
`TRACING_ON