minor update

This commit is contained in:
Blaise Tine 2024-07-09 14:19:19 -07:00
parent 7b9d2bdff2
commit 2651632884
5 changed files with 123 additions and 127 deletions

View file

@ -587,7 +587,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.DATA_SIZE (LMEM_DATA_SIZE),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_WIDTH (AVS_REQ_TAGW),
.ARBITER ("P"),
.ARBITER ("P"), // prioritize VX requests
.REQ_OUT_BUF (0),
.RSP_OUT_BUF (0)
) mem_arb (
@ -1010,7 +1010,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
// SCOPE //////////////////////////////////////////////////////////////////////
`ifdef DBG_SCOPE_AFU
`ifdef SCOPE
wire mem_req_fire = mem_bus_if[0].req_valid && mem_bus_if[0].req_ready;
wire mem_rsp_fire = mem_bus_if[0].rsp_valid && mem_bus_if[0].rsp_ready;
wire avs_write_fire = avs_write[0] && ~avs_waitrequest[0];
@ -1080,7 +1079,6 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_
.bus_in(scope_bus_in_w[0]),
.bus_out(scope_bus_out_w[0])
);
`endif
`else
`SCOPE_IO_UNUSED_W(0)
`endif

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,15 +14,15 @@
`include "VX_cache_define.vh"
module VX_cache import VX_gpu_pkg::*; #(
parameter `STRING INSTANCE_ID = "",
parameter `STRING INSTANCE_ID = "",
// Number of Word requests per cycle
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 4096,
parameter CACHE_SIZE = 4096,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
@ -33,7 +33,7 @@ module VX_cache import VX_gpu_pkg::*; #(
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
@ -53,12 +53,12 @@ module VX_cache import VX_gpu_pkg::*; #(
// Memory request output register
parameter MEM_OUT_BUF = 0
) (
) (
// PERF
`ifdef PERF_ENABLE
output cache_perf_t cache_perf,
`endif
input wire clk,
input wire reset,
@ -92,7 +92,7 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_REQS-1:0] core_req_valid;
wire [NUM_REQS-1:0][`CS_WORD_ADDR_WIDTH-1:0] core_req_addr;
wire [NUM_REQS-1:0] core_req_rw;
wire [NUM_REQS-1:0] core_req_rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen;
wire [NUM_REQS-1:0][`CS_WORD_WIDTH-1:0] core_req_data;
wire [NUM_REQS-1:0][TAG_WIDTH-1:0] core_req_tag;
@ -107,7 +107,7 @@ module VX_cache import VX_gpu_pkg::*; #(
assign core_req_tag[i] = core_bus_if[i].req_data.tag;
assign core_bus_if[i].req_ready = core_req_ready[i];
`UNUSED_VAR (core_bus_if[i].req_data.atype)
end
end
///////////////////////////////////////////////////////////////////////////
@ -120,7 +120,6 @@ module VX_cache import VX_gpu_pkg::*; #(
`RESET_RELAY (core_rsp_reset, reset);
for (genvar i = 0; i < NUM_REQS; ++i) begin
VX_elastic_buffer #(
.DATAW (`CS_WORD_WIDTH + TAG_WIDTH),
.SIZE (CORE_REQ_BUF_ENABLE ? `TO_OUT_BUF_SIZE(CORE_OUT_BUF) : 0),
@ -131,7 +130,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.valid_in (core_rsp_valid_s[i]),
.ready_in (core_rsp_ready_s[i]),
.data_in ({core_rsp_data_s[i], core_rsp_tag_s[i]}),
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
.data_out ({core_bus_if[i].rsp_data.data, core_bus_if[i].rsp_data.tag}),
.valid_out (core_bus_if[i].rsp_valid),
.ready_out (core_bus_if[i].rsp_ready)
);
@ -155,14 +154,14 @@ module VX_cache import VX_gpu_pkg::*; #(
) mem_req_buf (
.clk (clk),
.reset (reset),
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.valid_in (mem_req_valid_s),
.ready_in (mem_req_ready_s),
.data_in ({mem_req_rw_s, mem_req_byteen_s, mem_req_addr_s, mem_req_data_s, mem_req_tag_s}),
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
.valid_out (mem_bus_if.req_valid),
.data_out ({mem_bus_if.req_data.rw, mem_bus_if.req_data.byteen, mem_bus_if.req_data.addr, mem_bus_if.req_data.data, mem_bus_if.req_data.tag}),
.valid_out (mem_bus_if.req_valid),
.ready_out (mem_bus_if.req_ready)
);
assign mem_bus_if.req_data.atype = '0;
///////////////////////////////////////////////////////////////////////////
@ -172,9 +171,9 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [`CS_LINE_WIDTH-1:0] mem_rsp_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
wire mem_rsp_ready_s;
VX_elastic_buffer #(
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
.DATAW (MEM_TAG_WIDTH + `CS_LINE_WIDTH),
.SIZE (MRSQ_SIZE),
.OUT_REG (MRSQ_SIZE > 2)
) mem_rsp_queue (
@ -182,8 +181,8 @@ module VX_cache import VX_gpu_pkg::*; #(
.reset (reset),
.valid_in (mem_bus_if.rsp_valid),
.ready_in (mem_bus_if.rsp_ready),
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
.data_in ({mem_bus_if.rsp_data.tag, mem_bus_if.rsp_data.data}),
.data_out ({mem_rsp_tag_s, mem_rsp_data_s}),
.valid_out (mem_rsp_valid_s),
.ready_out (mem_rsp_ready_s)
);
@ -196,9 +195,9 @@ module VX_cache import VX_gpu_pkg::*; #(
// this reset relay is required to sync with bank initialization
`RESET_RELAY (init_reset, reset);
VX_cache_init #(
VX_cache_init #(
.CACHE_SIZE (CACHE_SIZE),
.LINE_SIZE (LINE_SIZE),
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_WAYS (NUM_WAYS)
) cache_init (
@ -209,7 +208,7 @@ module VX_cache import VX_gpu_pkg::*; #(
);
///////////////////////////////////////////////////////////////////////
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][`CS_LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0] per_bank_core_req_rw;
@ -219,14 +218,14 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_req_idx;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
wire [NUM_BANKS-1:0][`CS_WORD_WIDTH-1:0] per_bank_core_rsp_data;
wire [NUM_BANKS-1:0][TAG_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_core_rsp_idx;
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
wire [NUM_BANKS-1:0][`CS_MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
wire [NUM_BANKS-1:0][WORD_SEL_WIDTH-1:0] per_bank_mem_req_wsel;
@ -236,7 +235,7 @@ module VX_cache import VX_gpu_pkg::*; #(
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
if (NUM_BANKS == 1) begin
assign mem_rsp_ready_s = per_bank_mem_rsp_ready;
end else begin
@ -245,7 +244,7 @@ module VX_cache import VX_gpu_pkg::*; #(
// Bank requests dispatch
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
wire [NUM_REQS-1:0][CORE_REQ_DATAW-1:0] core_req_data_in;
wire [NUM_BANKS-1:0][CORE_REQ_DATAW-1:0] core_req_data_out;
wire [NUM_REQS-1:0][LINE_ADDR_WIDTH-1:0] core_req_line_addr;
wire [NUM_REQS-1:0][BANK_SEL_WIDTH-1:0] core_req_bid;
@ -273,7 +272,7 @@ module VX_cache import VX_gpu_pkg::*; #(
core_req_line_addr[i],
core_req_rw[i],
core_req_wsel[i],
core_req_byteen[i],
core_req_byteen[i],
core_req_data[i],
core_req_tag[i]};
end
@ -313,13 +312,13 @@ module VX_cache import VX_gpu_pkg::*; #(
per_bank_core_req_addr[i],
per_bank_core_req_rw[i],
per_bank_core_req_wsel[i],
per_bank_core_req_byteen[i],
per_bank_core_req_byteen[i],
per_bank_core_req_data[i],
per_bank_core_req_tag[i]} = core_req_data_out[i];
end
// Banks access
for (genvar i = 0; i < NUM_BANKS; ++i) begin
for (genvar i = 0; i < NUM_BANKS; ++i) begin : banks
wire [`CS_LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
wire curr_bank_mem_rsp_valid;
@ -330,8 +329,8 @@ module VX_cache import VX_gpu_pkg::*; #(
end
`RESET_RELAY (bank_reset, reset);
VX_cache_bank #(
VX_cache_bank #(
.BANK_ID (i),
.INSTANCE_ID (INSTANCE_ID),
.CACHE_SIZE (CACHE_SIZE),
@ -348,7 +347,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.TAG_WIDTH (TAG_WIDTH),
.CORE_OUT_BUF (CORE_REQ_BUF_ENABLE ? 0 : CORE_OUT_BUF),
.MEM_OUT_BUF (MEM_REQ_BUF_ENABLE ? 0 : MEM_OUT_BUF)
) bank (
) bank (
.clk (clk),
.reset (bank_reset),
@ -357,7 +356,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.perf_write_misses (perf_write_miss_per_bank[i]),
.perf_mshr_stalls (perf_mshr_stall_per_bank[i]),
`endif
// Core request
.core_req_valid (per_bank_core_req_valid[i]),
.core_req_addr (per_bank_core_req_addr[i]),
@ -369,7 +368,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.core_req_idx (per_bank_core_req_idx[i]),
.core_req_ready (per_bank_core_req_ready[i]),
// Core response
// Core response
.core_rsp_valid (per_bank_core_rsp_valid[i]),
.core_rsp_data (per_bank_core_rsp_data[i]),
.core_rsp_tag (per_bank_core_rsp_tag[i]),
@ -392,7 +391,7 @@ module VX_cache import VX_gpu_pkg::*; #(
.mem_rsp_id (`CS_MEM_TAG_TO_REQ_ID(mem_rsp_tag_s)),
.mem_rsp_ready (per_bank_mem_rsp_ready[i]),
// initialization
// initialization
.init_enable (init_enable),
.init_line_sel (init_line_sel)
);
@ -402,7 +401,7 @@ module VX_cache import VX_gpu_pkg::*; #(
end else begin
assign per_bank_mem_req_addr[i] = `CS_LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
end
end
end
// Bank responses gather
@ -456,8 +455,8 @@ module VX_cache import VX_gpu_pkg::*; #(
for (genvar i = 0; i < NUM_BANKS; ++i) begin
assign data_in[i] = {per_bank_mem_req_addr[i],
per_bank_mem_req_rw[i],
per_bank_mem_req_wsel[i],
per_bank_mem_req_byteen[i],
per_bank_mem_req_wsel[i],
per_bank_mem_req_byteen[i],
per_bank_mem_req_data[i],
per_bank_mem_req_id[i]};
end
@ -480,10 +479,10 @@ module VX_cache import VX_gpu_pkg::*; #(
if (NUM_BANKS > 1) begin
wire [`CS_BANK_SEL_BITS-1:0] mem_req_bank_id = `CS_MEM_ADDR_TO_BANK_ID(mem_req_addr_p);
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
assign mem_req_tag_p = MEM_TAG_WIDTH'({mem_req_bank_id, mem_req_id_p});
end else begin
assign mem_req_tag_p = MEM_TAG_WIDTH'(mem_req_id_p);
end
end
// Memory request multi-port handling
@ -502,22 +501,22 @@ module VX_cache import VX_gpu_pkg::*; #(
mem_req_data_r = 'x;
mem_req_byteen_r[mem_req_wsel_p * WORD_SIZE +: WORD_SIZE] = mem_req_byteen_p;
mem_req_data_r[mem_req_wsel_p * `CS_WORD_WIDTH +: `CS_WORD_WIDTH] = mem_req_data_p;
end
end
assign mem_req_rw_s = mem_req_rw_p;
assign mem_req_byteen_s = mem_req_byteen_r;
assign mem_req_data_s = mem_req_data_r;
end else begin
`UNUSED_VAR (mem_req_wsel_p)
assign mem_req_rw_s = mem_req_rw_p;
assign mem_req_byteen_s = mem_req_byteen_p;
assign mem_req_data_s = mem_req_data_p;
assign mem_req_byteen_s = mem_req_byteen_p;
assign mem_req_data_s = mem_req_data_p;
end
end else begin
`UNUSED_VAR (mem_req_byteen_p)
`UNUSED_VAR (mem_req_wsel_p)
`UNUSED_VAR (mem_req_data_p)
`UNUSED_VAR (mem_req_rw_p)
assign mem_req_rw_s = 0;
assign mem_req_byteen_s = {LINE_SIZE{1'b1}};
assign mem_req_data_s = '0;
@ -527,10 +526,10 @@ module VX_cache import VX_gpu_pkg::*; #(
// per cycle: core_reads, core_writes
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_reads_per_cycle;
wire [`CLOG2(NUM_REQS+1)-1:0] perf_core_writes_per_cycle;
wire [NUM_REQS-1:0] perf_core_reads_per_req;
wire [NUM_REQS-1:0] perf_core_writes_per_req;
// per cycle: read misses, write misses, msrq stalls, pipeline stalls
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_read_miss_per_cycle;
wire [`CLOG2(NUM_BANKS+1)-1:0] perf_write_miss_per_cycle;
@ -539,13 +538,13 @@ module VX_cache import VX_gpu_pkg::*; #(
`BUFFER(perf_core_reads_per_req, core_req_valid & core_req_ready & ~core_req_rw);
`BUFFER(perf_core_writes_per_req, core_req_valid & core_req_ready & core_req_rw);
`POP_COUNT(perf_core_reads_per_cycle, perf_core_reads_per_req);
`POP_COUNT(perf_core_writes_per_cycle, perf_core_writes_per_req);
`POP_COUNT(perf_read_miss_per_cycle, perf_read_miss_per_bank);
`POP_COUNT(perf_write_miss_per_cycle, perf_write_miss_per_bank);
`POP_COUNT(perf_mshr_stall_per_cycle, perf_mshr_stall_per_bank);
wire [NUM_REQS-1:0] perf_crsp_stall_per_req;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign perf_crsp_stall_per_req[i] = core_bus_if[i].rsp_valid && ~core_bus_if[i].rsp_ready;
@ -561,7 +560,7 @@ module VX_cache import VX_gpu_pkg::*; #(
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,20 +24,20 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
parameter NUM_REQS = 4,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
parameter CACHE_SIZE = 16384,
// Size of line inside a bank in bytes
parameter LINE_SIZE = 64,
parameter LINE_SIZE = 64,
// Number of banks
parameter NUM_BANKS = 1,
// Number of associative ways
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 4,
// Core Response Queue Size
parameter CRSQ_SIZE = 2,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
parameter MSHR_SIZE = 8,
// Memory Response Queue Size
parameter MRSQ_SIZE = 0,
// Memory Request Queue Size
@ -60,7 +60,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
// Memory request output buffer
parameter MEM_OUT_BUF = 0
) (
) (
input wire clk,
input wire reset,
@ -74,17 +74,16 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
);
localparam NUM_CACHES = `UP(NUM_UNITS);
localparam PASSTHRU = (NUM_UNITS == 0);
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
localparam ARB_TAG_WIDTH = TAG_WIDTH + `ARB_SEL_BITS(NUM_INPUTS, NUM_CACHES);
localparam MEM_TAG_WIDTH = PASSTHRU ? `CACHE_BYPASS_TAG_WIDTH(NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
(NC_ENABLE ? `CACHE_NC_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS, NUM_REQS, LINE_SIZE, WORD_SIZE, ARB_TAG_WIDTH) :
`CACHE_MEM_TAG_WIDTH(MSHR_SIZE, NUM_BANKS));
`STATIC_ASSERT(NUM_INPUTS >= NUM_CACHES, ("invalid parameter"))
`ifdef PERF_ENABLE
cache_perf_t perf_cache_tmp[1], perf_cache_unit[NUM_CACHES];
`PERF_CACHE_ADD (perf_cache_tmp, perf_cache_unit, 1, NUM_CACHES)
assign cache_perf = perf_cache_tmp[0];
cache_perf_t perf_cache_unit[NUM_CACHES];
`PERF_CACHE_ADD (cache_perf, perf_cache_unit, NUM_CACHES)
`endif
VX_mem_bus_if #(
@ -137,7 +136,7 @@ module VX_cache_cluster import VX_gpu_pkg::*; #(
`RESET_RELAY (cache_reset, reset);
for (genvar i = 0; i < NUM_CACHES; ++i) begin
for (genvar i = 0; i < NUM_CACHES; ++i) begin : caches
VX_cache_wrap #(
.INSTANCE_ID ($sformatf("%s%0d", INSTANCE_ID, i)),

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -15,7 +15,7 @@
`ifdef FPU_DPI
module VX_fpu_dpi import VX_fpu_pkg::*; #(
module VX_fpu_dpi import VX_fpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter TAG_WIDTH = 1,
parameter OUT_BUF = 0
@ -29,7 +29,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0] mask_in,
input wire [TAG_WIDTH-1:0] tag_in,
input wire [`INST_FPU_BITS-1:0] op_type,
input wire [`INST_FMT_BITS-1:0] fmt,
input wire [`INST_FRM_BITS-1:0] frm,
@ -37,7 +37,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
input wire [NUM_LANES-1:0][`XLEN-1:0] dataa,
input wire [NUM_LANES-1:0][`XLEN-1:0] datab,
input wire [NUM_LANES-1:0][`XLEN-1:0] datac,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire [NUM_LANES-1:0][`XLEN-1:0] result,
output wire has_fflags,
output wire [`FP_FLAGS_BITS-1:0] fflags,
@ -55,31 +55,31 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
localparam FPC_BITS = `LOG2UP(NUM_FPC);
localparam RSP_DATAW = (NUM_LANES * `XLEN) + 1 + $bits(fflags_t) + TAG_WIDTH;
wire [NUM_FPC-1:0] per_core_ready_in;
wire [NUM_FPC-1:0][NUM_LANES-1:0][`XLEN-1:0] per_core_result;
wire [NUM_FPC-1:0][TAG_WIDTH-1:0] per_core_tag_out;
reg [NUM_FPC-1:0] per_core_ready_out;
wire [NUM_FPC-1:0] per_core_valid_out;
wire [NUM_FPC-1:0] per_core_has_fflags;
fflags_t [NUM_FPC-1:0] per_core_fflags;
wire [NUM_FPC-1:0] per_core_valid_out;
wire [NUM_FPC-1:0] per_core_has_fflags;
fflags_t [NUM_FPC-1:0] per_core_fflags;
wire div_ready_in, sqrt_ready_in;
wire [NUM_LANES-1:0][`XLEN-1:0] div_result, sqrt_result;
wire [TAG_WIDTH-1:0] div_tag_out, sqrt_tag_out;
wire div_ready_out, sqrt_ready_out;
wire div_valid_out, sqrt_valid_out;
wire div_has_fflags, sqrt_has_fflags;
wire div_valid_out, sqrt_valid_out;
wire div_has_fflags, sqrt_has_fflags;
fflags_t div_fflags, sqrt_fflags;
reg [FPC_BITS-1:0] core_select;
reg is_fadd, is_fsub, is_fmul, is_fmadd, is_fmsub, is_fnmadd, is_fnmsub;
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
reg is_div, is_fcmp, is_itof, is_utof, is_ftoi, is_ftou, is_f2f;
reg dst_fmt, int_fmt;
reg [NUM_LANES-1:0][63:0] operands [3];
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
operands[0][i] = 64'(dataa[i]);
@ -92,23 +92,23 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
always @(*) begin
is_fadd = 0;
is_fsub = 0;
is_fmul = 0;
is_fsub = 0;
is_fmul = 0;
is_fmadd = 0;
is_fmsub = 0;
is_fnmadd = 0;
is_fnmsub = 0;
is_div = 0;
is_fnmadd = 0;
is_fnmsub = 0;
is_div = 0;
is_fcmp = 0;
is_itof = 0;
is_utof = 0;
is_ftoi = 0;
is_ftou = 0;
is_f2f = 0;
dst_fmt = 0;
int_fmt = 0;
`ifdef FLEN_64
dst_fmt = fmt[0];
`endif
@ -132,14 +132,14 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
`INST_FPU_F2U: begin core_select = FPU_CVT; is_ftou = 1; end
`INST_FPU_I2F: begin core_select = FPU_CVT; is_itof = 1; end
`INST_FPU_U2F: begin core_select = FPU_CVT; is_utof = 1; end
`INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end
`INST_FPU_F2F: begin core_select = FPU_CVT; is_f2f = 1; end
default: begin core_select = FPU_NCP; end
endcase
end
generate
generate
begin : fma
reg [NUM_LANES-1:0][`XLEN-1:0] result_fma;
wire [NUM_LANES-1:0][63:0] result_fadd;
wire [NUM_LANES-1:0][63:0] result_fsub;
@ -148,7 +148,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire [NUM_LANES-1:0][63:0] result_fmsub;
wire [NUM_LANES-1:0][63:0] result_fnmadd;
wire [NUM_LANES-1:0][63:0] result_fnmsub;
fflags_t [NUM_LANES-1:0] fflags_fma;
fflags_t [NUM_LANES-1:0] fflags_fadd;
fflags_t [NUM_LANES-1:0] fflags_fsub;
@ -162,7 +162,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fma_ready = per_core_ready_out[FPU_FMA] || ~per_core_valid_out[FPU_FMA];
wire fma_fire = fma_valid && fma_ready;
always @(*) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fadd (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fadd[i], fflags_fadd[i]);
dpi_fsub (fma_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fsub[i], fflags_fsub[i]);
@ -175,20 +175,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
result_fma[i] = is_fadd ? result_fadd[i][`XLEN-1:0] :
is_fsub ? result_fsub[i][`XLEN-1:0] :
is_fmul ? result_fmul[i][`XLEN-1:0] :
is_fmadd ? result_fmadd[i][`XLEN-1:0] :
is_fmadd ? result_fmadd[i][`XLEN-1:0] :
is_fmsub ? result_fmsub[i][`XLEN-1:0] :
is_fnmadd ? result_fnmadd[i][`XLEN-1:0] :
is_fnmadd ? result_fnmadd[i][`XLEN-1:0] :
is_fnmsub ? result_fnmsub[i][`XLEN-1:0] :
'0;
fflags_fma[i] = is_fadd ? fflags_fadd[i] :
is_fsub ? fflags_fsub[i] :
is_fmul ? fflags_fmul[i] :
is_fmadd ? fflags_fmadd[i] :
is_fmadd ? fflags_fmadd[i] :
is_fmsub ? fflags_fmsub[i] :
is_fnmadd ? fflags_fnmadd[i] :
is_fnmsub ? fflags_fnmsub[i] :
'0;
is_fnmadd ? fflags_fnmadd[i] :
is_fnmsub ? fflags_fnmsub[i] :
'0;
end
end
@ -213,7 +213,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
end
endgenerate
generate
generate
begin : fdiv
reg [NUM_LANES-1:0][`XLEN-1:0] result_fdiv_r;
@ -223,9 +223,9 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fdiv_valid = (valid_in && core_select == FPU_DIVSQRT) && is_div;
wire fdiv_ready = div_ready_out || ~div_valid_out;
wire fdiv_fire = fdiv_valid && fdiv_ready;
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fdiv (fdiv_fire, int'(dst_fmt), operands[0][i], operands[1][i], frm, result_fdiv[i], fflags_fdiv[i]);
result_fdiv_r[i] = result_fdiv[i][`XLEN-1:0];
end
@ -252,7 +252,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
end
endgenerate
generate
generate
begin : fsqrt
reg [NUM_LANES-1:0][`XLEN-1:0] result_fsqrt_r;
@ -260,10 +260,10 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
fflags_t [NUM_LANES-1:0] fflags_fsqrt;
wire fsqrt_valid = (valid_in && core_select == FPU_DIVSQRT) && ~is_div;
wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out;
wire fsqrt_ready = sqrt_ready_out || ~sqrt_valid_out;
wire fsqrt_fire = fsqrt_valid && fsqrt_ready;
always @(*) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fsqrt (fsqrt_fire, int'(dst_fmt), operands[0][i], frm, result_fsqrt[i], fflags_fsqrt[i]);
result_fsqrt_r[i] = result_fsqrt[i][`XLEN-1:0];
@ -300,7 +300,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire [NUM_LANES-1:0][63:0] result_ftoi;
wire [NUM_LANES-1:0][63:0] result_ftou;
wire [NUM_LANES-1:0][63:0] result_f2f;
fflags_t [NUM_LANES-1:0] fflags_fcvt;
fflags_t [NUM_LANES-1:0] fflags_itof;
fflags_t [NUM_LANES-1:0] fflags_utof;
@ -310,20 +310,20 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fcvt_valid = (valid_in && core_select == FPU_CVT);
wire fcvt_ready = per_core_ready_out[FPU_CVT] || ~per_core_valid_out[FPU_CVT];
wire fcvt_fire = fcvt_valid && fcvt_ready;
always @(*) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_itof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_itof[i], fflags_itof[i]);
dpi_utof (fcvt_fire, int'(dst_fmt), int'(int_fmt), operands[0][i], frm, result_utof[i], fflags_utof[i]);
dpi_ftoi (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftoi[i], fflags_ftoi[i]);
dpi_ftou (fcvt_fire, int'(int_fmt), int'(dst_fmt), operands[0][i], frm, result_ftou[i], fflags_ftou[i]);
dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]);
dpi_f2f (fcvt_fire, int'(dst_fmt), operands[0][i], result_f2f[i]);
result_fcvt[i] = is_itof ? result_itof[i][`XLEN-1:0] :
is_utof ? result_utof[i][`XLEN-1:0] :
is_ftoi ? result_ftoi[i][`XLEN-1:0] :
is_ftou ? result_ftou[i][`XLEN-1:0] :
is_f2f ? result_f2f[i][`XLEN-1:0] :
is_ftou ? result_ftou[i][`XLEN-1:0] :
is_f2f ? result_f2f[i][`XLEN-1:0] :
'0;
fflags_fcvt[i] = is_itof ? fflags_itof[i] :
@ -355,7 +355,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
end
endgenerate
generate
generate
begin : fncp
reg [NUM_LANES-1:0][`XLEN-1:0] result_fncp;
@ -381,15 +381,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
wire fncp_valid = (valid_in && core_select == FPU_NCP);
wire fncp_ready = per_core_ready_out[FPU_NCP] || ~per_core_valid_out[FPU_NCP];
wire fncp_fire = fncp_valid && fncp_ready;
always @(*) begin
always @(*) begin
for (integer i = 0; i < NUM_LANES; ++i) begin
dpi_fclss (fncp_fire, int'(dst_fmt), operands[0][i], result_fclss[i]);
dpi_fle (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fle[i], fflags_fle[i]);
dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
dpi_flt (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_flt[i], fflags_flt[i]);
dpi_feq (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_feq[i], fflags_feq[i]);
dpi_fmin (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmin[i], fflags_fmin[i]);
dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
dpi_fmax (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fmax[i], fflags_fmax[i]);
dpi_fsgnj (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnj[i]);
dpi_fsgnjn (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjn[i]);
dpi_fsgnjx (fncp_fire, int'(dst_fmt), operands[0][i], operands[1][i], result_fsgnjx[i]);
@ -431,7 +431,7 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
.data_in ({fncp_valid, tag_in, has_fflags_fncp, result_fncp, fflags_merged}),
.data_out ({per_core_valid_out[FPU_NCP], per_core_tag_out[FPU_NCP], per_core_has_fflags[FPU_NCP], per_core_result[FPU_NCP], per_core_fflags[FPU_NCP]})
);
assign per_core_ready_in[FPU_NCP] = fncp_ready;
end
@ -443,15 +443,15 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (2),
.DATAW (RSP_DATAW),
.DATAW (RSP_DATAW),
.ARBITER ("R"),
.OUT_BUF (0)
) div_sqrt_arb (
.clk (clk),
.reset (reset),
.valid_in ({sqrt_valid_out, div_valid_out}),
.valid_in ({sqrt_valid_out, div_valid_out}),
.ready_in ({sqrt_ready_out, div_ready_out}),
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
.data_in ({{sqrt_result, sqrt_has_fflags, sqrt_fflags, sqrt_tag_out},
{div_result, div_has_fflags, div_fflags, div_tag_out}}),
.data_out ({per_core_result[FPU_DIVSQRT], per_core_has_fflags[FPU_DIVSQRT], per_core_fflags[FPU_DIVSQRT], per_core_tag_out[FPU_DIVSQRT]}),
.valid_out (per_core_valid_out[FPU_DIVSQRT]),
@ -469,13 +469,13 @@ module VX_fpu_dpi import VX_fpu_pkg::*; #(
VX_stream_arb #(
.NUM_INPUTS (NUM_FPC),
.DATAW (RSP_DATAW),
.ARBITER ("R"),
.DATAW (RSP_DATAW),
.ARBITER ("F"),
.OUT_BUF (OUT_BUF)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (per_core_valid_out),
.valid_in (per_core_valid_out),
.ready_in (per_core_ready_out),
.data_in (per_core_data_out),
.data_out ({result, has_fflags, fflags, tag_out}),

View file

@ -289,14 +289,14 @@ module VX_fpu_dsp import VX_fpu_pkg::*; #(
end
wire [NUM_LANES-1:0][31:0] result_s;
wire [1:0] op_ret_int_out;
`UNUSED_VAR (op_ret_int_out)
VX_stream_arb #(
.NUM_INPUTS (NUM_FPC),
.DATAW (RSP_DATAW + 2),
.ARBITER ("R"),
.ARBITER ("F"),
.OUT_BUF (OUT_BUF)
) rsp_arb (
.clk (clk),