minor updates

This commit is contained in:
Blaise Tine 2024-07-16 10:52:07 -07:00
parent 578c3d33d2
commit 6a03882bd2
6 changed files with 136 additions and 129 deletions

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,10 +14,10 @@
`include "VX_define.vh"
module VX_lsu_adapter import VX_gpu_pkg::*; #(
parameter NUM_LANES = 1,
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter TAG_SEL_BITS = 0,
parameter NUM_LANES = 1,
parameter DATA_SIZE = 1,
parameter TAG_WIDTH = 1,
parameter TAG_SEL_BITS = 0,
parameter `STRING ARBITER = "P",
parameter REQ_OUT_BUF = 0,
parameter RSP_OUT_BUF = 0
@ -63,12 +63,12 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
assign mem_bus_if[i].req_data.tag = req_tag_out[i];
assign req_ready_out[i] = mem_bus_if[i].req_ready;
end
VX_stream_unpack #(
.NUM_REQS (NUM_LANES),
.DATA_WIDTH (REQ_DATA_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.OUT_BUF (REQ_OUT_BUF)
.NUM_REQS (NUM_LANES),
.DATA_WIDTH (REQ_DATA_WIDTH),
.TAG_WIDTH (TAG_WIDTH),
.OUT_BUF (REQ_OUT_BUF)
) stream_unpack (
.clk (clk),
.reset (reset),
@ -77,7 +77,7 @@ module VX_lsu_adapter import VX_gpu_pkg::*; #(
.data_in (req_data_in),
.tag_in (lsu_mem_if.req_data.tag),
.ready_in (lsu_mem_if.req_ready),
.valid_out (req_valid_out),
.valid_out (req_valid_out),
.data_out (req_data_out),
.tag_out (req_tag_out),
.ready_out (req_ready_out)

View file

@ -1,10 +1,10 @@
// Copyright © 2019-2023
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -24,7 +24,7 @@ module VX_mem_coalescer #(
parameter TAG_WIDTH = 8,
parameter UUID_WIDTH = 0, // upper section of the request tag contains the UUID
parameter QUEUE_SIZE = 8,
parameter DATA_IN_WIDTH = DATA_IN_SIZE * 8,
parameter DATA_OUT_WIDTH= DATA_OUT_SIZE * 8,
parameter OUT_REQS = (NUM_REQS * DATA_IN_WIDTH) / DATA_OUT_WIDTH,
@ -45,7 +45,7 @@ module VX_mem_coalescer #(
input wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] in_req_addr,
input wire [NUM_REQS-1:0][ATYPE_WIDTH-1:0] in_req_atype,
input wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_req_data,
input wire [TAG_WIDTH-1:0] in_req_tag,
input wire [TAG_WIDTH-1:0] in_req_tag,
output wire in_req_ready,
// Input response
@ -58,7 +58,7 @@ module VX_mem_coalescer #(
// Output request
output wire out_req_valid,
output wire out_req_rw,
output wire [OUT_REQS-1:0] out_req_mask,
output wire [OUT_REQS-1:0] out_req_mask,
output wire [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen,
output wire [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr,
output wire [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype,
@ -78,7 +78,7 @@ module VX_mem_coalescer #(
`STATIC_ASSERT ((NUM_REQS * DATA_IN_WIDTH >= DATA_OUT_WIDTH), ("invalid parameter"))
`RUNTIME_ASSERT ((~in_req_valid || in_req_mask != 0), ("invalid request mask"));
`RUNTIME_ASSERT ((~out_rsp_valid || out_rsp_mask != 0), ("invalid request mask"));
localparam TAG_ID_WIDTH = TAG_WIDTH - UUID_WIDTH;
localparam NUM_REQS_W = `LOG2UP(NUM_REQS);
// tag + mask + offest
@ -86,19 +86,19 @@ module VX_mem_coalescer #(
localparam STATE_SETUP = 0;
localparam STATE_SEND = 1;
logic state_r, state_n;
logic out_req_valid_r, out_req_valid_n;
logic out_req_rw_r, out_req_rw_n;
logic [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
logic [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
logic [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data_r, out_req_data_n;
logic [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
logic in_req_ready_n;
logic state_r, state_n;
reg out_req_valid_r, out_req_valid_n;
reg out_req_rw_r, out_req_rw_n;
reg [OUT_REQS-1:0] out_req_mask_r, out_req_mask_n;
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] out_req_addr_r, out_req_addr_n;
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] out_req_atype_r, out_req_atype_n;
reg [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] out_req_byteen_r, out_req_byteen_n;
reg [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] out_req_data_r, out_req_data_n;
reg [OUT_TAG_WIDTH-1:0] out_req_tag_r, out_req_tag_n;
logic in_req_ready_n;
wire ibuf_push;
wire ibuf_pop;
@ -108,11 +108,11 @@ module VX_mem_coalescer #(
wire ibuf_empty;
wire [IBUF_DATA_WIDTH-1:0] ibuf_din;
wire [IBUF_DATA_WIDTH-1:0] ibuf_dout;
logic [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
logic [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
logic [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
logic [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
reg [OUT_REQS-1:0] batch_valid_r, batch_valid_n;
reg [OUT_REQS-1:0][OUT_ADDR_WIDTH-1:0] seed_addr_r, seed_addr_n;
reg [OUT_REQS-1:0][ATYPE_WIDTH-1:0] seed_atype_r, seed_atype_n;
reg [NUM_REQS-1:0] processed_mask_r, processed_mask_n;
wire [OUT_REQS-1:0][NUM_REQS_W-1:0] seed_idx;
@ -147,9 +147,9 @@ module VX_mem_coalescer #(
out_req_valid_r <= out_req_valid_n;
batch_valid_r <= batch_valid_n;
seed_addr_r <= seed_addr_n;
seed_atype_r <= seed_atype_n;
out_req_rw_r <= out_req_rw_n;
out_req_mask_r <= out_req_mask_n;
seed_atype_r <= seed_atype_n;
out_req_rw_r <= out_req_rw_n;
out_req_mask_r <= out_req_mask_n;
out_req_addr_r <= out_req_addr_n;
out_req_atype_r <= out_req_atype_n;
out_req_byteen_r <= out_req_byteen_n;
@ -159,38 +159,58 @@ module VX_mem_coalescer #(
end
end
logic [NUM_REQS-1:0] addr_matches;
wire [NUM_REQS-1:0] addr_matches;
always @(*) begin
addr_matches = '0;
for (integer i = 0; i < OUT_REQS; ++i) begin
for (integer j = 0; j < BATCH_SIZE; j++) begin
if (in_addr_base[BATCH_SIZE * i + j] == seed_addr_r[i]) begin
addr_matches[BATCH_SIZE * i + j] = 1;
end
end
for (genvar i = 0; i < OUT_REQS; ++i) begin
for (genvar j = 0; j < BATCH_SIZE; ++j) begin
assign addr_matches[BATCH_SIZE * i + j] = (in_addr_base[BATCH_SIZE * i + j] == seed_addr_r[i]);
end
end
wire [NUM_REQS-1:0] current_pmask = in_req_mask & addr_matches;
reg [OUT_REQS-1:0][DATA_OUT_SIZE-1:0] req_byteen_merged;
reg [OUT_REQS-1:0][DATA_OUT_WIDTH-1:0] req_data_merged;
always @(*) begin
req_byteen_merged = '0;
req_data_merged = 'x;
for (integer i = 0; i < OUT_REQS; ++i) begin
for (integer j = 0; j < BATCH_SIZE; ++j) begin
if (current_pmask[BATCH_SIZE * i + j]) begin
req_byteen_merged[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_SIZE +: DATA_IN_SIZE] = in_req_byteen[BATCH_SIZE * i + j];
req_data_merged[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH] = in_req_data[BATCH_SIZE * i + j];
end
end
end
end
wire [OUT_REQS * BATCH_SIZE - 1:0] pending_mask;
for (genvar i = 0; i < OUT_REQS * BATCH_SIZE; ++i) begin
assign pending_mask[i] = in_req_mask[i] && ~addr_matches[i] && ~processed_mask_r[i];
end
wire batch_completed = ~(| pending_mask);
always @(*) begin
state_n = state_r;
out_req_valid_n = out_req_valid_r;
seed_addr_n = seed_addr_r;
seed_atype_n = seed_atype_r;
out_req_rw_n = out_req_rw_r;
out_req_mask_n = out_req_mask_r;
out_req_valid_n = out_req_valid_r;
out_req_mask_n = out_req_mask_r;
out_req_rw_n = out_req_rw_r;
out_req_addr_n = out_req_addr_r;
out_req_atype_n = out_req_atype_r;
out_req_byteen_n = out_req_byteen_r;
out_req_data_n = out_req_data_r;
out_req_tag_n = out_req_tag_r;
processed_mask_n = processed_mask_r;
in_req_ready_n = 0;
case (state_r)
STATE_SETUP: begin
STATE_SETUP: begin
// find the next seed address
for (integer i = 0; i < OUT_REQS; ++i) begin
seed_addr_n[i] = in_addr_base[seed_idx[i]];
@ -200,43 +220,28 @@ module VX_mem_coalescer #(
if (out_req_valid && out_req_ready) begin
out_req_valid_n = 0;
end
if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin
if (in_req_valid && ~out_req_valid_n && ~ibuf_full) begin
state_n = STATE_SEND;
end
end
default/*STATE_SEND*/: begin
out_req_valid_n = 1;
out_req_rw_n = in_req_rw;
out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
in_req_ready_n = 1;
out_req_byteen_n = '0;
out_req_data_n = 'x;
for (integer i = 0; i < OUT_REQS; ++i) begin
for (integer j = 0; j < BATCH_SIZE; j++) begin
if (in_req_mask[BATCH_SIZE * i + j]) begin
if (addr_matches[BATCH_SIZE * i + j]) begin
for (integer k = 0; k < DATA_IN_SIZE; ++k) begin
if (in_req_byteen[BATCH_SIZE * i + j][k]) begin
out_req_byteen_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_SIZE + k +: 1] = 1'b1;
out_req_data_n[i][in_addr_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH + k * 8 +: 8] = in_req_data[BATCH_SIZE * i + j][k * 8 +: 8];
end
end
end else begin
if (!processed_mask_r[BATCH_SIZE * i + j]) begin
in_req_ready_n = 0;
end
end
end
end
out_req_mask_n[i] = batch_valid_r[i];
out_req_addr_n[i] = seed_addr_r[i];
out_req_atype_n[i]= seed_atype_r[i];
end
if (in_req_ready_n) begin
out_req_mask_n = batch_valid_r;
out_req_rw_n = in_req_rw;
out_req_addr_n = seed_addr_r;
out_req_atype_n = seed_atype_r;
out_req_byteen_n= req_byteen_merged;
out_req_data_n = req_data_merged;
out_req_tag_n = {in_req_tag[TAG_WIDTH-1 -: UUID_WIDTH], ibuf_waddr};
in_req_ready_n = batch_completed;
if (batch_completed) begin
processed_mask_n = '0;
end else begin
processed_mask_n = processed_mask_r | current_pmask;
end
state_n = STATE_SETUP;
end
endcase
@ -248,11 +253,11 @@ module VX_mem_coalescer #(
assign ibuf_push = (state_r == STATE_SEND) && ~in_req_rw;
assign ibuf_pop = out_rsp_fire && out_rsp_eop;
assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0];
assign ibuf_raddr = out_rsp_tag[QUEUE_ADDRW-1:0];
wire [TAG_ID_WIDTH-1:0] ibuf_din_tag = in_req_tag[TAG_ID_WIDTH-1:0];
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_din_offset = in_addr_offset;
wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask;
wire [NUM_REQS-1:0] ibuf_din_pmask = current_pmask;
assign ibuf_din = {ibuf_din_tag, ibuf_din_pmask, ibuf_din_offset};
@ -286,7 +291,7 @@ module VX_mem_coalescer #(
// unmerge responses
reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask;
reg [QUEUE_SIZE-1:0][OUT_REQS-1:0] rsp_rem_mask;
wire [OUT_REQS-1:0] rsp_rem_mask_n = rsp_rem_mask[ibuf_raddr] & ~out_rsp_mask;
assign out_rsp_eop = ~(| rsp_rem_mask_n);
@ -300,20 +305,18 @@ module VX_mem_coalescer #(
end
wire [NUM_REQS-1:0][BATCH_SIZE_W-1:0] ibuf_dout_offset;
reg [NUM_REQS-1:0] ibuf_dout_pmask;
wire [NUM_REQS-1:0] ibuf_dout_pmask;
wire [TAG_ID_WIDTH-1:0] ibuf_dout_tag;
assign {ibuf_dout_tag, ibuf_dout_pmask, ibuf_dout_offset} = ibuf_dout;
logic [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n;
logic [NUM_REQS-1:0] in_rsp_mask_n;
always @(*) begin
for (integer i = 0; i < OUT_REQS; ++i) begin
for (integer j = 0; j < BATCH_SIZE; j++) begin
in_rsp_mask_n[BATCH_SIZE * i + j] = out_rsp_mask[i] && ibuf_dout_pmask[BATCH_SIZE * i + j];
in_rsp_data_n[BATCH_SIZE * i + j] = out_rsp_data[i][ibuf_dout_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
end
wire [NUM_REQS-1:0][DATA_IN_WIDTH-1:0] in_rsp_data_n;
wire [NUM_REQS-1:0] in_rsp_mask_n;
for (genvar i = 0; i < OUT_REQS; ++i) begin
for (genvar j = 0; j < BATCH_SIZE; ++j) begin
assign in_rsp_mask_n[BATCH_SIZE * i + j] = out_rsp_mask[i] && ibuf_dout_pmask[BATCH_SIZE * i + j];
assign in_rsp_data_n[BATCH_SIZE * i + j] = out_rsp_data[i][ibuf_dout_offset[BATCH_SIZE * i + j] * DATA_IN_WIDTH +: DATA_IN_WIDTH];
end
end
@ -339,7 +342,7 @@ module VX_mem_coalescer #(
reg [NUM_REQS-1:0] out_req_pmask;
always @(posedge clk) begin
if (ibuf_push) begin
if (ibuf_push) begin
out_req_offset <= ibuf_din_offset;
out_req_pmask <= ibuf_din_pmask;
end
@ -351,30 +354,30 @@ module VX_mem_coalescer #(
if (out_req_fire) begin
if (out_req_rw) begin
`TRACE(1, ("%d: %s-out-req-wr: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
`TRACE(1, (", byteen="));
`TRACE_ARRAY1D(1, "0x%h", out_req_byteen, OUT_REQS);
`TRACE(1, (", data="));
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS);
`TRACE_ARRAY1D(1, "0x%0h", out_req_data, OUT_REQS);
end else begin
`TRACE(1, ("%d: %s-out-req-rd: valid=%b, addr=", $time, INSTANCE_ID, out_req_mask));
`TRACE_ARRAY1D(1, "0x%h", out_req_addr, OUT_REQS);
`TRACE(1, (", atype="));
`TRACE_ARRAY1D(1, "%b", out_req_atype, OUT_REQS);
end
`TRACE(1, (", offset="));
`TRACE(1, (", offset="));
`TRACE_ARRAY1D(1, "%0d", out_req_offset, NUM_REQS);
`TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid));
`TRACE(1, (", pmask=%b, tag=0x%0h (#%0d)\n", out_req_pmask, out_req_tag, out_req_uuid));
if ($countones(out_req_pmask) > 1) begin
`TRACE(1, ("%t: *** %s: coalescing=%b (#%0d)\n", $time, INSTANCE_ID, out_req_pmask, out_req_uuid));
end
`TRACE(1, ("%t: *** %s: coalescing=%b (#%0d)\n", $time, INSTANCE_ID, out_req_pmask, out_req_uuid));
end
end
if (out_rsp_fire) begin
`TRACE(1, ("%d: %s-out-rsp: valid=%b, data=", $time, INSTANCE_ID, out_rsp_mask));
`TRACE_ARRAY1D(1, "0x%0h", out_rsp_data, OUT_REQS);
`TRACE(1, (", offset="));
`TRACE(1, (", offset="));
`TRACE_ARRAY1D(1, "%0d", ibuf_dout_offset, NUM_REQS);
`TRACE(1, (", eop=%b, pmask=%b, tag=0x%0h (#%0d)\n", out_rsp_eop, ibuf_dout_pmask, out_rsp_tag, out_rsp_uuid));
end

View file

@ -139,9 +139,9 @@ module VX_mem_scheduler #(
wire mem_req_ready_s;
wire mem_rsp_valid_s;
wire [CORE_REQS-1:0] mem_rsp_mask_s;
wire [CORE_REQS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s;
wire [REQQ_TAG_WIDTH-1:0] mem_rsp_tag_s;
wire [CORE_CHANNELS-1:0] mem_rsp_mask_s;
wire [CORE_CHANNELS-1:0][WORD_WIDTH-1:0] mem_rsp_data_s;
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_s;
wire mem_rsp_ready_s;
wire crsp_valid;
@ -494,7 +494,7 @@ module VX_mem_scheduler #(
end
if (UUID_WIDTH != 0) begin
assign crsp_tag = {mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout};
assign crsp_tag = {mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH], ibuf_dout};
end else begin
assign crsp_tag = ibuf_dout;
end
@ -564,8 +564,8 @@ module VX_mem_scheduler #(
wire [`UP(UUID_WIDTH)-1:0] rsp_dbg_uuid;
if (UUID_WIDTH != 0) begin
assign mem_req_dbg_uuid = mem_req_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_rsp_dbg_uuid = mem_rsp_tag_s[REQQ_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_req_dbg_uuid = mem_req_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
assign mem_rsp_dbg_uuid = mem_rsp_tag_s[MEM_TAG_WIDTH-1 -: UUID_WIDTH];
assign rsp_dbg_uuid = core_rsp_tag[TAG_WIDTH-1 -: UUID_WIDTH];
end else begin
assign mem_req_dbg_uuid = '0;

View file

@ -110,7 +110,7 @@ module VX_pe_serializer #(
always @(posedge clk) begin
if (reset) begin
valid_out_r <= 1'b0;
end else if (enablready_out_be_r) begin
end else if (ready_out_b) begin
valid_out_r <= valid_out_b;
end
if (ready_out_b) begin

View file

@ -36,23 +36,27 @@ typedef void* vx_buffer_h;
#define VX_CAPS_ISA_FLAGS 0x7
// device isa flags
#define VX_ISA_STD_A (1ull << 0)
#define VX_ISA_STD_C (1ull << 2)
#define VX_ISA_STD_D (1ull << 3)
#define VX_ISA_STD_E (1ull << 4)
#define VX_ISA_STD_F (1ull << 5)
#define VX_ISA_STD_H (1ull << 7)
#define VX_ISA_STD_I (1ull << 8)
#define VX_ISA_STD_N (1ull << 13)
#define VX_ISA_STD_Q (1ull << 16)
#define VX_ISA_STD_S (1ull << 18)
#define VX_ISA_STD_U (1ull << 20)
#define VX_ISA_ARCH(flags) (1 << (((flags >> 30) & 0x3) + 4))
#define VX_ISA_EXT_ICACHE (1ull << 32)
#define VX_ISA_EXT_DCACHE (1ull << 33)
#define VX_ISA_EXT_L2CACHE (1ull << 34)
#define VX_ISA_EXT_L3CACHE (1ull << 35)
#define VX_ISA_EXT_LMEM (1ull << 36)
#define VX_ISA_STD_A (1ull << ISA_STD_A)
#define VX_ISA_STD_C (1ull << ISA_STD_C)
#define VX_ISA_STD_D (1ull << ISA_STD_D)
#define VX_ISA_STD_E (1ull << ISA_STD_E)
#define VX_ISA_STD_F (1ull << ISA_STD_F)
#define VX_ISA_STD_H (1ull << ISA_STD_H)
#define VX_ISA_STD_I (1ull << ISA_STD_I)
#define VX_ISA_STD_N (1ull << ISA_STD_N)
#define VX_ISA_STD_Q (1ull << ISA_STD_Q)
#define VX_ISA_STD_S (1ull << ISA_STD_S)
#define VX_ISA_STD_U (1ull << ISA_STD_U)
#define VX_ISA_ARCH(flags) (1ull << (((flags >> 30) & 0x3) + 4))
#define VX_ISA_EXT_ICACHE (1ull << (32+ISA_EXT_ICACHE))
#define VX_ISA_EXT_DCACHE (1ull << (32+ISA_EXT_DCACHE))
#define VX_ISA_EXT_L2CACHE (1ull << (32+ISA_EXT_L2CACHE))
#define VX_ISA_EXT_L3CACHE (1ull << (32+ISA_EXT_L3CACHE))
#define VX_ISA_EXT_LMEM (1ull << (32+ISA_EXT_LMEM))
#define VX_ISA_EXT_ZICOND (1ull << (32+ISA_EXT_ZICOND))
#define VX_ISA_EXT_TEX (1ull << (32+ISA_EXT_TEX))
#define VX_ISA_EXT_RASTER (1ull << (32+ISA_EXT_RASTER))
#define VX_ISA_EXT_OM (1ull << (32+ISA_EXT_OM))
// ready wait timeout
#define VX_MAX_TIMEOUT (24*60*60*1000) // 24 Hr

View file

@ -539,7 +539,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
break;
}
float IPC = (float)(double(instrs_per_core) / double(cycles_per_core));
float IPC = caclAverage(instrs_per_core, cycles_per_core);
if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC);
total_instrs += instrs_per_core;
total_cycles += cycles_per_core;
@ -553,8 +553,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
int ibuffer_percent = calcAvgPercent(ibuffer_stalls, total_cycles);
int scrb_percent = calcAvgPercent(scrb_stalls, total_cycles);
int opds_percent = calcAvgPercent(opds_stalls, total_cycles);
int ifetch_avg_lat = (int)(double(ifetch_lat) / double(ifetches));
int load_avg_lat = (int)(double(load_lat) / double(loads));
int ifetch_avg_lat = caclAverage(ifetch_lat, ifetches);
int load_avg_lat = caclAverage(load_lat, loads);
uint64_t scrb_total = scrb_alu + scrb_fpu + scrb_lsu + scrb_csrs + scrb_wctl;
fprintf(stream, "PERF: scheduler idle=%ld (%d%%)\n", sched_idles, sched_idles_percent);
fprintf(stream, "PERF: scheduler stalls=%ld (%d%%)\n", sched_stalls, sched_stalls_percent);
@ -616,7 +616,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
break;
}
float IPC = (float)(double(total_instrs) / double(max_cycles));
float IPC = caclAverage(total_instrs, max_cycles);
fprintf(stream, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", total_instrs, max_cycles, IPC);
fflush(stream);