scope logic analyzer redsign using Verilator's AST

This commit is contained in:
Blaise Tine 2023-04-29 21:18:33 -04:00
parent 0515ebed06
commit 7fb77deefa
30 changed files with 2349 additions and 3181 deletions

View file

@ -156,6 +156,7 @@ make -C $VORTEX_HOME/runtime/stub
if [ $DEBUG -ne 0 ]
then
# driver initialization
if [ $SCOPE -eq 1 ]
then
echo "running: DEBUG=$DEBUG_LEVEL SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH"
@ -165,6 +166,7 @@ then
DEBUG=$DEBUG_LEVEL CONFIGS="$CONFIGS" make -C $DRIVER_PATH
fi
# running application
if [ $HAS_ARGS -eq 1 ]
then
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER > run.log 2>&1"
@ -181,7 +183,7 @@ then
mv -f $APP_PATH/trace.vcd .
fi
else
echo "driver initialization..."
# driver initialization
if [ $SCOPE -eq 1 ]
then
echo "running: SCOPE=1 CONFIGS="$CONFIGS" make -C $DRIVER_PATH"
@ -191,7 +193,7 @@ else
CONFIGS="$CONFIGS" make -C $DRIVER_PATH
fi
echo "running application..."
# running application
if [ $HAS_ARGS -eq 1 ]
then
echo "running: OPTS=$ARGS make -C $APP_PATH run-$DRIVER"

View file

@ -20,7 +20,7 @@ import VX_gpu_types::*;
module VX_cluster #(
parameter CLUSTER_ID = 0
) (
`SCOPE_IO_VX_cluster
`SCOPE_IO_DECL
// Clock
input wire clk,
@ -459,6 +459,8 @@ module VX_cluster #(
`BUFFER_DCR_WRITE_IF (socket_dcr_write_if, socket_dcr_write_tmp_if, (`NUM_SOCKETS > 1));
`SCOPE_IO_SWITCH (`NUM_SOCKETS);
// Generate all sockets
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
@ -467,7 +469,7 @@ module VX_cluster #(
VX_socket #(
.SOCKET_ID ((CLUSTER_ID * `NUM_SOCKETS) + i)
) socket (
`SCOPE_BIND_VX_cluster_socket(i)
`SCOPE_IO_BIND (i)
.clk (clk),
.reset (socket_reset),

View file

@ -119,13 +119,13 @@ module VX_mem_unit # (
.NUM_REQS (DCACHE_NUM_REQS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
) dcache_nosm_req_if [`NUM_SOCKETS]();
) dcache_switch_req_if [`NUM_SOCKETS]();
VX_cache_rsp_if #(
.NUM_REQS (DCACHE_NUM_REQS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
) dcache_nosm_rsp_if [`NUM_SOCKETS]();
) dcache_switch_rsp_if [`NUM_SOCKETS]();
`RESET_RELAY (dcache_reset, reset);
@ -159,8 +159,8 @@ module VX_mem_unit # (
.clk (clk),
.reset (dcache_reset),
.core_req_if (dcache_nosm_req_if),
.core_rsp_if (dcache_nosm_rsp_if),
.core_req_if (dcache_switch_req_if),
.core_rsp_if (dcache_switch_rsp_if),
.mem_req_if (dcache_mem_req_if),
.mem_rsp_if (dcache_mem_rsp_if)
);
@ -174,13 +174,13 @@ module VX_mem_unit # (
.NUM_REQS (DCACHE_NUM_REQS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
) dcache_smem_switch_req_if[2]();
) smem_switch_out_req_if[2]();
VX_cache_rsp_if #(
.NUM_REQS (DCACHE_NUM_REQS),
.WORD_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_NOSM_TAG_WIDTH)
) dcache_smem_switch_rsp_if[2]();
) smem_switch_out_rsp_if[2]();
`RESET_RELAY (dcache_smem_switch_reset, reset);
@ -198,12 +198,12 @@ module VX_mem_unit # (
.reset (dcache_smem_switch_reset),
.req_in_if (dcache_req_if[i]),
.rsp_in_if (dcache_rsp_if[i]),
.req_out_if (dcache_smem_switch_req_if),
.rsp_out_if (dcache_smem_switch_rsp_if)
.req_out_if (smem_switch_out_req_if),
.rsp_out_if (smem_switch_out_rsp_if)
);
`ASSIGN_VX_CACHE_REQ_IF (dcache_nosm_req_if[i], dcache_smem_switch_req_if[0]);
`ASSIGN_VX_CACHE_RSP_IF (dcache_smem_switch_rsp_if[0], dcache_nosm_rsp_if[i]);
`ASSIGN_VX_CACHE_REQ_IF (dcache_switch_req_if[i], smem_switch_out_req_if[0]);
`ASSIGN_VX_CACHE_RSP_IF (smem_switch_out_rsp_if[0], dcache_switch_rsp_if[i]);
// shared memory address mapping:
// [core_idx][warp_idx][word_idx][thread_idx] <= [core_idx][warp_idx][thread_idx][bank_offset..word_idx]
@ -217,14 +217,14 @@ module VX_mem_unit # (
for (genvar j = 0; j < DCACHE_NUM_REQS; ++j) begin
if (`NT_BITS != 0) begin
assign smem_req_addr[j][0 +: `NT_BITS] = dcache_smem_switch_req_if[1].addr[j][BANK_ADDR_OFFSET +: `NT_BITS];
assign smem_req_addr[j][0 +: `NT_BITS] = smem_switch_out_req_if[1].addr[j][BANK_ADDR_OFFSET +: `NT_BITS];
end
assign smem_req_addr[j][`NT_BITS +: WORD_SEL_BITS] = dcache_smem_switch_req_if[1].addr[j][0 +: WORD_SEL_BITS];
assign smem_req_addr[j][`NT_BITS +: WORD_SEL_BITS] = smem_switch_out_req_if[1].addr[j][0 +: WORD_SEL_BITS];
if (`NW_BITS != 0) begin
assign smem_req_addr[j][(`NT_BITS + WORD_SEL_BITS) +: `NW_BITS] = dcache_smem_switch_req_if[1].addr[j][(BANK_ADDR_OFFSET + `NT_BITS) +: `NW_BITS];
assign smem_req_addr[j][(`NT_BITS + WORD_SEL_BITS) +: `NW_BITS] = smem_switch_out_req_if[1].addr[j][(BANK_ADDR_OFFSET + `NT_BITS) +: `NW_BITS];
end
if (SOCKET_BITS != 0) begin
assign smem_req_addr[j][(`NT_BITS + WORD_SEL_BITS + `NW_BITS) +: SOCKET_BITS] = dcache_smem_switch_req_if[1].addr[j][(BANK_ADDR_OFFSET + `NT_BITS + `NW_BITS) +: SOCKET_BITS];
assign smem_req_addr[j][(`NT_BITS + WORD_SEL_BITS + `NW_BITS) +: SOCKET_BITS] = smem_switch_out_req_if[1].addr[j][(BANK_ADDR_OFFSET + `NT_BITS + `NW_BITS) +: SOCKET_BITS];
end
end
@ -249,27 +249,27 @@ module VX_mem_unit # (
`endif
// Core request
.req_valid (dcache_smem_switch_req_if[1].valid),
.req_rw (dcache_smem_switch_req_if[1].rw),
.req_byteen (dcache_smem_switch_req_if[1].byteen),
.req_valid (smem_switch_out_req_if[1].valid),
.req_rw (smem_switch_out_req_if[1].rw),
.req_byteen (smem_switch_out_req_if[1].byteen),
.req_addr (smem_req_addr),
.req_data (dcache_smem_switch_req_if[1].data),
.req_tag (dcache_smem_switch_req_if[1].tag),
.req_ready (dcache_smem_switch_req_if[1].ready),
.req_data (smem_switch_out_req_if[1].data),
.req_tag (smem_switch_out_req_if[1].tag),
.req_ready (smem_switch_out_req_if[1].ready),
// Core response
.rsp_valid (dcache_smem_switch_rsp_if[1].valid),
.rsp_data (dcache_smem_switch_rsp_if[1].data),
.rsp_tag (dcache_smem_switch_rsp_if[1].tag),
.rsp_ready (dcache_smem_switch_rsp_if[1].ready)
.rsp_valid (smem_switch_out_rsp_if[1].valid),
.rsp_data (smem_switch_out_rsp_if[1].data),
.rsp_tag (smem_switch_out_rsp_if[1].tag),
.rsp_ready (smem_switch_out_rsp_if[1].ready)
);
end
`else
for (genvar i = 0; i < `NUM_SOCKETS; ++i) begin
`ASSIGN_VX_CACHE_REQ_IF (dcache_nosm_req_if[i], dcache_req_if[i]);
`ASSIGN_VX_CACHE_RSP_IF (dcache_rsp_if[i], dcache_nosm_rsp_if[i]);
`ASSIGN_VX_CACHE_REQ_IF (dcache_switch_req_if[i], dcache_req_if[i]);
`ASSIGN_VX_CACHE_RSP_IF (dcache_rsp_if[i], dcache_switch_rsp_if[i]);
end
`endif

View file

@ -56,7 +56,8 @@
/* verilator lint_off DECLFILENAME */ \
/* verilator lint_off IMPLICIT */ \
/* verilator lint_off PINMISSING */ \
/* verilator lint_off IMPORTSTAR */
/* verilator lint_off IMPORTSTAR */ \
/* verilator lint_off UNSIGNED */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
@ -66,7 +67,8 @@
/* verilator lint_on DECLFILENAME */ \
/* verilator lint_on IMPLICIT */ \
/* verilator lint_off PINMISSING */ \
/* verilator lint_on IMPORTSTAR */
/* verilator lint_on IMPORTSTAR */ \
/* verilator lint_on UNSIGNED */
`define UNUSED_PARAM(x) /* verilator lint_off UNUSED */ \
localparam __``x = x; \

View file

@ -3,97 +3,38 @@
`ifdef SCOPE
`include "scope-defs.vh"
`define SCOPE_IO_DECL \
input wire scope_reset, \
input wire scope_bus_in, \
output wire scope_bus_out,
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
`define SCOPE_IO_SWITCH(count) \
wire scope_bus_in_w [count]; \
wire scope_bus_out_w [count]; \
`RESET_RELAY_EX(scope_reset_w, scope_reset, count, 4); \
VX_scope_switch #( \
.N (count) \
) scope_switch ( \
.clk (clk), \
.reset (scope_reset), \
.req_in (scope_bus_in), \
.rsp_out (scope_bus_out), \
.req_out (scope_bus_in_w), \
.rsp_in (scope_bus_out_w) \
);
`define SCOPE_SIZE 256
`define SCOPE_IO_BIND(i) \
.scope_reset (scope_reset_w[i]), \
.scope_bus_in (scope_bus_in_w[i]), \
.scope_bus_out (scope_bus_out_w[i]),
`else
`define SCOPE_IO_VX_icache_stage
`define SCOPE_IO_DECL
`define SCOPE_IO_VX_fetch
`define SCOPE_IO_SWITCH(n)
`define SCOPE_BIND_VX_fetch_icache_stage
`define SCOPE_BIND_VX_fetch_warp_sched
`define SCOPE_IO_VX_warp_sched
`define SCOPE_BIND_VX_core_fetch
`define SCOPE_IO_VX_core
`define SCOPE_IO_VX_socket
`define SCOPE_IO_VX_cluster
`define SCOPE_BIND_VX_cluster_socket(__i__)
`define SCOPE_BIND_VX_socket_core(__i__)
`define SCOPE_IO_Vortex
`define SCOPE_BIND_Vortex_cluster(__i__)
`define SCOPE_BIND_afu_vortex
`define SCOPE_IO_VX_lsu_unit
`define SCOPE_IO_VX_gpu_unit
`define SCOPE_IO_VX_execute
`define SCOPE_BIND_VX_execute_lsu_unit
`define SCOPE_BIND_VX_execute_gpu_unit
`define SCOPE_BIND_VX_core_execute
`define SCOPE_IO_VX_issue
`define SCOPE_BIND_VX_core_issue
`define SCOPE_IO_VX_cache_bank
`define SCOPE_IO_VX_cache
`define SCOPE_IO_VX_cache_wrap
`define SCOPE_BIND_VX_cache_wrap_cache
`define SCOPE_BIND_VX_cache_bank(__i__)
`define SCOPE_BIND_Vortex_l3cache
`define SCOPE_BIND_VX_cluster_l2cache
`define SCOPE_BIND_VX_cluster_rcache
`define SCOPE_BIND_VX_cluster_ocache
`define SCOPE_IO_VX_mem_unit
`define SCOPE_BIND_VX_core_mem_unit
`define SCOPE_BIND_VX_mem_unit_dcache
`define SCOPE_BIND_VX_mem_unit_icache
`define SCOPE_BIND_VX_mem_unit_tcache
`define SCOPE_BIND_VX_mem_unit_smem
`define SCOPE_DECL_SIGNALS
`define SCOPE_DATA_LIST
`define SCOPE_UPDATE_LIST
`define SCOPE_TRIGGER
`define SCOPE_ASSIGN(d,s)
`define SCOPE_IO_BIND(i)
`endif

View file

@ -8,7 +8,7 @@ import VX_gpu_types::*;
module VX_socket #(
parameter SOCKET_ID = 0
) (
`SCOPE_IO_VX_socket
`SCOPE_IO_DECL
// Clock
input wire clk,
@ -317,6 +317,8 @@ module VX_socket #(
`BUFFER_DCR_WRITE_IF (core_dcr_write_if, dcr_write_if, (`SOCKET_SIZE > 1));
`SCOPE_IO_SWITCH (`SOCKET_SIZE)
// Generate all cores
for (genvar i = 0; i < `SOCKET_SIZE; ++i) begin
@ -325,7 +327,7 @@ module VX_socket #(
VX_core #(
.CORE_ID ((SOCKET_ID * `SOCKET_SIZE) + i)
) core (
`SCOPE_BIND_VX_socket_core(i)
`SCOPE_IO_BIND (i)
.clk (clk),
.reset (core_reset),

View file

@ -18,7 +18,7 @@ import VX_gpu_types::*;
`IGNORE_WARNINGS_END
module Vortex (
`SCOPE_IO_Vortex
`SCOPE_IO_DECL
// Clock
input wire clk,
@ -141,6 +141,8 @@ module Vortex (
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
`SCOPE_IO_SWITCH (`NUM_CLUSTERS+1)
// Generate all clusters
for (genvar i = 0; i < `NUM_CLUSTERS; ++i) begin
@ -151,7 +153,7 @@ module Vortex (
VX_cluster #(
.CLUSTER_ID (i)
) cluster (
`SCOPE_BIND_Vortex_cluster(i)
`SCOPE_IO_BIND (i)
.clk (clk),
.reset (cluster_reset),
@ -312,12 +314,22 @@ module Vortex (
`endif
`SCOPE_ASSIGN (reset, reset);
`SCOPE_ASSIGN (mem_req_fire, mem_req_fire);
`SCOPE_ASSIGN (mem_req_addr, `TO_FULL_ADDR(mem_req_addr));
`SCOPE_ASSIGN (mem_req_rw, mem_req_rw);
`SCOPE_ASSIGN (mem_rsp_fire, mem_rsp_fire);
`SCOPE_ASSIGN (busy, busy);
`ifdef SCOPE
VX_scope_tap #(
.SCOPE_ID (1),
.TRIGGERW (2),
.PROBEW (`VX_MEM_ADDR_WIDTH+1+1)
) scope_tap (
.clk(clk),
.reset(scope_reset_w[`NUM_CLUSTERS]),
.start(1'b0),
.stop(1'b0),
.triggers({mem_req_fire, mem_rsp_fire}),
.probes({mem_req_addr, mem_req_rw, busy}),
.bus_in(scope_bus_in_w[`NUM_CLUSTERS]),
.bus_out(scope_bus_out_w[`NUM_CLUSTERS])
);
`endif
`ifdef DBG_TRACE_CORE_MEM
always @(posedge clk) begin

File diff suppressed because it is too large Load diff

View file

@ -27,7 +27,7 @@ import VX_fpu_types::*;
module VX_core #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_core
`SCOPE_IO_DECL
// Clock
input wire clk,
@ -128,10 +128,12 @@ module VX_core #(
.base_dcrs (base_dcrs)
);
`SCOPE_IO_SWITCH (3)
VX_fetch #(
.CORE_ID(CORE_ID)
) fetch (
`SCOPE_BIND_VX_core_fetch
`SCOPE_IO_BIND (0)
.clk (clk),
.reset (fetch_reset),
.base_dcrs (base_dcrs),
@ -161,7 +163,7 @@ module VX_core #(
VX_issue #(
.CORE_ID(CORE_ID)
) issue (
`SCOPE_BIND_VX_core_issue
`SCOPE_IO_BIND (1)
.clk (clk),
.reset (issue_reset),
@ -185,7 +187,7 @@ module VX_core #(
VX_execute #(
.CORE_ID(CORE_ID)
) execute (
`SCOPE_BIND_VX_core_execute
`SCOPE_IO_BIND (2)
.clk (clk),
.reset (execute_reset),

View file

@ -8,7 +8,7 @@ import VX_gpu_types::*;
module VX_execute #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_execute
`SCOPE_IO_DECL
input wire clk,
input wire reset,
@ -120,10 +120,12 @@ module VX_execute #(
.alu_commit_if (alu_commit_if)
);
`SCOPE_IO_SWITCH (2)
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_BIND_VX_execute_lsu_unit
`SCOPE_IO_BIND (0)
.clk (clk),
.reset (lsu_reset),
.cache_req_if (dcache_req_if),
@ -207,7 +209,7 @@ module VX_execute #(
VX_gpu_unit #(
.CORE_ID(CORE_ID)
) gpu_unit (
`SCOPE_BIND_VX_execute_gpu_unit
`SCOPE_IO_BIND (1)
.clk (clk),
.reset (gpu_reset),
.gpu_req_if (gpu_req_if),

View file

@ -8,7 +8,7 @@ import VX_gpu_types::*;
module VX_fetch #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_fetch
`SCOPE_IO_DECL
input wire clk,
input wire reset,
@ -40,34 +40,36 @@ module VX_fetch #(
VX_ifetch_req_if ifetch_req_if();
`SCOPE_IO_SWITCH (2)
VX_warp_sched #(
.CORE_ID(CORE_ID)
) warp_sched (
`SCOPE_BIND_VX_fetch_warp_sched
`SCOPE_IO_BIND (0)
.clk (clk),
.reset (reset),
.clk (clk),
.reset (reset),
.base_dcrs (base_dcrs),
.base_dcrs (base_dcrs),
.warp_ctl_if (warp_ctl_if),
.wrelease_if (wrelease_if),
.join_if (join_if),
.branch_ctl_if (branch_ctl_if),
.warp_ctl_if (warp_ctl_if),
.wrelease_if (wrelease_if),
.join_if (join_if),
.branch_ctl_if (branch_ctl_if),
.ifetch_req_if (ifetch_req_if),
.ifetch_req_if (ifetch_req_if),
.fetch_to_csr_if (fetch_to_csr_if),
.fetch_to_csr_if(fetch_to_csr_if),
.cmt_to_fetch_if (cmt_to_fetch_if),
.cmt_to_fetch_if(cmt_to_fetch_if),
.busy (busy)
.busy (busy)
);
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
`SCOPE_BIND_VX_fetch_icache_stage
`SCOPE_IO_BIND (1)
.clk (clk),
.reset (reset),

View file

@ -8,7 +8,7 @@ import VX_gpu_types::*;
module VX_gpu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_gpu_unit
`SCOPE_IO_DECL
input wire clk,
input wire reset,
@ -354,12 +354,28 @@ module VX_gpu_unit #(
assign warp_ctl_if.wid = gpu_commit_if.wid;
assign {warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier} = rsp_data[WCTL_DATAW-1:0];
`SCOPE_ASSIGN (gpu_rsp_valid, warp_ctl_if.valid);
`SCOPE_ASSIGN (gpu_rsp_uuid, gpu_commit_if.uuid);
`SCOPE_ASSIGN (gpu_rsp_tmc, warp_ctl_if.tmc.valid);
`SCOPE_ASSIGN (gpu_rsp_wspawn, warp_ctl_if.wspawn.valid);
`SCOPE_ASSIGN (gpu_rsp_split, warp_ctl_if.split.valid);
`SCOPE_ASSIGN (gpu_rsp_barrier, warp_ctl_if.barrier.valid);
`ifdef SCOPE
VX_scope_tap #(
.SCOPE_ID (6),
.TRIGGERW (5),
.PROBEW (UUID_WIDTH)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({
warp_ctl_if.valid,
warp_ctl_if.tmc.valid,
warp_ctl_if.wspawn.valid,
warp_ctl_if.split.valid,
warp_ctl_if.barrier.valid
}),
.probes({gpu_commit_if.uuid}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
// pending request

View file

@ -8,7 +8,7 @@ import VX_gpu_types::*;
module VX_icache_stage #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_icache_stage
`SCOPE_IO_DECL
input wire clk,
input wire reset,
@ -120,13 +120,23 @@ module VX_icache_stage #(
// Can accept new response?
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
`SCOPE_ASSIGN (icache_req_fire, icache_req_fire);
`SCOPE_ASSIGN (icache_req_uuid, ifetch_req_if.uuid);
`SCOPE_ASSIGN (icache_req_addr, ifetch_req_if.PC);
`SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready);
`SCOPE_ASSIGN (icache_rsp_uuid, rsp_uuid);
`SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data);
`ifdef SCOPE
wire icache_rsp_fire = icache_rsp_if.valid && icache_rsp_if.ready;
VX_scope_tap #(
.SCOPE_ID (3),
.TRIGGERW (2),
.PROBEW (UUID_WIDTH+32+UUID_WIDTH+32)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({icache_rsp_fire, icache_req_fire}),
.probes({ifetch_req_if.uuid, ifetch_req_if.PC, rsp_uuid, icache_rsp_if.data}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
`ifdef DBG_TRACE_CORE_ICACHE
wire ifetch_req_fire = ifetch_req_if.valid && ifetch_req_if.ready;

View file

@ -6,7 +6,7 @@
module VX_issue #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_issue
`SCOPE_IO_DECL
input wire clk,
input wire reset,
@ -26,7 +26,6 @@ module VX_issue #(
`endif
VX_gpu_req_if.master gpu_req_if
);
VX_ibuffer_if ibuffer_if();
VX_gpr_req_if gpr_req_if();
VX_gpr_rsp_if gpr_rsp_if();
@ -132,6 +131,8 @@ module VX_issue #(
`endif
.gpu_req_if (gpu_req_if)
);
wire ibuffer_if_fire = ibuffer_if.valid && ibuffer_if.ready;
reg [31:0] timeout_ctr;
always @(posedge clk) begin
@ -145,7 +146,7 @@ module VX_issue #(
in_use_regs[0], in_use_regs[1], in_use_regs[2], in_use_regs[3], ~dispatch_if.ready, ibuffer_if.uuid));
`endif
timeout_ctr <= timeout_ctr + 1;
end else if (ibuffer_if.valid && ibuffer_if.ready) begin
end else if (ibuffer_if_fire) begin
timeout_ctr <= '0;
end
end
@ -166,31 +167,55 @@ module VX_issue #(
);
`endif
`SCOPE_ASSIGN (issue_fire, ibuffer_if.valid && ibuffer_if.ready);
`SCOPE_ASSIGN (issue_uuid, ibuffer_if.uuid);
`SCOPE_ASSIGN (issue_tmask, ibuffer_if.tmask);
`SCOPE_ASSIGN (issue_ex_type, ibuffer_if.ex_type);
`SCOPE_ASSIGN (issue_op_type, ibuffer_if.op_type);
`SCOPE_ASSIGN (issue_op_mod, ibuffer_if.op_mod);
`SCOPE_ASSIGN (issue_wb, ibuffer_if.wb);
`SCOPE_ASSIGN (issue_rd, ibuffer_if.rd);
`SCOPE_ASSIGN (issue_rs1, ibuffer_if.rs1);
`SCOPE_ASSIGN (issue_rs2, ibuffer_if.rs2);
`SCOPE_ASSIGN (issue_rs3, ibuffer_if.rs3);
`SCOPE_ASSIGN (issue_imm, ibuffer_if.imm);
`SCOPE_ASSIGN (issue_use_pc, ibuffer_if.use_PC);
`SCOPE_ASSIGN (issue_use_imm, ibuffer_if.use_imm);
`SCOPE_ASSIGN (scoreboard_delay, !scoreboard_if.ready);
`SCOPE_ASSIGN (dispatch_delay, !dispatch_if.ready);
`SCOPE_ASSIGN (gpr_rs1, gpr_rsp_if.rs1_data);
`SCOPE_ASSIGN (gpr_rs2, gpr_rsp_if.rs2_data);
`SCOPE_ASSIGN (gpr_rs3, gpr_rsp_if.rs3_data);
`SCOPE_ASSIGN (writeback_valid, writeback_if.valid);
`SCOPE_ASSIGN (writeback_uuid, writeback_if.uuid);
`SCOPE_ASSIGN (writeback_tmask, writeback_if.tmask);
`SCOPE_ASSIGN (writeback_rd, writeback_if.rd);
`SCOPE_ASSIGN (writeback_data, writeback_if.data);
`SCOPE_ASSIGN (writeback_eop, writeback_if.eop);
`ifdef SCOPE
localparam UUID_WIDTH = `UP(`UUID_BITS);
wire scoreboard_if_not_ready = ~scoreboard_if.ready;
wire dispatch_if_not_ready = ~dispatch_if.ready;
wire writeback_if_valid = writeback_if.valid;
VX_scope_tap #(
.SCOPE_ID (4),
.TRIGGERW (4),
.PROBEW (UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS + `INST_MOD_BITS +
1 + (`NR_BITS * 4) + 32 + 1 + 1 + (`NUM_THREADS * 3 * 32) +
UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*32) + 1)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({
ibuffer_if_fire,
scoreboard_if_not_ready,
dispatch_if_not_ready,
writeback_if_valid
}),
.probes({
ibuffer_if.uuid,
ibuffer_if.tmask,
ibuffer_if.ex_type,
ibuffer_if.op_type,
ibuffer_if.op_mod,
ibuffer_if.wb,
ibuffer_if.rd,
ibuffer_if.rs1,
ibuffer_if.rs2,
ibuffer_if.rs3,
ibuffer_if.imm,
ibuffer_if.use_PC,
ibuffer_if.use_imm,
gpr_rsp_if.rs1_data,
gpr_rsp_if.rs2_data,
gpr_rsp_if.rs3_data,
writeback_if.uuid,
writeback_if.tmask,
writeback_if.rd,
writeback_if.data,
writeback_if.eop
}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
`ifdef PERF_ENABLE
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;

View file

@ -8,7 +8,7 @@ import VX_gpu_types::*;
module VX_lsu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_lsu_unit
`SCOPE_IO_DECL
input wire clk,
input wire reset,
@ -368,16 +368,22 @@ module VX_lsu_unit #(
);
`endif
// scope registration
`SCOPE_ASSIGN (dcache_req_fire, mem_req_fire);
`SCOPE_ASSIGN (dcache_req_uuid, lsu_req_if.uuid);
`SCOPE_ASSIGN (dcache_req_addr, full_addr);
`SCOPE_ASSIGN (dcache_req_rw, mem_req_rw);
`SCOPE_ASSIGN (dcache_req_byteen, mem_req_byteen);
`SCOPE_ASSIGN (dcache_req_data, mem_req_data);
`SCOPE_ASSIGN (dcache_rsp_fire, mem_rsp_fire);
`SCOPE_ASSIGN (dcache_rsp_uuid, rsp_uuid);
`SCOPE_ASSIGN (dcache_rsp_data, rsp_data);
`ifdef SCOPE
VX_scope_tap #(
.SCOPE_ID (5),
.TRIGGERW (2),
.PROBEW (UUID_WIDTH+`NUM_THREADS*(32+4+32)+1+UUID_WIDTH+`NUM_THREADS*32)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({mem_req_fire, mem_rsp_fire}),
.probes({lsu_req_if.uuid, full_addr, mem_req_rw, mem_req_byteen, mem_req_data, rsp_uuid, rsp_data}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
`ifdef DBG_TRACE_CORE_DCACHE
always @(posedge clk) begin

View file

@ -8,7 +8,7 @@ import VX_gpu_types::*;
module VX_warp_sched #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_warp_sched
`SCOPE_IO_DECL
input wire clk,
input wire reset,
@ -306,12 +306,21 @@ module VX_warp_sched #(
end
`RUNTIME_ASSERT(timeout_ctr < `STALL_TIMEOUT, ("%t: *** core%0d-scheduler-timeout: stalled_warps=%b", $time, CORE_ID, stalled_warps));
`SCOPE_ASSIGN (wsched_scheduled, schedule_fire);
`SCOPE_ASSIGN (wsched_schedule_uuid, instr_uuid);
`SCOPE_ASSIGN (wsched_active_warps, active_warps);
`SCOPE_ASSIGN (wsched_stalled_warps, stalled_warps);
`SCOPE_ASSIGN (wsched_schedule_wid, schedule_wid);
`SCOPE_ASSIGN (wsched_schedule_tmask, schedule_tmask);
`SCOPE_ASSIGN (wsched_schedule_pc, schedule_pc);
`ifdef SCOPE
VX_scope_tap #(
.SCOPE_ID (2),
.TRIGGERW (1),
.PROBEW (UUID_WIDTH+`NUM_WARPS+`NUM_WARPS+NW_WIDTH+`NUM_THREADS+32)
) scope_tap (
.clk(clk),
.reset(scope_reset),
.start(1'b0),
.stop(1'b0),
.triggers({schedule_fire}),
.probes({instr_uuid, active_warps, stalled_warps, schedule_wid, schedule_tmask, schedule_pc}),
.bus_in(scope_bus_in),
.bus_out(scope_bus_out)
);
`endif
endmodule

View file

@ -1,244 +0,0 @@
`include "VX_platform.vh"
`TRACING_OFF
module VX_scope #(
parameter DATAW = 64,
parameter BUSW = 64,
parameter SIZE = 16,
parameter UPDW = 1,
parameter DELTAW = 16
) (
input wire clk,
input wire reset,
input wire start,
input wire stop,
input wire changed,
input wire [DATAW-1:0] data_in,
input wire [BUSW-1:0] bus_in,
output wire [BUSW-1:0] bus_out,
input wire bus_write,
input wire bus_read
);
localparam UPDW_ENABLE = (UPDW != 0);
localparam MAX_DELTA = (2 ** DELTAW) - 1;
localparam CMD_GET_VALID = 3'd0;
localparam CMD_GET_DATA = 3'd1;
localparam CMD_GET_WIDTH = 3'd2;
localparam CMD_GET_COUNT = 3'd3;
localparam CMD_SET_START = 3'd4;
localparam CMD_SET_STOP = 3'd5;
localparam CMD_GET_OFFSET= 3'd6;
localparam GET_VALID = 3'd0;
localparam GET_DATA = 3'd1;
localparam GET_WIDTH = 3'd2;
localparam GET_COUNT = 3'd3;
localparam GET_OFFSET = 3'd6;
`NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0];
reg [UPDW-1:0] prev_trigger_id;
reg [DELTAW-1:0] delta;
reg [BUSW-1:0] bus_out_r;
reg [63:0] timestamp, start_time;
reg [`CLOG2(SIZE)-1:0] raddr, waddr, waddr_end;
reg [`LOG2UP(DATAW)-1:0] read_offset;
reg cmd_start, started, start_wait, recording, data_valid, read_delta, delta_flush;
reg [BUSW-3:0] delay_val, delay_cntr;
reg [2:0] get_cmd;
wire [2:0] cmd_type;
wire [BUSW-4:0] cmd_data;
assign {cmd_data, cmd_type} = bus_in;
wire [UPDW-1:0] trigger_id = data_in[UPDW-1:0];
always @(posedge clk) begin
if (reset) begin
get_cmd <= $bits(get_cmd)'(CMD_GET_VALID);
raddr <= '0;
waddr <= '0;
waddr_end <= $bits(waddr)'(SIZE-1);
cmd_start <= 0;
started <= 0;
start_wait <= 0;
recording <= 0;
delay_val <= '0;
delay_cntr <= '0;
delta <= '0;
delta_flush <= 0;
prev_trigger_id <= '0;
read_offset <= '0;
read_delta <= '0;
data_valid <= 0;
timestamp <= '0;
start_time <= '0;
end else begin
timestamp <= timestamp + 1;
if (bus_write) begin
case (cmd_type)
CMD_GET_VALID,
CMD_GET_DATA,
CMD_GET_WIDTH,
CMD_GET_OFFSET,
CMD_GET_COUNT: get_cmd <= $bits(get_cmd)'(cmd_type);
CMD_SET_START: begin
delay_val <= $bits(delay_val)'(cmd_data);
cmd_start <= 1;
`ifdef DBG_TRACE_SCOPE
`TRACE(2, ("%d: *** scope: CMD_SET_START: delay_val=%0d\n", $time, $bits(delay_val)'(cmd_data)));
`endif
end
CMD_SET_STOP: begin
waddr_end <= $bits(waddr)'(cmd_data);
`ifdef DBG_TRACE_SCOPE
`TRACE(2, ("%d: *** scope: CMD_SET_STOP: waddr_end=%0d\n", $time, $bits(waddr)'(cmd_data)));
`endif
end
default:;
endcase
end
if (!started && (start || cmd_start)) begin
started <= 1;
delta_flush <= 1;
if (0 == delay_val) begin
start_wait <= 0;
recording <= 1;
delta <= '0;
delay_cntr <= '0;
start_time <= timestamp;
`ifdef DBG_TRACE_SCOPE
`TRACE(2, ("%d: *** scope: recording start - start_time=%0d\n", $time, timestamp));
`endif
end else begin
start_wait <= 1;
delay_cntr <= delay_val;
end
end
if (start_wait) begin
delay_cntr <= delay_cntr - 1;
if (1 == delay_cntr) begin
start_wait <= 0;
recording <= 1;
delta <= '0;
start_time <= timestamp;
`ifdef DBG_TRACE_SCOPE
`TRACE(2, ("%d: *** scope: recording start - start_time=%0d\n", $time, timestamp));
`endif
end
end
if (recording) begin
if (UPDW_ENABLE != 0) begin
if (delta_flush
|| changed
|| (trigger_id != prev_trigger_id)) begin
delta_store[waddr] <= delta;
data_store[waddr] <= data_in;
waddr <= waddr + $bits(waddr)'(1);
delta <= '0;
delta_flush <= 0;
end else begin
delta <= delta + DELTAW'(1);
delta_flush <= (delta == (MAX_DELTA-1));
end
prev_trigger_id <= trigger_id;
end else begin
delta_store[waddr] <= '0;
data_store[waddr] <= data_in;
waddr <= waddr + 1;
end
if (stop
|| (waddr >= waddr_end)) begin
`ifdef DBG_TRACE_SCOPE
`TRACE(2, ("%d: *** scope: recording stop - waddr=(%0d, %0d)\n", $time, waddr, waddr_end));
`endif
waddr <= waddr; // keep last address
recording <= 0;
data_valid <= 1;
read_delta <= 1;
end
end
if (bus_read
&& (get_cmd == GET_DATA)
&& data_valid) begin
if (read_delta) begin
read_delta <= '0;
end else begin
if (DATAW > BUSW) begin
if (read_offset < $bits(read_offset)'(DATAW-BUSW)) begin
read_offset <= read_offset + $bits(read_offset)'(BUSW);
end else begin
raddr <= raddr + $bits(raddr)'(1);
read_offset <= '0;
read_delta <= 1;
if (raddr == waddr) begin
data_valid <= 0;
end
end
end else begin
raddr <= raddr + 1;
read_delta <= 1;
if (raddr == waddr) begin
data_valid <= 0;
end
end
end
end
end
if (recording) begin
if (UPDW_ENABLE != 0) begin
if (delta_flush
|| changed
|| (trigger_id != prev_trigger_id)) begin
delta_store[waddr] <= delta;
data_store[waddr] <= data_in;
end
end else begin
delta_store[waddr] <= '0;
data_store[waddr] <= data_in;
end
end
end
always @(*) begin
case (get_cmd)
GET_VALID : bus_out_r = BUSW'(data_valid);
GET_WIDTH : bus_out_r = BUSW'(DATAW);
GET_COUNT : bus_out_r = BUSW'(waddr) + BUSW'(1);
GET_OFFSET: bus_out_r = BUSW'(start_time);
/* verilator lint_off WIDTH */
GET_DATA : bus_out_r = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset);
/* verilator lint_on WIDTH */
default : bus_out_r = '0;
endcase
end
assign bus_out = bus_out_r;
`ifdef DBG_TRACE_SCOPE
always @(posedge clk) begin
if (bus_read) begin
`TRACE(2, ("%d: scope-read: cmd=%0d, addr=%0d, value=0x%0h\n", $time, get_cmd, raddr, bus_out));
end
if (bus_write) begin
`TRACE(2, ("%d: scope-write: cmd=%0d, value=%0d\n", $time, cmd_type, cmd_data));
end
end
`endif
endmodule
`TRACING_ON

View file

@ -0,0 +1,50 @@
`include "VX_platform.vh"
//`TRACING_OFF
module VX_scope_switch #(
parameter N = 0
) (
input wire clk,
input wire reset,
input wire req_in,
output wire req_out [N],
input wire rsp_in [N],
output wire rsp_out
);
if (N > 1) begin
reg req_out_r [N];
reg rsp_out_r;
always @(posedge clk) begin
if (reset) begin
for (integer i = 0; i < N; ++i) begin
req_out_r[i] <= 0;
end
rsp_out_r <= 0;
end else begin
for (integer i = 0; i < N; ++i) begin
req_out_r[i] <= req_in;
end
rsp_out_r <= 0;
for (integer i = 0; i < N; ++i) begin
if (rsp_in[i])
rsp_out_r <= 1;
end
end
end
assign req_out = req_out_r;
assign rsp_out = rsp_out_r;
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_out[0] = req_in;
assign rsp_out = rsp_in[0];
end
endmodule
//`TRACING_ON

View file

@ -38,8 +38,7 @@ translation_rules = [
(re.compile(r"\d+'d(\d+)"), r'\1'),
(re.compile(r"\d+'b([01]+)"), r'0b\1'),
(re.compile(r"128'h([\da-fA-F_]+)"), r'"\1"'),
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1')
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1')
]
with open(args.output, 'w') as f:

View file

@ -1,209 +0,0 @@
{
"version": 1,
"include_paths":[
"../dpi",
"../rtl",
"../rtl/afu/opae",
"../rtl/core",
"../rtl/cache",
"../rtl/fpu_unit",
"../rtl/interfaces",
"../rtl/libs"
],
"includes":[
"../rtl/VX_platform.vh",
"../rtl/VX_config.vh",
"../rtl/VX_define.vh",
"../rtl/VX_gpu_types.vh",
"../rtl/fpu_unit/VX_fpu_types.vh",
"../rtl/fpu_unit/VX_fpu_define.vh",
"../rtl/cache/VX_cache_define.vh"
],
"modules": {
"afu": {
"submodules": {
"vortex": {"type":"Vortex", "enabled":true}
}
},
"Vortex": {
"submodules": {
"cluster": {"type":"VX_cluster", "count":"`NUM_CLUSTERS"},
"l3cache": {"type":"VX_cache_wrap", "enabled":"`L3_ENABLED", "params":{"NUM_BANKS":"`L3_NUM_BANKS"}}
}
},
"VX_cluster": {
"submodules": {
"socket": {"type":"VX_socket", "count":"`NUM_CORES", "enabled":true},
"rcache": {"type":"VX_cache_wrap", "enabled":"`EXT_RASTER_ENABLED", "params":{"NUM_BANKS":"`RCACHE_NUM_BANKS"}},
"ocache": {"type":"VX_cache_wrap", "enabled":"`EXT_ROP_ENABLED", "params":{"NUM_BANKS":"`OCACHE_NUM_BANKS"}},
"l2cache": {"type":"VX_cache_wrap", "enabled":"`L2_ENABLED", "params":{"NUM_BANKS":"`L2_NUM_BANKS"}}
}
},
"VX_socket": {
"submodules": {
"core": {"type":"VX_core", "count":"1", "enabled":true}
}
},
"VX_core": {
"submodules": {
"fetch": {"type":"VX_fetch", "enabled":true},
"decode": {"type":"VX_decode", "enabled":true},
"issue": {"type":"VX_issue", "enabled":true},
"execute": {"type":"VX_execute", "enabled":true},
"commit": {"type":"VX_commit", "enabled":true}
}
},
"VX_fetch": {
"submodules": {
"warp_sched": {"type":"VX_warp_sched"},
"icache_stage": {"type":"VX_icache_stage"}
}
},
"VX_warp_sched": {},
"VX_icache_stage": {},
"VX_decode": {},
"VX_issue": {},
"VX_execute": {
"submodules": {
"lsu_unit": {"type":"VX_lsu_unit"},
"gpu_unit": {"type":"VX_gpu_unit"}
}
},
"VX_commit": {},
"VX_lsu_unit": {},
"VX_gpu_unit": {},
"VX_mem_unit": {
"submodules": {
"dcache": {"type":"VX_cache_wrap", "params":{"NUM_BANKS":"`DCACHE_NUM_BANKS"}},
"icache": {"type":"VX_cache_wrap", "params":{"NUM_BANKS":"1"}},
"tcache": {"type":"VX_cache_wrap", "enabled":"`EXT_TEX_ENABLED", "params":{"NUM_BANKS":"`TCACHE_NUM_BANKS"}}
}
},
"VX_cache_wrap": {
"submodules": {
"cache": {"type":"VX_cache"}
}
},
"VX_cache": {
"submodules": {
"bank": {"type":"VX_cache_bank", "count":"NUM_BANKS"}
}
},
"VX_cache_bank": {}
},
"taps": {
"afu": {
"!cmd_type":3,
"!state":3,
"?cci_sRxPort_c0_mmioRdValid":1,
"?cci_sRxPort_c0_mmioWrValid":1,
"mmio_hdr_address":16,
"mmio_hdr_length":2,
"cci_sRxPort_c0_hdr_mdata":16,
"?cci_sRxPort_c0_rspValid":1,
"?cci_sRxPort_c1_rspValid":1,
"?cci_sTxPort_c0_valid":1,
"cci_sTxPort_c0_hdr_address":42,
"cci_sTxPort_c0_hdr_mdata":16,
"?cci_sTxPort_c1_valid":1,
"cci_sTxPort_c1_hdr_address":42,
"cci_sTxPort_c2_mmioRdValid":1,
"!cci_sRxPort_c0TxAlmFull":1,
"!cci_sRxPort_c1TxAlmFull":1,
"avs_address":26,
"!avs_waitrequest":1,
"?avs_write_fire":1,
"?avs_read_fire":1,
"avs_byteenable":64,
"avs_burstcount":4,
"avs_readdatavalid":1,
"cci_mem_rd_req_ctr":26,
"cci_mem_wr_req_ctr":26,
"cci_rd_req_ctr":26,
"cci_rd_rsp_ctr":3,
"cci_wr_req_ctr":26,
"?cci_wr_req_fire":1,
"?cci_wr_rsp_fire":1,
"?cci_rd_req_fire":1,
"?cci_rd_rsp_fire":1,
"!cci_pending_reads_full":1,
"!cci_pending_writes_empty":1,
"!cci_pending_writes_full": 1,
"?afu_mem_req_fire": 1,
"afu_mem_req_addr": 26,
"?afu_mem_rsp_fire": 1
},
"afu/vortex": {
"!reset": 1,
"?mem_req_fire": 1,
"mem_req_addr": 32,
"mem_req_rw": 1,
"?mem_rsp_fire": 1,
"busy": 1
},
"afu/vortex/cluster/socket/core/fetch/warp_sched": {
"?wsched_scheduled": 1,
"wsched_schedule_uuid": "`UP(`UUID_BITS)",
"wsched_active_warps": "`NUM_WARPS",
"wsched_stalled_warps": "`NUM_WARPS",
"wsched_schedule_tmask": "`NUM_THREADS",
"wsched_schedule_wid": "`NW_BITS",
"wsched_schedule_pc": 32
},
"afu/vortex/cluster/socket/core/fetch/icache_stage": {
"?icache_req_fire": 1,
"icache_req_uuid": "`UP(`UUID_BITS)",
"icache_req_addr": 32,
"?icache_rsp_fire": 1,
"icache_rsp_uuid": "`UP(`UUID_BITS)",
"icache_rsp_data": 32
},
"afu/vortex/cluster/socket/core/issue": {
"?issue_fire": 1,
"issue_uuid": "`UP(`UUID_BITS)",
"issue_tmask":"`NUM_THREADS",
"issue_ex_type":"`EX_BITS",
"issue_op_type":"`INST_OP_BITS",
"issue_op_mod":"`INST_MOD_BITS",
"issue_wb": 1,
"issue_rd":"`NR_BITS",
"issue_rs1":"`NR_BITS",
"issue_rs2":"`NR_BITS",
"issue_rs3":"`NR_BITS",
"issue_imm": 32,
"issue_use_pc": 1,
"issue_use_imm": 1,
"gpr_rs1":"`NUM_THREADS * 32",
"gpr_rs2":"`NUM_THREADS * 32",
"gpr_rs3":"`NUM_THREADS * 32",
"?writeback_valid": 1,
"writeback_uuid": "`UP(`UUID_BITS)",
"writeback_tmask":"`NUM_THREADS",
"writeback_rd":"`NR_BITS",
"writeback_data":"`NUM_THREADS * 32",
"writeback_eop": 1,
"!scoreboard_delay": 1,
"!dispatch_delay": 1
},
"afu/vortex/cluster/socket/core/execute/lsu_unit": {
"?dcache_req_fire":1,
"dcache_req_uuid": "`UP(`UUID_BITS)",
"dcache_req_addr":"`NUM_THREADS * 32",
"dcache_req_rw": 1,
"dcache_req_byteen":"`NUM_THREADS * 4",
"dcache_req_data":"`NUM_THREADS * 32",
"?dcache_rsp_fire":1,
"dcache_rsp_uuid": "`UP(`UUID_BITS)",
"dcache_rsp_data":"`NUM_THREADS * 32"
},
"afu/vortex/cluster/socket/core/execute/gpu_unit": {
"?gpu_rsp_valid": 1,
"gpu_rsp_uuid": "`UP(`UUID_BITS)",
"gpu_rsp_tmc": 1,
"gpu_rsp_wspawn": 1,
"gpu_rsp_split": 1,
"gpu_rsp_barrier": 1
}
}
}

View file

@ -1,827 +1,163 @@
#!/usr/bin/env python3
import os
import sys
import argparse
import xml.etree.ElementTree as ET
import re
import json
import argparse
import math
vl_include_re = re.compile(r"^\s*`include\s+\"(.+)\"")
vl_define_re = re.compile(r"^\s*`define\s+(\w+)(\([\w\s,]*\))?(.*)")
vl_ifdef_re = re.compile(r"^\s*`(ifdef|ifndef|elsif)\s+(\w+)\s*$")
vl_endif_re = re.compile(r"^\s*`(endif|else)\s*$")
vl_expand_re = re.compile(r"`([0-9a-zA-Z_]+)")
vl_int_re = re.compile(r"\d+'s*h([\da-fA-F]+)")
exclude_files = []
include_dirs = []
macros = []
br_stack = []
def parse_vl_int(text):
str_hex = re.sub(vl_int_re, r'\1', text)
return int(str_hex, 16)
def translate_ternary(text):
def source_loc(xml_doc, xml_loc):
loc = xml_loc.split(",")
file_id = loc[0]
start_line = loc[1]
start_col = loc[2]
end_line = loc[3]
end_col = loc[4]
file = xml_doc.find(".//file/[@id='" + file_id + "']").get("filename")
return file + " (" + start_line + ":" + start_col + "-" + end_line + ":" + end_col + ")"
def skip_space(text, i, ln, step):
while (i >= 0) and (i < ln):
c = text[i]
if not c.isspace():
break
i += step
return i
def skip_expr(text, i, ln, step):
paren = 0
checkparen = True
while (i >= 0) and (i < ln):
c = text[i]
if checkparen and (((step < 0) and (c == ')')) or ((step > 0) and (c == '('))):
paren += 1
elif checkparen and (((step < 0) and (c == '(')) or ((step > 0) and (c == ')'))):
if (0 == paren):
break
paren -= 1
if (0 == paren):
i = skip_space(text, i + step, ln, step)
checkparen = False
continue
elif (0 == paren) and not (c.isalnum() or (c == '_')):
break
i += step
return (i - step)
def parse_ternary(text):
ternary = None
ln = len(text)
for i in range(1, ln):
c = text[i]
if not (c == '?'):
continue
# parse condition expression
i0 = skip_space(text, i - 1, ln, -1)
if (i < 0):
raise Exception("invalid condition expression")
i1 = skip_expr(text, i0, ln, -1)
if (i1 > i0):
raise Exception("invalid condition expression")
# parse true expression
i2 = skip_space(text, i + 1, ln, 1)
if (i2 >= ln):
raise Exception("invalid true expression")
i3 = skip_expr(text, i2, ln, 1)
if (i3 < i2):
raise Exception("invalid true expression")
# parse colon
i4 = skip_space(text, i3 + 1, ln, 1)
if (i4 >= ln):
raise Exception("invalid colon")
if not (text[i4] == ':'):
raise Exception("missing colon")
# parse false expression
i5 = skip_space(text, i4 + 1, ln, 1)
if (i5 >= ln):
raise Exception("invalid false expression")
i6 = skip_expr(text, i5, ln, 1)
if (i6 < i5):
raise Exception("invalid false expression")
ternary = (i0, i1, i2, i3, i5, i6)
break
return ternary
while True:
pos = parse_ternary(text)
if pos is None:
break
# convert to python ternary
newText = text[:pos[1]] + text[pos[2]:pos[3]+1] + " if " + text[pos[1]:pos[0]+1] + " else " + text[pos[4]:pos[5]+1] + text[pos[5]+1:]
text = newText
return text
def parse_func_args(text):
args = []
arg = ''
l = len(text)
if text[0] != '(':
raise Exception("missing leading parenthesis: " + text)
paren = 1
for i in range(1, l):
c = text[i]
if c == '(':
paren += 1
elif c == ')':
if paren == 0:
raise Exception("mismatched parenthesis: (" + i + ") " + text)
paren -= 1
if paren == 0:
l = i
break
if c == ',' and paren == 1:
if arg.strip():
args.append(arg)
arg = ''
else:
arg += c
if paren != 0:
raise Exception("missing closing parenthesis: " + text)
if arg.strip():
args.append(arg)
return (args, l)
def load_include_path(dir):
if not dir in include_dirs:
print("*** include path: " + dir)
include_dirs.append(dir)
def resolve_include_path(filename, parent_dir):
if os.path.basename(filename) in exclude_files:
return None
if os.path.isfile(filename):
return os.path.abspath(filename)
search_dirs = include_dirs
if parent_dir:
search_dirs.append(parent_dir)
for dir in search_dirs:
filepath = os.path.join(dir, filename)
if os.path.isfile(filepath):
return os.path.abspath(filepath)
raise Exception("couldn't find include file: " + filename + " in " + parent_dir)
def remove_comments(text):
text = re.sub(re.compile("/\*.*?\*/",re.DOTALL ), "", text) # multiline
text = re.sub(re.compile("//.*?\n" ), "\n", text) # singleline
return text
def add_macro(name, args, value):
macro = (name, args, value)
macros.append(macro)
'''
if not args is None:
print("*** token: " + name + "(", end='')
for i in range(len(args)):
if i > 0:
print(', ', end='')
print(args[i], end='')
print(")=" + value)
def parse_dtype_width(xml_doc, dtype_id):
xml_type = xml_doc.find(".//typetable/*[@id='" + dtype_id + "']")
if xml_type.tag == "packarraydtype" or xml_type.tag == "unpackarraydtype":
sub_dtype_id = xml_type.get("sub_dtype_id")
base_width = parse_dtype_width(xml_doc, sub_dtype_id)
const = xml_type.iter("const")
left = parse_vl_int(next(const).get("name"))
right = parse_vl_int(next(const).get("name"))
return base_width * (left - right + 1)
elif xml_type.tag == "structdtype":
width = 0
for member in xml_type.iter("memberdtype"):
sub_dtype_id = member.get("sub_dtype_id")
width = width + parse_dtype_width(xml_doc, sub_dtype_id)
return width
elif xml_type.tag == "uniondtype":
width = 0
for member in xml_type.iter("memberdtype"):
sub_dtype_id = member.get("sub_dtype_id")
width = max(width, parse_dtype_width(xml_doc, sub_dtype_id))
return width
else:
print("*** token: " + name + "=" + value)
'''
def find_macro(name):
for macro in macros:
if macro[0] == name:
return macro
return None
def expand_text(text, params):
def re_pattern_args(args):
p = "(?<![0-9a-zA-Z_])("
i = 0
for arg in args:
if i > 0:
p += "|"
p += arg
i += 1
p += ")(?![0-9a-zA-Z_])"
return p
class DoReplParam(object):
def __init__(self, params):
self.params = params
self.expanded = False
def __call__(self, match):
name = match.group(1)
self.expanded = True
return self.params[name]
class DoReplMacro(object):
def __init__(self):
self.expanded = False
self.has_func = False
def __call__(self, match):
name = match.group(1)
macro = find_macro(name)
if macro:
if not macro[1] is None:
self.has_func = True
else:
self.expanded = True
return macro[2]
return "`" + name
def repl_func_macro(text):
expanded = False
match = re.search(vl_expand_re, text)
if match:
name = match.group(1)
macro = find_macro(name)
if macro:
args = macro[1]
value = macro[2]
if not args is None:
str_args = text[match.end():].strip()
f_args = parse_func_args(str_args)
if len(args) == 0:
if len(f_args[0]) != 0:
raise Exception("invalid argments for macro '" + name + "': value=" + text)
else:
if len(args) != len(f_args[0]):
raise Exception("mismatch number of argments for macro '" + name + "': actual=" + len(f_args[0]) + ", expected=" + len(args))
pattern = re_pattern_args(args)
params = {}
for i in range(len(args)):
params[args[i]] = f_args[0][i]
dorepl = DoReplParam(params)
value = re.sub(pattern, dorepl, value)
str_head = text[0:match.start()]
str_tail = text[match.end() + f_args[1]+1:]
text = str_head + value + str_tail
expanded = True
if expanded:
return text
return None
changed = False
iter = 0
while True:
if iter > 65536:
raise Exception("Macro recursion!")
has_func = False
while True:
params_updated = False
if not params is None:
do_repl = DoReplParam(params)
pattern = re_pattern_args(params)
new_text = re.sub(pattern, do_repl, text)
if do_repl.expanded:
text = new_text
params_updated = True
do_repl = DoReplMacro()
new_text = re.sub(vl_expand_re, do_repl, text)
has_func = do_repl.has_func
if not (params_updated or do_repl.expanded):
break
text = new_text
changed = True
if not has_func:
break
expanded = repl_func_macro(text)
if not expanded:
break
text = expanded
changed = True
iter += 1
if changed:
return text
return None
def parse_include(filename, nesting):
print("*** parsing: " + filename + "...")
if nesting > 99:
raise Exception("include recursion!")
#print("*** parsing '" + filename + "'...")
content = None
with open(filename, "r") as f:
content = f.read()
# remove comments
content = remove_comments(content)
# parse content
prev_line = None
for line in content.splitlines(False):
# skip empty lines
if re.match(re.compile(r'^\s*$'), line):
continue
# merge multi-line lines
if line.endswith('\\'):
if prev_line:
prev_line += line[:len(line) - 1]
else:
prev_line = line[:len(line) - 1]
continue
if prev_line:
line = prev_line + line
prev_line = None
# parse ifdef
m = re.match(vl_ifdef_re, line)
if m:
key = m.group(1)
cond = m.group(2)
taken = find_macro(cond) is not None
if key == 'ifndef':
taken = not taken
elif key == 'elsif':
br_stack.pop()
br_stack.append(taken)
#print("*** " + key + "(" + cond + ") => " + str(taken))
continue
# parse endif
m = re.match(vl_endif_re, line)
if m:
key = m.group(1)
top = br_stack.pop()
if key == 'else':
br_stack.append(not top)
#print("*** " + key)
continue
# skip disabled blocks
if not all(br_stack):
continue
sub_dtype_id = xml_type.get("sub_dtype_id")
if sub_dtype_id != None:
return parse_dtype_width(xml_doc, sub_dtype_id)
left = xml_type.get("left")
right = xml_type.get("right")
if left != None and right != None:
return int(left) - int(right) + 1
return 1
# parse include
m = re.match(vl_include_re, line)
if m:
include = m.group(1)
include = resolve_include_path(include, os.path.dirname(filename))
if include:
parse_include(include, nesting + 1)
def parse_var_name(xml_doc, xml_node):
if xml_node.tag == "varref":
return xml_node.get("name")
elif xml_node.tag == "varxref":
name = xml_node.get("name")
dotted = xml_node.get("dotted")
return dotted + '.' + name
else:
raise ET.ParseError("invalid probe entry" + source_loc(xml_doc, xml_node.get("loc")))
return name
def parse_sel_name(xml_doc, xml_node):
name = parse_var_name(xml_doc, xml_node.find("*"))
const = xml_node.iter("const")
offset = parse_vl_int(next(const).get("name"))
#size = parse_vl_int(next(const).get("name"))
return name + '_' + str(offset)
def parse_array_name(xml_doc, xml_node):
if xml_node.tag == "arraysel":
name = parse_array_name(xml_doc, xml_node.find("*"))
xml_size = xml_node.find("const").get("name")
array_size = parse_vl_int(xml_size)
name = name + '_' + str(array_size)
else:
name = parse_var_name(xml_doc, xml_node)
return name
def parse_vl_port(xml_doc, xml_node, signals):
total_width = 0
if xml_node.tag == "concat":
for xml_child in xml_node.findall("*"):
total_width = total_width + parse_vl_port(xml_doc, xml_child, signals)
elif xml_node.tag == "varref" or xml_node.tag == "varxref":
name = parse_var_name(xml_doc, xml_node)
dtype_id = xml_node.get("dtype_id")
signal_width = parse_dtype_width(xml_doc, dtype_id)
signals.append([name, signal_width])
total_width = total_width + signal_width
elif xml_node.tag == "sel":
name = parse_sel_name(xml_doc, xml_node)
dtype_id = xml_node.get("dtype_id")
signal_width = parse_dtype_width(xml_doc, dtype_id)
signals.append([name, signal_width])
total_width = total_width + signal_width
elif xml_node.tag == "arraysel":
name = parse_array_name(xml_doc, xml_node)
dtype_id = xml_node.get("dtype_id")
signal_width = parse_dtype_width(xml_doc, dtype_id)
signals.append([name, signal_width])
total_width = total_width + signal_width
else:
raise ET.ParseError("invalid probe entry: " + source_loc(xml_doc, xml_node.get("loc")))
return total_width
def parse_xml(filename, max_taps):
xml_doc = ET.parse(filename)
modules = {}
xml_modules = xml_doc.findall(".//module/[@origName='VX_scope_tap']")
for xml_module in xml_modules:
scope_id = parse_vl_int(xml_module.find(".//var/[@name='SCOPE_ID']/const").get("name"))
triggerw = parse_vl_int(xml_module.find(".//var/[@name='TRIGGERW']/const").get("name"))
probew = parse_vl_int(xml_module.find(".//var/[@name='PROBEW']/const").get("name"))
module_name = xml_module.get("name")
modules[module_name] = [scope_id, triggerw, probew]
taps = []
xml_instances = xml_doc.iter("instance")
for xml_instance in xml_instances:
if (max_taps != -1 and len(taps) >= max_taps):
break
defName = xml_instance.get("defName")
module = modules.get(defName)
if module is None:
continue
# parse define
m = re.match(vl_define_re, line)
if m:
name = m.group(1)
args = m.group(2)
if args:
args = args[1:len(args)-1].strip()
if args != '':
args = args.split(',')
for i in range(len(args)):
args[i] = args[i].strip()
else:
args = []
value = m.group(3)
add_macro(name, args, value.strip())
continue
print("*** exiting: " + filename + "...")
triggers = []
probes = []
w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='triggers']/*"), triggers)
if w != module[1]:
raise ET.ParseError("invalid triggers width: actual=" + str(w) + ", expected=" + str(module[1]))
w = parse_vl_port(xml_doc, xml_instance.find(".//port/[@name='probes']/*"), probes)
if w != module[2]:
raise ET.ParseError("invalid probes width: actual=" + str(w) + ", expected=" + str(module[2]))
signals = probes
for trigger in triggers:
signals.append(trigger)
loc = xml_instance.get("loc")
hier = xml_doc.find(".//cell/[@loc='" + loc + "']").get("hier")
path = hier.rsplit(".", 1)[0]
taps.append({"id":module[0],
"width":module[1] + module[2],
"signals":signals,
"path":path})
def parse_includes(includes):
# change current directory to include directory
old_dir = os.getcwd()
script_dir = os.path.dirname(os.path.realpath(__file__))
os.chdir(script_dir)
for include in includes:
parse_include(include, 0)
load_include_path(os.path.dirname(include))
# restore current directory
os.chdir(old_dir)
def load_defines(defines):
for define in defines:
key_value = define.split('=', 2)
name = key_value[0]
value = ''
if len(key_value) == 2:
value = key_value[1]
add_macro(name, None, value)
def load_config(filename):
with open(filename, "r") as f:
config = json.load(f)
print("condfig=", config)
return config
def eval_node(text, params):
def clog2(x):
return int(x).bit_length() - 1
if not type(text) == str:
return text
expanded = expand_text(text, params)
if expanded:
text = expanded
try:
__text = text.replace('$clog2', '__clog2')
__text = translate_ternary(__text)
__text = __text.replace('||', 'or')
__text = __text.replace('&&', 'and')
e = eval(__text, {'__clog2': clog2})
return e
except (NameError, SyntaxError):
return text
def gen_vl_header(file, modules, taps):
header = '''
`ifndef VX_SCOPE_DEFS
`define VX_SCOPE_DEFS
'''
footer = '`endif'
def signal_size(size, mn):
if type(size) == int:
if (size != mn):
return "[" + str(size-1) + ":0]"
else:
return ""
else:
return "[" + size + "-1:0]"
def create_signal(key, ports):
if not key in ports:
ports[key] = []
return ports[key]
def dic_insert(gdic, ldic, key, value, enabled):
if enabled:
ldic[key] = value
if key in gdic:
return False
if enabled:
gdic[key] = None
return True
def trigger_name(name, size):
if type(size) == int:
if size != 1:
return "(| " + name + ")"
else:
return name
else:
return "(| " + name + ")"
def trigger_subscripts(asize):
def Q(arr, ss, asize, idx, N):
a = asize[idx]
if (a != 0):
for i in range(a):
tmp = ss + '[' + str(i) + ']'
if (idx + 1) < N:
Q(arr, tmp, asize, idx + 1, N)
else:
arr.append(tmp)
else:
if (idx + 1) < N:
Q(arr, ss, asize, idx + 1, N)
else:
arr.append(ss)
if asize is None:
return [""]
ln = len(asize)
if (0 == ln):
return [""]
arr = []
Q(arr, "", asize, 0, ln)
return arr
def visit_path(alltaps, ports, ntype, paths, modules, taps):
curtaps = {}
if (len(paths) != 0):
spath = paths.pop(0)
snodes = modules[ntype]["submodules"]
if not spath in snodes:
raise Exception("invalid path: " + spath + " in " + ntype)
snode = snodes[spath]
stype = snode["type"]
enabled = True
if "enabled" in snode:
enabled = eval_node(snode["enabled"], None)
subtaps = visit_path(alltaps, ports, stype, paths, modules, taps)
scount = 0
if "count" in snode:
scount = eval_node(snode["count"], None)
params = None
if "params" in snode:
params = snode["params"]
new_staps = []
nn = "SCOPE_IO_" + ntype
pp = create_signal(nn, ports)
for key in subtaps:
subtap = subtaps[key]
s = subtap[0]
a = subtap[1]
t = subtap[2]
aa = [scount]
sa = signal_size(scount, 0)
if a:
for i in a:
x = eval_node(i, params)
aa.append(x)
sa += signal_size(x, 0)
if dic_insert(alltaps, curtaps, spath + '/' + key, (s, aa, t), enabled):
skey = key.replace('/', '_')
if enabled:
pp.append("\toutput wire" + sa + signal_size(s, 1) + " scope_" + spath + '_' + skey + ',')
new_staps.append(skey)
ports[nn] = pp
if (0 == scount):
nn = "SCOPE_BIND_" + ntype + '_' + spath
pp = create_signal(nn, ports)
for st in new_staps:
if enabled:
pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "),")
else:
pp.append("\t`UNUSED_PIN (scope_" + st + "),")
ports[nn] = pp
else:
nn = "SCOPE_BIND_" + ntype + '_' + spath + "(__i__)"
pp = create_signal(nn, ports)
for st in new_staps:
if enabled:
pp.append("\t.scope_" + st + "(scope_" + spath + '_' + st + "[__i__]),")
else:
pp.append("\t`UNUSED_PIN (scope_" + st + "),")
ports[nn] = pp
else:
nn = "SCOPE_IO_" + ntype
pp = create_signal(nn, ports)
for tk in taps:
trigger = 0
name = tk
size = eval_node(taps[tk], None)
if name[0] == '!':
name = name[1:]
trigger = 1
elif name[0] == '?':
name = name[1:]
trigger = 2
if dic_insert(alltaps, curtaps, name, (size, None, trigger), True):
pp.append("\toutput wire" + signal_size(size, 1) + " scope_" + name + ',')
ports[nn] = pp
return curtaps
toptaps = {}
with open(file, 'w') as f:
ports = {}
alltaps = {}
for key in taps:
skey_list = key.split(',')
_taps = taps[key]
for skey in skey_list:
#print('*** processing node: ' + skey + ' ...')
paths = skey.strip().split('/')
ntype = paths.pop(0)
curtaps = visit_path(alltaps, ports, ntype, paths, modules, _taps)
for tk in curtaps:
toptaps[tk] = curtaps[tk]
print(header, file=f)
for key in ports:
print("`define " + key + ' \\', file=f)
for port in ports[key]:
print(port + ' \\', file=f)
print("", file=f)
print("`define SCOPE_DECL_SIGNALS \\", file=f)
i = 0
for key in toptaps:
tap = toptaps[key]
name = key.replace('/', '_')
size = tap[0]
asize = tap[1]
sa = ""
if asize:
for a in asize:
sa += signal_size(a, 0)
if i > 0:
print(" \\", file=f)
print('\t wire' + sa + signal_size(size, 1) + " scope_" + name + ';', file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print("`define SCOPE_DATA_LIST \\", file=f)
i = 0
for key in toptaps:
tap = toptaps[key]
trigger = tap[2]
if trigger != 0:
continue
name = key.replace('/', '_')
if i > 0:
print(", \\", file=f)
print("\t scope_" + name, file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print("`define SCOPE_UPDATE_LIST \\", file=f)
i = 0
for key in toptaps:
tap = toptaps[key]
trigger = tap[2]
if trigger == 0:
continue
name = key.replace('/', '_')
if i > 0:
print(", \\", file=f)
print("\t scope_" + name, file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print("`define SCOPE_TRIGGER \\", file=f)
i = 0
for key in toptaps:
tap = toptaps[key]
if tap[2] != 2:
continue
size = tap[0]
asize = tap[1]
sus = trigger_subscripts(asize)
for su in sus:
if i > 0:
print(" | \\", file=f)
print("\t(", file=f, end='')
name = trigger_name("scope_" + key.replace('/', '_') + su, size)
print(name, file=f, end='')
print(")", file=f, end='')
i += 1
print("", file=f)
print("", file=f)
print(footer, file=f)
return toptaps
def gen_cc_header(file, taps):
header = '''#pragma once
struct scope_module_t {
const char* name;
int index;
int parent;
};
struct scope_tap_t {
int width;
const char* name;
int module;
};
'''
def flatten_path(paths, sizes):
def Q(arr, ss, idx, N, paths, sizes):
size = sizes[idx]
if size != 0:
for i in range(sizes[idx]):
tmp = ss + ('/' if (ss != '') else '')
tmp += paths[idx] + '_' + str(i)
if (idx + 1) < N:
Q(arr, tmp, idx + 1, N, paths, sizes)
else:
arr.append(tmp)
else:
tmp = ss + ('/' if (ss != '') else '')
tmp += paths[idx]
if (idx + 1) < N:
Q(arr, tmp, idx + 1, N, paths, sizes)
else:
arr.append(tmp)
arr = []
Q(arr, "", 0, len(asize), paths, asize)
return arr
# flatten the taps
fdic = {}
for key in taps:
tap = taps[key]
size = str(tap[0])
trigger = tap[2]
if (trigger != 0):
continue
paths = key.split('/')
if (len(paths) > 1):
name = paths.pop(-1)
asize = tap[1]
for ss in flatten_path(paths, asize):
fdic[ss + '/' + name ] = [size, 0]
else:
fdic[key] = [size, 0]
for key in taps:
tap = taps[key]
size = str(tap[0])
trigger = tap[2]
if (trigger == 0):
continue
paths = key.split('/')
if (len(paths) > 1):
name = paths.pop(-1)
asize = tap[1]
for ss in flatten_path(paths, asize):
fdic[ss + '/' + name ] = [size, 0]
else:
fdic[key] = [size, 0]
# generate module dic
mdic = {}
mdic["*"] = ("*", 0, -1)
for key in fdic:
paths = key.split('/')
if len(paths) == 1:
continue
paths.pop(-1)
parent = 0
mk = ""
for path in paths:
mk += '/' + path
if not mk in mdic:
index = len(mdic)
mdic[mk] = (path, index, parent)
parent = index
else:
parent = mdic[mk][1]
fdic[key][1] = parent
with open(file, 'w') as f:
print(header, file=f)
print("static constexpr scope_module_t scope_modules[] = {", file=f)
i = 0
for key in mdic:
m = mdic[key]
if i > 0:
print(',', file=f)
print("\t{\"" + m[0] + "\", " + str(m[1]) + ", " + str(m[2]) + "}", file=f, end='')
i += 1
print("", file=f)
print("};", file=f)
print("", file=f)
print("static constexpr scope_tap_t scope_taps[] = {", file=f)
i = 0
for key in fdic:
size = fdic[key][0]
parent = fdic[key][1]
paths = key.split('/')
if len(paths) > 1:
name = paths.pop(-1)
else:
name = key
if i > 0:
print(',', file=f)
print("\t{" + size + ", \"" + name + "\", " + str(parent) + "}", file=f, end='')
i += 1
print("", file=f)
print("};", file=f)
return {"version":"0.1.0", "taps":taps}
def main():
parser = argparse.ArgumentParser(description='Scope headers generator.')
parser.add_argument('-vl', nargs='?', default='scope-defs.vh', metavar='file', help='Output Verilog header')
parser.add_argument('-cc', nargs='?', default='scope-defs.h', metavar='file', help='Output C++ header')
parser.add_argument('-D', nargs='?', action='append', metavar='macro[=value]', help='define macro')
parser.add_argument('-I', nargs='?', action='append', metavar='<includedir>', help='include directory')
parser.add_argument('config', help='Json config file')
parser.add_argument('-o', nargs='?', default='scope.json', metavar='o', help='Output JSON manifest')
parser.add_argument('-n', nargs='?', default=-1, metavar='n', type=int, help='Maximum number of taps to read')
parser.add_argument('xml', help='Design XML descriptor file')
args = parser.parse_args()
print("args=", args)
global exclude_files
global include_dirs
global macros
global br_stack
if args.D:
load_defines(args.D)
if args.I:
for dir in args.I:
load_include_path(dir)
config = load_config(args.config)
exclude_files.append(os.path.basename(args.vl))
if "include_paths" in config:
for path in config["include_paths"]:
load_include_path(path)
if "includes" in config:
parse_includes(config["includes"])
taps = gen_vl_header(args.vl, config["modules"], config["taps"])
gen_cc_header(args.cc, taps)
#print("args=", args)
scope_taps = parse_xml(args.xml, args.n)
with open(args.o, "w") as f:
json.dump(scope_taps, f, ensure_ascii=False, indent=4)
if __name__ == '__main__':
main()
main()

File diff suppressed because it is too large Load diff

View file

@ -5,37 +5,47 @@
#include <thread>
#include <chrono>
#include <vector>
#include <list>
#include <assert.h>
#include <chrono>
#include <thread>
#include <condition_variable>
#include <mutex>
#include <unordered_set>
#include <sstream>
#include <VX_config.h>
#include <vortex_afu.h>
#include "scope-defs.h"
#include <nlohmann_json.hpp>
#define FRAME_FLUSH_SIZE 100
#define MMIO_SCOPE_READ (AFU_IMAGE_MMIO_SCOPE_READ * 4)
#define MMIO_SCOPE_WRITE (AFU_IMAGE_MMIO_SCOPE_WRITE * 4)
#define CMD_GET_VALID 0
#define CMD_GET_DATA 1
#define CMD_GET_WIDTH 2
#define CMD_GET_COUNT 3
#define CMD_GET_START 4
#define CMD_SET_START 5
#define CMD_SET_STOP 6
#define CMD_GET_WIDTH 0
#define CMD_GET_COUNT 1
#define CMD_GET_START 2
#define CMD_GET_DATA 3
#define CMD_SET_START 4
#define CMD_SET_STOP 5
static constexpr int num_modules = sizeof(scope_modules) / sizeof(scope_module_t);
struct tap_signal_t {
uint32_t id;
std::string name;
uint32_t width;
};
static constexpr int num_taps = sizeof(scope_taps) / sizeof(scope_tap_t);
struct tap_t {
uint32_t id;
uint32_t width;
uint32_t frames;
uint32_t cur_frame;
uint64_t ticks;
std::string path;
std::vector<tap_signal_t> signals;
};
constexpr int calcFrameWidth(int index = 0) {
return (index < num_taps) ? (scope_taps[index].width + calcFrameWidth(index + 1)) : 0;
}
static constexpr int fwidth = calcFrameWidth();
using json = nlohmann::json;
#ifdef HANG_TIMEOUT
static std::thread g_timeout_thread;
@ -50,7 +60,7 @@ static void timeout_callback(vx_device_h hdevice) {
std::cerr << "Scope timed out!" << std::endl;
g_timeout_enabled = false;
auto device = ((vx_device*)hdevice);
auto api = device->api;
auto& api = device->api;
vx_scope_stop(hdevice);
api.fpgaClose(device->fpga);
exit(-1);
@ -60,7 +70,7 @@ static void timeout_callback(vx_device_h hdevice) {
}
#endif
uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) {
static uint64_t dump_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) {
while (delta != 0) {
ofs << '#' << timestamp++ << std::endl;
ofs << "b0 0" << std::endl;
@ -71,54 +81,200 @@ uint64_t print_clock(std::ofstream& ofs, uint64_t delta, uint64_t timestamp) {
return timestamp;
}
void dump_taps(std::ofstream& ofs, int module) {
for (int i = 0; i < num_taps; ++i) {
auto& tap = scope_taps[i];
if (tap.module != module)
continue;
ofs << "$var reg " << tap.width << " " << (i + 1) << " " << tap.name << " $end" << std::endl;
static std::vector<std::string> split(const std::string &s, char delimiter) {
std::vector<std::string> tokens;
std::string token;
std::istringstream tokenStream(s);
while (std::getline(tokenStream, token, delimiter)) {
tokens.push_back(token);
}
return tokens;
}
void dump_module(std::ofstream& ofs, int parent) {
for (auto& module : scope_modules) {
if (module.parent != parent)
continue;
if (module.name[0] == '*') {
ofs << "$var reg 1 0 clk $end" << std::endl;
} else {
ofs << "$scope module " << module.name << " $end" << std::endl;
}
dump_module(ofs, module.index);
dump_taps(ofs, module.index);
if (module.name[0] != '*') {
ofs << "$upscope $end" << std::endl;
static void dump_module(std::ofstream& ofs,
const std::string& name,
std::unordered_map<std::string, std::unordered_set<std::string>>& hierarchy,
std::unordered_map<std::string, tap_t*>& tails,
int indentation) {
std::string indent(indentation, ' ');
ofs << indent << "$scope module " << name << " $end" << std::endl;
auto itt = tails.find(name);
if (itt != tails.end()) {
for (auto& signal : itt->second->signals) {
ofs << indent << " $var reg " << signal.width << " " << signal.id << " " << signal.name << " $end" << std::endl;
}
}
auto ith = hierarchy.find(name);
if (ith != hierarchy.end()) {
for (auto& child : ith->second) {
dump_module(ofs, child, hierarchy, tails, indentation + 1);
}
}
ofs << indent << "$upscope $end" << std::endl;
}
static void dump_header(std::ofstream& ofs, std::vector<tap_t>& taps) {
ofs << "$version Generated by Vortex Scope Analyzer $end" << std::endl;
ofs << "$timescale 1 ns $end" << std::endl;
ofs << "$scope module TOP $end" << std::endl;
ofs << " $var reg 1 0 clk $end" << std::endl;
std::unordered_map<std::string, std::unordered_set<std::string>> hierarchy;
std::unordered_set<std::string> heads;
std::unordered_map<std::string, tap_t*> tails;
// Build hierarchy
for (auto& tap : taps) {
std::vector<std::string> tokens = split(tap.path, '.');
for (size_t i = 1; i < tokens.size(); ++i) {
hierarchy[tokens[i-1]].insert(tokens[i]);
}
auto h = tokens[0];
auto t = tokens[tokens.size()-1];
heads.insert(h);
tails[t] = &tap;
}
// Dump module huierarchy
for (auto& head : heads) {
dump_module(ofs, head, hierarchy, tails, 1);
}
ofs << "$upscope $end" << std::endl;
ofs << "enddefinitions $end" << std::endl;
}
static tap_t* find_nearest_tap(std::vector<tap_t>& taps) {
tap_t* nearest = nullptr;
for (auto& tap : taps) {
if (tap.cur_frame == tap.frames)
continue;
if (nearest != nullptr) {
if (tap.ticks < nearest->ticks)
nearest = &tap;
} else {
nearest = &tap;
}
}
return nearest;
}
static void dump_tap(std::ofstream& ofs, tap_t* tap, vx_device* device) {
auto& api = device->api;
uint32_t signal_offset = 0;
uint32_t frame_offset = 0;
uint64_t word;
std::vector<char> signal_data(tap->width);
auto signal_it = tap->signals.rbegin();
uint32_t signal_width = signal_it->width;
do {
// read data
uint64_t cmd_data = (tap->id << 3) | CMD_GET_DATA;
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_data), {
return;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &word), {
return;
});
do {
uint32_t word_offset = frame_offset % 64;
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
++signal_offset;
++frame_offset;
if (signal_offset == signal_width) {
signal_data[signal_width] = 0; // string null termination
ofs << 'b' << signal_data.data() << ' ' << signal_it->id << std::endl;
if (frame_offset == tap->width) {
// end-of-frame
++tap->cur_frame;
if (tap->cur_frame != tap->frames) {
// read next delta
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_data), {
return;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &word), {
return;
});
tap->ticks += word;
if (0 == (tap->cur_frame % FRAME_FLUSH_SIZE)) {
ofs << std::flush;
std::cout << std::dec << "*** scope #" << tap->id << ": "<< tap->cur_frame << "/" << tap->frames << " frames" << std::endl;
}
}
break;
}
signal_offset = 0;
++signal_it;
signal_width = signal_it->width;
}
} while ((frame_offset % 64) != 0);
} while (frame_offset != tap->width);
}
int vx_scope_start(vx_device_h hdevice, uint64_t start_time, uint64_t stop_time) {
if (nullptr == hdevice)
return -1;
const char* json_path = getenv("SCOPE_JSON_PATH");
std::ifstream ifs(json_path);
if (!ifs)
return -1;
auto json_obj = json::parse(ifs);
if (json_obj.is_null())
return -1;
auto device = ((vx_device*)hdevice);
auto api = device->api;
if (stop_time != uint64_t(-1)) {
// set stop time
uint64_t cmd_stop = ((stop_time << 3) | CMD_SET_STOP);
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_stop), {
auto& api = device->api;
// validate scope manifest
for (auto& tap : json_obj["taps"]) {
auto id = tap["id"].get<uint32_t>();
auto width = tap["width"].get<uint32_t>();
uint64_t cmd_width = (id << 3) | CMD_GET_WIDTH;
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_width), {
return -1;
});
std::cout << "scope stop time: " << std::dec << stop_time << "s" << std::endl;
uint64_t dev_width;
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &dev_width), {
return -1;
});
if (width != dev_width) {
std::cerr << "Invalid scope with! id=" << id << ", actual=" << dev_width << ", expected=" << width << std::endl;
return 1;
}
}
// set stop time
if (stop_time != uint64_t(-1)) {
std::cout << "scope stop time: " << std::dec << stop_time << "s" << std::endl;
for (auto& tap : json_obj["taps"]) {
auto id = tap["id"].get<uint32_t>();
uint64_t cmd_stop = (stop_time << 11) | (id << 3) | CMD_SET_STOP;
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_stop), {
return -1;
});
}
}
// start recording
uint64_t cmd_delay = ((start_time << 3) | CMD_SET_START);
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_delay), {
return -1;
});
std::cout << "scope start time: " << std::dec << start_time << "s" << std::endl;
if (start_time != uint64_t(-1)) {
std::cout << "scope start time: " << std::dec << start_time << "s" << std::endl;
for (auto& tap : json_obj["taps"]) {
auto id = tap["id"].get<uint32_t>();
uint64_t cmd_start = (start_time << 11) | (id << 3) | CMD_SET_START;
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_start), {
return -1;
});
}
}
#ifdef HANG_TIMEOUT
// starting timeout thread
@ -129,7 +285,7 @@ int vx_scope_start(vx_device_h hdevice, uint64_t start_time, uint64_t stop_time)
return 0;
}
int vx_scope_stop(vx_device_h hdevice) {
int vx_scope_stop(vx_device_h hdevice) {
#ifdef HANG_TIMEOUT
if (g_timeout_enabled) {
// shutting down timeout thread
@ -142,163 +298,105 @@ int vx_scope_stop(vx_device_h hdevice) {
if (nullptr == hdevice)
return -1;
auto device = ((vx_device*)hdevice);
auto api = device->api;
auto device = (vx_device*)hdevice;
auto& api = device->api;
// forced stop
uint64_t cmd_stop = ((0 << 3) | CMD_SET_STOP);
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_stop), {
return -1;
});
std::vector<tap_t> taps;
{
const char* json_path = getenv("SCOPE_JSON_PATH");
std::ifstream ifs(json_path);
auto json_obj = json::parse(ifs);
if (json_obj.is_null())
return 0;
uint32_t signal_id = 1;
for (auto& tap : json_obj["taps"]) {
tap_t _tap;
_tap.id = tap["id"].get<uint32_t>();
_tap.width = tap["width"].get<uint32_t>();
_tap.path = tap["path"].get<std::string>();
_tap.ticks = 0;
_tap.frames = 0;
_tap.cur_frame = 0;
for (auto& signal : tap["signals"]) {
auto name = signal[0].get<std::string>();
auto width = signal[1].get<uint32_t>();
_tap.signals.push_back({signal_id, name, width});
++signal_id;
}
taps.emplace_back(std::move(_tap));
}
}
// stop recording
for (auto& tap : taps) {
uint64_t cmd_stop = (0 << 11) | (tap.id << 3) | CMD_SET_STOP;
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_stop), {
return -1;
});
}
std::cout << "scope trace dump begin..." << std::endl;
std::ofstream ofs("trace.vcd");
std::ofstream ofs("scope.vcd");
ofs << "$version Generated by Vortex Scope $end" << std::endl;
ofs << "$timescale 1 ns $end" << std::endl;
ofs << "$scope module TOP $end" << std::endl;
dump_module(ofs, -1);
dump_taps(ofs, -1);
ofs << "$upscope $end" << std::endl;
ofs << "enddefinitions $end" << std::endl;
dump_header(ofs, taps);
// load trace info
for (auto& tap : taps) {
uint64_t count, start, delta;
// get count
uint64_t cmd_count = (tap.id << 3) | CMD_GET_COUNT;
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_count), {
return -1;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &count), {
return -1;
});
// get start
uint64_t cmd_start = (tap.id << 3) | CMD_GET_START;
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_start), {
return -1;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &start), {
return -1;
});
// get data
uint64_t cmd_data = (tap.id << 3) | CMD_GET_DATA;
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, cmd_data), {
return -1;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &delta), {
return -1;
});
tap.frames = count;
tap.ticks = start + delta;
std::cout << std::dec << "scope #" << tap.id << ": width=" << tap.width << ", num_frames=" << tap.frames << ", start_time=" << tap.ticks << ", path=" << tap.path << std::endl;
}
uint64_t frame_width, max_frames, data_valid, offset, delta;
uint64_t timestamp = 0;
uint64_t frame_offset = 0;
uint64_t frame_no = 0;
int signal_id = 0;
int signal_offset = 0;
// wait for recording to terminate
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID), {
return -1;
});
do {
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &data_valid), {
return -1;
});
if (data_valid)
while (true) {
// find the nearest tap
auto tap = find_nearest_tap(taps);
if (tap == nullptr)
break;
std::this_thread::sleep_for(std::chrono::seconds(1));
} while (true);
// get frame width
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH), {
return -1;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &frame_width), {
return -1;
});
std::cout << "scope::frame_width=" << std::dec << frame_width << std::endl;
if (fwidth != (int)frame_width) {
std::cerr << "invalid frame_width: expecting " << std::dec << fwidth << "!" << std::endl;
std::abort();
}
// get max frames
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_COUNT), {
return -1;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &max_frames), {
return -1;
});
std::cout << "scope::max_frames=" << std::dec << max_frames << std::endl;
// get offset
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_START), {
return -1;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &offset), {
return -1;
});
// get data
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA), {
return -1;
});
// print clock header
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &delta), {
return -1;
});
timestamp = print_clock(ofs, offset + delta + 2, timestamp);
signal_id = num_taps;
std::vector<char> signal_data(frame_width+1);
do {
if (frame_no == (max_frames-1)) {
// verify last frame is valid
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID), {
return -1;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &data_valid), {
return -1;
});
assert(data_valid == 1);
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_DATA), {
return -1;
});
}
// read next data words
uint64_t word;
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &word), {
return -1;
});
do {
int signal_width = scope_taps[signal_id-1].width;
int word_offset = frame_offset % 64;
signal_data[signal_width - signal_offset - 1] = ((word >> word_offset) & 0x1) ? '1' : '0';
++signal_offset;
++frame_offset;
if (signal_offset == signal_width) {
signal_data[signal_width] = 0; // string null termination
ofs << 'b' << signal_data.data() << ' ' << signal_id << std::endl;
signal_offset = 0;
--signal_id;
}
if (frame_offset == frame_width) {
assert(0 == signal_offset);
frame_offset = 0;
++frame_no;
if (frame_no != max_frames) {
// print clock header
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &delta), {
return -1;
});
timestamp = print_clock(ofs, delta + 1, timestamp);
signal_id = num_taps;
if (0 == (frame_no % FRAME_FLUSH_SIZE)) {
ofs << std::flush;
std::cout << "*** " << frame_no << "/" << max_frames << " frames" << std::endl;
}
}
}
} while ((frame_offset % 64) != 0);
} while (frame_no != max_frames);
// advance clock
timestamp = dump_clock(ofs, tap->ticks + 1, timestamp);
// dump tap
dump_tap(ofs, tap, device);
};
std::cout << "scope trace dump done! - " << (timestamp/2) << " cycles" << std::endl;
// verify data not valid
CHECK_ERR(api.fpgaWriteMMIO64(device->fpga, 0, MMIO_SCOPE_WRITE, CMD_GET_VALID), {
return -1;
});
CHECK_ERR(api.fpgaReadMMIO64(device->fpga, 0, MMIO_SCOPE_READ, &data_valid), {
return -1;
});
assert(data_valid == 0);
return 0;
}
}

View file

@ -10,4 +10,4 @@
int vx_scope_start(vx_device_h hdevice, uint64_t start_time = 0, uint64_t stop_time = -1);
int vx_scope_stop(vx_device_h hdevice);
int vx_scope_stop(vx_device_h hdevice);

View file

@ -34,6 +34,7 @@ SRCS = vortex.cpp driver.cpp ../common/utils.cpp
# set up target types
ifeq ($(TARGET), opaesim)
CXXFLAGS += -DOPAESIM
OPAESIM = libopae-c-sim.so
else
ifeq ($(TARGET), asesim)
CXXFLAGS += -DASESIM
@ -47,15 +48,13 @@ ifdef DEBUG
CXXFLAGS += -g -O0
else
CXXFLAGS += -O2 -DNDEBUG
SCOPE_FLAGS += -D NDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
CXXFLAGS += -DSCOPE
SRCS += ../common/scope.cpp
SCOPE_H = scope-defs.h
SCOPE_FLAGS += -D SCOPE
SCOPE_JSON = scope.json
endif
# Enable perf counters
@ -67,20 +66,18 @@ PROJECT = libvortex.so
all: $(PROJECT)
scope-defs.h: $(SCRIPT_DIR)/scope.json
$(SCRIPT_DIR)/scope.py $(CONFIGS) $(SCOPE_FLAGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json
vortex.xml:
DESTDIR=../../runtime/opae $(MAKE) -C $(OPAESIM_DIR) ../../runtime/opae/vortex.xml
# generate scope data
scope: scope-defs.h
scope.json: vortex.xml
$(SCRIPT_DIR)/scope.py vortex.xml -o scope.json
$(PROJECT): $(SRCS) $(SCOPE_H)
ifeq ($(TARGET), opaesim)
$(MAKE) -C $(OPAESIM_DIR)
endif
libopae-c-sim.so:
DESTDIR=../../runtime/opae $(MAKE) -C $(OPAESIM_DIR) ../../runtime/opae/libopae-c-sim.so
$(PROJECT): $(SRCS) $(OPAESIM) $(SCOPE_JSON)
$(CXX) $(CXXFLAGS) $(SRCS) $(LDFLAGS) -o $(PROJECT)
clean:
rm -rf $(PROJECT) *.o scope-defs.h $(RTL_DIR)/scope-defs.vh
ifeq ($(TARGET), opaesim)
$(MAKE) -C $(OPAESIM_DIR) clean
endif
DESTDIR=../../runtime/opae $(MAKE) -C $(OPAESIM_DIR) clean
rm -rf $(PROJECT) vortex.xml scope.json

View file

@ -212,7 +212,7 @@ extern int vx_dev_close(vx_device_h hdevice) {
return -1;
auto device = ((vx_device*)hdevice);
auto api = device->api;
auto& api = device->api;
#ifdef SCOPE
vx_scope_stop(hdevice);
@ -275,7 +275,7 @@ extern int vx_buf_alloc(vx_device_h hdevice, uint64_t size, vx_buffer_h* hbuffer
return -1;
auto device = ((vx_device*)hdevice);
auto api = device->api;
auto& api = device->api;
size_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
@ -321,7 +321,7 @@ extern int vx_buf_free(vx_buffer_h hbuffer) {
auto buffer = ((vx_buffer_t*)hbuffer);
auto device = ((vx_device*)buffer->hdevice);
auto api = device->api;
auto& api = device->api;
api.fpgaReleaseBuffer(device->fpga, buffer->wsid);
@ -337,7 +337,7 @@ extern int vx_ready_wait(vx_device_h hdevice, uint64_t timeout) {
std::unordered_map<uint32_t, std::stringstream> print_bufs;
auto device = ((vx_device*)hdevice);
auto api = device->api;
auto& api = device->api;
struct timespec sleep_time;
@ -402,7 +402,7 @@ extern int vx_copy_to_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t size
auto buffer = ((vx_buffer_t*)hbuffer);
auto device = ((vx_device*)buffer->hdevice);
auto api = device->api;
auto& api = device->api;
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
@ -452,7 +452,7 @@ extern int vx_copy_from_dev(vx_buffer_h hbuffer, uint64_t dev_maddr, uint64_t si
auto buffer = ((vx_buffer_t*)hbuffer);
auto device = ((vx_device*)buffer->hdevice);
auto api = device->api;
auto& api = device->api;
uint64_t dev_mem_size = LOCAL_MEM_SIZE;
uint64_t asize = aligned_size(size, CACHE_BLOCK_SIZE);
@ -500,7 +500,7 @@ extern int vx_start(vx_device_h hdevice) {
return -1;
auto device = ((vx_device*)hdevice);
auto api = device->api;
auto& api = device->api;
// Ensure ready for new command
if (vx_ready_wait(hdevice, MAX_TIMEOUT) != 0)
@ -519,7 +519,7 @@ extern int vx_dcr_write(vx_device_h hdevice, uint32_t addr, uint64_t value) {
return -1;
auto device = ((vx_device*)hdevice);
auto api = device->api;
auto& api = device->api;
// Ensure ready for new command
if (vx_ready_wait(hdevice, -1) != 0)

View file

@ -45,8 +45,8 @@ RTL_INCLUDE += -I$(AFU_DIR) -I$(AFU_DIR)/ccip
TOP = vortex_afu_shim
VL_FLAGS = --exe --cc $(TOP) --top-module $(TOP)
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS = --cc $(TOP) --top-module $(TOP)
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += -DSIMULATION
@ -67,7 +67,7 @@ ifdef DEBUG
CXXFLAGS += -g -O0 $(DBG_FLAGS)
else
VL_FLAGS += -DNDEBUG
CXXFLAGS += -O2 -DNDEBUG
CXXFLAGS += -O3 -DNDEBUG
endif
# Enable scope analyzer
@ -102,7 +102,10 @@ vortex_afu.h : $(AFU_DIR)/vortex_afu.vh
$(SCRIPT_DIR)/gen_config.py -i $(AFU_DIR)/vortex_afu.vh -o vortex_afu.h
$(DESTDIR)/$(PROJECT): $(SRCS) vortex_afu.h
verilator --build $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(DESTDIR)/$(PROJECT)
verilator --build --exe -O3 $(VL_FLAGS) $(SRCS) -CFLAGS '$(CXXFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(DESTDIR)/$(PROJECT)
$(DESTDIR)/vortex.xml: $(DESTDIR)/$(PROJECT)
verilator --xml-only -O0 $(VL_FLAGS) --xml-output $(DESTDIR)/vortex.xml
clean:
rm -rf obj_dir $(DESTDIR)/$(PROJECT)

View file

@ -63,6 +63,8 @@
#define RAM_PAGE_SIZE 4096
#define CPU_GPU_LATENCY 200
using namespace vortex;
static uint64_t timestamp = 0;
@ -193,6 +195,11 @@ public:
void read_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t *value) {
std::lock_guard<std::mutex> guard(mutex_);
// simulate CPU-GPU latency
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
this->tick();
// simulate mmio request
device_->vcp2af_sRxPort_c0_mmioRdValid = 1;
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;
@ -205,7 +212,12 @@ public:
void write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value) {
std::lock_guard<std::mutex> guard(mutex_);
// simulate CPU-GPU latency
for (uint32_t i = 0; i < CPU_GPU_LATENCY; ++i)
this->tick();
// simulate mmio request
device_->vcp2af_sRxPort_c0_mmioWrValid = 1;
device_->vcp2af_sRxPort_c0_ReqMmioHdr_address = offset / 4;
device_->vcp2af_sRxPort_c0_ReqMmioHdr_length = 1;

View file

@ -35,12 +35,12 @@ else
endif
ifeq ($(TARGET), fpga)
OPAE_DRV_PATHS ?= $(OPAE_SDK_ROOT)/lib64/libopae-c.so
OPAE_DRV_PATHS ?= libopae-c.so
else
ifeq ($(TARGET), asesim)
OPAE_DRV_PATHS ?= $(OPAE_SDK_ROOT)/lib64/libopae-c-ase.so
OPAE_DRV_PATHS ?= libopae-c-ase.so
else
OPAE_DRV_PATHS ?= ../../../sim/opaesim/libopae-c-sim.so
OPAE_DRV_PATHS ?= libopae-c-sim.so
endif
endif
@ -59,7 +59,7 @@ run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-opae: $(PROJECT) kernel.pocl
OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/opae/scope.json LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_RT_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-xrt: $(PROJECT) kernel.pocl
ifeq ($(TARGET), hw)

View file

@ -54,12 +54,12 @@ else
endif
ifeq ($(TARGET), fpga)
OPAE_DRV_PATHS ?= $(OPAE_SDK_ROOT)/lib64/libopae-c.so
OPAE_DRV_PATHS ?= libopae-c.so
else
ifeq ($(TARGET), asesim)
OPAE_DRV_PATHS ?= $(OPAE_SDK_ROOT)/lib64/libopae-c-ase.so
OPAE_DRV_PATHS ?= libopae-c-ase.so
else
OPAE_DRV_PATHS ?= ../../../sim/opaesim/libopae-c-sim.so
OPAE_DRV_PATHS ?= libopae-c-sim.so
endif
endif
@ -81,7 +81,7 @@ run-simx: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(VORTEX_RT_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-opae: $(PROJECT) kernel.bin
OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) LD_LIBRARY_PATH=$(VORTEX_RT_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
OPAE_DRV_PATHS=$(OPAE_DRV_PATHS) SCOPE_JSON_PATH=$(VORTEX_RT_PATH)/opae/scope.json LD_LIBRARY_PATH=$(HOME)/dev/netlist-paths/release/install/lib:$(VORTEX_RT_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.bin
LD_LIBRARY_PATH=$(VORTEX_RT_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)