mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 22:07:41 -04:00
added debug print states or rtl
This commit is contained in:
parent
65c2da76cf
commit
d6c87dbb0a
24 changed files with 7100 additions and 5980 deletions
|
@ -3,11 +3,18 @@ CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
|
||||||
|
|
||||||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
|
CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
|
||||||
|
|
||||||
|
# control RTL debug print states
|
||||||
|
DBG_PRINT = -DDBG_PRINT_CORE_ICACHE \
|
||||||
|
-DDBG_PRINT_CORE_DCACHE \
|
||||||
|
-DDBG_PRINT_BANK \
|
||||||
|
-DDBG_PRINT_DRAM \
|
||||||
|
-DDBG_PRINT_SNP_FWD
|
||||||
|
|
||||||
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
|
#MULTICORE += -DNUM_CLUSTERS=2 -DNUM_CORES=2
|
||||||
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
|
||||||
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||||
|
|
||||||
#DEBUG = 1
|
DEBUG = 1
|
||||||
|
|
||||||
CFLAGS += -fPIC
|
CFLAGS += -fPIC
|
||||||
|
|
||||||
|
@ -31,7 +38,7 @@ VL_FLAGS += -DGLOBAL_BLOCK_SIZE=64
|
||||||
|
|
||||||
# Debugigng
|
# Debugigng
|
||||||
ifdef DEBUG
|
ifdef DEBUG
|
||||||
VL_FLAGS += --trace -DVL_DEBUG=1
|
VL_FLAGS += --trace -DVL_DEBUG=1 $(DBG_PRINT)
|
||||||
CFLAGS += -DVCD_OUTPUT
|
CFLAGS += -DVCD_OUTPUT
|
||||||
else
|
else
|
||||||
CFLAGS += -DNDEBUG
|
CFLAGS += -DNDEBUG
|
||||||
|
|
|
@ -46,7 +46,7 @@ run-ase: $(PROJECT)
|
||||||
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||||
|
|
||||||
run-rtlsim: $(PROJECT)
|
run-rtlsim: $(PROJECT)
|
||||||
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 4
|
||||||
|
|
||||||
run-simx: $(PROJECT)
|
run-simx: $(PROJECT)
|
||||||
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
|
||||||
|
|
|
@ -4,8 +4,6 @@
|
||||||
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000
|
||||||
|
|
||||||
struct kernel_arg_t {
|
struct kernel_arg_t {
|
||||||
uint32_t num_warps;
|
|
||||||
uint32_t num_threads;
|
|
||||||
uint32_t stride;
|
uint32_t stride;
|
||||||
uint32_t src0_ptr;
|
uint32_t src0_ptr;
|
||||||
uint32_t src1_ptr;
|
uint32_t src1_ptr;
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
#define RT_CHECK(_expr) \
|
#define RT_CHECK(_expr) \
|
||||||
do { \
|
do { \
|
||||||
int _ret = _expr; \
|
int _ret = _expr; \
|
||||||
if (0 == _ret) \
|
if (0 == _ret) \
|
||||||
break; \
|
break; \
|
||||||
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \
|
||||||
|
@ -15,7 +15,7 @@
|
||||||
} while (false)
|
} while (false)
|
||||||
|
|
||||||
const char* program_file = "kernel.bin";
|
const char* program_file = "kernel.bin";
|
||||||
uint32_t data_stride = 0xffffffff;
|
uint32_t data_stride = 0;
|
||||||
|
|
||||||
static void show_usage() {
|
static void show_usage() {
|
||||||
std::cout << "Vortex Driver Test." << std::endl;
|
std::cout << "Vortex Driver Test." << std::endl;
|
||||||
|
@ -111,19 +111,22 @@ int main(int argc, char *argv[]) {
|
||||||
// parse command arguments
|
// parse command arguments
|
||||||
parse_args(argc, argv);
|
parse_args(argc, argv);
|
||||||
|
|
||||||
uint32_t block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE);
|
|
||||||
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
|
uint32_t max_cores = vx_dev_caps(VX_CAPS_MAX_CORES);
|
||||||
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
|
uint32_t max_warps = vx_dev_caps(VX_CAPS_MAX_WARPS);
|
||||||
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
|
uint32_t max_threads = vx_dev_caps(VX_CAPS_MAX_THREADS);
|
||||||
|
|
||||||
if (data_stride == 0xffffffff) {
|
if (data_stride == 0) {
|
||||||
data_stride = block_size / sizeof(uint32_t);
|
data_stride = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t num_points = max_cores * max_warps * max_threads * data_stride;
|
kernel_arg.stride = data_stride;
|
||||||
uint32_t buf_size = num_points * sizeof(uint32_t);
|
|
||||||
|
uint32_t num_points = max_cores * max_warps * max_threads;
|
||||||
|
uint32_t buf_size = num_points * data_stride * sizeof(uint32_t);
|
||||||
|
|
||||||
std::cout << "number of workitems: " << num_points << std::endl;
|
std::cout << "number of workitems: " << num_points << std::endl;
|
||||||
|
std::cout << "workitem size: " << data_stride * sizeof(uint32_t) << " bytes" << std::endl;
|
||||||
|
std::cout << "buffer size: " << buf_size << " bytes" << std::endl;
|
||||||
|
|
||||||
// open device connection
|
// open device connection
|
||||||
std::cout << "open device connection" << std::endl;
|
std::cout << "open device connection" << std::endl;
|
||||||
|
@ -167,10 +170,6 @@ int main(int argc, char *argv[]) {
|
||||||
// upload kernel argument
|
// upload kernel argument
|
||||||
std::cout << "upload kernel argument" << std::endl;
|
std::cout << "upload kernel argument" << std::endl;
|
||||||
{
|
{
|
||||||
kernel_arg.num_warps = max_warps;
|
|
||||||
kernel_arg.num_threads = max_threads;
|
|
||||||
kernel_arg.stride = data_stride;
|
|
||||||
|
|
||||||
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
auto buf_ptr = (int*)vx_host_ptr(buffer);
|
||||||
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
memcpy(buf_ptr, &kernel_arg, sizeof(kernel_arg_t));
|
||||||
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
RT_CHECK(vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0));
|
||||||
|
|
Binary file not shown.
|
@ -6,27 +6,24 @@
|
||||||
|
|
||||||
void kernel_body(void* arg) {
|
void kernel_body(void* arg) {
|
||||||
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg);
|
||||||
int* x = (int*)_arg->src0_ptr;
|
int* src0_ptr = (int*)_arg->src0_ptr;
|
||||||
int* y = (int*)_arg->src1_ptr;
|
int* src1_ptr = (int*)_arg->src1_ptr;
|
||||||
int* z = (int*)_arg->dst_ptr;
|
int* dst_ptr = (int*)_arg->dst_ptr;
|
||||||
|
|
||||||
unsigned wid = vx_warp_gid();
|
unsigned offset = vx_thread_gid() * _arg->stride;
|
||||||
unsigned tid = vx_thread_id();
|
|
||||||
|
|
||||||
unsigned i = ((wid * _arg->num_threads) + tid) * _arg->stride;
|
for (unsigned i = 0; i < _arg->stride; ++i) {
|
||||||
|
dst_ptr[offset+i] = src0_ptr[offset+i] + src1_ptr[offset+i];
|
||||||
for (unsigned j = 0; j < _arg->stride; ++j) {
|
|
||||||
z[i+j] = x[i+j] + y[i+j];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR;
|
||||||
/*printf("num_warps=%d\n", arg->num_warps);
|
/*printf("stride=%d\n", arg->stride);
|
||||||
printf("num_threads=%d\n", arg->num_threads);
|
printf("src0_ptr=0x%src0\n", arg->src0_ptr);
|
||||||
printf("stride=%d\n", arg->stride);
|
printf("src1_ptr=0x%src0\n", arg->src1_ptr);
|
||||||
printf("src0_ptr=0x%x\n", arg->src0_ptr);
|
printf("dst_ptr=0x%src0\n", arg->dst_ptr);*/
|
||||||
printf("src1_ptr=0x%x\n", arg->src1_ptr);
|
int num_warps = vx_num_warps();
|
||||||
printf("dst_ptr=0x%x\n", arg->dst_ptr);*/
|
int num_threads = vx_num_threads();
|
||||||
vx_spawn_warps(arg->num_warps, arg->num_threads, kernel_body, arg);
|
vx_spawn_warps(num_warps, num_threads, kernel_body, arg);
|
||||||
}
|
}
|
Binary file not shown.
File diff suppressed because it is too large
Load diff
|
@ -62,13 +62,15 @@ module VX_icache_stage #(
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
/*always_comb begin
|
`ifdef DBG_PRINT_CORE_ICACHE
|
||||||
|
always_comb begin
|
||||||
if (1'($time & 1) && icache_req_if.core_req_ready && icache_req_if.core_req_valid) begin
|
if (1'($time & 1) && icache_req_if.core_req_ready && icache_req_if.core_req_valid) begin
|
||||||
$display("*** %t: I%01d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, icache_req_if.core_req_tag, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
|
$display("*** %t: I%01d$ req: tag=%0h, pc=%0h, warp=%0d", $time, CORE_ID, icache_req_if.core_req_tag, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
|
||||||
end
|
end
|
||||||
if (1'($time & 1) && icache_rsp_if.core_rsp_ready && icache_rsp_if.core_rsp_valid) begin
|
if (1'($time & 1) && icache_rsp_if.core_rsp_ready && icache_rsp_if.core_rsp_valid) begin
|
||||||
$display("*** %t: I%01d$ rsp: tag=%0h, pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, icache_rsp_if.core_rsp_tag, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
|
$display("*** %t: I%01d$ rsp: tag=%0h, pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, icache_rsp_if.core_rsp_tag, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
|
||||||
end
|
end
|
||||||
end*/
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
endmodule
|
endmodule
|
|
@ -62,14 +62,16 @@ module VX_lsu_unit #(
|
||||||
assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem;
|
assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem;
|
||||||
assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag;
|
assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag;
|
||||||
|
|
||||||
/*always_comb begin
|
`ifdef DBG_PRINT_CORE_DCACHE
|
||||||
|
always_comb begin
|
||||||
if (1'($time & 1) && dcache_req_if.core_req_ready && (| dcache_req_if.core_req_valid)) begin
|
if (1'($time & 1) && dcache_req_if.core_req_ready && (| dcache_req_if.core_req_valid)) begin
|
||||||
$display("*** %t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, use_valid, use_address, dcache_req_if.core_req_tag, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
|
$display("*** %t: D%01d$ req: valid=%b, addr=%0h, tag=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, use_valid, use_address, dcache_req_if.core_req_tag, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
|
||||||
end
|
end
|
||||||
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && (| dcache_rsp_if.core_rsp_valid)) begin
|
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && (| dcache_rsp_if.core_rsp_valid)) begin
|
||||||
$display("*** %t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, dcache_rsp_if.core_rsp_tag, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
|
$display("*** %t: D%01d$ rsp: valid=%b, tag=%0h, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, dcache_rsp_if.core_rsp_tag, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
|
||||||
end
|
end
|
||||||
end*/
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
||||||
|
|
|
@ -328,13 +328,15 @@ module Vortex_Socket (
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
/*always_comb begin
|
`ifdef DBG_PRINT_DRAM
|
||||||
|
always_comb begin
|
||||||
if (1'($time & 1) && (dram_req_read || dram_req_write) && dram_req_ready) begin
|
if (1'($time & 1) && (dram_req_read || dram_req_write) && dram_req_ready) begin
|
||||||
$display("*** %t: DRAM req: w=%b addr=%0h, tag=%0h, data=%0h", $time, dram_req_write, {dram_req_addr, `CLOG2(`GLOBAL_BLOCK_SIZE)'(0)}, dram_req_tag, dram_req_data);
|
$display("*** %t: DRAM req: w=%b addr=%0h, tag=%0h, data=%0h", $time, dram_req_write, {dram_req_addr, `CLOG2(`GLOBAL_BLOCK_SIZE)'(0)}, dram_req_tag, dram_req_data);
|
||||||
end
|
end
|
||||||
if (1'($time & 1) && dram_rsp_valid && dram_rsp_ready) begin
|
if (1'($time & 1) && dram_rsp_valid && dram_rsp_ready) begin
|
||||||
$display("*** %t: DRAM rsp: tag=%0h, data=%0h", $time, dram_rsp_tag, dram_rsp_data);
|
$display("*** %t: DRAM rsp: tag=%0h, data=%0h", $time, dram_rsp_tag, dram_rsp_data);
|
||||||
end
|
end
|
||||||
end*/
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
endmodule
|
endmodule
|
14
hw/rtl/cache/VX_bank.v
vendored
14
hw/rtl/cache/VX_bank.v
vendored
|
@ -627,4 +627,18 @@ module VX_bank #(
|
||||||
|| msrq_push_stall
|
|| msrq_push_stall
|
||||||
|| dram_fill_req_stall;
|
|| dram_fill_req_stall;
|
||||||
|
|
||||||
|
`ifdef DBG_PRINT_BANK
|
||||||
|
always_comb begin
|
||||||
|
if (1'($time & 1) && dram_fill_req_valid && dram_fill_req_ready) begin
|
||||||
|
$display("*** %t: bank%02d:%01d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
|
||||||
|
end
|
||||||
|
if (1'($time & 1) && dram_wb_req_valid && dram_wb_req_ready) begin
|
||||||
|
$display("*** %t: bank%02d:%01d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
|
||||||
|
end
|
||||||
|
if (1'($time & 1) && dram_fill_rsp_valid && dram_fill_rsp_ready) begin
|
||||||
|
$display("*** %t: bank%02d:%01d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
endmodule : VX_bank
|
endmodule : VX_bank
|
4
hw/rtl/cache/VX_cache_config.vh
vendored
4
hw/rtl/cache/VX_cache_config.vh
vendored
|
@ -70,6 +70,8 @@
|
||||||
|
|
||||||
`define DRAM_TO_LINE_ADDR(x) x[`DRAM_ADDR_WIDTH-1:`BANK_SELECT_BITS]
|
`define DRAM_TO_LINE_ADDR(x) x[`DRAM_ADDR_WIDTH-1:`BANK_SELECT_BITS]
|
||||||
|
|
||||||
`define LINE_TO_DRAM_ADDR(x, i) {x, (`BANK_SELECT_BITS)'(i)};
|
`define LINE_TO_DRAM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
|
||||||
|
|
||||||
|
`define LINE_TO_BYTE_ADDR(x, i) {x, `BANK_SELECT_BITS'(i), `BASE_ADDR_BITS'(0)}
|
||||||
|
|
||||||
`endif
|
`endif
|
||||||
|
|
6
hw/rtl/cache/VX_snp_forwarder.v
vendored
6
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -112,7 +112,8 @@ module VX_snp_forwarder #(
|
||||||
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
|
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
|
||||||
end
|
end
|
||||||
|
|
||||||
/*always_comb begin
|
`ifdef DBG_PRINT_SNP_FWD
|
||||||
|
always_comb begin
|
||||||
if (1'($time & 1) && snp_req_valid && snp_req_ready) begin
|
if (1'($time & 1) && snp_req_valid && snp_req_ready) begin
|
||||||
$display("*** %t: snp req: addr=%0h, tag=%0h", $time, snp_req_addr, snp_req_tag);
|
$display("*** %t: snp req: addr=%0h, tag=%0h", $time, snp_req_addr, snp_req_tag);
|
||||||
end
|
end
|
||||||
|
@ -125,6 +126,7 @@ module VX_snp_forwarder #(
|
||||||
if (1'($time & 1) && snp_rsp_valid && snp_rsp_ready) begin
|
if (1'($time & 1) && snp_rsp_valid && snp_rsp_ready) begin
|
||||||
$display("*** %t: snp rsp: addr=%0h, tag=%0h", $time, snp_rsp_addr, snp_rsp_tag);
|
$display("*** %t: snp rsp: addr=%0h, tag=%0h", $time, snp_rsp_addr, snp_rsp_tag);
|
||||||
end
|
end
|
||||||
end*/
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
endmodule
|
endmodule
|
25
hw/rtl/libs/VX_encoder_onehot.v
Normal file
25
hw/rtl/libs/VX_encoder_onehot.v
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
module VX_encoder_onehot #(
|
||||||
|
parameter N = 6
|
||||||
|
) (
|
||||||
|
input wire [N-1:0] onehot,
|
||||||
|
output reg valid,
|
||||||
|
output reg [`LOG2UP(N)-1:0] value
|
||||||
|
);
|
||||||
|
integer i;
|
||||||
|
|
||||||
|
always @(*) begin
|
||||||
|
valid = 1'b0;
|
||||||
|
value = {`LOG2UP(N){1'bx}};
|
||||||
|
for (i = 0; i < N; i++) begin
|
||||||
|
if (onehot[i]) begin
|
||||||
|
valid = 1'b1;
|
||||||
|
value = `LOG2UP(N)'(i);
|
||||||
|
break;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
|
@ -3,54 +3,51 @@
|
||||||
module VX_matrix_arbiter #(
|
module VX_matrix_arbiter #(
|
||||||
parameter N = 0
|
parameter N = 0
|
||||||
) (
|
) (
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
input wire [N-1:0] inputs,
|
input wire [N-1:0] requests,
|
||||||
output wire [N-1:0] grant
|
output wire grant_valid,
|
||||||
|
output wire [N-1:0] grant_onehot,
|
||||||
|
output wire [`LOG2UP(N)-1:0] grant_index
|
||||||
);
|
);
|
||||||
|
|
||||||
reg [N-1:1][N-1:0] pri;
|
reg [N-1:0] state [0:N-1];
|
||||||
|
wire [N-1:0] dis [0:N-1];
|
||||||
|
|
||||||
always @(posedge clk) begin
|
genvar i, j;
|
||||||
if (reset) begin
|
|
||||||
integer i, j;
|
for (i = 0; i < N; ++i) begin
|
||||||
for (i = 0; i < N; ++i) begin
|
for (j = i + 1; j < N; ++j) begin
|
||||||
for (j = 0; j < N; ++j) begin
|
always @(posedge clk) begin
|
||||||
pri[i][j] <= 1;
|
if (reset) begin
|
||||||
|
state[i][j] <= 0;
|
||||||
|
end else begin
|
||||||
|
state[i][j] <= (state[i][j] || grant_onehot[j]) && ~grant_onehot[i];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end else begin
|
|
||||||
integer i, j;
|
|
||||||
for (i = 0; i < N; ++i) begin
|
|
||||||
if (grant[i]) begin
|
|
||||||
for (j = 0; j < N; ++j) begin
|
|
||||||
if (j > i)
|
|
||||||
pri[j][i] <= 1;
|
|
||||||
else if (j < i)
|
|
||||||
pri[i][j] <= 0;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
genvar i, j;
|
|
||||||
|
|
||||||
for (i = 0; i < N; ++i) begin
|
for (i = 0; i < N; ++i) begin
|
||||||
|
|
||||||
wire [N-1:0] dis;
|
|
||||||
|
|
||||||
for (j = 0; j < N; ++j) begin
|
for (j = 0; j < N; ++j) begin
|
||||||
if (j > i) begin
|
if (j > i) begin
|
||||||
assign dis[j] = inputs[j] & pri[j][i];
|
assign dis[j][i] = requests[i] & state[i][j];
|
||||||
end else if (j < i) begin
|
end else if (j < i) begin
|
||||||
assign dis[j] = inputs[j] & ~pri[i][j];
|
assign dis[j][i] = requests[i] & ~state[j][i];
|
||||||
end else begin
|
end else begin
|
||||||
assign dis[j] = 0;
|
assign dis[j][i] = 0;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
assign grant[i] = inputs[i] & ~(| dis);
|
assign grant_onehot[i] = requests[i] & ~(| dis[i]);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
VX_encoder_onehot #(
|
||||||
|
.N(N)
|
||||||
|
) encoder (
|
||||||
|
.onehot(grant_onehot),
|
||||||
|
.valid(grant_valid),
|
||||||
|
.value(grant_index)
|
||||||
|
);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
|
@ -5,7 +5,7 @@
|
||||||
.type vx_wspawn, @function
|
.type vx_wspawn, @function
|
||||||
.global vx_wspawn
|
.global vx_wspawn
|
||||||
vx_wspawn:
|
vx_wspawn:
|
||||||
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
|
.word 0x00b5106b # wspawn a0(num_warps), a1(func_ptr)
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.type vx_tmc, @function
|
.type vx_tmc, @function
|
||||||
|
@ -17,7 +17,7 @@ vx_tmc:
|
||||||
.type vx_barrier, @function
|
.type vx_barrier, @function
|
||||||
.global vx_barrier
|
.global vx_barrier
|
||||||
vx_barrier:
|
vx_barrier:
|
||||||
.word 0x00b5406b # barrier a0(barrier id), a1(numWarps)
|
.word 0x00b5406b # barrier a0(barrier_id), a1(num_warps)
|
||||||
ret
|
ret
|
||||||
|
|
||||||
.type vx_split, @function
|
.type vx_split, @function
|
||||||
|
|
|
@ -1,20 +1,18 @@
|
||||||
|
#ifndef VX_INTRINSICS_H
|
||||||
#ifndef VX_INTRINSICS
|
#define VX_INTRINSICS_H
|
||||||
|
|
||||||
#define VX_INTRINSICS
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Spawn warps
|
// Spawn warps
|
||||||
void vx_wspawn(int numWarps, int PC_spawn);
|
void vx_wspawn(int num_warps, unsigned func_ptr);
|
||||||
|
|
||||||
// Set thread mask
|
// Set thread mask
|
||||||
void vx_tmc(int numThreads);
|
void vx_tmc(int num_threads);
|
||||||
|
|
||||||
// Warp Barrier
|
// Warp Barrier
|
||||||
void vx_barrier(int barriedID, int numWarps);
|
void vx_barrier(int barried_id, int num_warps);
|
||||||
|
|
||||||
// Split on a predicate
|
// Split on a predicate
|
||||||
void vx_split(int predicate);
|
void vx_split(int predicate);
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
#ifndef VX_IO_H
|
||||||
#pragma once
|
#define VX_IO_H
|
||||||
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
@ -15,7 +15,8 @@ void vx_printf(const char *, unsigned);
|
||||||
void vx_print_str(const char *);
|
void vx_print_str(const char *);
|
||||||
void vx_printc(unsigned, char c);
|
void vx_printc(unsigned, char c);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -625,7 +625,7 @@ Disassembly of section .text:
|
||||||
8000083c: 28d1a023 sw a3,640(gp) # 80016a88 <global_argument_struct>
|
8000083c: 28d1a023 sw a3,640(gp) # 80016a88 <global_argument_struct>
|
||||||
80000840: 26b1ae23 sw a1,636(gp) # 80016a84 <global_num_threads>
|
80000840: 26b1ae23 sw a1,636(gp) # 80016a84 <global_num_threads>
|
||||||
80000844: 00100793 li a5,1
|
80000844: 00100793 li a5,1
|
||||||
80000848: 00a7fa63 bgeu a5,a0,8000085c <vx_spawn_warps+0x38>
|
80000848: 00a7da63 bge a5,a0,8000085c <vx_spawn_warps+0x38>
|
||||||
8000084c: 800005b7 lui a1,0x80000
|
8000084c: 800005b7 lui a1,0x80000
|
||||||
80000850: 7a058593 addi a1,a1,1952 # 800007a0 <__BSS_END__+0xfffe9c78>
|
80000850: 7a058593 addi a1,a1,1952 # 800007a0 <__BSS_END__+0xfffe9c78>
|
||||||
80000854: d55ff0ef jal ra,800005a8 <vx_wspawn>
|
80000854: d55ff0ef jal ra,800005a8 <vx_wspawn>
|
||||||
|
|
Binary file not shown.
|
@ -132,7 +132,7 @@
|
||||||
:1008180013351500130101016FF01FD9130101FFF2
|
:1008180013351500130101016FF01FD9130101FFF2
|
||||||
:100828002324810023229100232021012326110063
|
:100828002324810023229100232021012326110063
|
||||||
:1008380023A2C12823A0D12823AEB12693071000F4
|
:1008380023A2C12823A0D12823AEB12693071000F4
|
||||||
:1008480063FAA700B70500809385057AEFF05FD5B6
|
:1008480063DAA700B70500809385057AEFF05FD5D6
|
||||||
:1008580083A5C12713850500EFF01FD503A501283F
|
:1008580083A5C12713850500EFF01FD503A501283F
|
||||||
:1008680083A74128E7800700EFF01FD60324810003
|
:1008680083A74128E7800700EFF01FD60324810003
|
||||||
:100878008320C1008324410003290100133515009A
|
:100878008320C1008324410003290100133515009A
|
||||||
|
|
|
@ -9,7 +9,7 @@ extern "C" {
|
||||||
|
|
||||||
func_t global_function_pointer;
|
func_t global_function_pointer;
|
||||||
void * global_argument_struct;
|
void * global_argument_struct;
|
||||||
unsigned global_num_threads;
|
int global_num_threads;
|
||||||
|
|
||||||
void spawn_warp_runonce() {
|
void spawn_warp_runonce() {
|
||||||
// active all threads
|
// active all threads
|
||||||
|
@ -19,12 +19,12 @@ void spawn_warp_runonce() {
|
||||||
global_function_pointer(global_argument_struct);
|
global_function_pointer(global_argument_struct);
|
||||||
|
|
||||||
// resume single-thread execution on exit
|
// resume single-thread execution on exit
|
||||||
unsigned wid = vx_warp_id();
|
int wid = vx_warp_id();
|
||||||
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
unsigned tmask = (0 == wid) ? 0x1 : 0x0;
|
||||||
vx_tmc(tmask);
|
vx_tmc(tmask);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr, void * args) {
|
void vx_spawn_warps(int numWarps, int numThreads, func_t func_ptr, void * args) {
|
||||||
global_function_pointer = func_ptr;
|
global_function_pointer = func_ptr;
|
||||||
global_argument_struct = args;
|
global_argument_struct = args;
|
||||||
global_num_threads = numThreads;
|
global_num_threads = numThreads;
|
||||||
|
@ -34,7 +34,7 @@ void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr, voi
|
||||||
spawn_warp_runonce();
|
spawn_warp_runonce();
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned pocl_threads;
|
int pocl_threads;
|
||||||
struct context_t * pocl_ctx;
|
struct context_t * pocl_ctx;
|
||||||
vx_pocl_workgroup_func pocl_pfn;
|
vx_pocl_workgroup_func pocl_pfn;
|
||||||
const void * pocl_args;
|
const void * pocl_args;
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
|
#ifndef VX_API_H
|
||||||
#ifndef VX_API_
|
#define VX_API_H
|
||||||
#define VX_API_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -11,7 +10,7 @@ extern "C" {
|
||||||
|
|
||||||
typedef void (*func_t)(void *);
|
typedef void (*func_t)(void *);
|
||||||
|
|
||||||
void vx_spawn_warps(unsigned numWarps, unsigned numThreads, func_t func_ptr , void * args);
|
void vx_spawn_warps(int num_warps, int num_threads, func_t func_ptr , void * args);
|
||||||
|
|
||||||
struct context_t {
|
struct context_t {
|
||||||
uint32_t num_groups[3];
|
uint32_t num_groups[3];
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue