checking workgroup occupancy

This commit is contained in:
Blaise Tine 2024-05-07 23:38:51 -07:00
parent 98ead77405
commit 0003926d01
2 changed files with 44 additions and 0 deletions

View file

@ -729,5 +729,45 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
fflush(stream);
return 0;
}
int vx_check_occupancy(vx_device_h hdevice, uint32_t group_size, uint32_t* max_barriers, uint32_t* max_localmem) {
// check group size
uint64_t warps_per_core, threads_per_warp;
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_NUM_WARPS, &warps_per_core), {
return _ret;
});
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_NUM_THREADS, &threads_per_warp), {
return _ret;
});
uint32_t threads_per_core = warps_per_core * threads_per_warp;
if (group_size > threads_per_core) {
printf("Error: device cannot schedule group size > (%d)\n", threads_per_core);
return -1;
}
// calculate groups occupancy
int warps_per_group = (group_size + threads_per_warp-1) / threads_per_warp;
int groups_per_core = warps_per_core / warps_per_group;
// check barriers capacity
if (max_barriers) {
uint64_t num_barriers;
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_NUM_BARRIERS, &num_barriers), {
return _ret;
});
*max_barriers = num_barriers / groups_per_core;
}
// check local memory capacity
if (max_localmem) {
uint64_t local_mem_size;
RT_CHECK(vx_dev_caps(hdevice, VX_CAPS_LOCAL_MEM_SIZE, &local_mem_size), {
return _ret;
});
*max_localmem = local_mem_size / groups_per_core;
}
return 0;
}

View file

@ -35,6 +35,7 @@ typedef void* vx_buffer_h;
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
#define VX_CAPS_LOCAL_MEM_ADDR 0x7
#define VX_CAPS_ISA_FLAGS 0x8
#define VX_CAPS_NUM_BARRIERS 0x9
// device isa flags
#define VX_ISA_STD_A (1ull << 0)
@ -125,6 +126,9 @@ int vx_upload_bytes(vx_device_h hdevice, const void* content, uint64_t size, vx_
// upload file to device
int vx_upload_file(vx_device_h hdevice, const char* filename, vx_buffer_h* hbuffer);
// calculate cooperative threads array occupancy
int vx_check_occupancy(vx_device_h hdevice, uint32_t group_size, uint32_t* max_barriers, uint32_t* max_localmem);
// performance counters
int vx_dump_perf(vx_device_h hdevice, FILE* stream);