mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
CSRs I/O refactoring
This commit is contained in:
parent
a46d6cb606
commit
3cc1190cd7
33 changed files with 881 additions and 1385 deletions
|
@ -76,21 +76,20 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
|||
return err;
|
||||
}
|
||||
|
||||
int vx_csr_get_l(vx_device_h device, int core_id, int addr, int addr_h, uint64_t* value) {
|
||||
int ret = 0;
|
||||
unsigned value_lo, value_hi;
|
||||
ret |= vx_csr_get(device, core_id, addr, &value_lo);
|
||||
ret |= vx_csr_get(device, core_id, addr_h, &value_hi);
|
||||
*value = (uint64_t(value_hi) << 32) | value_lo;
|
||||
return ret;
|
||||
/*static uint32_t get_csr_32(const uint32_t* buffer, int addr) {
|
||||
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
|
||||
return value_lo;
|
||||
}*/
|
||||
|
||||
static uint64_t get_csr_64(const uint32_t* buffer, int addr) {
|
||||
uint32_t value_lo = buffer[addr - CSR_MPM_BASE];
|
||||
uint32_t value_hi = buffer[addr - CSR_MPM_BASE + 32];
|
||||
return (uint64_t(value_hi) << 32) | value_lo;
|
||||
}
|
||||
|
||||
extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
int ret = 0;
|
||||
|
||||
unsigned num_cores;
|
||||
vx_csr_get(device, 0, CSR_NC, &num_cores);
|
||||
|
||||
uint64_t instrs = 0;
|
||||
uint64_t cycles = 0;
|
||||
|
||||
|
@ -127,12 +126,23 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
uint64_t mem_stalls = 0;
|
||||
uint64_t mem_lat = 0;
|
||||
#endif
|
||||
|
||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||
|
||||
uint64_t instrs_per_core, cycles_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_INSTRET, CSR_INSTRET_H, &instrs_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_CYCLE, CSR_CYCLE_H, &cycles_per_core);
|
||||
unsigned num_cores;
|
||||
ret = vx_dev_caps(device, VX_CAPS_MAX_CORES, &num_cores);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
vx_buffer_h staging_buf;
|
||||
ret |= vx_alloc_shared_mem(device, 64 * sizeof(uint32_t), &staging_buf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
auto staging_ptr = (uint32_t*)vx_host_ptr(staging_buf);
|
||||
|
||||
for (unsigned core_id = 0; core_id < num_cores; ++core_id) {
|
||||
ret |= vx_copy_from_dev(staging_buf, IO_ADDR_CSR + 64 * sizeof(uint32_t) * core_id, 64 * sizeof(uint32_t), 0);
|
||||
uint64_t instrs_per_core = get_csr_64(staging_ptr, CSR_MINSTRET);
|
||||
uint64_t cycles_per_core = get_csr_64(staging_ptr, CSR_MCYCLE);
|
||||
float IPC = (float)(double(instrs_per_core) / double(cycles_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: instrs=%ld, cycles=%ld, IPC=%f\n", core_id, instrs_per_core, cycles_per_core, IPC);
|
||||
instrs += instrs_per_core;
|
||||
|
@ -141,133 +151,110 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||
#ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
// ibuffer_stall
|
||||
uint64_t ibuffer_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_IBUF_ST, CSR_MPM_IBUF_ST_H, &ibuffer_stalls_per_core);
|
||||
uint64_t ibuffer_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_IBUF_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: ibuffer stalls=%ld\n", core_id, ibuffer_stalls_per_core);
|
||||
ibuffer_stalls += ibuffer_stalls_per_core;
|
||||
// scoreboard_stall
|
||||
uint64_t scoreboard_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SCRB_ST, CSR_MPM_SCRB_ST_H, &scoreboard_stalls_per_core);
|
||||
uint64_t scoreboard_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_SCRB_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: scoreboard stalls=%ld\n", core_id, scoreboard_stalls_per_core);
|
||||
scoreboard_stalls += scoreboard_stalls_per_core;
|
||||
// alu_stall
|
||||
uint64_t alu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ALU_ST, CSR_MPM_ALU_ST_H, &alu_stalls_per_core);
|
||||
uint64_t alu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_ALU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: alu unit stalls=%ld\n", core_id, alu_stalls_per_core);
|
||||
alu_stalls += alu_stalls_per_core;
|
||||
// lsu_stall
|
||||
uint64_t lsu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_LSU_ST, CSR_MPM_LSU_ST_H, &lsu_stalls_per_core);
|
||||
uint64_t lsu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_LSU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: lsu unit stalls=%ld\n", core_id, lsu_stalls_per_core);
|
||||
lsu_stalls += lsu_stalls_per_core;
|
||||
// csr_stall
|
||||
uint64_t csr_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_CSR_ST, CSR_MPM_CSR_ST_H, &csr_stalls_per_core);
|
||||
uint64_t csr_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_CSR_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
|
||||
csr_stalls += csr_stalls_per_core;
|
||||
// fpu_stall
|
||||
uint64_t fpu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_FPU_ST, CSR_MPM_FPU_ST_H, &fpu_stalls_per_core);
|
||||
uint64_t fpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_FPU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: fpu unit stalls=%ld\n", core_id, fpu_stalls_per_core);
|
||||
fpu_stalls += fpu_stalls_per_core;
|
||||
// gpu_stall
|
||||
uint64_t gpu_stalls_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_GPU_ST, CSR_MPM_GPU_ST_H, &gpu_stalls_per_core);
|
||||
uint64_t gpu_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_GPU_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: gpu unit stalls=%ld\n", core_id, gpu_stalls_per_core);
|
||||
gpu_stalls += gpu_stalls_per_core;
|
||||
|
||||
// PERF: Icache
|
||||
// total reads
|
||||
uint64_t icache_reads_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_READS, CSR_MPM_ICACHE_READS_H, &icache_reads_per_core);
|
||||
uint64_t icache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_READS);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reads=%ld\n", core_id, icache_reads_per_core);
|
||||
icache_reads += icache_reads_per_core;
|
||||
// read misses
|
||||
uint64_t icache_miss_r_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_MISS_R, CSR_MPM_ICACHE_MISS_R_H, &icache_miss_r_per_core);
|
||||
uint64_t icache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_MISS_R);
|
||||
int icache_read_hit_ratio = (int)((1.0 - (double(icache_miss_r_per_core) / double(icache_reads_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache read misses=%ld (hit ratio=%d%%)\n", core_id, icache_miss_r_per_core, icache_read_hit_ratio);
|
||||
icache_read_misses += icache_miss_r_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t icache_pipe_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_PIPE_ST, CSR_MPM_ICACHE_PIPE_ST_H, &icache_pipe_st_per_core);
|
||||
uint64_t icache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_PIPE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache pipeline stalls=%ld\n", core_id, icache_pipe_st_per_core);
|
||||
icache_pipe_stalls += icache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t icache_crsp_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_ICACHE_CRSP_ST, CSR_MPM_ICACHE_CRSP_ST_H, &icache_crsp_st_per_core);
|
||||
uint64_t icache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_ICACHE_CRSP_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: icache reponse stalls=%ld\n", core_id, icache_crsp_st_per_core);
|
||||
icache_rsp_stalls += icache_crsp_st_per_core;
|
||||
|
||||
// PERF: Dcache
|
||||
// total reads
|
||||
uint64_t dcache_reads_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_READS, CSR_MPM_DCACHE_READS_H, &dcache_reads_per_core);
|
||||
uint64_t dcache_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_READS);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reads=%ld\n", core_id, dcache_reads_per_core);
|
||||
dcache_reads += dcache_reads_per_core;
|
||||
// total write
|
||||
uint64_t dcache_writes_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_WRITES, CSR_MPM_DCACHE_WRITES_H, &dcache_writes_per_core);
|
||||
uint64_t dcache_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_WRITES);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache writes=%ld\n", core_id, dcache_writes_per_core);
|
||||
dcache_writes += dcache_writes_per_core;
|
||||
// read misses
|
||||
uint64_t dcache_miss_r_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_R, CSR_MPM_DCACHE_MISS_R_H, &dcache_miss_r_per_core);
|
||||
uint64_t dcache_miss_r_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_R);
|
||||
int dcache_read_hit_ratio = (int)((1.0 - (double(dcache_miss_r_per_core) / double(dcache_reads_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache read misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_r_per_core, dcache_read_hit_ratio);
|
||||
dcache_read_misses += dcache_miss_r_per_core;
|
||||
// read misses
|
||||
uint64_t dcache_miss_w_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_W, CSR_MPM_DCACHE_MISS_W_H, &dcache_miss_w_per_core);
|
||||
uint64_t dcache_miss_w_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MISS_W);
|
||||
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_miss_w_per_core) / double(dcache_writes_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache write misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_w_per_core, dcache_write_hit_ratio);
|
||||
dcache_write_misses += dcache_miss_w_per_core;
|
||||
// bank_stalls
|
||||
uint64_t dcache_bank_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_BANK_ST, CSR_MPM_DCACHE_BANK_ST_H, &dcache_bank_st_per_core);
|
||||
uint64_t dcache_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_BANK_ST);
|
||||
int dcache_bank_utilization = (int)((double(dcache_reads_per_core + dcache_writes_per_core) / double(dcache_reads_per_core + dcache_writes_per_core + dcache_bank_st_per_core)) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache bank stalls=%ld (utilization=%d%%)\n", core_id, dcache_bank_st_per_core, dcache_bank_utilization);
|
||||
dcache_bank_stalls += dcache_bank_st_per_core;
|
||||
// mshr_stalls
|
||||
uint64_t dcache_mshr_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MSHR_ST, CSR_MPM_DCACHE_MSHR_ST_H, &dcache_mshr_st_per_core);
|
||||
uint64_t dcache_mshr_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_MSHR_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache mshr stalls=%ld\n", core_id, dcache_mshr_st_per_core);
|
||||
dcache_mshr_stalls += dcache_mshr_st_per_core;
|
||||
// pipeline stalls
|
||||
uint64_t dcache_pipe_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_PIPE_ST, CSR_MPM_DCACHE_PIPE_ST_H, &dcache_pipe_st_per_core);
|
||||
uint64_t dcache_pipe_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_PIPE_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache pipeline stalls=%ld\n", core_id, dcache_pipe_st_per_core);
|
||||
dcache_pipe_stalls += dcache_pipe_st_per_core;
|
||||
// response stalls
|
||||
uint64_t dcache_crsp_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_CRSP_ST, CSR_MPM_DCACHE_CRSP_ST_H, &dcache_crsp_st_per_core);
|
||||
uint64_t dcache_crsp_st_per_core = get_csr_64(staging_ptr, CSR_MPM_DCACHE_CRSP_ST);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache reponse stalls=%ld\n", core_id, dcache_crsp_st_per_core);
|
||||
dcache_rsp_stalls += dcache_crsp_st_per_core;
|
||||
|
||||
// PERF: SMEM
|
||||
// total reads
|
||||
uint64_t smem_reads_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_READS, CSR_MPM_SMEM_READS_H, &smem_reads_per_core);
|
||||
uint64_t smem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_READS);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem reads=%ld\n", core_id, smem_reads_per_core);
|
||||
smem_reads += smem_reads_per_core;
|
||||
// total write
|
||||
uint64_t smem_writes_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_WRITES, CSR_MPM_SMEM_WRITES_H, &smem_writes_per_core);
|
||||
uint64_t smem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_WRITES);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem writes=%ld\n", core_id, smem_writes_per_core);
|
||||
smem_writes += smem_writes_per_core;
|
||||
// bank_stalls
|
||||
uint64_t smem_bank_st_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_SMEM_BANK_ST, CSR_MPM_SMEM_BANK_ST_H, &smem_bank_st_per_core);
|
||||
uint64_t smem_bank_st_per_core = get_csr_64(staging_ptr, CSR_MPM_SMEM_BANK_ST);
|
||||
int smem_bank_utilization = (int)((double(smem_reads_per_core + smem_writes_per_core) / double(smem_reads_per_core + smem_writes_per_core + smem_bank_st_per_core)) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: smem bank stalls=%ld (utilization=%d%%)\n", core_id, smem_bank_st_per_core, smem_bank_utilization);
|
||||
smem_bank_stalls += smem_bank_st_per_core;
|
||||
|
||||
// PERF: memory
|
||||
uint64_t mem_reads_per_core, mem_writes_per_core, mem_stalls_per_core, mem_lat_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_READS, CSR_MPM_MEM_READS_H, &mem_reads_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_WRITES, CSR_MPM_MEM_WRITES_H, &mem_writes_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_ST, CSR_MPM_MEM_ST_H, &mem_stalls_per_core);
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MEM_LAT, CSR_MPM_MEM_LAT_H, &mem_lat_per_core);
|
||||
uint64_t mem_reads_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_READS);
|
||||
uint64_t mem_writes_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_WRITES);
|
||||
uint64_t mem_stalls_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_ST);
|
||||
uint64_t mem_lat_per_core = get_csr_64(staging_ptr, CSR_MPM_MEM_LAT);
|
||||
int mem_utilization = (int)((double(mem_reads_per_core + mem_writes_per_core) / double(mem_reads_per_core + mem_writes_per_core + mem_stalls_per_core)) * 100);
|
||||
int mem_avg_lat = (int)(double(mem_lat_per_core) / double(mem_reads_per_core));
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: memory requests=%ld (reads=%ld, writes=%ld)\n", core_id, (mem_reads_per_core + mem_writes_per_core), mem_reads_per_core, mem_writes_per_core);
|
||||
|
|
|
@ -59,12 +59,6 @@ int vx_start(vx_device_h hdevice);
|
|||
// Wait for device ready with milliseconds timeout
|
||||
int vx_ready_wait(vx_device_h hdevice, long long timeout);
|
||||
|
||||
// set device constant registers
|
||||
int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value);
|
||||
|
||||
// get device constant registers
|
||||
int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value);
|
||||
|
||||
////////////////////////////// UTILITY FUNCIONS ///////////////////////////////
|
||||
|
||||
// upload kernel bytes to device
|
||||
|
|
|
@ -37,25 +37,20 @@
|
|||
#define CMD_MEM_READ AFU_IMAGE_CMD_MEM_READ
|
||||
#define CMD_MEM_WRITE AFU_IMAGE_CMD_MEM_WRITE
|
||||
#define CMD_RUN AFU_IMAGE_CMD_RUN
|
||||
#define CMD_CSR_READ AFU_IMAGE_CMD_CSR_READ
|
||||
#define CMD_CSR_WRITE AFU_IMAGE_CMD_CSR_WRITE
|
||||
|
||||
#define MMIO_CMD_TYPE (AFU_IMAGE_MMIO_CMD_TYPE * 4)
|
||||
#define MMIO_IO_ADDR (AFU_IMAGE_MMIO_IO_ADDR * 4)
|
||||
#define MMIO_MEM_ADDR (AFU_IMAGE_MMIO_MEM_ADDR * 4)
|
||||
#define MMIO_DATA_SIZE (AFU_IMAGE_MMIO_DATA_SIZE * 4)
|
||||
#define MMIO_DEV_CAPS (AFU_IMAGE_MMIO_DEV_CAPS * 4)
|
||||
#define MMIO_STATUS (AFU_IMAGE_MMIO_STATUS * 4)
|
||||
#define MMIO_CSR_CORE (AFU_IMAGE_MMIO_CSR_CORE * 4)
|
||||
#define MMIO_CSR_ADDR (AFU_IMAGE_MMIO_CSR_ADDR * 4)
|
||||
#define MMIO_CSR_DATA (AFU_IMAGE_MMIO_CSR_DATA * 4)
|
||||
#define MMIO_CSR_READ (AFU_IMAGE_MMIO_CSR_READ * 4)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef struct vx_device_ {
|
||||
fpga_handle fpga;
|
||||
size_t mem_allocation;
|
||||
unsigned implementation_id;
|
||||
unsigned version;
|
||||
unsigned num_cores;
|
||||
unsigned num_warps;
|
||||
unsigned num_threads;
|
||||
|
@ -89,7 +84,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
|||
|
||||
switch (caps_id) {
|
||||
case VX_CAPS_VERSION:
|
||||
*value = device->implementation_id;
|
||||
*value = device->version;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = device->num_cores;
|
||||
|
@ -195,21 +190,22 @@ extern int vx_dev_open(vx_device_h* hdevice) {
|
|||
|
||||
device->fpga = accel_handle;
|
||||
device->mem_allocation = ALLOC_BASE_ADDR;
|
||||
|
||||
|
||||
{
|
||||
// Load device CAPS
|
||||
int ret = 0;
|
||||
ret |= vx_csr_get(device, 0, CSR_MIMPID, &device->implementation_id);
|
||||
ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores);
|
||||
ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps);
|
||||
ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads);
|
||||
uint64_t dev_caps;
|
||||
int ret = fpgaReadMMIO64(device->fpga, 0, MMIO_DEV_CAPS, &dev_caps);
|
||||
if (ret != FPGA_OK) {
|
||||
fpgaClose(accel_handle);
|
||||
return ret;
|
||||
}
|
||||
device->version = (dev_caps >> 0) & 0xffff;
|
||||
device->num_cores = (dev_caps >> 16) & 0xffff;
|
||||
device->num_warps = (dev_caps >> 32) & 0xffff;
|
||||
device->num_threads = (dev_caps >> 48) & 0xffff;
|
||||
#ifndef NDEBUG
|
||||
fprintf(stdout, "[VXDRV] DEVCAPS: version=%d, num_cores=%d, num_warps=%d, num_threads=%d\n",
|
||||
device->implementation_id, device->num_cores, device->num_warps, device->num_threads);
|
||||
device->version, device->num_cores, device->num_warps, device->num_threads);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -470,52 +466,5 @@ extern int vx_start(vx_device_h hdevice) {
|
|||
// start execution
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_RUN));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// set device constant registers
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// get device constant registers
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||
if (nullptr == hdevice || nullptr == value)
|
||||
return -1;
|
||||
|
||||
vx_device_t *device = ((vx_device_t*)hdevice);
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
// write CSR value
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core_id));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, addr));
|
||||
CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ));
|
||||
|
||||
// Ensure ready for new command
|
||||
if (vx_ready_wait(hdevice, -1) != 0)
|
||||
return -1;
|
||||
|
||||
uint64_t value64;
|
||||
CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_READ, &value64));
|
||||
*value = (unsigned)value64;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -7,21 +7,16 @@
|
|||
|
||||
#define AFU_ACCEL_NAME "vortex_afu"
|
||||
#define AFU_ACCEL_UUID "35F9452B-25C2-434C-93D5-6F8C60DB361C"
|
||||
#define AFU_IMAGE_CMD_CSR_READ 4
|
||||
#define AFU_IMAGE_CMD_CSR_WRITE 5
|
||||
#define AFU_IMAGE_CMD_MEM_READ 1
|
||||
#define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
#define AFU_IMAGE_CMD_RUN 3
|
||||
#define AFU_IMAGE_MMIO_CMD_TYPE 10
|
||||
#define AFU_IMAGE_MMIO_CSR_ADDR 26
|
||||
#define AFU_IMAGE_MMIO_CSR_CORE 24
|
||||
#define AFU_IMAGE_MMIO_CSR_DATA 28
|
||||
#define AFU_IMAGE_MMIO_CSR_READ 30
|
||||
#define AFU_IMAGE_MMIO_DATA_SIZE 16
|
||||
#define AFU_IMAGE_MMIO_IO_ADDR 12
|
||||
#define AFU_IMAGE_MMIO_MEM_ADDR 14
|
||||
#define AFU_IMAGE_MMIO_SCOPE_READ 20
|
||||
#define AFU_IMAGE_MMIO_SCOPE_WRITE 22
|
||||
#define AFU_IMAGE_MMIO_DEV_CAPS 24
|
||||
#define AFU_IMAGE_MMIO_STATUS 18
|
||||
#define AFU_IMAGE_POWER 0
|
||||
#define AFU_TOP_IFC "ccip_std_afu_avalon_mm"
|
||||
|
|
|
@ -144,28 +144,6 @@ public:
|
|||
return 0;
|
||||
}
|
||||
|
||||
int set_csr(int core_id, int addr, unsigned value) {
|
||||
if (future_.valid()) {
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
simulator_.set_csr(core_id, addr, value);
|
||||
while (simulator_.csr_req_active()) {
|
||||
simulator_.step();
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_csr(int core_id, int addr, unsigned *value) {
|
||||
if (future_.valid()) {
|
||||
future_.wait(); // ensure prior run completed
|
||||
}
|
||||
simulator_.get_csr(core_id, addr, value);
|
||||
while (simulator_.csr_req_active()) {
|
||||
simulator_.step();
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
size_t mem_allocation_;
|
||||
|
@ -330,22 +308,4 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->set_csr(core_id, addr, value);
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned* value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->get_csr(core_id, addr, value);
|
||||
}
|
|
@ -376,22 +376,4 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) {
|
|||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->wait(timeout);
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h hdevice, int core_id, int addr, unsigned value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->set_csr(core_id, addr, value);
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h hdevice, int core_id, int addr, unsigned *value) {
|
||||
if (nullptr == hdevice)
|
||||
return -1;
|
||||
|
||||
vx_device *device = ((vx_device*)hdevice);
|
||||
|
||||
return device->get_csr(core_id, addr, value);
|
||||
}
|
|
@ -42,12 +42,4 @@ extern int vx_start(vx_device_h /*hdevice*/) {
|
|||
|
||||
extern int vx_ready_wait(vx_device_h /*hdevice*/, long long /*timeout*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_set(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned /*value*/) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
extern int vx_csr_get(vx_device_h /*hdevice*/, int /*core_id*/, int /*addr*/, unsigned* /*value*/) {
|
||||
return -1;
|
||||
}
|
Binary file not shown.
File diff suppressed because it is too large
Load diff
Binary file not shown.
|
@ -24,19 +24,6 @@ module VX_cluster #(
|
|||
input wire [`L2MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// CSR Request
|
||||
input wire csr_req_valid,
|
||||
input wire [`NC_BITS-1:0] csr_req_coreid,
|
||||
input wire [11:0] csr_req_addr,
|
||||
input wire csr_req_rw,
|
||||
input wire [31:0] csr_req_data,
|
||||
output wire csr_req_ready,
|
||||
|
||||
// CSR Response
|
||||
output wire csr_rsp_valid,
|
||||
output wire [31:0] csr_rsp_data,
|
||||
input wire csr_rsp_ready,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
@ -55,16 +42,6 @@ module VX_cluster #(
|
|||
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
|
||||
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_csr_req_valid;
|
||||
wire [`NUM_CORES-1:0][11:0] per_core_csr_req_addr;
|
||||
wire [`NUM_CORES-1:0] per_core_csr_req_rw;
|
||||
wire [`NUM_CORES-1:0][31:0] per_core_csr_req_data;
|
||||
wire [`NUM_CORES-1:0] per_core_csr_req_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_csr_rsp_valid;
|
||||
wire [`NUM_CORES-1:0][31:0] per_core_csr_rsp_data;
|
||||
wire [`NUM_CORES-1:0] per_core_csr_rsp_ready;
|
||||
|
||||
wire [`NUM_CORES-1:0] per_core_busy;
|
||||
|
||||
for (genvar i = 0; i < `NUM_CORES; i++) begin
|
||||
|
@ -99,56 +76,9 @@ module VX_cluster #(
|
|||
.mem_rsp_tag (per_core_mem_rsp_tag [i]),
|
||||
.mem_rsp_ready (per_core_mem_rsp_ready[i]),
|
||||
|
||||
.csr_req_valid (per_core_csr_req_valid [i]),
|
||||
.csr_req_rw (per_core_csr_req_rw [i]),
|
||||
.csr_req_addr (per_core_csr_req_addr [i]),
|
||||
.csr_req_data (per_core_csr_req_data [i]),
|
||||
.csr_req_ready (per_core_csr_req_ready [i]),
|
||||
|
||||
.csr_rsp_valid (per_core_csr_rsp_valid [i]),
|
||||
.csr_rsp_data (per_core_csr_rsp_data [i]),
|
||||
.csr_rsp_ready (per_core_csr_rsp_ready [i]),
|
||||
|
||||
.busy (per_core_busy [i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_csr_arb #(
|
||||
.NUM_REQS (`NUM_CORES),
|
||||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) csr_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.request_id (csr_req_coreid),
|
||||
|
||||
// input requests
|
||||
.req_valid_in (csr_req_valid),
|
||||
.req_addr_in (csr_req_addr),
|
||||
.req_rw_in (csr_req_rw),
|
||||
.req_data_in (csr_req_data),
|
||||
.req_ready_in (csr_req_ready),
|
||||
|
||||
// output request
|
||||
.req_valid_out (per_core_csr_req_valid),
|
||||
.req_addr_out (per_core_csr_req_addr),
|
||||
.req_rw_out (per_core_csr_req_rw),
|
||||
.req_data_out (per_core_csr_req_data),
|
||||
.req_ready_out (per_core_csr_req_ready),
|
||||
|
||||
// input responses
|
||||
.rsp_valid_in (per_core_csr_rsp_valid),
|
||||
.rsp_data_in (per_core_csr_rsp_data),
|
||||
.rsp_ready_in (per_core_csr_rsp_ready),
|
||||
|
||||
// output response
|
||||
.rsp_valid_out (csr_rsp_valid),
|
||||
.rsp_data_out (csr_rsp_data),
|
||||
.rsp_ready_out (csr_rsp_ready)
|
||||
);
|
||||
|
||||
assign busy = (| per_core_busy);
|
||||
|
||||
|
|
|
@ -57,6 +57,10 @@
|
|||
`define IO_ADDR_COUT 32'hFFFFFFFC
|
||||
`endif
|
||||
|
||||
`ifndef IO_ADDR_CSR
|
||||
`define IO_ADDR_CSR `IO_BASE_ADDR
|
||||
`endif
|
||||
|
||||
`ifndef SMEM_BASE_ADDR
|
||||
`define SMEM_BASE_ADDR `IO_BASE_ADDR
|
||||
`endif
|
||||
|
@ -147,28 +151,30 @@
|
|||
|
||||
`define CSR_MEPC 12'h341
|
||||
|
||||
// Machine Counter/Timers
|
||||
`define CSR_CYCLE 12'hC00
|
||||
`define CSR_CYCLE_H 12'hC80
|
||||
`define CSR_INSTRET 12'hC02
|
||||
`define CSR_INSTRET_H 12'hC82
|
||||
|
||||
// Machine Performance-monitoring counters
|
||||
`define CSR_MPM_BASE 12'hB00
|
||||
`define CSR_MPM_BASE_H 12'hB80
|
||||
// PERF: pipeline
|
||||
`define CSR_MPM_IBUF_ST 12'hB03
|
||||
`define CSR_MPM_IBUF_ST_H 12'hB83
|
||||
`define CSR_MPM_SCRB_ST 12'hB04
|
||||
`define CSR_MPM_SCRB_ST_H 12'hB84
|
||||
`define CSR_MPM_ALU_ST 12'hB05
|
||||
`define CSR_MPM_ALU_ST_H 12'hB85
|
||||
`define CSR_MPM_LSU_ST 12'hB06
|
||||
`define CSR_MPM_LSU_ST_H 12'hB86
|
||||
`define CSR_MPM_CSR_ST 12'hB07
|
||||
`define CSR_MPM_CSR_ST_H 12'hB87
|
||||
`define CSR_MPM_FPU_ST 12'hB08
|
||||
`define CSR_MPM_FPU_ST_H 12'hB88
|
||||
`define CSR_MPM_GPU_ST 12'hB09
|
||||
`define CSR_MPM_GPU_ST_H 12'hB89
|
||||
`define CSR_MCYCLE 12'hB00
|
||||
`define CSR_MCYCLE_H 12'hB80
|
||||
`define CSR_MPM_RESERVED 12'hB01
|
||||
`define CSR_MPM_RESERVED_H 12'hB81
|
||||
`define CSR_MINSTRET 12'hB02
|
||||
`define CSR_MINSTRET_H 12'hB82
|
||||
`define CSR_MPM_IBUF_ST 12'hB03
|
||||
`define CSR_MPM_IBUF_ST_H 12'hB83
|
||||
`define CSR_MPM_SCRB_ST 12'hB04
|
||||
`define CSR_MPM_SCRB_ST_H 12'hB84
|
||||
`define CSR_MPM_ALU_ST 12'hB05
|
||||
`define CSR_MPM_ALU_ST_H 12'hB85
|
||||
`define CSR_MPM_LSU_ST 12'hB06
|
||||
`define CSR_MPM_LSU_ST_H 12'hB86
|
||||
`define CSR_MPM_CSR_ST 12'hB07
|
||||
`define CSR_MPM_CSR_ST_H 12'hB87
|
||||
`define CSR_MPM_FPU_ST 12'hB08
|
||||
`define CSR_MPM_FPU_ST_H 12'hB88
|
||||
`define CSR_MPM_GPU_ST 12'hB09
|
||||
`define CSR_MPM_GPU_ST_H 12'hB89
|
||||
// PERF: icache
|
||||
`define CSR_MPM_ICACHE_READS 12'hB0A // total reads
|
||||
`define CSR_MPM_ICACHE_READS_H 12'hB8A
|
||||
|
@ -196,21 +202,21 @@
|
|||
`define CSR_MPM_DCACHE_CRSP_ST 12'hB15 // core response stalls
|
||||
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB95
|
||||
// PERF: smem
|
||||
`define CSR_MPM_SMEM_READS 12'hB16 // total reads
|
||||
`define CSR_MPM_SMEM_READS_H 12'hB96
|
||||
`define CSR_MPM_SMEM_WRITES 12'hB17 // total writes
|
||||
`define CSR_MPM_SMEM_WRITES_H 12'hB97
|
||||
`define CSR_MPM_SMEM_BANK_ST 12'hB18 // bank conflicts stalls
|
||||
`define CSR_MPM_SMEM_BANK_ST_H 12'hB98
|
||||
`define CSR_MPM_SMEM_READS 12'hB16 // total reads
|
||||
`define CSR_MPM_SMEM_READS_H 12'hB96
|
||||
`define CSR_MPM_SMEM_WRITES 12'hB17 // total writes
|
||||
`define CSR_MPM_SMEM_WRITES_H 12'hB97
|
||||
`define CSR_MPM_SMEM_BANK_ST 12'hB18 // bank conflicts stalls
|
||||
`define CSR_MPM_SMEM_BANK_ST_H 12'hB98
|
||||
// PERF: memory
|
||||
`define CSR_MPM_MEM_READS 12'hB19 // memory reads
|
||||
`define CSR_MPM_MEM_READS_H 12'hB99
|
||||
`define CSR_MPM_MEM_WRITES 12'hB1A // memory writes
|
||||
`define CSR_MPM_MEM_WRITES_H 12'hB9A
|
||||
`define CSR_MPM_MEM_ST 12'hB1B // memory request stalls
|
||||
`define CSR_MPM_MEM_ST_H 12'hB9B
|
||||
`define CSR_MPM_MEM_LAT 12'hB1C // memory latency (total)
|
||||
`define CSR_MPM_MEM_LAT_H 12'hB9C
|
||||
`define CSR_MPM_MEM_READS 12'hB19 // memory reads
|
||||
`define CSR_MPM_MEM_READS_H 12'hB99
|
||||
`define CSR_MPM_MEM_WRITES 12'hB1A // memory writes
|
||||
`define CSR_MPM_MEM_WRITES_H 12'hB9A
|
||||
`define CSR_MPM_MEM_ST 12'hB1B // memory request stalls
|
||||
`define CSR_MPM_MEM_ST_H 12'hB9B
|
||||
`define CSR_MPM_MEM_LAT 12'hB1C // memory latency (total)
|
||||
`define CSR_MPM_MEM_LAT_H 12'hB9C
|
||||
|
||||
// Machine Information Registers
|
||||
`define CSR_MVENDORID 12'hF11
|
||||
|
|
|
@ -24,18 +24,6 @@ module VX_core #(
|
|||
input wire [`XMEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// CSR request
|
||||
input wire csr_req_valid,
|
||||
input wire [11:0] csr_req_addr,
|
||||
input wire csr_req_rw,
|
||||
input wire [31:0] csr_req_data,
|
||||
output wire csr_req_ready,
|
||||
|
||||
// CSR response
|
||||
output wire csr_rsp_valid,
|
||||
output wire [31:0] csr_rsp_data,
|
||||
input wire csr_rsp_ready,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
@ -127,19 +115,7 @@ module VX_core #(
|
|||
.icache_rsp_valid (icache_core_rsp_if.valid),
|
||||
.icache_rsp_data (icache_core_rsp_if.data),
|
||||
.icache_rsp_tag (icache_core_rsp_if.tag),
|
||||
.icache_rsp_ready (icache_core_rsp_if.ready),
|
||||
|
||||
// CSR request
|
||||
.csr_req_valid (csr_req_valid),
|
||||
.csr_req_rw (csr_req_rw),
|
||||
.csr_req_addr (csr_req_addr),
|
||||
.csr_req_data (csr_req_data),
|
||||
.csr_req_ready (csr_req_ready),
|
||||
|
||||
// CSR response
|
||||
.csr_rsp_valid (csr_rsp_valid),
|
||||
.csr_rsp_data (csr_rsp_data),
|
||||
.csr_rsp_ready (csr_rsp_ready),
|
||||
.icache_rsp_ready (icache_core_rsp_if.ready),
|
||||
|
||||
// Status
|
||||
.busy(busy)
|
||||
|
|
|
@ -1,82 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_arb #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
parameter BUFFERED_RSP = 0,
|
||||
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter ADDR_WIDTH = 32 - `CLOG2(DATA_SIZE),
|
||||
parameter LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire [LOG_NUM_REQS-1:0] request_id,
|
||||
|
||||
// input requests
|
||||
input wire req_valid_in,
|
||||
input wire [ADDR_WIDTH-1:0] req_addr_in,
|
||||
input wire req_rw_in,
|
||||
input wire [DATA_WIDTH-1:0] req_data_in,
|
||||
output wire req_ready_in,
|
||||
|
||||
// output request
|
||||
output wire [NUM_REQS-1:0] req_valid_out,
|
||||
output wire [NUM_REQS-1:0][ADDR_WIDTH-1:0] req_addr_out,
|
||||
output wire [NUM_REQS-1:0] req_rw_out,
|
||||
output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] req_data_out,
|
||||
input wire [NUM_REQS-1:0] req_ready_out,
|
||||
|
||||
// input response
|
||||
input wire [NUM_REQS-1:0] rsp_valid_in,
|
||||
input wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_in,
|
||||
output wire [NUM_REQS-1:0] rsp_ready_in,
|
||||
|
||||
// output response
|
||||
output wire rsp_valid_out,
|
||||
output wire [DATA_WIDTH-1:0] rsp_data_out,
|
||||
input wire rsp_ready_out
|
||||
);
|
||||
localparam REQ_DATAW = ADDR_WIDTH + 1 + DATA_WIDTH;
|
||||
localparam RSP_DATAW = DATA_WIDTH;
|
||||
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_merged_data_out;
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign {req_addr_out[i], req_rw_out[i], req_data_out[i]} = req_merged_data_out[i];
|
||||
end
|
||||
|
||||
VX_stream_demux #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (BUFFERED_REQ)
|
||||
) req_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.sel (request_id),
|
||||
.valid_in (req_valid_in),
|
||||
.data_in ({req_addr_in, req_rw_in, req_data_in}),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (req_valid_out),
|
||||
.data_out (req_merged_data_out),
|
||||
.ready_out (req_ready_out)
|
||||
);
|
||||
|
||||
VX_stream_arbiter #(
|
||||
.NUM_REQS (NUM_REQS),
|
||||
.DATAW (RSP_DATAW),
|
||||
.BUFFERED (BUFFERED_RSP),
|
||||
.TYPE ("X") // fixed arbitration
|
||||
) rsp_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (rsp_valid_in),
|
||||
.data_in (rsp_data_in),
|
||||
.ready_in (rsp_ready_in),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_data_out),
|
||||
.ready_out (rsp_ready_out)
|
||||
);
|
||||
|
||||
endmodule
|
|
@ -96,20 +96,25 @@ module VX_csr_data #(
|
|||
always @(*) begin
|
||||
read_data_r = 'x;
|
||||
case (read_addr)
|
||||
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFG_BITS-1:0]);
|
||||
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS]);
|
||||
`CSR_FCSR : read_data_r = 32'(fcsr[read_wid]);
|
||||
`CSR_FFLAGS : read_data_r = 32'(fcsr[read_wid][`FFG_BITS-1:0]);
|
||||
`CSR_FRM : read_data_r = 32'(fcsr[read_wid][`FRM_BITS+`FFG_BITS-1:`FFG_BITS]);
|
||||
`CSR_FCSR : read_data_r = 32'(fcsr[read_wid]);
|
||||
|
||||
`CSR_WTID ,
|
||||
`CSR_LTID ,
|
||||
`CSR_LWID : read_data_r = 32'(read_wid);
|
||||
`CSR_GTID ,
|
||||
`CSR_WTID ,
|
||||
`CSR_LTID ,
|
||||
`CSR_LWID : read_data_r = 32'(read_wid);
|
||||
`CSR_GTID ,
|
||||
/*`CSR_MHARTID ,*/
|
||||
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
|
||||
`CSR_GCID : read_data_r = CORE_ID;
|
||||
`CSR_NT : read_data_r = `NUM_THREADS;
|
||||
`CSR_NW : read_data_r = `NUM_WARPS;
|
||||
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
|
||||
`CSR_GWID : read_data_r = CORE_ID * `NUM_WARPS + 32'(read_wid);
|
||||
`CSR_GCID : read_data_r = CORE_ID;
|
||||
`CSR_NT : read_data_r = `NUM_THREADS;
|
||||
`CSR_NW : read_data_r = `NUM_WARPS;
|
||||
`CSR_NC : read_data_r = `NUM_CORES * `NUM_CLUSTERS;
|
||||
|
||||
`CSR_MCYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_MCYCLE_H : read_data_r = 32'(csr_cycle[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MINSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_MINSTRET_H : read_data_r = 32'(csr_instret[`PERF_CTR_BITS-1:32]);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
|
@ -154,12 +159,12 @@ module VX_csr_data #(
|
|||
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: smem
|
||||
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: MEM
|
||||
`CSR_MPM_MEM_READS : read_data_r = perf_memsys_if.mem_reads[31:0];
|
||||
`CSR_MPM_MEM_READS_H : read_data_r = 32'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
|
@ -169,6 +174,9 @@ module VX_csr_data #(
|
|||
`CSR_MPM_MEM_ST_H : read_data_r = 32'(perf_memsys_if.mem_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_LAT : read_data_r = perf_memsys_if.mem_latency[31:0];
|
||||
`CSR_MPM_MEM_LAT_H : read_data_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: reserved
|
||||
`CSR_MPM_RESERVED : read_data_r = '0;
|
||||
`CSR_MPM_RESERVED_H : read_data_r = '0;
|
||||
`endif
|
||||
|
||||
`CSR_SATP : read_data_r = 32'(csr_satp);
|
||||
|
@ -185,17 +193,15 @@ module VX_csr_data #(
|
|||
`CSR_PMPCFG0 : read_data_r = 32'(csr_pmpcfg[0]);
|
||||
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
|
||||
|
||||
`CSR_CYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_CYCLE_H : read_data_r = 32'(csr_cycle[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_INSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_INSTRET_H : read_data_r = 32'(csr_instret[`PERF_CTR_BITS-1:32]);
|
||||
|
||||
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
|
||||
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
|
||||
`CSR_MIMPID : read_data_r = `IMPLEMENTATION_ID;
|
||||
|
||||
default: begin
|
||||
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
|
||||
default: begin
|
||||
if (!((read_addr >= `CSR_MPM_BASE && read_addr < (`CSR_MPM_BASE + 32))
|
||||
| (read_addr >= `CSR_MPM_BASE_H && read_addr < (`CSR_MPM_BASE_H + 32)))) begin
|
||||
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
|
|
@ -7,49 +7,29 @@ module VX_csr_unit #(
|
|||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
VX_fpu_to_csr_if fpu_to_csr_if,
|
||||
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_commit_if csr_commit_if,
|
||||
|
||||
input wire busy,
|
||||
|
||||
input wire[`NUM_WARPS-1:0] fpu_pending,
|
||||
output wire[`NUM_WARPS-1:0] pending
|
||||
);
|
||||
VX_csr_pipe_req_if csr_pipe_req_if();
|
||||
VX_commit_if csr_pipe_rsp_if();
|
||||
|
||||
wire select_io_rsp;
|
||||
|
||||
VX_csr_io_arb csr_io_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.select_io_rsp (select_io_rsp),
|
||||
|
||||
.csr_core_req_if (csr_req_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_pipe_req_if (csr_pipe_req_if),
|
||||
|
||||
.csr_pipe_rsp_if (csr_pipe_rsp_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_commit_if (csr_commit_if)
|
||||
);
|
||||
|
||||
);
|
||||
wire csr_we_s1;
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||
wire [31:0] csr_read_data, csr_read_data_s1;
|
||||
wire [31:0] csr_updated_data_s1;
|
||||
|
||||
wire write_enable = csr_pipe_rsp_if.valid && csr_we_s1;
|
||||
wire write_enable = csr_commit_if.valid && csr_we_s1;
|
||||
|
||||
wire [31:0] csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.rs1) : csr_req_if.rs1_data;
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
@ -62,20 +42,20 @@ module VX_csr_unit #(
|
|||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.read_enable (csr_pipe_req_if.valid),
|
||||
.read_addr (csr_pipe_req_if.addr),
|
||||
.read_wid (csr_pipe_req_if.wid),
|
||||
.read_enable (csr_req_if.valid),
|
||||
.read_addr (csr_req_if.addr),
|
||||
.read_wid (csr_req_if.wid),
|
||||
.read_data (csr_read_data),
|
||||
.write_enable (write_enable),
|
||||
.write_addr (csr_addr_s1),
|
||||
.write_wid (csr_pipe_rsp_if.wid),
|
||||
.write_wid (csr_commit_if.wid),
|
||||
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
|
||||
.busy (busy)
|
||||
);
|
||||
|
||||
wire write_hazard = (csr_addr_s1 == csr_pipe_req_if.addr)
|
||||
&& (csr_pipe_rsp_if.wid == csr_pipe_req_if.wid)
|
||||
&& csr_pipe_rsp_if.valid;
|
||||
wire write_hazard = (csr_addr_s1 == csr_req_if.addr)
|
||||
&& (csr_commit_if.wid == csr_req_if.wid)
|
||||
&& csr_commit_if.valid;
|
||||
|
||||
wire [31:0] csr_read_data_qual = write_hazard ? csr_updated_data_s1 : csr_read_data;
|
||||
|
||||
|
@ -83,53 +63,55 @@ module VX_csr_unit #(
|
|||
|
||||
reg csr_we_s0_unqual;
|
||||
|
||||
always @(*) begin
|
||||
csr_we_s0_unqual = 0;
|
||||
case (csr_pipe_req_if.op_type)
|
||||
always @(*) begin
|
||||
case (csr_req_if.op_type)
|
||||
`CSR_RW: begin
|
||||
csr_updated_data = csr_pipe_req_if.data;
|
||||
csr_updated_data = csr_req_data;
|
||||
csr_we_s0_unqual = 1;
|
||||
end
|
||||
`CSR_RS: begin
|
||||
csr_updated_data = csr_read_data_qual | csr_pipe_req_if.data;
|
||||
csr_we_s0_unqual = (csr_pipe_req_if.data != 0);
|
||||
csr_updated_data = csr_read_data_qual | csr_req_data;
|
||||
csr_we_s0_unqual = (csr_req_data != 0);
|
||||
end
|
||||
`CSR_RC: begin
|
||||
csr_updated_data = csr_read_data_qual & ~csr_pipe_req_if.data;
|
||||
csr_we_s0_unqual = (csr_pipe_req_if.data != 0);
|
||||
csr_updated_data = csr_read_data_qual & ~csr_req_data;
|
||||
csr_we_s0_unqual = (csr_req_data != 0);
|
||||
end
|
||||
default: begin
|
||||
csr_updated_data = 'x;
|
||||
csr_we_s0_unqual = 0;
|
||||
end
|
||||
default: csr_updated_data = 'x;
|
||||
endcase
|
||||
end
|
||||
|
||||
wire stall_in = !csr_pipe_req_if.is_io && fpu_pending[csr_pipe_req_if.wid];
|
||||
wire stall_in = fpu_pending[csr_req_if.wid];
|
||||
|
||||
wire pipe_req_valid_qual = csr_pipe_req_if.valid && !stall_in;
|
||||
wire csr_req_valid = csr_req_if.valid && !stall_in;
|
||||
|
||||
wire stall_out = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid;
|
||||
wire stall_out = ~csr_commit_if.ready && csr_commit_if.valid;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32),
|
||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 32 + 32),
|
||||
.RESETW (1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (!stall_out),
|
||||
.data_in ({pipe_req_valid_qual, csr_pipe_req_if.wid, csr_pipe_req_if.tmask, csr_pipe_req_if.PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0_unqual, csr_pipe_req_if.addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||
.data_out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.tmask, csr_pipe_rsp_if.PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||
.data_in ({csr_req_valid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_we_s0_unqual, csr_req_if.addr, csr_read_data_qual, csr_updated_data}),
|
||||
.data_out ({csr_commit_if.valid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_we_s1, csr_addr_s1, csr_read_data_s1, csr_updated_data_s1})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign csr_pipe_rsp_if.data[i] = (csr_addr_s1 == `CSR_WTID) ? i :
|
||||
assign csr_commit_if.data[i] = (csr_addr_s1 == `CSR_WTID) ? i :
|
||||
(csr_addr_s1 == `CSR_LTID
|
||||
|| csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
|
||||
csr_read_data_s1;
|
||||
end
|
||||
|
||||
assign csr_pipe_rsp_if.eop = 1'b1;
|
||||
assign csr_commit_if.eop = 1'b1;
|
||||
|
||||
// can accept new request?
|
||||
assign csr_pipe_req_if.ready = ~(stall_out || stall_in);
|
||||
assign csr_req_if.ready = ~(stall_out || stall_in);
|
||||
|
||||
// pending request
|
||||
reg [`NUM_WARPS-1:0] pending_r;
|
||||
|
@ -137,11 +119,11 @@ module VX_csr_unit #(
|
|||
if (reset) begin
|
||||
pending_r <= 0;
|
||||
end else begin
|
||||
if (csr_pipe_rsp_if.valid && csr_pipe_rsp_if.ready) begin
|
||||
pending_r[csr_pipe_rsp_if.wid] <= 0;
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
pending_r[csr_commit_if.wid] <= 0;
|
||||
end
|
||||
if (csr_pipe_req_if.valid && csr_pipe_req_if.ready) begin
|
||||
pending_r[csr_pipe_req_if.wid] <= 1;
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
pending_r[csr_req_if.wid] <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -6,11 +6,7 @@ module VX_execute #(
|
|||
`SCOPE_IO_VX_execute
|
||||
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// CSR io interface
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
input wire reset,
|
||||
|
||||
// Dcache interface
|
||||
VX_dcache_core_req_if dcache_req_if,
|
||||
|
@ -81,8 +77,6 @@ module VX_execute #(
|
|||
`endif
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.fpu_to_csr_if (fpu_to_csr_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.fpu_pending (fpu_pending),
|
||||
|
|
|
@ -34,19 +34,7 @@ module VX_pipeline #(
|
|||
input wire icache_rsp_valid,
|
||||
input wire [31:0] icache_rsp_data,
|
||||
input wire [`ICORE_TAG_WIDTH-1:0] icache_rsp_tag,
|
||||
output wire icache_rsp_ready,
|
||||
|
||||
// CSR I/O Request
|
||||
input wire csr_req_valid,
|
||||
input wire[11:0] csr_req_addr,
|
||||
input wire csr_req_rw,
|
||||
input wire[31:0] csr_req_data,
|
||||
output wire csr_req_ready,
|
||||
|
||||
// CSR I/O Response
|
||||
output wire csr_rsp_valid,
|
||||
output wire[31:0] csr_rsp_data,
|
||||
input wire csr_rsp_ready,
|
||||
output wire icache_rsp_ready,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
|
@ -116,26 +104,6 @@ module VX_pipeline #(
|
|||
assign icache_core_rsp_if.tag = icache_rsp_tag;
|
||||
assign icache_rsp_ready = icache_core_rsp_if.ready;
|
||||
|
||||
//
|
||||
// CSR IO request
|
||||
//
|
||||
|
||||
VX_csr_io_req_if csr_io_req_if();
|
||||
assign csr_io_req_if.valid = csr_req_valid;
|
||||
assign csr_io_req_if.rw = csr_req_rw;
|
||||
assign csr_io_req_if.addr = csr_req_addr;
|
||||
assign csr_io_req_if.data = csr_req_data;
|
||||
assign csr_req_ready = csr_io_req_if.ready;
|
||||
|
||||
//
|
||||
// CSR IO response
|
||||
//
|
||||
|
||||
VX_csr_io_rsp_if csr_io_rsp_if();
|
||||
assign csr_rsp_valid = csr_io_rsp_if.valid;
|
||||
assign csr_rsp_data = csr_io_rsp_if.data;
|
||||
assign csr_io_rsp_if.ready = csr_rsp_ready;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if();
|
||||
|
@ -226,9 +194,6 @@ module VX_pipeline #(
|
|||
|
||||
.dcache_req_if (dcache_core_req_if),
|
||||
.dcache_rsp_if (dcache_core_rsp_if),
|
||||
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
|
||||
|
|
|
@ -22,19 +22,6 @@ module Vortex (
|
|||
input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
|
||||
output wire mem_rsp_ready,
|
||||
|
||||
// CSR Request
|
||||
input wire csr_req_valid,
|
||||
input wire [`VX_CSR_ID_WIDTH-1:0] csr_req_coreid,
|
||||
input wire [11:0] csr_req_addr,
|
||||
input wire csr_req_rw,
|
||||
input wire [31:0] csr_req_data,
|
||||
output wire csr_req_ready,
|
||||
|
||||
// CSR Response
|
||||
output wire csr_rsp_valid,
|
||||
output wire [31:0] csr_rsp_data,
|
||||
input wire csr_rsp_ready,
|
||||
|
||||
// Status
|
||||
output wire busy
|
||||
);
|
||||
|
@ -53,21 +40,8 @@ module Vortex (
|
|||
wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_req_addr;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_rw;
|
||||
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_req_data;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_rsp_valid;
|
||||
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_rsp_data;
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_rsp_ready;
|
||||
|
||||
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||
|
||||
wire [`LOG2UP(`NUM_CLUSTERS)-1:0] csr_cluster_id = `LOG2UP(`NUM_CLUSTERS)'(csr_req_coreid >> `CLOG2(`NUM_CORES));
|
||||
wire [`NC_BITS-1:0] csr_core_id = `NC_BITS'(csr_req_coreid);
|
||||
|
||||
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
|
||||
|
||||
wire cluster_reset;
|
||||
|
@ -100,58 +74,10 @@ module Vortex (
|
|||
.mem_rsp_tag (per_cluster_mem_rsp_tag [i]),
|
||||
.mem_rsp_ready (per_cluster_mem_rsp_ready [i]),
|
||||
|
||||
.csr_req_valid (per_cluster_csr_req_valid [i]),
|
||||
.csr_req_coreid (csr_core_id),
|
||||
.csr_req_rw (per_cluster_csr_req_rw [i]),
|
||||
.csr_req_addr (per_cluster_csr_req_addr [i]),
|
||||
.csr_req_data (per_cluster_csr_req_data [i]),
|
||||
.csr_req_ready (per_cluster_csr_req_ready [i]),
|
||||
|
||||
.csr_rsp_valid (per_cluster_csr_rsp_valid [i]),
|
||||
.csr_rsp_data (per_cluster_csr_rsp_data [i]),
|
||||
.csr_rsp_ready (per_cluster_csr_rsp_ready [i]),
|
||||
|
||||
.busy (per_cluster_busy [i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_csr_arb #(
|
||||
.NUM_REQS (`NUM_CLUSTERS),
|
||||
.DATA_WIDTH (32),
|
||||
.ADDR_WIDTH (12),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
) csr_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.request_id (csr_cluster_id),
|
||||
|
||||
// input requests
|
||||
.req_valid_in (csr_req_valid),
|
||||
.req_addr_in (csr_req_addr),
|
||||
.req_rw_in (csr_req_rw),
|
||||
.req_data_in (csr_req_data),
|
||||
.req_ready_in (csr_req_ready),
|
||||
|
||||
// output request
|
||||
.req_valid_out (per_cluster_csr_req_valid),
|
||||
.req_addr_out (per_cluster_csr_req_addr),
|
||||
.req_rw_out (per_cluster_csr_req_rw),
|
||||
.req_data_out (per_cluster_csr_req_data),
|
||||
.req_ready_out (per_cluster_csr_req_ready),
|
||||
|
||||
// input responses
|
||||
.rsp_valid_in (per_cluster_csr_rsp_valid),
|
||||
.rsp_data_in (per_cluster_csr_rsp_data),
|
||||
.rsp_ready_in (per_cluster_csr_rsp_ready),
|
||||
|
||||
// output response
|
||||
.rsp_valid_out (csr_rsp_valid),
|
||||
.rsp_data_out (csr_rsp_data),
|
||||
.rsp_ready_out (csr_rsp_ready)
|
||||
);
|
||||
|
||||
assign busy = (| per_cluster_busy);
|
||||
|
||||
if (`L3_ENABLE) begin
|
||||
|
|
|
@ -63,8 +63,6 @@ localparam AFU_ID_H = 16'h0004; // AFU ID Higher
|
|||
localparam CMD_MEM_READ = `AFU_IMAGE_CMD_MEM_READ;
|
||||
localparam CMD_MEM_WRITE = `AFU_IMAGE_CMD_MEM_WRITE;
|
||||
localparam CMD_RUN = `AFU_IMAGE_CMD_RUN;
|
||||
localparam CMD_CSR_READ = `AFU_IMAGE_CMD_CSR_READ;
|
||||
localparam CMD_CSR_WRITE = `AFU_IMAGE_CMD_CSR_WRITE;
|
||||
|
||||
localparam MMIO_CMD_TYPE = `AFU_IMAGE_MMIO_CMD_TYPE;
|
||||
localparam MMIO_IO_ADDR = `AFU_IMAGE_MMIO_IO_ADDR;
|
||||
|
@ -75,10 +73,7 @@ localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS;
|
|||
localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ;
|
||||
localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE;
|
||||
|
||||
localparam MMIO_CSR_CORE = `AFU_IMAGE_MMIO_CSR_CORE;
|
||||
localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR;
|
||||
localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA;
|
||||
localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ;
|
||||
localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS;
|
||||
|
||||
localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
|
||||
localparam CCI_RD_RQ_DATAW = CCI_LINE_WIDTH + CCI_RD_RQ_TAGW;
|
||||
|
@ -88,9 +83,7 @@ localparam STATE_READ = 1;
|
|||
localparam STATE_WRITE = 2;
|
||||
localparam STATE_START = 3;
|
||||
localparam STATE_RUN = 4;
|
||||
localparam STATE_CSR_READ = 5;
|
||||
localparam STATE_CSR_WRITE = 6;
|
||||
localparam STATE_MAX_VALUE = 7;
|
||||
localparam STATE_MAX_VALUE = 5;
|
||||
localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE);
|
||||
|
||||
`ifdef SCOPE
|
||||
|
@ -99,6 +92,8 @@ localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE);
|
|||
|
||||
wire [127:0] afu_id = `AFU_ACCEL_UUID;
|
||||
|
||||
wire [63:0] dev_caps = {16'(`NUM_THREADS), 16'(`NUM_WARPS), 16'(`NUM_CORES), 16'(`IMPLEMENTATION_ID)};
|
||||
|
||||
reg [STATE_WIDTH-1:0] state;
|
||||
|
||||
// Vortex ports ///////////////////////////////////////////////////////////////
|
||||
|
@ -116,18 +111,7 @@ wire [`VX_MEM_LINE_WIDTH-1:0] vx_mem_rsp_data;
|
|||
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_rsp_tag;
|
||||
wire vx_mem_rsp_ready;
|
||||
|
||||
wire vx_csr_io_req_valid;
|
||||
wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid;
|
||||
wire [11:0] vx_csr_io_req_addr;
|
||||
wire vx_csr_io_req_rw;
|
||||
wire [31:0] vx_csr_io_req_data;
|
||||
wire vx_csr_io_req_ready;
|
||||
|
||||
wire vx_csr_io_rsp_valid;
|
||||
wire [31:0] vx_csr_io_rsp_data;
|
||||
wire vx_csr_io_rsp_ready;
|
||||
|
||||
wire vx_busy;
|
||||
wire vx_busy;
|
||||
|
||||
reg vx_reset;
|
||||
reg vx_mem_en;
|
||||
|
@ -145,11 +129,6 @@ wire cmd_scope_read;
|
|||
wire cmd_scope_write;
|
||||
`endif
|
||||
|
||||
reg [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core;
|
||||
reg [11:0] cmd_csr_addr;
|
||||
reg [31:0] cmd_csr_rdata;
|
||||
reg [31:0] cmd_csr_wdata;
|
||||
|
||||
// MMIO controller ////////////////////////////////////////////////////////////
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
|
@ -246,27 +225,9 @@ always @(posedge clk) begin
|
|||
`endif
|
||||
end
|
||||
`endif
|
||||
MMIO_CSR_CORE: begin
|
||||
cmd_csr_core <= $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_CSR_CORE: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
MMIO_CSR_ADDR: begin
|
||||
cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_CSR_ADDR: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
MMIO_CSR_DATA: begin
|
||||
cmd_csr_wdata <= $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_CSR_DATA: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
default: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data));
|
||||
$display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data));
|
||||
`endif
|
||||
end
|
||||
endcase
|
||||
|
@ -298,12 +259,6 @@ always @(posedge clk) begin
|
|||
end
|
||||
`endif
|
||||
end
|
||||
MMIO_CSR_READ: begin
|
||||
mmio_tx.data <= 64'(cmd_csr_rdata);
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_CSR_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_csr_rdata);
|
||||
`endif
|
||||
end
|
||||
`ifdef SCOPE
|
||||
MMIO_SCOPE_READ: begin
|
||||
mmio_tx.data <= cmd_scope_rdata;
|
||||
|
@ -312,6 +267,12 @@ always @(posedge clk) begin
|
|||
`endif
|
||||
end
|
||||
`endif
|
||||
MMIO_DEV_CAPS: begin
|
||||
mmio_tx.data <= dev_caps;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: MMIO_DEV_CAPS: addr=%0h, data=%0h", $time, mmio_hdr.address, dev_caps);
|
||||
`endif
|
||||
end
|
||||
default: begin
|
||||
mmio_tx.data <= 64'h0;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
|
@ -326,7 +287,6 @@ end
|
|||
|
||||
wire cmd_read_done;
|
||||
wire cmd_write_done;
|
||||
wire cmd_csr_done;
|
||||
wire cmd_run_done;
|
||||
|
||||
reg [$clog2(RESET_DELAY+1)-1:0] vx_reset_ctr;
|
||||
|
@ -366,18 +326,6 @@ always @(posedge clk) begin
|
|||
vx_reset <= 1;
|
||||
state <= STATE_START;
|
||||
end
|
||||
CMD_CSR_READ: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE CSR_READ: addr=%0h", $time, cmd_csr_addr);
|
||||
`endif
|
||||
state <= STATE_CSR_READ;
|
||||
end
|
||||
CMD_CSR_WRITE: begin
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE CSR_WRITE: addr=%0h data=%0d", $time, cmd_csr_addr, cmd_csr_wdata);
|
||||
`endif
|
||||
state <= STATE_CSR_WRITE;
|
||||
end
|
||||
default: begin
|
||||
state <= state;
|
||||
end
|
||||
|
@ -421,24 +369,6 @@ always @(posedge clk) begin
|
|||
end
|
||||
end
|
||||
|
||||
STATE_CSR_READ: begin
|
||||
if (cmd_csr_done) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE IDLE", $time);
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
STATE_CSR_WRITE: begin
|
||||
if (cmd_csr_done) begin
|
||||
state <= STATE_IDLE;
|
||||
`ifdef DBG_PRINT_OPAE
|
||||
$display("%t: STATE IDLE", $time);
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
default: begin
|
||||
state <= state;
|
||||
end
|
||||
|
@ -926,40 +856,6 @@ assign cci_mem_req_valid = cci_mem_req_rw ? cci_mem_wr_req_valid : cci_mem_rd_re
|
|||
assign cci_mem_req_addr = cci_mem_req_rw ? cci_mem_wr_req_addr : cci_mem_rd_req_addr;
|
||||
assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_ctr;
|
||||
|
||||
// CSRs ///////////////////////////////////////////////////////////////////////
|
||||
|
||||
reg csr_io_req_sent;
|
||||
|
||||
assign vx_csr_io_req_valid = !csr_io_req_sent
|
||||
&& ((STATE_CSR_READ == state || STATE_CSR_WRITE == state));
|
||||
assign vx_csr_io_req_coreid = cmd_csr_core;
|
||||
assign vx_csr_io_req_rw = (STATE_CSR_WRITE == state);
|
||||
assign vx_csr_io_req_addr = cmd_csr_addr;
|
||||
assign vx_csr_io_req_data = cmd_csr_wdata;
|
||||
|
||||
assign vx_csr_io_rsp_ready = 1;
|
||||
|
||||
assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
csr_io_req_sent <= 0;
|
||||
end else begin
|
||||
if (vx_csr_io_req_valid && vx_csr_io_req_ready) begin
|
||||
csr_io_req_sent <= 1;
|
||||
end
|
||||
if (cmd_csr_done) begin
|
||||
csr_io_req_sent <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
if ((STATE_CSR_READ == state)
|
||||
&& vx_csr_io_rsp_ready
|
||||
&& vx_csr_io_rsp_valid) begin
|
||||
cmd_csr_rdata <= vx_csr_io_rsp_data;
|
||||
end
|
||||
end
|
||||
|
||||
// Vortex /////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign cmd_run_done = !vx_busy;
|
||||
|
@ -984,19 +880,6 @@ Vortex #() vortex (
|
|||
.mem_rsp_data (vx_mem_rsp_data),
|
||||
.mem_rsp_tag (vx_mem_rsp_tag),
|
||||
.mem_rsp_ready (vx_mem_rsp_ready),
|
||||
|
||||
// CSR Request
|
||||
.csr_req_valid (vx_csr_io_req_valid),
|
||||
.csr_req_coreid (vx_csr_io_req_coreid),
|
||||
.csr_req_addr (vx_csr_io_req_addr),
|
||||
.csr_req_rw (vx_csr_io_req_rw),
|
||||
.csr_req_data (vx_csr_io_req_data),
|
||||
.csr_req_ready (vx_csr_io_req_ready),
|
||||
|
||||
// CSR Response
|
||||
.csr_rsp_valid (vx_csr_io_rsp_valid),
|
||||
.csr_rsp_data (vx_csr_io_rsp_data),
|
||||
.csr_rsp_ready (vx_csr_io_rsp_ready),
|
||||
|
||||
// status
|
||||
.busy (vx_busy)
|
||||
|
|
|
@ -26,21 +26,16 @@
|
|||
`define AFU_ACCEL_NAME "vortex_afu"
|
||||
`define AFU_ACCEL_UUID 128'h35f9452b_25c2_434c_93d5_6f8c60db361c
|
||||
|
||||
`define AFU_IMAGE_CMD_CSR_READ 4
|
||||
`define AFU_IMAGE_CMD_CSR_WRITE 5
|
||||
`define AFU_IMAGE_CMD_MEM_READ 1
|
||||
`define AFU_IMAGE_CMD_MEM_WRITE 2
|
||||
`define AFU_IMAGE_CMD_RUN 3
|
||||
`define AFU_IMAGE_MMIO_CMD_TYPE 10
|
||||
`define AFU_IMAGE_MMIO_CSR_CORE 24
|
||||
`define AFU_IMAGE_MMIO_CSR_ADDR 26
|
||||
`define AFU_IMAGE_MMIO_CSR_DATA 28
|
||||
`define AFU_IMAGE_MMIO_CSR_READ 30
|
||||
`define AFU_IMAGE_MMIO_DATA_SIZE 16
|
||||
`define AFU_IMAGE_MMIO_IO_ADDR 12
|
||||
`define AFU_IMAGE_MMIO_MEM_ADDR 14
|
||||
`define AFU_IMAGE_MMIO_SCOPE_READ 20
|
||||
`define AFU_IMAGE_MMIO_SCOPE_WRITE 22
|
||||
`define AFU_IMAGE_MMIO_DEV_CAPS 24
|
||||
`define AFU_IMAGE_MMIO_STATUS 18
|
||||
|
||||
`define AFU_IMAGE_POWER 0
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
`ifndef VX_CSR_IO_REQ_IF
|
||||
`define VX_CSR_IO_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_csr_io_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`CSR_ADDR_BITS-1:0] addr;
|
||||
wire rw;
|
||||
wire [31:0] data;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,14 +0,0 @@
|
|||
`ifndef VX_CSR_IO_RSP_IF
|
||||
`define VX_CSR_IO_RSP_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_csr_io_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
wire [31:0] data;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,22 +0,0 @@
|
|||
`ifndef VX_CSR_PIPE_REQ_IF
|
||||
`define VX_CSR_PIPE_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_csr_pipe_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`CSR_BITS-1:0] op_type;
|
||||
wire [`CSR_ADDR_BITS-1:0] addr;
|
||||
wire [31:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire is_io;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -77,15 +77,9 @@ void Simulator::reset() {
|
|||
mem_rsp_vec_.clear();
|
||||
|
||||
mem_rsp_active_ = false;
|
||||
csr_req_active_ = false;
|
||||
csr_rsp_value_ = nullptr;
|
||||
|
||||
vortex_->mem_rsp_valid = 0;
|
||||
vortex_->mem_req_ready = 0;
|
||||
//vortex_->io_req_ready = 0;
|
||||
//vortex_->io_rsp_valid = 0;
|
||||
vortex_->csr_req_valid = 0;
|
||||
vortex_->csr_rsp_ready = 0;
|
||||
|
||||
vortex_->reset = 1;
|
||||
|
||||
|
@ -108,14 +102,11 @@ void Simulator::step() {
|
|||
this->eval();
|
||||
|
||||
mem_rsp_ready_ = vortex_->mem_rsp_ready;
|
||||
csr_req_ready_ = vortex_->csr_req_ready;
|
||||
|
||||
vortex_->clk = 1;
|
||||
this->eval();
|
||||
|
||||
this->eval_mem_bus();
|
||||
this->eval_io_bus();
|
||||
this->eval_csr_bus();
|
||||
|
||||
#ifndef NDEBUG
|
||||
fflush(stdout);
|
||||
|
@ -209,53 +200,6 @@ void Simulator::eval_mem_bus() {
|
|||
vortex_->mem_req_ready = !mem_stalled;
|
||||
}
|
||||
|
||||
void Simulator::eval_io_bus() {
|
||||
/*for (int i = 0; i < NUM_THREADS; ++i) {
|
||||
if (((vortex_->io_req_valid >> i) & 0x1)
|
||||
&& ((VL_WDATA_GETW(vortex_->io_req_addr, i, NUM_THREADS, 30) << 2) == IO_BUS_ADDR_COUT)) {
|
||||
assert(vortex_->io_req_rw);
|
||||
int data = vortex_->io_req_data[i];
|
||||
int tid = data >> 16;
|
||||
char c = data & 0xff;
|
||||
auto& ss_buf = print_bufs_[tid];
|
||||
ss_buf << c;
|
||||
if (c == '\n') {
|
||||
std::cout << std::dec << "#" << tid << ": " << ss_buf.str() << std::flush;
|
||||
ss_buf.str("");
|
||||
}
|
||||
}
|
||||
}
|
||||
vortex_->io_req_ready = 1;
|
||||
vortex_->io_rsp_valid = 0;*/
|
||||
}
|
||||
|
||||
void Simulator::eval_csr_bus() {
|
||||
if (csr_req_active_) {
|
||||
if (vortex_->csr_req_valid && csr_req_ready_) {
|
||||
#ifndef NDEBUG
|
||||
if (vortex_->csr_req_rw)
|
||||
std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_req_coreid << ", addr=" << std::hex << vortex_->csr_req_addr << ", value=" << vortex_->csr_req_data << std::endl;
|
||||
else
|
||||
std::cout << std::dec << timestamp << ": [sim] CSR Rd Req: core=" << (int)vortex_->csr_req_coreid << ", addr=" << std::hex << vortex_->csr_req_addr << std::endl;
|
||||
#endif
|
||||
vortex_->csr_req_valid = 0;
|
||||
if (vortex_->csr_req_rw)
|
||||
csr_req_active_ = false;
|
||||
}
|
||||
if (vortex_->csr_rsp_valid && vortex_->csr_rsp_ready) {
|
||||
*csr_rsp_value_ = vortex_->csr_rsp_data;
|
||||
vortex_->csr_rsp_ready = 0;
|
||||
csr_req_active_ = false;
|
||||
#ifndef NDEBUG
|
||||
std::cout << std::dec << timestamp << ": [sim] CSR Rsp: value=" << vortex_->csr_rsp_data << std::endl;
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
vortex_->csr_req_valid = 0;
|
||||
vortex_->csr_rsp_ready = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void Simulator::wait(uint32_t cycles) {
|
||||
for (int i = 0; i < cycles; ++i) {
|
||||
this->step();
|
||||
|
@ -266,33 +210,6 @@ bool Simulator::is_busy() const {
|
|||
return vortex_->busy;
|
||||
}
|
||||
|
||||
bool Simulator::csr_req_active() const {
|
||||
return csr_req_active_;
|
||||
}
|
||||
|
||||
void Simulator::set_csr(int core_id, int addr, unsigned value) {
|
||||
vortex_->csr_req_valid = 1;
|
||||
vortex_->csr_req_coreid = core_id;
|
||||
vortex_->csr_req_addr = addr;
|
||||
vortex_->csr_req_rw = 1;
|
||||
vortex_->csr_req_data = value;
|
||||
vortex_->csr_rsp_ready = 0;
|
||||
|
||||
csr_req_active_ = true;
|
||||
}
|
||||
|
||||
void Simulator::get_csr(int core_id, int addr, unsigned *value) {
|
||||
vortex_->csr_req_valid = 1;
|
||||
vortex_->csr_req_coreid = core_id;
|
||||
vortex_->csr_req_addr = addr;
|
||||
vortex_->csr_req_rw = 0;
|
||||
vortex_->csr_rsp_ready = 1;
|
||||
|
||||
csr_rsp_value_ = value;
|
||||
|
||||
csr_req_active_ = true;
|
||||
}
|
||||
|
||||
void Simulator::run() {
|
||||
#ifndef NDEBUG
|
||||
std::cout << std::dec << timestamp << ": [sim] run()" << std::endl;
|
||||
|
|
|
@ -30,14 +30,9 @@ public:
|
|||
|
||||
bool is_busy() const;
|
||||
|
||||
bool csr_req_active() const;
|
||||
|
||||
void reset();
|
||||
void step();
|
||||
void wait(uint32_t cycles);
|
||||
|
||||
void set_csr(int core_id, int addr, unsigned value);
|
||||
void get_csr(int core_id, int addr, unsigned *value);
|
||||
|
||||
void run();
|
||||
|
||||
|
@ -61,16 +56,11 @@ private:
|
|||
void eval();
|
||||
|
||||
void eval_mem_bus();
|
||||
void eval_io_bus();
|
||||
void eval_csr_bus();
|
||||
|
||||
std::list<mem_req_t> mem_rsp_vec_;
|
||||
bool mem_rsp_active_;
|
||||
|
||||
bool mem_rsp_ready_;
|
||||
bool csr_req_ready_;
|
||||
bool csr_req_active_;
|
||||
uint32_t* csr_rsp_value_;
|
||||
bool mem_rsp_ready_;
|
||||
|
||||
RAM *ram_;
|
||||
VVortex *vortex_;
|
||||
|
|
|
@ -8,8 +8,6 @@
|
|||
"cmd-mem-read": 1,
|
||||
"cmd-mem-write": 2,
|
||||
"cmd-run": 3,
|
||||
"cmd-csr-read": 4,
|
||||
"cmd-csr-write": 5,
|
||||
|
||||
"mmio-cmd-type": 10,
|
||||
"mmio-io-addr": 12,
|
||||
|
@ -18,10 +16,7 @@
|
|||
"mmio-status": 18,
|
||||
"mmio-scope-read": 20,
|
||||
"mmio-scope-write": 22,
|
||||
"mmio-csr-core": 24,
|
||||
"mmio-csr-addr": 26,
|
||||
"mmio-csr-data": 28,
|
||||
"mmio-csr-read": 30,
|
||||
"mmio-dev-caps": 24,
|
||||
|
||||
"afu-top-interface":
|
||||
{
|
||||
|
|
|
@ -41,21 +41,29 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
|
|||
set_global_assignment -name MESSAGE_DISABLE 16818
|
||||
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
|
||||
|
||||
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||
#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
|
||||
#set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
#set_global_assignment -name SEED 1
|
||||
set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
|
||||
set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
|
||||
set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
|
||||
set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
|
||||
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
|
||||
set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
|
||||
set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
|
||||
set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
|
||||
set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
|
||||
set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
|
||||
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
|
||||
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
|
||||
|
||||
set_global_assignment -name USE_HIGH_SPEED_ADDER ON
|
||||
set_global_assignment -name MUX_RESTRUCTURE ON
|
||||
set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
|
||||
set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
|
||||
set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
|
||||
set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
|
||||
|
||||
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
|
||||
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name SEED 1
|
||||
|
||||
switch $opts(family) {
|
||||
"Arria 10" {
|
||||
|
|
|
@ -10,7 +10,7 @@ CFLAGS += -I./include -I../hw
|
|||
|
||||
PROJECT = libvortexrt
|
||||
|
||||
SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c
|
||||
SRCS = ./src/vx_start.S ./src/vx_print.S ./src/vx_print.c ./src/vx_spawn.c ./src/vx_perf.c
|
||||
|
||||
OBJS := $(addsuffix .o, $(notdir $(SRCS)))
|
||||
|
||||
|
|
|
@ -7,6 +7,51 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __ASSEMBLY__
|
||||
#define __ASM_STR(x) x
|
||||
#else
|
||||
#define __ASM_STR(x) #x
|
||||
#endif
|
||||
|
||||
#define vx_csr_swap(csr, val) ({ \
|
||||
unsigned __v = (unsigned )(val); \
|
||||
__asm__ __volatile__ ("csrrw %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \
|
||||
__v; \
|
||||
})
|
||||
|
||||
#define vx_csr_read(csr) ({ \
|
||||
register unsigned __v; \
|
||||
__asm__ __volatile__ ("csrr %0, " __ASM_STR(csr) : "=r" (__v) :: "memory"); \
|
||||
__v; \
|
||||
})
|
||||
|
||||
#define vx_csr_write(csr, val) ({ \
|
||||
unsigned __v = (unsigned )(val); \
|
||||
__asm__ __volatile__ ("csrw " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \
|
||||
})
|
||||
|
||||
#define vx_csr_read_set(csr, val) ({ \
|
||||
unsigned __v = (unsigned )(val); \
|
||||
__asm__ __volatile__ ("csrrs %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \
|
||||
__v; \
|
||||
})
|
||||
|
||||
#define vx_csr_set(csr, val) ({ \
|
||||
unsigned __v = (unsigned )(val); \
|
||||
__asm__ __volatile__ ("csrs " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \
|
||||
})
|
||||
|
||||
#define vx_csr_read_clear(csr, val) ({ \
|
||||
unsigned __v = (unsigned )(val); \
|
||||
__asm__ __volatile__ ("csrrc %0, " __ASM_STR(csr) ", %1" : "=r" (__v) : "rK" (__v) : "memory"); \
|
||||
__v; \
|
||||
})
|
||||
|
||||
#define vx_csr_clear(csr, val) ({ \
|
||||
unsigned __v = (unsigned )(val); \
|
||||
__asm__ __volatile__ ("csrc " __ASM_STR(csr) ", %0" :: "rK" (__v) : "memory"); \
|
||||
})
|
||||
|
||||
// Set thread mask
|
||||
inline void vx_tmc(unsigned num_threads) {
|
||||
asm volatile (".insn s 0x6b, 0, x0, 0(%0)" :: "r"(num_threads));
|
||||
|
@ -95,20 +140,6 @@ inline int vx_num_cores() {
|
|||
return result;
|
||||
}
|
||||
|
||||
// Return the number of cycles
|
||||
inline int vx_num_cycles() {
|
||||
int result;
|
||||
asm volatile ("csrr %0, %1" : "=r"(result) : "i"(CSR_CYCLE));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Return the number of instructions
|
||||
inline int vx_num_instrs() {
|
||||
int result;
|
||||
asm volatile ("csrr %0, %1" : "=r"(result) : "i"(CSR_INSTRET));
|
||||
return result;
|
||||
}
|
||||
|
||||
#define __if(b) vx_split(b); \
|
||||
if (b)
|
||||
|
||||
|
|
27
runtime/src/vx_perf.c
Normal file
27
runtime/src/vx_perf.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
|
||||
#include <VX_config.h>
|
||||
#include <vx_intrinsics.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define DUMP_CSR_4(d, s) \
|
||||
csr_mem[d + 0] = vx_csr_read(s + 0); \
|
||||
csr_mem[d + 1] = vx_csr_read(s + 1); \
|
||||
csr_mem[d + 2] = vx_csr_read(s + 2); \
|
||||
csr_mem[d + 3] = vx_csr_read(s + 3);
|
||||
|
||||
#define DUMP_CSR_32(d, s) \
|
||||
DUMP_CSR_4(d + 0, s + 0) \
|
||||
DUMP_CSR_4(d + 4, s + 4) \
|
||||
DUMP_CSR_4(d + 8, s + 8) \
|
||||
DUMP_CSR_4(d + 12, s + 12) \
|
||||
DUMP_CSR_4(d + 16, s + 16) \
|
||||
DUMP_CSR_4(d + 20, s + 20) \
|
||||
DUMP_CSR_4(d + 24, s + 24) \
|
||||
DUMP_CSR_4(d + 28, s + 28)
|
||||
|
||||
void vx_perf_dump() {
|
||||
int core_id = vx_core_id();
|
||||
uint32_t* const csr_mem = (uint32_t*)(IO_ADDR_CSR + 64 * sizeof(uint32_t) * core_id);
|
||||
DUMP_CSR_32(0, CSR_MPM_BASE)
|
||||
DUMP_CSR_32(32, CSR_MPM_BASE_H)
|
||||
}
|
|
@ -42,6 +42,9 @@ _start:
|
|||
.type _exit, @function
|
||||
.global _exit
|
||||
_exit:
|
||||
# dump performance CSRs
|
||||
call vx_perf_dump
|
||||
|
||||
# disable all threads in current warp
|
||||
li a0, 0
|
||||
.insn s 0x6b, 0, x0, 0(a0) # tmc a0
|
||||
|
|
|
@ -269,16 +269,16 @@ Word Core::get_csr(Addr addr, int tid, int wid) {
|
|||
} else if (addr == CSR_NC) {
|
||||
// Number of cores
|
||||
return arch_.num_cores();
|
||||
} else if (addr == CSR_INSTRET) {
|
||||
} else if (addr == CSR_MINSTRET) {
|
||||
// NumInsts
|
||||
return insts_;
|
||||
} else if (addr == CSR_INSTRET_H) {
|
||||
} else if (addr == CSR_MINSTRET_H) {
|
||||
// NumInsts
|
||||
return (Word)(insts_ >> 32);
|
||||
} else if (addr == CSR_CYCLE) {
|
||||
} else if (addr == CSR_MCYCLE) {
|
||||
// NumCycles
|
||||
return (Word)steps_;
|
||||
} else if (addr == CSR_CYCLE_H) {
|
||||
} else if (addr == CSR_MCYCLE_H) {
|
||||
// NumCycles
|
||||
return (Word)(steps_ >> 32);
|
||||
} else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue