mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
CSRs update
This commit is contained in:
parent
c8bae13448
commit
badfb24e01
6 changed files with 61 additions and 51 deletions
|
@ -91,7 +91,7 @@
|
|||
`endif
|
||||
|
||||
`ifndef NUM_BARRIERS
|
||||
`define NUM_BARRIERS 4
|
||||
`define NUM_BARRIERS (`NUM_WARPS/2)
|
||||
`endif
|
||||
|
||||
`ifndef SOCKET_SIZE
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -32,7 +32,7 @@
|
|||
|
||||
// Machine Performance-monitoring counters classes ////////////////////////////
|
||||
|
||||
`define VX_DCR_MPM_CLASS_NONE 0
|
||||
`define VX_DCR_MPM_CLASS_NONE 0
|
||||
`define VX_DCR_MPM_CLASS_CORE 1
|
||||
`define VX_DCR_MPM_CLASS_MEM 2
|
||||
|
||||
|
@ -41,7 +41,7 @@
|
|||
`define VX_CSR_FFLAGS 12'h001
|
||||
`define VX_CSR_FRM 12'h002
|
||||
`define VX_CSR_FCSR 12'h003
|
||||
|
||||
|
||||
`define VX_CSR_SATP 12'h180
|
||||
|
||||
`define VX_CSR_PMPCFG0 12'h3A0
|
||||
|
@ -101,7 +101,7 @@
|
|||
`define VX_CSR_MPM_STORES_H 12'hB8D
|
||||
`define VX_CSR_MPM_IFETCH_LT 12'hB0E
|
||||
`define VX_CSR_MPM_IFETCH_LT_H 12'hB8E
|
||||
`define VX_CSR_MPM_LOAD_LT 12'hB0F
|
||||
`define VX_CSR_MPM_LOAD_LT 12'hB0F
|
||||
`define VX_CSR_MPM_LOAD_LT_H 12'hB8F
|
||||
// SFU: scoreboard
|
||||
`define VX_CSR_MPM_SCRB_WCTL 12'hB10
|
||||
|
@ -187,11 +187,12 @@
|
|||
`define VX_CSR_THREAD_ID 12'hCC0
|
||||
`define VX_CSR_WARP_ID 12'hCC1
|
||||
`define VX_CSR_CORE_ID 12'hCC2
|
||||
`define VX_CSR_WARP_MASK 12'hCC3
|
||||
`define VX_CSR_THREAD_MASK 12'hCC4 // warning! this value is also used in LLVM
|
||||
`define VX_CSR_ACTIVE_WARPS 12'hCC3
|
||||
`define VX_CSR_ACTIVE_THREADS 12'hCC4 // warning! this value is also used in LLVM
|
||||
|
||||
`define VX_CSR_NUM_THREADS 12'hFC0
|
||||
`define VX_CSR_NUM_WARPS 12'hFC1
|
||||
`define VX_CSR_NUM_CORES 12'hFC2
|
||||
`define VX_CSR_NUM_BARRIERS 12'hFC3
|
||||
|
||||
`endif // VX_TYPES_VH
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
// Copyright © 2019-2023
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
@ -62,7 +62,7 @@ import VX_fpu_pkg::*;
|
|||
output wire [`XLEN-1:0] read_data_ro,
|
||||
output wire [`XLEN-1:0] read_data_rw,
|
||||
|
||||
input wire write_enable,
|
||||
input wire write_enable,
|
||||
input wire [`UUID_WIDTH-1:0] write_uuid,
|
||||
input wire [`NW_WIDTH-1:0] write_wid,
|
||||
input wire [`VX_CSR_ADDR_BITS-1:0] write_addr,
|
||||
|
@ -77,7 +77,7 @@ import VX_fpu_pkg::*;
|
|||
|
||||
reg [`XLEN-1:0] mscratch;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FP_FLAGS_BITS-1:0] fcsr, fcsr_n;
|
||||
wire [`NUM_FPU_BLOCKS-1:0] fpu_write_enable;
|
||||
wire [`NUM_FPU_BLOCKS-1:0][`NW_WIDTH-1:0] fpu_write_wid;
|
||||
|
@ -88,7 +88,7 @@ import VX_fpu_pkg::*;
|
|||
assign fpu_write_wid[i] = fpu_csr_if[i].write_wid;
|
||||
assign fpu_write_fflags[i] = fpu_csr_if[i].write_fflags;
|
||||
end
|
||||
|
||||
|
||||
always @(*) begin
|
||||
fcsr_n = fcsr;
|
||||
for (integer i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
|
@ -96,7 +96,7 @@ import VX_fpu_pkg::*;
|
|||
fcsr_n[fpu_write_wid[i]][`FP_FLAGS_BITS-1:0] = fcsr[fpu_write_wid[i]][`FP_FLAGS_BITS-1:0]
|
||||
| fpu_write_fflags[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
if (write_enable) begin
|
||||
case (write_addr)
|
||||
`VX_CSR_FFLAGS: fcsr_n[write_wid][`FP_FLAGS_BITS-1:0] = write_data[`FP_FLAGS_BITS-1:0];
|
||||
|
@ -106,7 +106,7 @@ import VX_fpu_pkg::*;
|
|||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
for (genvar i = 0; i < `NUM_FPU_BLOCKS; ++i) begin
|
||||
assign fpu_csr_if[i].read_frm = fcsr[fpu_csr_if[i].read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS];
|
||||
end
|
||||
|
@ -146,7 +146,7 @@ import VX_fpu_pkg::*;
|
|||
`VX_CSR_MSCRATCH: begin
|
||||
mscratch <= write_data;
|
||||
end
|
||||
default: begin
|
||||
default: begin
|
||||
`ASSERT(0, ("%t: *** invalid CSR write address: %0h (#%0d)", $time, write_addr, write_uuid));
|
||||
end
|
||||
endcase
|
||||
|
@ -163,7 +163,7 @@ import VX_fpu_pkg::*;
|
|||
read_data_ro_r = '0;
|
||||
read_data_rw_r = '0;
|
||||
read_addr_valid_r = 1;
|
||||
case (read_addr)
|
||||
case (read_addr)
|
||||
`VX_CSR_MVENDORID : read_data_ro_r = `XLEN'(`VENDOR_ID);
|
||||
`VX_CSR_MARCHID : read_data_ro_r = `XLEN'(`ARCHITECTURE_ID);
|
||||
`VX_CSR_MIMPID : read_data_ro_r = `XLEN'(`IMPLEMENTATION_ID);
|
||||
|
@ -171,25 +171,26 @@ import VX_fpu_pkg::*;
|
|||
`ifdef EXT_F_ENABLE
|
||||
`VX_CSR_FFLAGS : read_data_rw_r = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`VX_CSR_FRM : read_data_rw_r = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`VX_CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]);
|
||||
`VX_CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]);
|
||||
`endif
|
||||
`VX_CSR_MSCRATCH : read_data_rw_r = mscratch;
|
||||
|
||||
|
||||
`VX_CSR_WARP_ID : read_data_ro_r = `XLEN'(read_wid);
|
||||
`VX_CSR_CORE_ID : read_data_ro_r = `XLEN'(CORE_ID);
|
||||
`VX_CSR_THREAD_MASK: read_data_ro_r = `XLEN'(thread_masks[read_wid]);
|
||||
`VX_CSR_WARP_MASK : read_data_ro_r = `XLEN'(active_warps);
|
||||
`VX_CSR_ACTIVE_THREADS: read_data_ro_r = `XLEN'(thread_masks[read_wid]);
|
||||
`VX_CSR_ACTIVE_WARPS: read_data_ro_r = `XLEN'(active_warps);
|
||||
`VX_CSR_NUM_THREADS: read_data_ro_r = `XLEN'(`NUM_THREADS);
|
||||
`VX_CSR_NUM_WARPS : read_data_ro_r = `XLEN'(`NUM_WARPS);
|
||||
`VX_CSR_NUM_CORES : read_data_ro_r = `XLEN'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
|
||||
`VX_CSR_NUM_BARRIERS: read_data_ro_r = `XLEN'(`NUM_BARRIERS);
|
||||
|
||||
`CSR_READ_64(`VX_CSR_MCYCLE, read_data_ro_r, cycles);
|
||||
|
||||
`VX_CSR_MPM_RESERVED : read_data_ro_r = 'x;
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
|
||||
|
||||
`CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_r, commit_csr_if.instret);
|
||||
|
||||
`VX_CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
|
||||
|
||||
`CSR_READ_64(`VX_CSR_MINSTRET, read_data_ro_r, commit_csr_if.instret);
|
||||
|
||||
`VX_CSR_SATP,
|
||||
`VX_CSR_MSTATUS,
|
||||
`VX_CSR_MNSTATUS,
|
||||
|
@ -210,7 +211,7 @@ import VX_fpu_pkg::*;
|
|||
case (base_dcrs.mpm_class)
|
||||
`VX_DCR_MPM_CLASS_CORE: begin
|
||||
case (read_addr)
|
||||
// PERF: pipeline
|
||||
// PERF: pipeline
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ID, read_data_ro_r, pipeline_perf_if.sched_idles);
|
||||
`CSR_READ_64(`VX_CSR_MPM_SCHED_ST, read_data_ro_r, pipeline_perf_if.sched_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IBUF_ST, read_data_ro_r, pipeline_perf_if.ibf_stalls);
|
||||
|
@ -231,7 +232,7 @@ import VX_fpu_pkg::*;
|
|||
`CSR_READ_64(`VX_CSR_MPM_LOADS, read_data_ro_r, pipeline_perf_if.loads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_STORES, read_data_ro_r, pipeline_perf_if.stores);
|
||||
`CSR_READ_64(`VX_CSR_MPM_IFETCH_LT, read_data_ro_r, pipeline_perf_if.ifetch_latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_r, pipeline_perf_if.load_latency);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LOAD_LT, read_data_ro_r, pipeline_perf_if.load_latency);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -248,17 +249,17 @@ import VX_fpu_pkg::*;
|
|||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MISS_W, read_data_ro_r, mem_perf_if.dcache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_BANK_ST, read_data_ro_r, mem_perf_if.dcache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_DCACHE_MSHR_ST, read_data_ro_r, mem_perf_if.dcache.mshr_stalls);
|
||||
// PERF: lmem
|
||||
// PERF: lmem
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_READS, read_data_ro_r, mem_perf_if.lmem.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_WRITES, read_data_ro_r, mem_perf_if.lmem.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_LMEM_BANK_ST, read_data_ro_r, mem_perf_if.lmem.bank_stalls);
|
||||
// PERF: l2cache
|
||||
// PERF: l2cache
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_READS, read_data_ro_r, mem_perf_if.l2cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_WRITES, read_data_ro_r, mem_perf_if.l2cache.writes);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_R, read_data_ro_r, mem_perf_if.l2cache.read_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MISS_W, read_data_ro_r, mem_perf_if.l2cache.write_misses);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_BANK_ST, read_data_ro_r, mem_perf_if.l2cache.bank_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l2cache.mshr_stalls);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L2CACHE_MSHR_ST, read_data_ro_r, mem_perf_if.l2cache.mshr_stalls);
|
||||
// PERF: l3cache
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_READS, read_data_ro_r, mem_perf_if.l3cache.reads);
|
||||
`CSR_READ_64(`VX_CSR_MPM_L3CACHE_WRITES, read_data_ro_r, mem_perf_if.l3cache.writes);
|
||||
|
|
|
@ -178,17 +178,17 @@ inline int vx_core_id() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
// Return current thread mask
|
||||
inline int vx_thread_mask() {
|
||||
// Return active threads mask
|
||||
inline int vx_active_threads() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_THREAD_MASK));
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_ACTIVE_THREADS));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return active warps mask
|
||||
inline int vx_warp_mask() {
|
||||
inline int vx_active_warps() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_WARP_MASK));
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_ACTIVE_WARPS));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -213,6 +213,13 @@ inline int vx_num_cores() {
|
|||
return ret;
|
||||
}
|
||||
|
||||
// Return the number of barriers
|
||||
inline int vx_num_barriers() {
|
||||
int ret;
|
||||
asm volatile ("csrr %0, %1" : "=r"(ret) : "i"(VX_CSR_NUM_BARRIERS));
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Return the hart identifier (thread id accross the processor)
|
||||
inline int vx_hart_id() {
|
||||
int ret;
|
||||
|
|
|
@ -358,11 +358,12 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
|
|||
case VX_CSR_THREAD_ID: return tid;
|
||||
case VX_CSR_WARP_ID: return wid;
|
||||
case VX_CSR_CORE_ID: return core_->id();
|
||||
case VX_CSR_THREAD_MASK:return warps_.at(wid).tmask.to_ulong();
|
||||
case VX_CSR_WARP_MASK: return active_warps_.to_ulong();
|
||||
case VX_CSR_ACTIVE_THREADS:return warps_.at(wid).tmask.to_ulong();
|
||||
case VX_CSR_ACTIVE_WARPS:return active_warps_.to_ulong();
|
||||
case VX_CSR_NUM_THREADS:return arch_.num_threads();
|
||||
case VX_CSR_NUM_WARPS: return arch_.num_warps();
|
||||
case VX_CSR_NUM_CORES: return uint32_t(arch_.num_cores()) * arch_.num_clusters();
|
||||
case VX_CSR_NUM_BARRIERS:return arch_.num_barriers();
|
||||
case VX_CSR_MSCRATCH: return csr_mscratch_;
|
||||
CSR_READ_64(VX_CSR_MCYCLE, core_perf.cycles);
|
||||
CSR_READ_64(VX_CSR_MINSTRET, core_perf.instrs);
|
||||
|
|
|
@ -65,7 +65,7 @@ int test_local_memory() {
|
|||
|
||||
int num_threads = std::min(vx_num_threads(), 8);
|
||||
int tmask = make_full_tmask(num_threads);
|
||||
vx_tmc(tmask);
|
||||
vx_tmc(tmask);
|
||||
do_lmem_wr();
|
||||
do_lmem_rd();
|
||||
vx_tmc_one();
|
||||
|
@ -87,7 +87,7 @@ int test_tmc() {
|
|||
|
||||
int num_threads = std::min(vx_num_threads(), 8);
|
||||
int tmask = make_full_tmask(num_threads);
|
||||
vx_tmc(tmask);
|
||||
vx_tmc(tmask);
|
||||
do_tmc();
|
||||
vx_tmc_one();
|
||||
|
||||
|
@ -146,7 +146,7 @@ int dvg_buffer[4];
|
|||
void __attribute__((noinline)) __attribute__((optimize("O1"))) do_divergence() {
|
||||
int tid = vx_thread_id();
|
||||
int cond1 = tid < 2;
|
||||
int sp1 = vx_split(cond1);
|
||||
int sp1 = vx_split(cond1);
|
||||
if (cond1) {
|
||||
{
|
||||
int cond2 = tid < 1;
|
||||
|
@ -244,9 +244,9 @@ void __attribute__((noinline)) do_serial() {
|
|||
}
|
||||
|
||||
int test_serial() {
|
||||
PRINTF("Serial Test\n");
|
||||
PRINTF("Serial Test\n");
|
||||
int num_threads = std::min(vx_num_threads(), 8);
|
||||
int tmask = make_full_tmask(num_threads);
|
||||
int tmask = make_full_tmask(num_threads);
|
||||
vx_tmc(tmask);
|
||||
do_serial();
|
||||
vx_tmc_one();
|
||||
|
@ -258,10 +258,10 @@ int test_serial() {
|
|||
|
||||
int tmask_buffer[8];
|
||||
|
||||
int __attribute__((noinline)) do_tmask() {
|
||||
int __attribute__((noinline)) do_tmask() {
|
||||
int tid = vx_thread_id();
|
||||
int tmask = make_select_tmask(tid);
|
||||
int cur_tmask = vx_thread_mask();
|
||||
int cur_tmask = vx_active_threads();
|
||||
tmask_buffer[tid] = (cur_tmask == tmask) ? (65 + tid) : 0;
|
||||
return tid + 1;
|
||||
}
|
||||
|
@ -275,11 +275,11 @@ int test_tmask() {
|
|||
int num_threads = std::min(vx_num_threads(), 8);
|
||||
int tid = 0;
|
||||
|
||||
l_start:
|
||||
l_start:
|
||||
int tmask = make_select_tmask(tid);
|
||||
vx_tmc(tmask);
|
||||
tid = do_tmask();
|
||||
if (tid < num_threads)
|
||||
vx_tmc(tmask);
|
||||
tid = do_tmask();
|
||||
if (tid < num_threads)
|
||||
goto l_start;
|
||||
vx_tmc_one();
|
||||
|
||||
|
@ -296,7 +296,7 @@ void barrier_kernel() {
|
|||
unsigned wid = vx_warp_id();
|
||||
for (int i = 0; i <= (wid * 256); ++i) {
|
||||
++barrier_stall;
|
||||
}
|
||||
}
|
||||
barrier_buffer[wid] = 65 + wid;
|
||||
vx_barrier(0, barrier_ctr);
|
||||
vx_tmc(0 == wid);
|
||||
|
@ -308,7 +308,7 @@ int test_barrier() {
|
|||
barrier_ctr = num_warps;
|
||||
barrier_stall = 0;
|
||||
vx_wspawn(num_warps, barrier_kernel);
|
||||
barrier_kernel();
|
||||
barrier_kernel();
|
||||
return check_error(barrier_buffer, 0, num_warps);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue