64-bit RTL fixes and optimizations

This commit is contained in:
Blaise Tine 2023-06-20 14:00:41 -04:00
parent 6117fb48fe
commit b8ddc91b2c
9 changed files with 190 additions and 176 deletions

View file

@ -106,9 +106,9 @@
`define INST_ALU_AUIPC 4'b0011
`define INST_ALU_SLTU 4'b0100
`define INST_ALU_SLT 4'b0101
`define INST_ALU_SUB 4'b0111
`define INST_ALU_SRL 4'b1000
`define INST_ALU_SRA 4'b1001
`define INST_ALU_SUB 4'b1011
`define INST_ALU_AND 4'b1100
`define INST_ALU_OR 4'b1101
`define INST_ALU_XOR 4'b1110

View file

@ -22,21 +22,28 @@ module VX_alu_unit #(
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
localparam SHIFT_IMM_BITS = `CLOG2(`XLEN);
wire [`NUM_THREADS-1:0][`XLEN-1:0] add_result;
wire [`NUM_THREADS-1:0][`XLEN:0] sub_result; // +1 bit for branch compare
wire [`NUM_THREADS-1:0][`XLEN-1:0] shr_result;
reg [`NUM_THREADS-1:0][`XLEN-1:0] msc_result;
wire [`NUM_THREADS-1:0][`XLEN-1:0] add_result_w;
wire [`NUM_THREADS-1:0][`XLEN-1:0] sub_result_w;
wire [`NUM_THREADS-1:0][`XLEN-1:0] shr_result_w;
reg [`NUM_THREADS-1:0][`XLEN-1:0] msc_result_w;
reg [`NUM_THREADS-1:0][`XLEN-1:0] alu_result;
reg [`NUM_THREADS-1:0][`XLEN-1:0] add_result;
reg [`NUM_THREADS-1:0][`XLEN:0] sub_result; // +1 bit for branch compare
reg [`NUM_THREADS-1:0][`XLEN-1:0] shr_result;
reg [`NUM_THREADS-1:0][`XLEN-1:0] msc_result;
wire ready_in;
wire ready_in;
`UNUSED_VAR (alu_req_if.op_mod)
`ifdef XLEN_64
wire is_alu_w = `INST_ALU_IS_W(alu_req_if.op_mod);
`else
wire is_alu_w = 0;
`endif
`UNUSED_VAR (alu_req_if.op_mod)
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(alu_req_if.op_type);
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(alu_req_if.op_type);
wire is_br_op = `INST_ALU_IS_BR(alu_req_if.op_mod);
@ -52,46 +59,49 @@ module VX_alu_unit #(
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2_br = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign add_result[i] = is_alu_w ? `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0])) :
(alu_in1_PC[i] + alu_in2_imm[i]);
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
assign add_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0]));
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`XLEN:0] sub_in1 = {alu_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
wire [`XLEN:0] sub_in2 = {alu_signed & alu_in2_br[i][`XLEN-1], alu_in2_br[i]};
assign sub_result[i] = is_alu_w ? {1'b0, `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]))} :
(sub_in1 - sub_in2);
assign sub_result[i] = sub_in1 - sub_in2;
assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]));
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`XLEN:0] shr_in1 = {alu_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
wire [32:0] shr_in1_w = {alu_signed && alu_in1[i][31], alu_in1[i][31:0]};
wire [31:0] shr_res_w = 32'($signed(shr_in1_w) >>> alu_in2_imm[i][4:0]);
assign shr_result[i] = is_alu_w ? `XLEN'($signed(shr_res_w)) :
`XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
wire [`XLEN:0] shr_in1 = {alu_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
assign shr_result[i] = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
wire [32:0] shr_in1_w = {alu_signed && alu_in1[i][31], alu_in1[i][31:0]};
wire [31:0] shr_res_w = 32'($signed(shr_in1_w) >>> alu_in2_imm[i][4:0]);
assign shr_result_w[i] = `XLEN'($signed(shr_res_w));
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
always @(*) begin
case (alu_op)
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
`INST_ALU_SLL: msc_result[i] = is_alu_w ? `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])) :
(alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
default: msc_result[i] = 'x;
case (alu_op[1:0])
2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND
2'b01: msc_result[i] = alu_in1[i] | alu_in2_imm[i]; // OR
2'b10: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i]; // XOR
2'b11: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]; // SLL
endcase
end
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0]));
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`XLEN-1:0] slt_sub_result = is_sub_op ? sub_result[i][`XLEN-1:0] : `XLEN'(sub_result[i][`XLEN]);
always @(*) begin
case (alu_op_class)
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC, ADDIW, ADDW
2'b01: alu_result[i] = {{`XLEN-1{1'b0}}, sub_result[i][`XLEN]}; // SLTU, SLT
2'b10: alu_result[i] = is_sub_op ? sub_result[i][`XLEN-1:0] // SUB, SUBW
: shr_result[i]; // SRL, SRA, SRLI, SRAI, SRLW, SRAW, SRLIW, SRAIW
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI, SLLIW, SLLW
case ({is_alu_w, alu_op_class})
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
3'b001: alu_result[i] = slt_sub_result; // SUB, SLTU, SLT
3'b010: alu_result[i] = shr_result[i]; // SRL, SRA, SRLI, SRAI
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
3'b110: alu_result[i] = shr_result_w[i]; // SRLW, SRAW, SRLIW, SRAIW
3'b111: alu_result[i] = msc_result_w[i]; // SLLW
endcase
end
end
@ -121,11 +131,6 @@ module VX_alu_unit #(
wire alu_wb;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] full_alu_data;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign full_alu_data[i] =alu_data[i];
end
wire [`INST_BR_BITS-1:0] br_op_r;
wire [`XLEN-1:0] br_dest_r;
wire is_less_r;
@ -157,16 +162,16 @@ module VX_alu_unit #(
`ifdef EXT_M_ENABLE
wire muldiv_valid_in;
wire muldiv_ready_in;
wire muldiv_valid_out;
wire muldiv_ready_out;
wire [UUID_WIDTH-1:0] muldiv_uuid;
wire [NW_WIDTH-1:0] muldiv_wid;
wire [`NUM_THREADS-1:0] muldiv_tmask;
wire [`XLEN-1:0] muldiv_PC;
wire [`NR_BITS-1:0] muldiv_rd;
wire muldiv_wb;
wire muldiv_valid_in;
wire muldiv_ready_in;
wire muldiv_valid_out;
wire muldiv_ready_out;
wire [UUID_WIDTH-1:0] muldiv_uuid;
wire [NW_WIDTH-1:0] muldiv_wid;
wire [`NUM_THREADS-1:0] muldiv_tmask;
wire [`XLEN-1:0] muldiv_PC;
wire [`NR_BITS-1:0] muldiv_rd;
wire muldiv_wb;
wire [`NUM_THREADS-1:0][`XLEN-1:0] muldiv_data;
wire [`INST_M_BITS-1:0] muldiv_op = `INST_M_BITS'(alu_req_if.op_type);
@ -239,7 +244,7 @@ module VX_alu_unit #(
`endif
}),
.data_in ({
{alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, full_alu_data}
{alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data}
`ifdef EXT_M_ENABLE
, {muldiv_uuid, muldiv_wid, muldiv_tmask, muldiv_PC, muldiv_rd, muldiv_wb, muldiv_data}
`endif

View file

@ -45,14 +45,14 @@ module VX_csr_data #(
input wire [`UP(`NW_BITS)-1:0] read_wid,
input wire [`NUM_THREADS-1:0] read_tmask,
input wire [`CSR_ADDR_BITS-1:0] read_addr,
output wire [`XLEN-1:0] read_data_ro,
output wire [`XLEN-1:0] read_data_rw,
output wire [31:0] read_data_ro,
output wire [31:0] read_data_rw,
input wire write_enable,
input wire [`UP(`UUID_BITS)-1:0] write_uuid,
input wire [`UP(`NW_BITS)-1:0] write_wid,
input wire [`CSR_ADDR_BITS-1:0] write_addr,
input wire [`XLEN-1:0] write_data
input wire [31:0] write_data
);
`UNUSED_VAR (reset)
@ -64,15 +64,15 @@ module VX_csr_data #(
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FP_FLAGS_BITS-1:0] fcsr;
`endif
reg [`XLEN-1:0] csr_satp;
reg [`XLEN-1:0] csr_mstatus;
reg [`XLEN-1:0] csr_medeleg;
reg [`XLEN-1:0] csr_mideleg;
reg [`XLEN-1:0] csr_mie;
reg [`XLEN-1:0] csr_mtvec;
reg [`XLEN-1:0] csr_mepc;
reg [`XLEN-1:0] csr_pmpcfg;
reg [`XLEN-1:0] csr_pmpaddr;
reg [31:0] csr_satp;
reg [31:0] csr_mstatus;
reg [31:0] csr_medeleg;
reg [31:0] csr_mideleg;
reg [31:0] csr_mie;
reg [31:0] csr_mtvec;
reg [31:0] csr_mepc;
reg [31:0] csr_pmpcfg;
reg [31:0] csr_pmpaddr;
always @(posedge clk) begin
`ifdef EXT_F_ENABLE
@ -111,8 +111,8 @@ module VX_csr_data #(
// CSRs read //////////////////////////////////////////////////////////////
reg [`XLEN-1:0] read_data_ro_r;
reg [`XLEN-1:0] read_data_rw_r;
reg [31:0] read_data_ro_r;
reg [31:0] read_data_rw_r;
reg read_addr_valid_r;
always @(*) begin
@ -121,46 +121,46 @@ module VX_csr_data #(
read_addr_valid_r = 1;
case (read_addr)
`ifdef EXT_F_ENABLE
`CSR_FFLAGS : read_data_rw_r = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
`CSR_FRM : read_data_rw_r = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
`CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]);
`CSR_FFLAGS : read_data_rw_r = 32'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
`CSR_FRM : read_data_rw_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
`CSR_FCSR : read_data_rw_r = 32'(fcsr[read_wid]);
`endif
`CSR_LWID : read_data_ro_r = `XLEN'(read_wid);
`CSR_LWID : read_data_ro_r = 32'(read_wid);
/*`CSR_MHARTID ,*/
`CSR_GWID : read_data_ro_r = (`XLEN'(CORE_ID) << `NW_BITS) + `XLEN'(read_wid);
`CSR_GCID : read_data_ro_r = `XLEN'(CORE_ID);
`CSR_GWID : read_data_ro_r = (32'(CORE_ID) << `NW_BITS) + 32'(read_wid);
`CSR_GCID : read_data_ro_r = 32'(CORE_ID);
`CSR_TMASK : read_data_ro_r = `XLEN'(read_tmask);
`CSR_TMASK : read_data_ro_r = 32'(read_tmask);
`CSR_NT : read_data_ro_r = `XLEN'd`NUM_THREADS;
`CSR_NW : read_data_ro_r = `XLEN'd`NUM_WARPS;
`CSR_NC : read_data_ro_r = `XLEN'(`NUM_CORES * `NUM_CLUSTERS);
`CSR_NT : read_data_ro_r = 32'(`NUM_THREADS);
`CSR_NW : read_data_ro_r = 32'(`NUM_WARPS);
`CSR_NC : read_data_ro_r = 32'(`NUM_CORES * `NUM_CLUSTERS);
`CSR_MCYCLE : read_data_ro_r = `XLEN'(fetch_to_csr_if.cycles[31:0]);
`CSR_MCYCLE_H : read_data_ro_r = `XLEN'(fetch_to_csr_if.cycles[`PERF_CTR_BITS-1:32]);
`CSR_MCYCLE : read_data_ro_r = 32'(fetch_to_csr_if.cycles[31:0]);
`CSR_MCYCLE_H : read_data_ro_r = 32'(fetch_to_csr_if.cycles[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RESERVED : read_data_ro_r = 'x;
`CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
`CSR_MINSTRET : read_data_ro_r = `XLEN'(cmt_to_csr_if.instret[31:0]);
`CSR_MINSTRET_H : read_data_ro_r = `XLEN'(cmt_to_csr_if.instret[`PERF_CTR_BITS-1:32]);
`CSR_MINSTRET : read_data_ro_r = 32'(cmt_to_csr_if.instret[31:0]);
`CSR_MINSTRET_H : read_data_ro_r = 32'(cmt_to_csr_if.instret[`PERF_CTR_BITS-1:32]);
`CSR_SATP : read_data_ro_r = `XLEN'(csr_satp);
`CSR_SATP : read_data_ro_r = 32'(csr_satp);
`CSR_MSTATUS,
`CSR_MNSTATUS : read_data_ro_r = `XLEN'(csr_mstatus);
`CSR_MISA : read_data_ro_r = (((`XLEN'($clog2(`XLEN))-4) << (`XLEN-2)) | `MISA_STD);
`CSR_MEDELEG : read_data_ro_r = `XLEN'(csr_medeleg);
`CSR_MIDELEG : read_data_ro_r = `XLEN'(csr_mideleg);
`CSR_MIE : read_data_ro_r = `XLEN'(csr_mie);
`CSR_MTVEC : read_data_ro_r = `XLEN'(csr_mtvec);
`CSR_MNSTATUS : read_data_ro_r = 32'(csr_mstatus);
`CSR_MISA : read_data_ro_r = ((($clog2(`XLEN)-4) << (`XLEN-2)) | `MISA_STD);
`CSR_MEDELEG : read_data_ro_r = 32'(csr_medeleg);
`CSR_MIDELEG : read_data_ro_r = 32'(csr_mideleg);
`CSR_MIE : read_data_ro_r = 32'(csr_mie);
`CSR_MTVEC : read_data_ro_r = 32'(csr_mtvec);
`CSR_MEPC : read_data_ro_r = `XLEN'(csr_mepc);
`CSR_MEPC : read_data_ro_r = 32'(csr_mepc);
`CSR_PMPCFG0 : read_data_ro_r = `XLEN'(csr_pmpcfg);
`CSR_PMPADDR0 : read_data_ro_r = `XLEN'(csr_pmpaddr);
`CSR_PMPCFG0 : read_data_ro_r = 32'(csr_pmpcfg);
`CSR_PMPADDR0 : read_data_ro_r = 32'(csr_pmpaddr);
`CSR_MVENDORID : read_data_ro_r = `XLEN'd`VENDOR_ID;
`CSR_MARCHID : read_data_ro_r = `XLEN'd`ARCHITECTURE_ID;
`CSR_MIMPID : read_data_ro_r = `XLEN'd`IMPLEMENTATION_ID;
`CSR_MVENDORID : read_data_ro_r = 32'(`VENDOR_ID);
`CSR_MARCHID : read_data_ro_r = 32'(`ARCHITECTURE_ID);
`CSR_MIMPID : read_data_ro_r = 32'(`IMPLEMENTATION_ID);
default: begin
read_addr_valid_r = 0;
@ -173,35 +173,35 @@ module VX_csr_data #(
case (read_addr)
// PERF: pipeline
`CSR_MPM_IBUF_ST : read_data_ro_r = perf_pipeline_if.ibf_stalls[31:0];
`CSR_MPM_IBUF_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_IBUF_ST_H : read_data_ro_r = 32'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SCRB_ST : read_data_ro_r = perf_pipeline_if.scb_stalls[31:0];
`CSR_MPM_SCRB_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SCRB_ST_H : read_data_ro_r = 32'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ALU_ST : read_data_ro_r = perf_pipeline_if.alu_stalls[31:0];
`CSR_MPM_ALU_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ALU_ST_H : read_data_ro_r = 32'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_LSU_ST : read_data_ro_r = perf_pipeline_if.lsu_stalls[31:0];
`CSR_MPM_LSU_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_LSU_ST_H : read_data_ro_r = 32'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_CSR_ST : read_data_ro_r = perf_pipeline_if.csr_stalls[31:0];
`CSR_MPM_CSR_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_CSR_ST_H : read_data_ro_r = 32'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
`ifdef EXT_F_ENABLE
`CSR_MPM_FPU_ST : read_data_ro_r = perf_pipeline_if.fpu_stalls[31:0];
`CSR_MPM_FPU_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_FPU_ST_H : read_data_ro_r = 32'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
`else
`CSR_MPM_FPU_ST : read_data_ro_r = '0;
`CSR_MPM_FPU_ST_H : read_data_ro_r = '0;
`endif
`CSR_MPM_GPU_ST : read_data_ro_r = perf_pipeline_if.gpu_stalls[31:0];
`CSR_MPM_GPU_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_GPU_ST_H : read_data_ro_r = 32'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
// PERF: memory
`CSR_MPM_IFETCHES : read_data_ro_r = perf_pipeline_if.ifetches[31:0];
`CSR_MPM_IFETCHES_H : read_data_ro_r = `XLEN'(perf_pipeline_if.ifetches[`PERF_CTR_BITS-1:32]);
`CSR_MPM_IFETCHES_H : read_data_ro_r = 32'(perf_pipeline_if.ifetches[`PERF_CTR_BITS-1:32]);
`CSR_MPM_LOADS : read_data_ro_r = perf_pipeline_if.loads[31:0];
`CSR_MPM_LOADS_H : read_data_ro_r = `XLEN'(perf_pipeline_if.loads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_LOADS_H : read_data_ro_r = 32'(perf_pipeline_if.loads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_STORES : read_data_ro_r = perf_pipeline_if.stores[31:0];
`CSR_MPM_STORES_H : read_data_ro_r = `XLEN'(perf_pipeline_if.stores[`PERF_CTR_BITS-1:32]);
`CSR_MPM_STORES_H : read_data_ro_r = 32'(perf_pipeline_if.stores[`PERF_CTR_BITS-1:32]);
`CSR_MPM_IFETCH_LAT : read_data_ro_r = perf_pipeline_if.ifetch_latency[31:0];
`CSR_MPM_IFETCH_LAT_H : read_data_ro_r = `XLEN'(perf_pipeline_if.ifetch_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_IFETCH_LAT_H : read_data_ro_r = 32'(perf_pipeline_if.ifetch_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_LOAD_LAT : read_data_ro_r = perf_pipeline_if.load_latency[31:0];
`CSR_MPM_LOAD_LAT_H : read_data_ro_r = `XLEN'(perf_pipeline_if.load_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_LOAD_LAT_H : read_data_ro_r = 32'(perf_pipeline_if.load_latency[`PERF_CTR_BITS-1:32]);
default:;
endcase
end
@ -209,62 +209,62 @@ module VX_csr_data #(
case (read_addr)
// PERF: icache
`CSR_MPM_ICACHE_READS : read_data_ro_r = perf_memsys_if.icache_reads[31:0];
`CSR_MPM_ICACHE_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_READS_H : read_data_ro_r = 32'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_MISS_R : read_data_ro_r = perf_memsys_if.icache_read_misses[31:0];
`CSR_MPM_ICACHE_MISS_R_H : read_data_ro_r = `XLEN'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_MISS_R_H : read_data_ro_r = 32'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
// PERF: dcache
`CSR_MPM_DCACHE_READS : read_data_ro_r = perf_memsys_if.dcache_reads[31:0];
`CSR_MPM_DCACHE_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_READS_H : read_data_ro_r = 32'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_WRITES : read_data_ro_r = perf_memsys_if.dcache_writes[31:0];
`CSR_MPM_DCACHE_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_R : read_data_ro_r = perf_memsys_if.dcache_read_misses[31:0];
`CSR_MPM_DCACHE_MISS_R_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_R_H : read_data_ro_r = 32'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_W : read_data_ro_r = perf_memsys_if.dcache_write_misses[31:0];
`CSR_MPM_DCACHE_MISS_W_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_W_H : read_data_ro_r = 32'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_BANK_ST : read_data_ro_r = perf_memsys_if.dcache_bank_stalls[31:0];
`CSR_MPM_DCACHE_BANK_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_BANK_ST_H : read_data_ro_r = 32'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MSHR_ST : read_data_ro_r = perf_memsys_if.dcache_mshr_stalls[31:0];
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_ro_r = 32'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
// PERF: smem
`CSR_MPM_SMEM_READS : read_data_ro_r = perf_memsys_if.smem_reads[31:0];
`CSR_MPM_SMEM_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_READS_H : read_data_ro_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_WRITES : read_data_ro_r = perf_memsys_if.smem_writes[31:0];
`CSR_MPM_SMEM_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_BANK_ST : read_data_ro_r = perf_memsys_if.smem_bank_stalls[31:0];
`CSR_MPM_SMEM_BANK_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_BANK_ST_H : read_data_ro_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
// PERF: l2cache
`CSR_MPM_L2CACHE_READS : read_data_ro_r = perf_memsys_if.l2cache_reads[31:0];
`CSR_MPM_L2CACHE_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_READS_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_WRITES : read_data_ro_r = perf_memsys_if.l2cache_writes[31:0];
`CSR_MPM_L2CACHE_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_MISS_R : read_data_ro_r = perf_memsys_if.l2cache_read_misses[31:0];
`CSR_MPM_L2CACHE_MISS_R_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_MISS_R_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_MISS_W : read_data_ro_r = perf_memsys_if.l2cache_write_misses[31:0];
`CSR_MPM_L2CACHE_MISS_W_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_MISS_W_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_BANK_ST : read_data_ro_r = perf_memsys_if.l2cache_bank_stalls[31:0];
`CSR_MPM_L2CACHE_BANK_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_BANK_ST_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_MSHR_ST : read_data_ro_r = perf_memsys_if.l2cache_mshr_stalls[31:0];
`CSR_MPM_L2CACHE_MSHR_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L2CACHE_MSHR_ST_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
// PERF: l3cache
`CSR_MPM_L3CACHE_READS : read_data_ro_r = perf_memsys_if.l3cache_reads[31:0];
`CSR_MPM_L3CACHE_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_READS_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_WRITES : read_data_ro_r = perf_memsys_if.l3cache_writes[31:0];
`CSR_MPM_L3CACHE_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_MISS_R : read_data_ro_r = perf_memsys_if.l3cache_read_misses[31:0];
`CSR_MPM_L3CACHE_MISS_R_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_MISS_R_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_MISS_W : read_data_ro_r = perf_memsys_if.l3cache_write_misses[31:0];
`CSR_MPM_L3CACHE_MISS_W_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_MISS_W_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_BANK_ST : read_data_ro_r = perf_memsys_if.l3cache_bank_stalls[31:0];
`CSR_MPM_L3CACHE_BANK_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_BANK_ST_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_MSHR_ST : read_data_ro_r = perf_memsys_if.l3cache_mshr_stalls[31:0];
`CSR_MPM_L3CACHE_MSHR_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_L3CACHE_MSHR_ST_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
// PERF: memory
`CSR_MPM_MEM_READS : read_data_ro_r = perf_memsys_if.mem_reads[31:0];
`CSR_MPM_MEM_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_READS_H : read_data_ro_r = 32'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_WRITES : read_data_ro_r = perf_memsys_if.mem_writes[31:0];
`CSR_MPM_MEM_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_LAT : read_data_ro_r = perf_memsys_if.mem_latency[31:0];
`CSR_MPM_MEM_LAT_H : read_data_ro_r = `XLEN'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
default:;
endcase
end
@ -272,24 +272,24 @@ module VX_csr_data #(
`ifdef EXT_TEX_ENABLE
case (read_addr)
`CSR_MPM_TEX_READS : read_data_ro_r = perf_tex_if.mem_reads[31:0];
`CSR_MPM_TEX_READS_H : read_data_ro_r = `XLEN'(perf_tex_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TEX_READS_H : read_data_ro_r = 32'(perf_tex_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TEX_LAT : read_data_ro_r = perf_tex_if.mem_latency[31:0];
`CSR_MPM_TEX_LAT_H : read_data_ro_r = `XLEN'(perf_tex_if.mem_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TEX_LAT_H : read_data_ro_r = 32'(perf_tex_if.mem_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TEX_STALL : read_data_ro_r = perf_tex_if.stall_cycles[31:0];
`CSR_MPM_TEX_STALL_H : read_data_ro_r = `XLEN'(perf_tex_if.stall_cycles[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TEX_STALL_H : read_data_ro_r = 32'(perf_tex_if.stall_cycles[`PERF_CTR_BITS-1:32]);
`ifdef TCACHE_ENABLE
// cache perf counters
`CSR_MPM_TCACHE_READS : read_data_ro_r = perf_tcache_if.reads[31:0];
`CSR_MPM_TCACHE_READS_H : read_data_ro_r = `XLEN'(perf_tcache_if.reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TCACHE_READS_H : read_data_ro_r = 32'(perf_tcache_if.reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TCACHE_MISS_R : read_data_ro_r = perf_tcache_if.read_misses[31:0];
`CSR_MPM_TCACHE_MISS_R_H: read_data_ro_r = `XLEN'(perf_tcache_if.read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TCACHE_MISS_R_H: read_data_ro_r = 32'(perf_tcache_if.read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TCACHE_BANK_ST : read_data_ro_r = perf_tcache_if.bank_stalls[31:0];
`CSR_MPM_TCACHE_BANK_ST_H:read_data_ro_r = `XLEN'(perf_tcache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TCACHE_BANK_ST_H:read_data_ro_r = 32'(perf_tcache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TCACHE_MSHR_ST :read_data_ro_r = perf_tcache_if.mshr_stalls[31:0];
`CSR_MPM_TCACHE_MSHR_ST_H:read_data_ro_r = `XLEN'(perf_tcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_tcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
`endif
`CSR_MPM_TEX_ISSUE_ST : read_data_ro_r = perf_gpu_if.tex_stalls[31:0];
`CSR_MPM_TEX_ISSUE_ST_H : read_data_ro_r = `XLEN'(perf_gpu_if.tex_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_TEX_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.tex_stalls[`PERF_CTR_BITS-1:32]);
default:;
endcase
`endif
@ -298,24 +298,24 @@ module VX_csr_data #(
`ifdef EXT_RASTER_ENABLE
case (read_addr)
`CSR_MPM_RASTER_READS : read_data_ro_r = perf_raster_if.mem_reads[31:0];
`CSR_MPM_RASTER_READS_H : read_data_ro_r = `XLEN'(perf_raster_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RASTER_READS_H : read_data_ro_r = 32'(perf_raster_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RASTER_LAT : read_data_ro_r = perf_raster_if.mem_latency[31:0];
`CSR_MPM_RASTER_LAT_H : read_data_ro_r = `XLEN'(perf_raster_if.mem_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RASTER_LAT_H : read_data_ro_r = 32'(perf_raster_if.mem_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RASTER_STALL : read_data_ro_r = perf_raster_if.stall_cycles[31:0];
`CSR_MPM_RASTER_STALL_H : read_data_ro_r = `XLEN'(perf_raster_if.stall_cycles[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RASTER_STALL_H : read_data_ro_r = 32'(perf_raster_if.stall_cycles[`PERF_CTR_BITS-1:32]);
`ifdef RCACHE_ENABLE
// cache perf counters
`CSR_MPM_RCACHE_READS : read_data_ro_r = perf_rcache_if.reads[31:0];
`CSR_MPM_RCACHE_READS_H : read_data_ro_r = `XLEN'(perf_rcache_if.reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RCACHE_READS_H : read_data_ro_r = 32'(perf_rcache_if.reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RCACHE_MISS_R : read_data_ro_r = perf_rcache_if.read_misses[31:0];
`CSR_MPM_RCACHE_MISS_R_H: read_data_ro_r = `XLEN'(perf_rcache_if.read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RCACHE_MISS_R_H: read_data_ro_r = 32'(perf_rcache_if.read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RCACHE_BANK_ST : read_data_ro_r = perf_rcache_if.bank_stalls[31:0];
`CSR_MPM_RCACHE_BANK_ST_H:read_data_ro_r = `XLEN'(perf_rcache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RCACHE_BANK_ST_H:read_data_ro_r = 32'(perf_rcache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RCACHE_MSHR_ST :read_data_ro_r = perf_rcache_if.mshr_stalls[31:0];
`CSR_MPM_RCACHE_MSHR_ST_H:read_data_ro_r = `XLEN'(perf_rcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_rcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
`endif
`CSR_MPM_RASTER_ISSUE_ST : read_data_ro_r = perf_gpu_if.raster_stalls[31:0];
`CSR_MPM_RASTER_ISSUE_ST_H : read_data_ro_r = `XLEN'(perf_gpu_if.raster_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_RASTER_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.raster_stalls[`PERF_CTR_BITS-1:32]);
default:;
endcase
`endif
@ -324,30 +324,30 @@ module VX_csr_data #(
`ifdef EXT_ROP_ENABLE
case (read_addr)
`CSR_MPM_ROP_READS : read_data_ro_r = perf_rop_if.mem_reads[31:0];
`CSR_MPM_ROP_READS_H : read_data_ro_r = `XLEN'(perf_rop_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ROP_READS_H : read_data_ro_r = 32'(perf_rop_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ROP_WRITES : read_data_ro_r = perf_rop_if.mem_writes[31:0];
`CSR_MPM_ROP_WRITES_H : read_data_ro_r = `XLEN'(perf_rop_if.mem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ROP_WRITES_H : read_data_ro_r = 32'(perf_rop_if.mem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ROP_LAT : read_data_ro_r = perf_rop_if.mem_latency[31:0];
`CSR_MPM_ROP_LAT_H : read_data_ro_r = `XLEN'(perf_rop_if.mem_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ROP_LAT_H : read_data_ro_r = 32'(perf_rop_if.mem_latency[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ROP_STALL : read_data_ro_r = perf_rop_if.stall_cycles[31:0];
`CSR_MPM_ROP_STALL_H : read_data_ro_r = `XLEN'(perf_rop_if.stall_cycles[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ROP_STALL_H : read_data_ro_r = 32'(perf_rop_if.stall_cycles[`PERF_CTR_BITS-1:32]);
`ifdef OCACHE_ENABLE
// cache perf counters
`CSR_MPM_OCACHE_READS : read_data_ro_r = perf_ocache_if.reads[31:0];
`CSR_MPM_OCACHE_READS_H : read_data_ro_r = `XLEN'(perf_ocache_if.reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_READS_H : read_data_ro_r = 32'(perf_ocache_if.reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_WRITES : read_data_ro_r = perf_ocache_if.writes[31:0];
`CSR_MPM_OCACHE_WRITES_H: read_data_ro_r = `XLEN'(perf_ocache_if.writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_WRITES_H: read_data_ro_r = 32'(perf_ocache_if.writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_MISS_R : read_data_ro_r = perf_ocache_if.read_misses[31:0];
`CSR_MPM_OCACHE_MISS_R_H: read_data_ro_r = `XLEN'(perf_ocache_if.read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_MISS_R_H: read_data_ro_r = 32'(perf_ocache_if.read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_MISS_W : read_data_ro_r = perf_ocache_if.write_misses[31:0];
`CSR_MPM_OCACHE_MISS_W_H: read_data_ro_r = `XLEN'(perf_ocache_if.write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_MISS_W_H: read_data_ro_r = 32'(perf_ocache_if.write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_BANK_ST : read_data_ro_r = perf_ocache_if.bank_stalls[31:0];
`CSR_MPM_OCACHE_BANK_ST_H:read_data_ro_r = `XLEN'(perf_ocache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_BANK_ST_H:read_data_ro_r = 32'(perf_ocache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_MSHR_ST :read_data_ro_r = perf_ocache_if.mshr_stalls[31:0];
`CSR_MPM_OCACHE_MSHR_ST_H:read_data_ro_r = `XLEN'(perf_ocache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_OCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_ocache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
`endif
`CSR_MPM_ROP_ISSUE_ST : read_data_ro_r = perf_gpu_if.rop_stalls[31:0];
`CSR_MPM_ROP_ISSUE_ST_H : read_data_ro_r = `XLEN'(perf_gpu_if.rop_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ROP_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.rop_stalls[`PERF_CTR_BITS-1:32]);
default:;
endcase
`endif

View file

@ -58,12 +58,12 @@ module VX_csr_unit #(
localparam NW_WIDTH = `UP(`NW_BITS);
reg [`NUM_THREADS-1:0][`XLEN-1:0] csr_read_data;
reg [`XLEN-1:0] csr_write_data;
wire [`XLEN-1:0] csr_read_data_ro, csr_read_data_rw;
wire [`XLEN-1:0] csr_req_data;
reg csr_rd_enable;
wire csr_wr_enable;
reg [`NUM_THREADS-1:0][31:0] csr_read_data;
reg [31:0] csr_write_data;
wire [31:0] csr_read_data_ro, csr_read_data_rw;
wire [31:0] csr_req_data;
reg csr_rd_enable;
wire csr_wr_enable;
`UNUSED_VAR (gpu_pending)
wire csr_access_pending = (0
@ -177,24 +177,24 @@ module VX_csr_unit #(
.read_wid (csr_req_if.wid),
.read_tmask (csr_req_if.tmask),
.read_addr (csr_req_if.addr),
.read_data_ro (csr_read_data_ro[`XLEN-1:0]),
.read_data_rw (csr_read_data_rw[`XLEN-1:0]),
.read_data_ro (csr_read_data_ro[31:0]),
.read_data_rw (csr_read_data_rw[31:0]),
.write_enable (csr_req_valid && csr_wr_enable),
.write_uuid (csr_req_if.uuid),
.write_wid (csr_req_if.wid),
.write_addr (csr_req_if.addr),
.write_data (csr_write_data[`XLEN-1:0])
.write_data (csr_write_data[31:0])
);
// CSR read
wire [`NUM_THREADS-1:0][`XLEN-1:0] wtid, ltid, gtid;
wire [`NUM_THREADS-1:0][31:0] wtid, ltid, gtid;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign wtid[i] = `XLEN'(i);
assign ltid[i] = (`XLEN'(csr_req_if.wid) << `NT_BITS) + i;
assign gtid[i] = `XLEN'((`XLEN'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (`XLEN'(csr_req_if.wid) << `NT_BITS) + i);
assign wtid[i] = 32'(i);
assign ltid[i] = (32'(csr_req_if.wid) << `NT_BITS) + i;
assign gtid[i] = 32'((CORE_ID << (`NW_BITS + `NT_BITS)) + (32'(csr_req_if.wid) << `NT_BITS) + i);
end
always @(*) begin
@ -217,7 +217,7 @@ module VX_csr_unit #(
// CSR write
assign csr_req_data = csr_req_if.use_imm ? `XLEN'(csr_req_if.imm) : csr_req_if.rs1_data[csr_req_if.tid];
assign csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.imm) : csr_req_if.rs1_data[csr_req_if.tid];
assign csr_wr_enable = (csr_write_enable || (csr_req_data != 0))
`ifdef EXT_ROP_ENABLE
@ -241,13 +241,10 @@ module VX_csr_unit #(
end
// send response
wire [`NUM_THREADS-1:0][`XLEN-1:0] csr_commit_data;
for(genvar i = 0; i < `NUM_THREADS; ++i) begin
assign csr_commit_if.data[i] = `XLEN'(csr_commit_data[i]);
end
wire [`NUM_THREADS-1:0][31:0] csr_commit_data;
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + 1 + `NUM_THREADS * `XLEN)
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + 1 + `NUM_THREADS * 32)
) rsp_sbuf (
.clk (clk),
.reset (reset),
@ -258,8 +255,12 @@ module VX_csr_unit #(
.valid_out (csr_commit_if.valid),
.ready_out (csr_commit_if.ready)
);
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign csr_commit_if.data[i] = `XLEN'(csr_commit_data[i]);
end
assign csr_commit_if.eop = 1'b1;
assign csr_commit_if.eop = 1'b1;
// pending request
reg req_pending_r;

View file

@ -84,16 +84,21 @@ module VX_dispatch (
wire [`INST_CSR_BITS-1:0] csr_op_type = `INST_CSR_BITS'(dispatch_if.op_type);
wire [`CSR_ADDR_BITS-1:0] csr_addr = dispatch_if.imm[`CSR_ADDR_BITS-1:0];
wire [`NRI_BITS-1:0] csr_imm = dispatch_if.imm[`CSR_ADDR_BITS +: `NRI_BITS];
wire [`NUM_THREADS-1:0][31:0] csr_data;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign csr_data[i] = gpr_rsp_if.rs1_data[i][31:0];
end
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * `XLEN)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * 32)),
.OUT_REG (1)
) csr_buffer (
.clk (clk),
.reset (reset),
.valid_in (csr_req_valid),
.ready_in (csr_req_ready),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, csr_op_type, csr_addr, dispatch_if.rd, dispatch_if.wb, dispatch_if.use_imm, csr_imm, tid, gpr_rsp_if.rs1_data}),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, csr_op_type, csr_addr, dispatch_if.rd, dispatch_if.wb, dispatch_if.use_imm, csr_imm, tid, csr_data}),
.data_out ({csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.imm, csr_req_if.tid, csr_req_if.rs1_data}),
.valid_out (csr_req_if.valid),
.ready_out (csr_req_if.ready)

View file

@ -10,7 +10,7 @@ interface VX_csr_req_if ();
wire [`INST_CSR_BITS-1:0] op_type;
wire [`CSR_ADDR_BITS-1:0] addr;
wire [`UP(`NT_BITS)-1:0] tid;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire use_imm;
wire [`NRI_BITS-1:0] imm;
wire [`NR_BITS-1:0] rd;

View file

@ -56,6 +56,7 @@ CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS += $(CONFIGS_$(NUM_CORES)c)
CONFIGS += -DFPU_DSP
# include paths
FPU_INCLUDE = -I$(RTL_DIR)/fpu -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src
@ -66,7 +67,7 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface
RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(ROP_INCLUDE)
# compilation flags
CFLAGS += -DSYNTHESIS -DQUARTUS -DFPU_DSP
CFLAGS += -DSYNTHESIS -DQUARTUS
CFLAGS += $(CONFIGS)
CFLAGS += $(RTL_INCLUDE)

View file

@ -18,6 +18,7 @@ CONFIGS += -DNDEBUG
CONFIGS += -DQUARTUS
CONFIGS += -DSYNTHESIS
CONFIGS += -DNOGLOBALS
CONFIGS += -DFPU_DSP
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf

View file

@ -75,6 +75,7 @@ CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 -DL2_ENABLE
CONFIGS += $(CONFIGS_$(NUM_CORES)c)
CONFIGS += -DFPU_DSP
# include paths
FPU_INCLUDE = -I$(RTL_DIR)/fpu -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
@ -133,7 +134,7 @@ else
endif
# compilation flags
CFLAGS += -DSYNTHESIS -DVIVADO -DFPU_DSP
CFLAGS += -DSYNTHESIS -DVIVADO
CFLAGS += $(CONFIGS)
CFLAGS += $(RTL_INCLUDE)