mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
64-bit RTL fixes and optimizations
This commit is contained in:
parent
6117fb48fe
commit
b8ddc91b2c
9 changed files with 190 additions and 176 deletions
|
@ -106,9 +106,9 @@
|
|||
`define INST_ALU_AUIPC 4'b0011
|
||||
`define INST_ALU_SLTU 4'b0100
|
||||
`define INST_ALU_SLT 4'b0101
|
||||
`define INST_ALU_SUB 4'b0111
|
||||
`define INST_ALU_SRL 4'b1000
|
||||
`define INST_ALU_SRA 4'b1001
|
||||
`define INST_ALU_SUB 4'b1011
|
||||
`define INST_ALU_AND 4'b1100
|
||||
`define INST_ALU_OR 4'b1101
|
||||
`define INST_ALU_XOR 4'b1110
|
||||
|
|
|
@ -22,21 +22,28 @@ module VX_alu_unit #(
|
|||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam SHIFT_IMM_BITS = `CLOG2(`XLEN);
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] add_result;
|
||||
wire [`NUM_THREADS-1:0][`XLEN:0] sub_result; // +1 bit for branch compare
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] msc_result;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] add_result_w;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] sub_result_w;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] shr_result_w;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] msc_result_w;
|
||||
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] alu_result;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] add_result;
|
||||
reg [`NUM_THREADS-1:0][`XLEN:0] sub_result; // +1 bit for branch compare
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] msc_result;
|
||||
|
||||
wire ready_in;
|
||||
wire ready_in;
|
||||
|
||||
`UNUSED_VAR (alu_req_if.op_mod)
|
||||
`ifdef XLEN_64
|
||||
wire is_alu_w = `INST_ALU_IS_W(alu_req_if.op_mod);
|
||||
`else
|
||||
wire is_alu_w = 0;
|
||||
`endif
|
||||
|
||||
`UNUSED_VAR (alu_req_if.op_mod)
|
||||
|
||||
wire [`INST_ALU_BITS-1:0] alu_op = `INST_ALU_BITS'(alu_req_if.op_type);
|
||||
wire [`INST_BR_BITS-1:0] br_op = `INST_BR_BITS'(alu_req_if.op_type);
|
||||
wire is_br_op = `INST_ALU_IS_BR(alu_req_if.op_mod);
|
||||
|
@ -52,46 +59,49 @@ module VX_alu_unit #(
|
|||
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2_br = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign add_result[i] = is_alu_w ? `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0])) :
|
||||
(alu_in1_PC[i] + alu_in2_imm[i]);
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
assign add_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] + alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`XLEN:0] sub_in1 = {alu_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
wire [`XLEN:0] sub_in2 = {alu_signed & alu_in2_br[i][`XLEN-1], alu_in2_br[i]};
|
||||
assign sub_result[i] = is_alu_w ? {1'b0, `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]))} :
|
||||
(sub_in1 - sub_in2);
|
||||
assign sub_result[i] = sub_in1 - sub_in2;
|
||||
assign sub_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] - alu_in2_imm[i][31:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`XLEN:0] shr_in1 = {alu_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
wire [32:0] shr_in1_w = {alu_signed && alu_in1[i][31], alu_in1[i][31:0]};
|
||||
wire [31:0] shr_res_w = 32'($signed(shr_in1_w) >>> alu_in2_imm[i][4:0]);
|
||||
assign shr_result[i] = is_alu_w ? `XLEN'($signed(shr_res_w)) :
|
||||
`XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
|
||||
wire [`XLEN:0] shr_in1 = {alu_signed && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
assign shr_result[i] = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
|
||||
wire [32:0] shr_in1_w = {alu_signed && alu_in1[i][31], alu_in1[i][31:0]};
|
||||
wire [31:0] shr_res_w = 32'($signed(shr_in1_w) >>> alu_in2_imm[i][4:0]);
|
||||
assign shr_result_w[i] = `XLEN'($signed(shr_res_w));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
`INST_ALU_SLL: msc_result[i] = is_alu_w ? `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0])) :
|
||||
(alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]);
|
||||
default: msc_result[i] = 'x;
|
||||
case (alu_op[1:0])
|
||||
2'b00: msc_result[i] = alu_in1[i] & alu_in2_imm[i]; // AND
|
||||
2'b01: msc_result[i] = alu_in1[i] | alu_in2_imm[i]; // OR
|
||||
2'b10: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i]; // XOR
|
||||
2'b11: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS-1:0]; // SLL
|
||||
endcase
|
||||
end
|
||||
assign msc_result_w[i] = `XLEN'($signed(alu_in1[i][31:0] << alu_in2_imm[i][4:0]));
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`XLEN-1:0] slt_sub_result = is_sub_op ? sub_result[i][`XLEN-1:0] : `XLEN'(sub_result[i][`XLEN]);
|
||||
always @(*) begin
|
||||
case (alu_op_class)
|
||||
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC, ADDIW, ADDW
|
||||
2'b01: alu_result[i] = {{`XLEN-1{1'b0}}, sub_result[i][`XLEN]}; // SLTU, SLT
|
||||
2'b10: alu_result[i] = is_sub_op ? sub_result[i][`XLEN-1:0] // SUB, SUBW
|
||||
: shr_result[i]; // SRL, SRA, SRLI, SRAI, SRLW, SRAW, SRLIW, SRAIW
|
||||
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI, SLLIW, SLLW
|
||||
case ({is_alu_w, alu_op_class})
|
||||
3'b000: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
3'b001: alu_result[i] = slt_sub_result; // SUB, SLTU, SLT
|
||||
3'b010: alu_result[i] = shr_result[i]; // SRL, SRA, SRLI, SRAI
|
||||
3'b011: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLI
|
||||
3'b100: alu_result[i] = add_result_w[i]; // ADDIW, ADDW
|
||||
3'b101: alu_result[i] = sub_result_w[i]; // SUBW
|
||||
3'b110: alu_result[i] = shr_result_w[i]; // SRLW, SRAW, SRLIW, SRAIW
|
||||
3'b111: alu_result[i] = msc_result_w[i]; // SLLW
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
@ -121,11 +131,6 @@ module VX_alu_unit #(
|
|||
wire alu_wb;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_data;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] full_alu_data;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign full_alu_data[i] =alu_data[i];
|
||||
end
|
||||
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire [`XLEN-1:0] br_dest_r;
|
||||
wire is_less_r;
|
||||
|
@ -157,16 +162,16 @@ module VX_alu_unit #(
|
|||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
wire muldiv_valid_in;
|
||||
wire muldiv_ready_in;
|
||||
wire muldiv_valid_out;
|
||||
wire muldiv_ready_out;
|
||||
wire [UUID_WIDTH-1:0] muldiv_uuid;
|
||||
wire [NW_WIDTH-1:0] muldiv_wid;
|
||||
wire [`NUM_THREADS-1:0] muldiv_tmask;
|
||||
wire [`XLEN-1:0] muldiv_PC;
|
||||
wire [`NR_BITS-1:0] muldiv_rd;
|
||||
wire muldiv_wb;
|
||||
wire muldiv_valid_in;
|
||||
wire muldiv_ready_in;
|
||||
wire muldiv_valid_out;
|
||||
wire muldiv_ready_out;
|
||||
wire [UUID_WIDTH-1:0] muldiv_uuid;
|
||||
wire [NW_WIDTH-1:0] muldiv_wid;
|
||||
wire [`NUM_THREADS-1:0] muldiv_tmask;
|
||||
wire [`XLEN-1:0] muldiv_PC;
|
||||
wire [`NR_BITS-1:0] muldiv_rd;
|
||||
wire muldiv_wb;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] muldiv_data;
|
||||
|
||||
wire [`INST_M_BITS-1:0] muldiv_op = `INST_M_BITS'(alu_req_if.op_type);
|
||||
|
@ -239,7 +244,7 @@ module VX_alu_unit #(
|
|||
`endif
|
||||
}),
|
||||
.data_in ({
|
||||
{alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, full_alu_data}
|
||||
{alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data}
|
||||
`ifdef EXT_M_ENABLE
|
||||
, {muldiv_uuid, muldiv_wid, muldiv_tmask, muldiv_PC, muldiv_rd, muldiv_wb, muldiv_data}
|
||||
`endif
|
||||
|
|
|
@ -45,14 +45,14 @@ module VX_csr_data #(
|
|||
input wire [`UP(`NW_BITS)-1:0] read_wid,
|
||||
input wire [`NUM_THREADS-1:0] read_tmask,
|
||||
input wire [`CSR_ADDR_BITS-1:0] read_addr,
|
||||
output wire [`XLEN-1:0] read_data_ro,
|
||||
output wire [`XLEN-1:0] read_data_rw,
|
||||
output wire [31:0] read_data_ro,
|
||||
output wire [31:0] read_data_rw,
|
||||
|
||||
input wire write_enable,
|
||||
input wire [`UP(`UUID_BITS)-1:0] write_uuid,
|
||||
input wire [`UP(`NW_BITS)-1:0] write_wid,
|
||||
input wire [`CSR_ADDR_BITS-1:0] write_addr,
|
||||
input wire [`XLEN-1:0] write_data
|
||||
input wire [31:0] write_data
|
||||
);
|
||||
|
||||
`UNUSED_VAR (reset)
|
||||
|
@ -64,15 +64,15 @@ module VX_csr_data #(
|
|||
reg [`NUM_WARPS-1:0][`INST_FRM_BITS+`FP_FLAGS_BITS-1:0] fcsr;
|
||||
`endif
|
||||
|
||||
reg [`XLEN-1:0] csr_satp;
|
||||
reg [`XLEN-1:0] csr_mstatus;
|
||||
reg [`XLEN-1:0] csr_medeleg;
|
||||
reg [`XLEN-1:0] csr_mideleg;
|
||||
reg [`XLEN-1:0] csr_mie;
|
||||
reg [`XLEN-1:0] csr_mtvec;
|
||||
reg [`XLEN-1:0] csr_mepc;
|
||||
reg [`XLEN-1:0] csr_pmpcfg;
|
||||
reg [`XLEN-1:0] csr_pmpaddr;
|
||||
reg [31:0] csr_satp;
|
||||
reg [31:0] csr_mstatus;
|
||||
reg [31:0] csr_medeleg;
|
||||
reg [31:0] csr_mideleg;
|
||||
reg [31:0] csr_mie;
|
||||
reg [31:0] csr_mtvec;
|
||||
reg [31:0] csr_mepc;
|
||||
reg [31:0] csr_pmpcfg;
|
||||
reg [31:0] csr_pmpaddr;
|
||||
|
||||
always @(posedge clk) begin
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -111,8 +111,8 @@ module VX_csr_data #(
|
|||
|
||||
// CSRs read //////////////////////////////////////////////////////////////
|
||||
|
||||
reg [`XLEN-1:0] read_data_ro_r;
|
||||
reg [`XLEN-1:0] read_data_rw_r;
|
||||
reg [31:0] read_data_ro_r;
|
||||
reg [31:0] read_data_rw_r;
|
||||
reg read_addr_valid_r;
|
||||
|
||||
always @(*) begin
|
||||
|
@ -121,46 +121,46 @@ module VX_csr_data #(
|
|||
read_addr_valid_r = 1;
|
||||
case (read_addr)
|
||||
`ifdef EXT_F_ENABLE
|
||||
`CSR_FFLAGS : read_data_rw_r = `XLEN'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`CSR_FRM : read_data_rw_r = `XLEN'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`CSR_FCSR : read_data_rw_r = `XLEN'(fcsr[read_wid]);
|
||||
`CSR_FFLAGS : read_data_rw_r = 32'(fcsr[read_wid][`FP_FLAGS_BITS-1:0]);
|
||||
`CSR_FRM : read_data_rw_r = 32'(fcsr[read_wid][`INST_FRM_BITS+`FP_FLAGS_BITS-1:`FP_FLAGS_BITS]);
|
||||
`CSR_FCSR : read_data_rw_r = 32'(fcsr[read_wid]);
|
||||
`endif
|
||||
`CSR_LWID : read_data_ro_r = `XLEN'(read_wid);
|
||||
`CSR_LWID : read_data_ro_r = 32'(read_wid);
|
||||
/*`CSR_MHARTID ,*/
|
||||
`CSR_GWID : read_data_ro_r = (`XLEN'(CORE_ID) << `NW_BITS) + `XLEN'(read_wid);
|
||||
`CSR_GCID : read_data_ro_r = `XLEN'(CORE_ID);
|
||||
`CSR_GWID : read_data_ro_r = (32'(CORE_ID) << `NW_BITS) + 32'(read_wid);
|
||||
`CSR_GCID : read_data_ro_r = 32'(CORE_ID);
|
||||
|
||||
`CSR_TMASK : read_data_ro_r = `XLEN'(read_tmask);
|
||||
`CSR_TMASK : read_data_ro_r = 32'(read_tmask);
|
||||
|
||||
`CSR_NT : read_data_ro_r = `XLEN'd`NUM_THREADS;
|
||||
`CSR_NW : read_data_ro_r = `XLEN'd`NUM_WARPS;
|
||||
`CSR_NC : read_data_ro_r = `XLEN'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
`CSR_NT : read_data_ro_r = 32'(`NUM_THREADS);
|
||||
`CSR_NW : read_data_ro_r = 32'(`NUM_WARPS);
|
||||
`CSR_NC : read_data_ro_r = 32'(`NUM_CORES * `NUM_CLUSTERS);
|
||||
|
||||
`CSR_MCYCLE : read_data_ro_r = `XLEN'(fetch_to_csr_if.cycles[31:0]);
|
||||
`CSR_MCYCLE_H : read_data_ro_r = `XLEN'(fetch_to_csr_if.cycles[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MCYCLE : read_data_ro_r = 32'(fetch_to_csr_if.cycles[31:0]);
|
||||
`CSR_MCYCLE_H : read_data_ro_r = 32'(fetch_to_csr_if.cycles[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RESERVED : read_data_ro_r = 'x;
|
||||
`CSR_MPM_RESERVED_H : read_data_ro_r = 'x;
|
||||
`CSR_MINSTRET : read_data_ro_r = `XLEN'(cmt_to_csr_if.instret[31:0]);
|
||||
`CSR_MINSTRET_H : read_data_ro_r = `XLEN'(cmt_to_csr_if.instret[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MINSTRET : read_data_ro_r = 32'(cmt_to_csr_if.instret[31:0]);
|
||||
`CSR_MINSTRET_H : read_data_ro_r = 32'(cmt_to_csr_if.instret[`PERF_CTR_BITS-1:32]);
|
||||
|
||||
`CSR_SATP : read_data_ro_r = `XLEN'(csr_satp);
|
||||
`CSR_SATP : read_data_ro_r = 32'(csr_satp);
|
||||
|
||||
`CSR_MSTATUS,
|
||||
`CSR_MNSTATUS : read_data_ro_r = `XLEN'(csr_mstatus);
|
||||
`CSR_MISA : read_data_ro_r = (((`XLEN'($clog2(`XLEN))-4) << (`XLEN-2)) | `MISA_STD);
|
||||
`CSR_MEDELEG : read_data_ro_r = `XLEN'(csr_medeleg);
|
||||
`CSR_MIDELEG : read_data_ro_r = `XLEN'(csr_mideleg);
|
||||
`CSR_MIE : read_data_ro_r = `XLEN'(csr_mie);
|
||||
`CSR_MTVEC : read_data_ro_r = `XLEN'(csr_mtvec);
|
||||
`CSR_MNSTATUS : read_data_ro_r = 32'(csr_mstatus);
|
||||
`CSR_MISA : read_data_ro_r = ((($clog2(`XLEN)-4) << (`XLEN-2)) | `MISA_STD);
|
||||
`CSR_MEDELEG : read_data_ro_r = 32'(csr_medeleg);
|
||||
`CSR_MIDELEG : read_data_ro_r = 32'(csr_mideleg);
|
||||
`CSR_MIE : read_data_ro_r = 32'(csr_mie);
|
||||
`CSR_MTVEC : read_data_ro_r = 32'(csr_mtvec);
|
||||
|
||||
`CSR_MEPC : read_data_ro_r = `XLEN'(csr_mepc);
|
||||
`CSR_MEPC : read_data_ro_r = 32'(csr_mepc);
|
||||
|
||||
`CSR_PMPCFG0 : read_data_ro_r = `XLEN'(csr_pmpcfg);
|
||||
`CSR_PMPADDR0 : read_data_ro_r = `XLEN'(csr_pmpaddr);
|
||||
`CSR_PMPCFG0 : read_data_ro_r = 32'(csr_pmpcfg);
|
||||
`CSR_PMPADDR0 : read_data_ro_r = 32'(csr_pmpaddr);
|
||||
|
||||
`CSR_MVENDORID : read_data_ro_r = `XLEN'd`VENDOR_ID;
|
||||
`CSR_MARCHID : read_data_ro_r = `XLEN'd`ARCHITECTURE_ID;
|
||||
`CSR_MIMPID : read_data_ro_r = `XLEN'd`IMPLEMENTATION_ID;
|
||||
`CSR_MVENDORID : read_data_ro_r = 32'(`VENDOR_ID);
|
||||
`CSR_MARCHID : read_data_ro_r = 32'(`ARCHITECTURE_ID);
|
||||
`CSR_MIMPID : read_data_ro_r = 32'(`IMPLEMENTATION_ID);
|
||||
|
||||
default: begin
|
||||
read_addr_valid_r = 0;
|
||||
|
@ -173,35 +173,35 @@ module VX_csr_data #(
|
|||
case (read_addr)
|
||||
// PERF: pipeline
|
||||
`CSR_MPM_IBUF_ST : read_data_ro_r = perf_pipeline_if.ibf_stalls[31:0];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_IBUF_ST_H : read_data_ro_r = 32'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SCRB_ST : read_data_ro_r = perf_pipeline_if.scb_stalls[31:0];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SCRB_ST_H : read_data_ro_r = 32'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ALU_ST : read_data_ro_r = perf_pipeline_if.alu_stalls[31:0];
|
||||
`CSR_MPM_ALU_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ALU_ST_H : read_data_ro_r = 32'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_LSU_ST : read_data_ro_r = perf_pipeline_if.lsu_stalls[31:0];
|
||||
`CSR_MPM_LSU_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_LSU_ST_H : read_data_ro_r = 32'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_CSR_ST : read_data_ro_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_CSR_ST_H : read_data_ro_r = 32'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`ifdef EXT_F_ENABLE
|
||||
`CSR_MPM_FPU_ST : read_data_ro_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_FPU_ST_H : read_data_ro_r = 32'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`else
|
||||
`CSR_MPM_FPU_ST : read_data_ro_r = '0;
|
||||
`CSR_MPM_FPU_ST_H : read_data_ro_r = '0;
|
||||
`endif
|
||||
`CSR_MPM_GPU_ST : read_data_ro_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_ro_r = `XLEN'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_GPU_ST_H : read_data_ro_r = 32'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: memory
|
||||
`CSR_MPM_IFETCHES : read_data_ro_r = perf_pipeline_if.ifetches[31:0];
|
||||
`CSR_MPM_IFETCHES_H : read_data_ro_r = `XLEN'(perf_pipeline_if.ifetches[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_IFETCHES_H : read_data_ro_r = 32'(perf_pipeline_if.ifetches[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_LOADS : read_data_ro_r = perf_pipeline_if.loads[31:0];
|
||||
`CSR_MPM_LOADS_H : read_data_ro_r = `XLEN'(perf_pipeline_if.loads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_LOADS_H : read_data_ro_r = 32'(perf_pipeline_if.loads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_STORES : read_data_ro_r = perf_pipeline_if.stores[31:0];
|
||||
`CSR_MPM_STORES_H : read_data_ro_r = `XLEN'(perf_pipeline_if.stores[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_STORES_H : read_data_ro_r = 32'(perf_pipeline_if.stores[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_IFETCH_LAT : read_data_ro_r = perf_pipeline_if.ifetch_latency[31:0];
|
||||
`CSR_MPM_IFETCH_LAT_H : read_data_ro_r = `XLEN'(perf_pipeline_if.ifetch_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_IFETCH_LAT_H : read_data_ro_r = 32'(perf_pipeline_if.ifetch_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_LOAD_LAT : read_data_ro_r = perf_pipeline_if.load_latency[31:0];
|
||||
`CSR_MPM_LOAD_LAT_H : read_data_ro_r = `XLEN'(perf_pipeline_if.load_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_LOAD_LAT_H : read_data_ro_r = 32'(perf_pipeline_if.load_latency[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -209,62 +209,62 @@ module VX_csr_data #(
|
|||
case (read_addr)
|
||||
// PERF: icache
|
||||
`CSR_MPM_ICACHE_READS : read_data_ro_r = perf_memsys_if.icache_reads[31:0];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_ro_r = 32'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_ro_r = perf_memsys_if.icache_read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_ro_r = `XLEN'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_ro_r = 32'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_READS : read_data_ro_r = perf_memsys_if.dcache_reads[31:0];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_ro_r = 32'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_WRITES : read_data_ro_r = perf_memsys_if.dcache_writes[31:0];
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_R : read_data_ro_r = perf_memsys_if.dcache_read_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_ro_r = 32'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_W : read_data_ro_r = perf_memsys_if.dcache_write_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_ro_r = 32'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_BANK_ST : read_data_ro_r = perf_memsys_if.dcache_bank_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_ro_r = 32'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_ro_r = perf_memsys_if.dcache_mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_ro_r = 32'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: smem
|
||||
`CSR_MPM_SMEM_READS : read_data_ro_r = perf_memsys_if.smem_reads[31:0];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_READS_H : read_data_ro_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_WRITES : read_data_ro_r = perf_memsys_if.smem_writes[31:0];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST : read_data_ro_r = perf_memsys_if.smem_bank_stalls[31:0];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_ro_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: l2cache
|
||||
`CSR_MPM_L2CACHE_READS : read_data_ro_r = perf_memsys_if.l2cache_reads[31:0];
|
||||
`CSR_MPM_L2CACHE_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_READS_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_WRITES : read_data_ro_r = perf_memsys_if.l2cache_writes[31:0];
|
||||
`CSR_MPM_L2CACHE_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_MISS_R : read_data_ro_r = perf_memsys_if.l2cache_read_misses[31:0];
|
||||
`CSR_MPM_L2CACHE_MISS_R_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_MISS_R_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_MISS_W : read_data_ro_r = perf_memsys_if.l2cache_write_misses[31:0];
|
||||
`CSR_MPM_L2CACHE_MISS_W_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_MISS_W_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_BANK_ST : read_data_ro_r = perf_memsys_if.l2cache_bank_stalls[31:0];
|
||||
`CSR_MPM_L2CACHE_BANK_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_BANK_ST_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_MSHR_ST : read_data_ro_r = perf_memsys_if.l2cache_mshr_stalls[31:0];
|
||||
`CSR_MPM_L2CACHE_MSHR_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.l2cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L2CACHE_MSHR_ST_H : read_data_ro_r = 32'(perf_memsys_if.l2cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: l3cache
|
||||
`CSR_MPM_L3CACHE_READS : read_data_ro_r = perf_memsys_if.l3cache_reads[31:0];
|
||||
`CSR_MPM_L3CACHE_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_READS_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_WRITES : read_data_ro_r = perf_memsys_if.l3cache_writes[31:0];
|
||||
`CSR_MPM_L3CACHE_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_MISS_R : read_data_ro_r = perf_memsys_if.l3cache_read_misses[31:0];
|
||||
`CSR_MPM_L3CACHE_MISS_R_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_MISS_R_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_MISS_W : read_data_ro_r = perf_memsys_if.l3cache_write_misses[31:0];
|
||||
`CSR_MPM_L3CACHE_MISS_W_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_MISS_W_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_BANK_ST : read_data_ro_r = perf_memsys_if.l3cache_bank_stalls[31:0];
|
||||
`CSR_MPM_L3CACHE_BANK_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_BANK_ST_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_MSHR_ST : read_data_ro_r = perf_memsys_if.l3cache_mshr_stalls[31:0];
|
||||
`CSR_MPM_L3CACHE_MSHR_ST_H : read_data_ro_r = `XLEN'(perf_memsys_if.l3cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_L3CACHE_MSHR_ST_H : read_data_ro_r = 32'(perf_memsys_if.l3cache_mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
// PERF: memory
|
||||
`CSR_MPM_MEM_READS : read_data_ro_r = perf_memsys_if.mem_reads[31:0];
|
||||
`CSR_MPM_MEM_READS_H : read_data_ro_r = `XLEN'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_READS_H : read_data_ro_r = 32'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_WRITES : read_data_ro_r = perf_memsys_if.mem_writes[31:0];
|
||||
`CSR_MPM_MEM_WRITES_H : read_data_ro_r = `XLEN'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_WRITES_H : read_data_ro_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_LAT : read_data_ro_r = perf_memsys_if.mem_latency[31:0];
|
||||
`CSR_MPM_MEM_LAT_H : read_data_ro_r = `XLEN'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_MEM_LAT_H : read_data_ro_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -272,24 +272,24 @@ module VX_csr_data #(
|
|||
`ifdef EXT_TEX_ENABLE
|
||||
case (read_addr)
|
||||
`CSR_MPM_TEX_READS : read_data_ro_r = perf_tex_if.mem_reads[31:0];
|
||||
`CSR_MPM_TEX_READS_H : read_data_ro_r = `XLEN'(perf_tex_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TEX_READS_H : read_data_ro_r = 32'(perf_tex_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TEX_LAT : read_data_ro_r = perf_tex_if.mem_latency[31:0];
|
||||
`CSR_MPM_TEX_LAT_H : read_data_ro_r = `XLEN'(perf_tex_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TEX_LAT_H : read_data_ro_r = 32'(perf_tex_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TEX_STALL : read_data_ro_r = perf_tex_if.stall_cycles[31:0];
|
||||
`CSR_MPM_TEX_STALL_H : read_data_ro_r = `XLEN'(perf_tex_if.stall_cycles[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TEX_STALL_H : read_data_ro_r = 32'(perf_tex_if.stall_cycles[`PERF_CTR_BITS-1:32]);
|
||||
`ifdef TCACHE_ENABLE
|
||||
// cache perf counters
|
||||
`CSR_MPM_TCACHE_READS : read_data_ro_r = perf_tcache_if.reads[31:0];
|
||||
`CSR_MPM_TCACHE_READS_H : read_data_ro_r = `XLEN'(perf_tcache_if.reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TCACHE_READS_H : read_data_ro_r = 32'(perf_tcache_if.reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TCACHE_MISS_R : read_data_ro_r = perf_tcache_if.read_misses[31:0];
|
||||
`CSR_MPM_TCACHE_MISS_R_H: read_data_ro_r = `XLEN'(perf_tcache_if.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TCACHE_MISS_R_H: read_data_ro_r = 32'(perf_tcache_if.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TCACHE_BANK_ST : read_data_ro_r = perf_tcache_if.bank_stalls[31:0];
|
||||
`CSR_MPM_TCACHE_BANK_ST_H:read_data_ro_r = `XLEN'(perf_tcache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TCACHE_BANK_ST_H:read_data_ro_r = 32'(perf_tcache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TCACHE_MSHR_ST :read_data_ro_r = perf_tcache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_TCACHE_MSHR_ST_H:read_data_ro_r = `XLEN'(perf_tcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_tcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`endif
|
||||
`CSR_MPM_TEX_ISSUE_ST : read_data_ro_r = perf_gpu_if.tex_stalls[31:0];
|
||||
`CSR_MPM_TEX_ISSUE_ST_H : read_data_ro_r = `XLEN'(perf_gpu_if.tex_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_TEX_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.tex_stalls[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
`endif
|
||||
|
@ -298,24 +298,24 @@ module VX_csr_data #(
|
|||
`ifdef EXT_RASTER_ENABLE
|
||||
case (read_addr)
|
||||
`CSR_MPM_RASTER_READS : read_data_ro_r = perf_raster_if.mem_reads[31:0];
|
||||
`CSR_MPM_RASTER_READS_H : read_data_ro_r = `XLEN'(perf_raster_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RASTER_READS_H : read_data_ro_r = 32'(perf_raster_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RASTER_LAT : read_data_ro_r = perf_raster_if.mem_latency[31:0];
|
||||
`CSR_MPM_RASTER_LAT_H : read_data_ro_r = `XLEN'(perf_raster_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RASTER_LAT_H : read_data_ro_r = 32'(perf_raster_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RASTER_STALL : read_data_ro_r = perf_raster_if.stall_cycles[31:0];
|
||||
`CSR_MPM_RASTER_STALL_H : read_data_ro_r = `XLEN'(perf_raster_if.stall_cycles[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RASTER_STALL_H : read_data_ro_r = 32'(perf_raster_if.stall_cycles[`PERF_CTR_BITS-1:32]);
|
||||
`ifdef RCACHE_ENABLE
|
||||
// cache perf counters
|
||||
`CSR_MPM_RCACHE_READS : read_data_ro_r = perf_rcache_if.reads[31:0];
|
||||
`CSR_MPM_RCACHE_READS_H : read_data_ro_r = `XLEN'(perf_rcache_if.reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RCACHE_READS_H : read_data_ro_r = 32'(perf_rcache_if.reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RCACHE_MISS_R : read_data_ro_r = perf_rcache_if.read_misses[31:0];
|
||||
`CSR_MPM_RCACHE_MISS_R_H: read_data_ro_r = `XLEN'(perf_rcache_if.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RCACHE_MISS_R_H: read_data_ro_r = 32'(perf_rcache_if.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RCACHE_BANK_ST : read_data_ro_r = perf_rcache_if.bank_stalls[31:0];
|
||||
`CSR_MPM_RCACHE_BANK_ST_H:read_data_ro_r = `XLEN'(perf_rcache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RCACHE_BANK_ST_H:read_data_ro_r = 32'(perf_rcache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RCACHE_MSHR_ST :read_data_ro_r = perf_rcache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_RCACHE_MSHR_ST_H:read_data_ro_r = `XLEN'(perf_rcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_rcache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`endif
|
||||
`CSR_MPM_RASTER_ISSUE_ST : read_data_ro_r = perf_gpu_if.raster_stalls[31:0];
|
||||
`CSR_MPM_RASTER_ISSUE_ST_H : read_data_ro_r = `XLEN'(perf_gpu_if.raster_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_RASTER_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.raster_stalls[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
`endif
|
||||
|
@ -324,30 +324,30 @@ module VX_csr_data #(
|
|||
`ifdef EXT_ROP_ENABLE
|
||||
case (read_addr)
|
||||
`CSR_MPM_ROP_READS : read_data_ro_r = perf_rop_if.mem_reads[31:0];
|
||||
`CSR_MPM_ROP_READS_H : read_data_ro_r = `XLEN'(perf_rop_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ROP_READS_H : read_data_ro_r = 32'(perf_rop_if.mem_reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ROP_WRITES : read_data_ro_r = perf_rop_if.mem_writes[31:0];
|
||||
`CSR_MPM_ROP_WRITES_H : read_data_ro_r = `XLEN'(perf_rop_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ROP_WRITES_H : read_data_ro_r = 32'(perf_rop_if.mem_writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ROP_LAT : read_data_ro_r = perf_rop_if.mem_latency[31:0];
|
||||
`CSR_MPM_ROP_LAT_H : read_data_ro_r = `XLEN'(perf_rop_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ROP_LAT_H : read_data_ro_r = 32'(perf_rop_if.mem_latency[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ROP_STALL : read_data_ro_r = perf_rop_if.stall_cycles[31:0];
|
||||
`CSR_MPM_ROP_STALL_H : read_data_ro_r = `XLEN'(perf_rop_if.stall_cycles[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ROP_STALL_H : read_data_ro_r = 32'(perf_rop_if.stall_cycles[`PERF_CTR_BITS-1:32]);
|
||||
`ifdef OCACHE_ENABLE
|
||||
// cache perf counters
|
||||
`CSR_MPM_OCACHE_READS : read_data_ro_r = perf_ocache_if.reads[31:0];
|
||||
`CSR_MPM_OCACHE_READS_H : read_data_ro_r = `XLEN'(perf_ocache_if.reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_READS_H : read_data_ro_r = 32'(perf_ocache_if.reads[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_WRITES : read_data_ro_r = perf_ocache_if.writes[31:0];
|
||||
`CSR_MPM_OCACHE_WRITES_H: read_data_ro_r = `XLEN'(perf_ocache_if.writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_WRITES_H: read_data_ro_r = 32'(perf_ocache_if.writes[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_MISS_R : read_data_ro_r = perf_ocache_if.read_misses[31:0];
|
||||
`CSR_MPM_OCACHE_MISS_R_H: read_data_ro_r = `XLEN'(perf_ocache_if.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_MISS_R_H: read_data_ro_r = 32'(perf_ocache_if.read_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_MISS_W : read_data_ro_r = perf_ocache_if.write_misses[31:0];
|
||||
`CSR_MPM_OCACHE_MISS_W_H: read_data_ro_r = `XLEN'(perf_ocache_if.write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_MISS_W_H: read_data_ro_r = 32'(perf_ocache_if.write_misses[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_BANK_ST : read_data_ro_r = perf_ocache_if.bank_stalls[31:0];
|
||||
`CSR_MPM_OCACHE_BANK_ST_H:read_data_ro_r = `XLEN'(perf_ocache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_BANK_ST_H:read_data_ro_r = 32'(perf_ocache_if.bank_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_MSHR_ST :read_data_ro_r = perf_ocache_if.mshr_stalls[31:0];
|
||||
`CSR_MPM_OCACHE_MSHR_ST_H:read_data_ro_r = `XLEN'(perf_ocache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_OCACHE_MSHR_ST_H:read_data_ro_r = 32'(perf_ocache_if.mshr_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`endif
|
||||
`CSR_MPM_ROP_ISSUE_ST : read_data_ro_r = perf_gpu_if.rop_stalls[31:0];
|
||||
`CSR_MPM_ROP_ISSUE_ST_H : read_data_ro_r = `XLEN'(perf_gpu_if.rop_stalls[`PERF_CTR_BITS-1:32]);
|
||||
`CSR_MPM_ROP_ISSUE_ST_H : read_data_ro_r = 32'(perf_gpu_if.rop_stalls[`PERF_CTR_BITS-1:32]);
|
||||
default:;
|
||||
endcase
|
||||
`endif
|
||||
|
|
|
@ -58,12 +58,12 @@ module VX_csr_unit #(
|
|||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
|
||||
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] csr_read_data;
|
||||
reg [`XLEN-1:0] csr_write_data;
|
||||
wire [`XLEN-1:0] csr_read_data_ro, csr_read_data_rw;
|
||||
wire [`XLEN-1:0] csr_req_data;
|
||||
reg csr_rd_enable;
|
||||
wire csr_wr_enable;
|
||||
reg [`NUM_THREADS-1:0][31:0] csr_read_data;
|
||||
reg [31:0] csr_write_data;
|
||||
wire [31:0] csr_read_data_ro, csr_read_data_rw;
|
||||
wire [31:0] csr_req_data;
|
||||
reg csr_rd_enable;
|
||||
wire csr_wr_enable;
|
||||
|
||||
`UNUSED_VAR (gpu_pending)
|
||||
wire csr_access_pending = (0
|
||||
|
@ -177,24 +177,24 @@ module VX_csr_unit #(
|
|||
.read_wid (csr_req_if.wid),
|
||||
.read_tmask (csr_req_if.tmask),
|
||||
.read_addr (csr_req_if.addr),
|
||||
.read_data_ro (csr_read_data_ro[`XLEN-1:0]),
|
||||
.read_data_rw (csr_read_data_rw[`XLEN-1:0]),
|
||||
.read_data_ro (csr_read_data_ro[31:0]),
|
||||
.read_data_rw (csr_read_data_rw[31:0]),
|
||||
|
||||
.write_enable (csr_req_valid && csr_wr_enable),
|
||||
.write_uuid (csr_req_if.uuid),
|
||||
.write_wid (csr_req_if.wid),
|
||||
.write_addr (csr_req_if.addr),
|
||||
.write_data (csr_write_data[`XLEN-1:0])
|
||||
.write_data (csr_write_data[31:0])
|
||||
);
|
||||
|
||||
// CSR read
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] wtid, ltid, gtid;
|
||||
wire [`NUM_THREADS-1:0][31:0] wtid, ltid, gtid;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign wtid[i] = `XLEN'(i);
|
||||
assign ltid[i] = (`XLEN'(csr_req_if.wid) << `NT_BITS) + i;
|
||||
assign gtid[i] = `XLEN'((`XLEN'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (`XLEN'(csr_req_if.wid) << `NT_BITS) + i);
|
||||
assign wtid[i] = 32'(i);
|
||||
assign ltid[i] = (32'(csr_req_if.wid) << `NT_BITS) + i;
|
||||
assign gtid[i] = 32'((CORE_ID << (`NW_BITS + `NT_BITS)) + (32'(csr_req_if.wid) << `NT_BITS) + i);
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
|
@ -217,7 +217,7 @@ module VX_csr_unit #(
|
|||
|
||||
// CSR write
|
||||
|
||||
assign csr_req_data = csr_req_if.use_imm ? `XLEN'(csr_req_if.imm) : csr_req_if.rs1_data[csr_req_if.tid];
|
||||
assign csr_req_data = csr_req_if.use_imm ? 32'(csr_req_if.imm) : csr_req_if.rs1_data[csr_req_if.tid];
|
||||
|
||||
assign csr_wr_enable = (csr_write_enable || (csr_req_data != 0))
|
||||
`ifdef EXT_ROP_ENABLE
|
||||
|
@ -241,13 +241,10 @@ module VX_csr_unit #(
|
|||
end
|
||||
|
||||
// send response
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] csr_commit_data;
|
||||
for(genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign csr_commit_if.data[i] = `XLEN'(csr_commit_data[i]);
|
||||
end
|
||||
wire [`NUM_THREADS-1:0][31:0] csr_commit_data;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + 1 + `NUM_THREADS * `XLEN)
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + 1 + `NUM_THREADS * 32)
|
||||
) rsp_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -258,8 +255,12 @@ module VX_csr_unit #(
|
|||
.valid_out (csr_commit_if.valid),
|
||||
.ready_out (csr_commit_if.ready)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign csr_commit_if.data[i] = `XLEN'(csr_commit_data[i]);
|
||||
end
|
||||
|
||||
assign csr_commit_if.eop = 1'b1;
|
||||
assign csr_commit_if.eop = 1'b1;
|
||||
|
||||
// pending request
|
||||
reg req_pending_r;
|
||||
|
|
|
@ -84,16 +84,21 @@ module VX_dispatch (
|
|||
wire [`INST_CSR_BITS-1:0] csr_op_type = `INST_CSR_BITS'(dispatch_if.op_type);
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr = dispatch_if.imm[`CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = dispatch_if.imm[`CSR_ADDR_BITS +: `NRI_BITS];
|
||||
wire [`NUM_THREADS-1:0][31:0] csr_data;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign csr_data[i] = gpr_rsp_if.rs1_data[i][31:0];
|
||||
end
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * `XLEN)),
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * 32)),
|
||||
.OUT_REG (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, csr_op_type, csr_addr, dispatch_if.rd, dispatch_if.wb, dispatch_if.use_imm, csr_imm, tid, gpr_rsp_if.rs1_data}),
|
||||
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, csr_op_type, csr_addr, dispatch_if.rd, dispatch_if.wb, dispatch_if.use_imm, csr_imm, tid, csr_data}),
|
||||
.data_out ({csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.imm, csr_req_if.tid, csr_req_if.rs1_data}),
|
||||
.valid_out (csr_req_if.valid),
|
||||
.ready_out (csr_req_if.ready)
|
||||
|
|
|
@ -10,7 +10,7 @@ interface VX_csr_req_if ();
|
|||
wire [`INST_CSR_BITS-1:0] op_type;
|
||||
wire [`CSR_ADDR_BITS-1:0] addr;
|
||||
wire [`UP(`NT_BITS)-1:0] tid;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire use_imm;
|
||||
wire [`NRI_BITS-1:0] imm;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
|
|
@ -56,6 +56,7 @@ CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -DL2_ENABLE
|
|||
CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -DL2_ENABLE
|
||||
CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 -DL2_ENABLE
|
||||
CONFIGS += $(CONFIGS_$(NUM_CORES)c)
|
||||
CONFIGS += -DFPU_DSP
|
||||
|
||||
# include paths
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fpu -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -J$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -J$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -J$(THIRD_PARTY_DIR)/fpnew/src
|
||||
|
@ -66,7 +67,7 @@ RTL_INCLUDE = -I$(RTL_DIR) -I$(DPI_DIR) -I$(RTL_DIR)/libs -I$(RTL_DIR)/interface
|
|||
RTL_INCLUDE += $(FPU_INCLUDE) $(TEX_INCLUDE) $(RASTER_INCLUDE) $(ROP_INCLUDE)
|
||||
|
||||
# compilation flags
|
||||
CFLAGS += -DSYNTHESIS -DQUARTUS -DFPU_DSP
|
||||
CFLAGS += -DSYNTHESIS -DQUARTUS
|
||||
CFLAGS += $(CONFIGS)
|
||||
CFLAGS += $(RTL_INCLUDE)
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ CONFIGS += -DNDEBUG
|
|||
CONFIGS += -DQUARTUS
|
||||
CONFIGS += -DSYNTHESIS
|
||||
CONFIGS += -DNOGLOBALS
|
||||
CONFIGS += -DFPU_DSP
|
||||
|
||||
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
|
||||
|
||||
|
|
|
@ -75,6 +75,7 @@ CONFIGS_16c := -DNUM_CLUSTERS=1 -DNUM_CORES=16 -DL2_ENABLE
|
|||
CONFIGS_32c := -DNUM_CLUSTERS=2 -DNUM_CORES=16 -DL2_ENABLE
|
||||
CONFIGS_64c := -DNUM_CLUSTERS=4 -DNUM_CORES=16 -DL2_ENABLE
|
||||
CONFIGS += $(CONFIGS_$(NUM_CORES)c)
|
||||
CONFIGS += -DFPU_DSP
|
||||
|
||||
# include paths
|
||||
FPU_INCLUDE = -I$(RTL_DIR)/fpu -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/include -I$(THIRD_PARTY_DIR)/fpnew/src/common_cells/src -I$(THIRD_PARTY_DIR)/fpnew/src/fpu_div_sqrt_mvp/hdl -I$(THIRD_PARTY_DIR)/fpnew/src
|
||||
|
@ -133,7 +134,7 @@ else
|
|||
endif
|
||||
|
||||
# compilation flags
|
||||
CFLAGS += -DSYNTHESIS -DVIVADO -DFPU_DSP
|
||||
CFLAGS += -DSYNTHESIS -DVIVADO
|
||||
CFLAGS += $(CONFIGS)
|
||||
CFLAGS += $(RTL_INCLUDE)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue