From 280abe8853ab92af567a64695cdf25e6d5c505a9 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 28 Mar 2018 16:41:20 +0200 Subject: [PATCH 01/94] :construction: Add Floating-Point CSR Add FP CSRs `fcsr`, `fflags` and `frm` to the core. --- include/ariane_pkg.sv | 6 +++++- src/csr_regfile.sv | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 9ca437f1d..a299232e7 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -121,7 +121,7 @@ package ariane_pkg; } bht_prediction_t; typedef enum logic[3:0] { - NONE, LOAD, STORE, ALU, CTRL_FLOW, MULT, CSR + NONE, LOAD, STORE, ALU, CTRL_FLOW, MULT, CSR, FPU } fu_t; localparam EXC_OFF_RST = 8'h80; @@ -368,6 +368,10 @@ package ariane_pkg; // CSRs // ----- typedef enum logic [11:0] { + // Floating-Point CSRs + CSR_FFLAGS = 12'h001; + CSR_FRM = 12'h002; + CSR_FCSR = 12'h003; // Supervisor Mode CSRs CSR_SSTATUS = 12'h100, CSR_SIE = 12'h104, diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index f5bd477c1..1107a904c 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -54,6 +54,9 @@ module csr_regfile #( output logic eret_o, // Return from exception, set the PC of epc_o output logic [63:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec) output priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in + // FPU + output logic [4:0] fflags_o, // Floating-Point Accured Exceptions + output logic [2:0] frm_o, // Floating-Point Dynamic Rounding Mode // MMU output logic en_translation_o, // enable VA translation output logic en_ld_st_translation_o, // enable VA translation for load and stores @@ -164,6 +167,15 @@ module csr_regfile #( satp_t satp_q, satp_d; + // Floating-Point control and status register (32-bit!) + typedef struct packed { + logic [31:8] reserved; // reserved for L extension, return 0 otherwise + logic [2:0] frm; // float rounding mode + logic [4:0] fflags; // float exception flags + } fcsr_t; + + fcsr_t fcsr_q, fcsr_d; + // ---------------- // CSR Read logic // ---------------- @@ -177,6 +189,11 @@ module csr_regfile #( if (csr_read) begin case (csr_addr.address) + // Floating-Point + CSR_FFLAGS: csr_rdata = {59'b0, fcsr_q.fflags}; + CSR_FRM: csr_rdata = {61'b0, fcsr_q.frm}; + CSR_FCSR: csr_rdata = {32'b0, fcsr_q}; + CSR_SSTATUS: csr_rdata = mstatus_q & 64'h3fffe1fee; CSR_SIE: csr_rdata = mie_q & mideleg_q; CSR_SIP: csr_rdata = mip_q & mideleg_q; @@ -245,7 +262,7 @@ module csr_regfile #( sapt = satp_q; mip = csr_wdata & 64'h33; instret = instret_q; - // only USIP, SSIP, UTIP, STIP are write-able + // only FCSR, USIP, SSIP, UTIP, STIP are write-able eret_o = 1'b0; flush_o = 1'b0; @@ -254,6 +271,8 @@ module csr_regfile #( perf_we_o = 1'b0; perf_data_o = 'b0; + fcsr_d = fcsr_q; + priv_lvl_d = priv_lvl_q; mstatus_d = mstatus_q; mtvec_d = mtvec_q; @@ -279,6 +298,12 @@ module csr_regfile #( // check for correct access rights and that we are writing if (csr_we) begin case (csr_addr.address) + + // Floating-Point + CSR_FFLAGS: fcsr_d.fflags = csr_wdata[4:0]; + CSR_FRM: fcsr_d.frm = csr_wdata[2:0]; + CSR_FCSR: fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space + // sstatus is a subset of mstatus - mask it accordingly CSR_SSTATUS: begin mstatus_d = csr_wdata & 64'h3fffe1fee; @@ -648,6 +673,9 @@ module csr_regfile #( // ------------------- assign csr_rdata_o = csr_rdata; assign priv_lvl_o = priv_lvl_q; + // FPU outputs + assign fflags_o = fcsr_q.fflags; + assign frm_o = fcsr_q.frm; // MMU outputs assign satp_ppn_o = satp_q.ppn; assign asid_o = satp_q.asid[ASID_WIDTH-1:0]; @@ -688,6 +716,8 @@ module csr_regfile #( always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin priv_lvl_q <= PRIV_LVL_M; + // floating-point registers + fcsr_q <= 64'b0; // machine mode registers mstatus_q <= 64'b0; mtvec_q <= {boot_addr_i[63:2], 2'b0}; // set to boot address + direct mode @@ -717,6 +747,8 @@ module csr_regfile #( wfi_q <= 1'b0; end else begin priv_lvl_q <= priv_lvl_d; + // floating-point registers + fcsr_q <= fcsr_d; // machine mode registers mstatus_q <= mstatus_d; mtvec_q <= mtvec_d; From 0cd0b1212e4d17ff5b3fcc6a6c3c79d104468520 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 28 Mar 2018 19:54:00 +0200 Subject: [PATCH 02/94] :recycle: Refactor opcode names and decoders Refactoring opcode names to be as specified in the RISC-V spec. Also added all opcodes defined in the spec for (potential) future use. --- include/ariane_pkg.sv | 82 +++++++-- src/compressed_decoder.sv | 220 ++++++++++++------------ src/decoder.sv | 8 +- src/frontend.sv | 4 +- src/util/instruction_tracer_defines.svh | 52 +++--- 5 files changed, 207 insertions(+), 159 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index a299232e7..27be12b38 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -259,24 +259,72 @@ package ariane_pkg; // -------------------- // Opcodes // -------------------- - localparam OPCODE_SYSTEM = 7'h73; - localparam OPCODE_FENCE = 7'h0f; - localparam OPCODE_OP = 7'h33; - localparam OPCODE_OP32 = 7'h3B; - localparam OPCODE_OPIMM = 7'h13; - localparam OPCODE_OPIMM32 = 7'h1B; - localparam OPCODE_STORE = 7'h23; - localparam OPCODE_LOAD = 7'h03; - localparam OPCODE_BRANCH = 7'h63; - localparam OPCODE_JALR = 7'h67; - localparam OPCODE_JAL = 7'h6f; - localparam OPCODE_AUIPC = 7'h17; - localparam OPCODE_LUI = 7'h37; - localparam OPCODE_AMO = 7'h2F; + // RV32/64G listings: + // Quadrant 0 + localparam OPCODE_LOAD = 7'b00_000_11; + localparam OPCODE_LOAD_FP = 7'b00_001_11; + localparam OPCODE_CUSTOM_0 = 7'b00_010_11; + localparam OPCODE_MISC_MEM = 7'b00_011_11; + localparam OPCODE_OP_IMM = 7'b00_100_11; + localparam OPCODE_AUIPC = 7'b00_101_11; + localparam OPCODE_OP_IMM_32 = 7'b00_110_11; + // Quadrant 1 + localparam OPCODE_STORE = 7'b01_000_11; + localparam OPCODE_STORE_FP = 7'b01_001_11; + localparam OPCODE_CUSTOM_1 = 7'b01_010_11; + localparam OPCODE_AMO = 7'b01_011_11; + localparam OPCODE_OP = 7'b01_100_11; + localparam OPCODE_LUI = 7'b01_101_11; + localparam OPCODE_OP_32 = 7'b01_110_11; + // Quadrant 2 + localparam OPCODE_MADD = 7'b10_000_11; + localparam OPCODE_MSUB = 7'b10_001_11; + localparam OPCODE_NMSUB = 7'b10_010_11; + localparam OPCODE_NMADD = 7'b10_011_11; + localparam OPCODE_OP_FP = 7'b10_100_11; + localparam OPCODE_RSRVD_1 = 7'b10_101_11; + localparam OPCODE_CUSTOM_2 = 7'b10_110_11; + // Quadrant 3 + localparam OPCODE_BRANCH = 7'b11_000_11; + localparam OPCODE_JALR = 7'b11_001_11; + localparam OPCODE_RSRVD_2 = 7'b11_010_11; + localparam OPCODE_JAL = 7'b11_011_11; + localparam OPCODE_SYSTEM = 7'b11_100_11; + localparam OPCODE_RSRVD_3 = 7'b11_101_11; + localparam OPCODE_CUSTOM_3 = 7'b11_110_11; + + // RV64C listings: + // Quadrant 0 + localparam OPCODE_C0 = 2'b00; + localparam OPCODE_C0_ADDI4SPN = 3'b000; + localparam OPCODE_C0_FLD = 3'b001; + localparam OPCODE_C0_LW = 3'b010; + localparam OPCODE_C0_LD = 3'b011; + localparam OPCODE_C0_RSRVD = 3'b100; + localparam OPCODE_C0_FSD = 3'b101; + localparam OPCODE_C0_SW = 3'b110; + localparam OPCODE_C0_SD = 3'b111; + // Quadrant 1 + localparam OPCODE_C1 = 2'b01; + localparam OPCODE_C1_ADDI = 3'b000; + localparam OPCODE_C1_ADDIW = 3'b001; + localparam OPCODE_C1_LI = 3'b010; + localparam OPCODE_C1_LUI_ADDI16SP = 3'b011; + localparam OPCODE_C1_MISC_ALU = 3'b100; + localparam OPCODE_C1_J = 3'b101; + localparam OPCODE_C1_BEQZ = 3'b110; + localparam OPCODE_C1_BNEZ = 3'b111; + // Quadrant 2 + localparam OPCODE_C2 = 2'b10; + localparam OPCODE_C2_SLLI = 3'b000; + localparam OPCODE_C2_FLDSP = 3'b001; + localparam OPCODE_C2_LWSP = 3'b010; + localparam OPCODE_C2_LDSP = 3'b011; + localparam OPCODE_C2_JALR_MV_ADD = 3'b100; + localparam OPCODE_C2_FSDSP = 3'b101; + localparam OPCODE_C2_SWSP = 3'b110; + localparam OPCODE_C2_SDSP = 3'b111; - localparam OPCODE_C_J = 3'b101; - localparam OPCODE_C_BEQZ = 3'b110; - localparam OPCODE_C_BNEZ = 3'b111; // -------------------- // Atomics // -------------------- diff --git a/src/compressed_decoder.sv b/src/compressed_decoder.sv index eb92237cb..21721100d 100644 --- a/src/compressed_decoder.sv +++ b/src/compressed_decoder.sv @@ -21,10 +21,10 @@ import ariane_pkg::*; module compressed_decoder ( - input logic [31:0] instr_i, - output logic [31:0] instr_o, - output logic illegal_instr_o, - output logic is_compressed_o + input logic [31:0] instr_i, + output logic [31:0] instr_o, + output logic illegal_instr_o, + output logic is_compressed_o ); // ------------------- @@ -38,31 +38,31 @@ module compressed_decoder unique case (instr_i[1:0]) // C0 - 2'b00: begin + OPCODE_C0: begin unique case (instr_i[15:13]) - 3'b000: begin + OPCODE_C0_ADDI4SPN: begin // c.addi4spn -> addi rd', x2, imm - instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], OPCODE_OPIMM}; + instr_o = {2'b0, instr_i[10:7], instr_i[12:11], instr_i[5], instr_i[6], 2'b00, 5'h02, 3'b000, 2'b01, instr_i[4:2], OPCODE_OP_IMM}; if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1; end - 3'b010: begin + OPCODE_C0_LW: begin // c.lw -> lw rd', imm(rs1') instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], OPCODE_LOAD}; end - 3'b011: begin + OPCODE_C0_LD: begin // c.ld -> ld rd', imm(rs1') // | imm[11:0] | rs1 | funct3 | rd | opcode | instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], OPCODE_LOAD}; end - 3'b110: begin + OPCODE_C0_SW: begin // c.sw -> sw rs2', imm(rs1') instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], 2'b00, OPCODE_STORE}; end - 3'b111: begin + OPCODE_C0_SD: begin // c.sd -> sd rs2', imm(rs1') instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE}; end @@ -74,137 +74,137 @@ module compressed_decoder end // C1 - 2'b01: begin - unique case (instr_i[15:13]) - 3'b000: begin - // c.addi -> addi rd, rd, nzimm - // c.nop -> addi 0, 0, 0 - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM}; - end + OPCODE_C1: begin + unique case (instr_i[15:13]) + OPCODE_C1_ADDI: begin + // c.addi -> addi rd, rd, nzimm + // c.nop -> addi 0, 0, 0 + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OP_IMM}; + end - // c.addiw -> addiw rd, rd, nzimm for RV64 - 3'b001: begin - if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0 - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OPIMM32}; - else - illegal_instr_o = 1'b1; - end + // c.addiw -> addiw rd, rd, nzimm for RV64 + OPCODE_C1_ADDIW: begin + if (instr_i[11:7] != 5'h0) // only valid if the destination is not r0 + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OP_IMM_32}; + else + illegal_instr_o = 1'b1; + end - OPCODE_C_J: begin - // 101: c.j -> jal x0, imm - instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], OPCODE_JAL}; - end + OPCODE_C1_LI: begin + // c.li -> addi rd, x0, nzimm + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OP_IMM}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end - 3'b010: begin - // c.li -> addi rd, x0, nzimm - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OPIMM}; - if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; - end + OPCODE_C1_LUI_ADDI16SP: begin + // c.lui -> lui rd, imm + instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], OPCODE_LUI}; - 3'b011: begin - // c.lui -> lui rd, imm - instr_o = {{15 {instr_i[12]}}, instr_i[6:2], instr_i[11:7], OPCODE_LUI}; + if (instr_i[11:7] == 5'h02) begin + // c.addi16sp -> addi x2, x2, nzimm + instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, OPCODE_OP_IMM}; + end else if (instr_i[11:7] == 5'b0) begin + illegal_instr_o = 1'b1; + end - if (instr_i[11:7] == 5'h02) begin - // c.addi16sp -> addi x2, x2, nzimm - instr_o = {{3 {instr_i[12]}}, instr_i[4:3], instr_i[5], instr_i[2], instr_i[6], 4'b0, 5'h02, 3'b000, 5'h02, OPCODE_OPIMM}; - end else if (instr_i[11:7] == 5'b0) begin - illegal_instr_o = 1'b1; - end + if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; + end - if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; - end + OPCODE_C1_MISC_ALU: begin + unique case (instr_i[11:10]) + 2'b00, + 2'b01: begin + // 00: c.srli -> srli rd, rd, shamt + // 01: c.srai -> srai rd, rd, shamt + instr_o = {1'b0, instr_i[10], 4'b0, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], OPCODE_OP_IMM}; + // shamt field must be non-zero + if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; + end - 3'b100: begin - unique case (instr_i[11:10]) - 2'b00, - 2'b01: begin - // 00: c.srli -> srli rd, rd, shamt - // 01: c.srai -> srai rd, rd, shamt - instr_o = {1'b0, instr_i[10], 4'b0, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b101, 2'b01, instr_i[9:7], OPCODE_OPIMM}; - // shamt field must be non-zero - if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; - end + 2'b10: begin + // c.andi -> andi rd, rd, imm + instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OP_IMM}; + end - 2'b10: begin - // c.andi -> andi rd, rd, imm - instr_o = {{6 {instr_i[12]}}, instr_i[12], instr_i[6:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OPIMM}; - end + 2'b11: begin + unique case ({instr_i[12], instr_i[6:5]}) + 3'b000: begin + // c.sub -> sub rd', rd', rs2' + instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP}; + end - 2'b11: begin - unique case ({instr_i[12], instr_i[6:5]}) - 3'b000: begin - // c.sub -> sub rd', rd', rs2' - instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP}; - end + 3'b001: begin + // c.xor -> xor rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], OPCODE_OP}; + end - 3'b001: begin - // c.xor -> xor rd', rd', rs2' - instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b100, 2'b01, instr_i[9:7], OPCODE_OP}; - end + 3'b010: begin + // c.or -> or rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], OPCODE_OP}; + end - 3'b010: begin - // c.or -> or rd', rd', rs2' - instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b110, 2'b01, instr_i[9:7], OPCODE_OP}; - end + 3'b011: begin + // c.and -> and rd', rd', rs2' + instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OP}; + end - 3'b011: begin - // c.and -> and rd', rd', rs2' - instr_o = {7'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b111, 2'b01, instr_i[9:7], OPCODE_OP}; - end + 3'b100: begin + // c.subw -> subw rd', rd', rs2' + instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP_32}; + end + 3'b101: begin + // c.addw -> addw rd', rd', rs2' + instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP_32}; + end - 3'b100: begin - // c.subw -> subw rd', rd', rs2' - instr_o = {2'b01, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32}; - end - 3'b101: begin - // c.addw -> addw rd', rd', rs2' - instr_o = {2'b00, 5'b0, 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b000, 2'b01, instr_i[9:7], OPCODE_OP32}; - end + 3'b110, + 3'b111: begin + // 100: c.subw + // 101: c.addw + illegal_instr_o = 1'b1; + instr_o = {16'b0, instr_i}; + end + endcase + end + endcase + end - 3'b110, - 3'b111: begin - // 100: c.subw - // 101: c.addw - illegal_instr_o = 1'b1; - instr_o = {16'b0, instr_i}; - end - endcase - end - endcase - end + OPCODE_C1_J: begin + // 101: c.j -> jal x0, imm + instr_o = {instr_i[12], instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], {9 {instr_i[12]}}, 4'b0, ~instr_i[15], OPCODE_JAL}; + end - OPCODE_C_BEQZ, OPCODE_C_BNEZ: begin - // 0: c.beqz -> beq rs1', x0, imm - // 1: c.bnez -> bne rs1', x0, imm - instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], OPCODE_BRANCH}; - end - endcase + OPCODE_C1_BEQZ, OPCODE_C1_BNEZ: begin + // 0: c.beqz -> beq rs1', x0, imm + // 1: c.bnez -> bne rs1', x0, imm + instr_o = {{4 {instr_i[12]}}, instr_i[6:5], instr_i[2], 5'b0, 2'b01, instr_i[9:7], 2'b00, instr_i[13], instr_i[11:10], instr_i[4:3], instr_i[12], OPCODE_BRANCH}; + end + endcase end // C2 - 2'b10: begin + OPCODE_C2: begin unique case (instr_i[15:13]) - 3'b000: begin + OPCODE_C2_SLLI: begin // c.slli -> slli rd, rd, shamt - instr_o = {6'b0, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], OPCODE_OPIMM}; + instr_o = {6'b0, instr_i[12], instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], OPCODE_OP_IMM}; if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; // register not x0 if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; // shift amount must be non zero end - 3'b010: begin + OPCODE_C2_LWSP: begin // c.lwsp -> lw rd, imm(x2) instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, 3'b010, instr_i[11:7], OPCODE_LOAD}; if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; end - 3'b011: begin + OPCODE_C2_LDSP: begin // c.ldsp -> ld rd, imm(x2) instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], OPCODE_LOAD}; if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; end - 3'b100: begin + OPCODE_C2_JALR_MV_ADD: begin if (instr_i[12] == 1'b0) begin // c.mv -> add rd/rs1, x0, rs2 instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OP}; @@ -231,12 +231,12 @@ module compressed_decoder end end - 3'b110: begin + OPCODE_C2_SWSP: begin // c.swsp -> sw rs2, imm(x2) instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, instr_i[11:9], 2'b00, OPCODE_STORE}; end - 3'b111: begin + OPCODE_C2_SDSP: begin // c.sdsp -> sd rs2, imm(x2) instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE}; end diff --git a/src/decoder.sv b/src/decoder.sv index 83c5ae2fa..55abaf035 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -202,7 +202,7 @@ module decoder ( endcase end // Memory ordering instructions - OPCODE_FENCE: begin + OPCODE_MISC_MEM: begin instruction_o.fu = CSR; instruction_o.rs1 = '0; instruction_o.rs2 = '0; @@ -263,7 +263,7 @@ module decoder ( // -------------------------- // 32bit Reg-Reg Operations // -------------------------- - OPCODE_OP32: begin + OPCODE_OP_32: begin instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; instruction_o.rs1 = instr.rtype.rs1; instruction_o.rs2 = instr.rtype.rs2; @@ -287,7 +287,7 @@ module decoder ( // -------------------------------- // Reg-Immediate Operations // -------------------------------- - OPCODE_OPIMM: begin + OPCODE_OP_IMM: begin instruction_o.fu = ALU; imm_select = IIMM; instruction_o.rs1 = instr.itype.rs1; @@ -321,7 +321,7 @@ module decoder ( // -------------------------------- // 32 bit Reg-Immediate Operations // -------------------------------- - OPCODE_OPIMM32: begin + OPCODE_OP_IMM_32: begin instruction_o.fu = ALU; imm_select = IIMM; instruction_o.rs1 = instr.itype.rs1; diff --git a/src/frontend.sv b/src/frontend.sv index e87531fd8..ec3d9f2c0 100644 --- a/src/frontend.sv +++ b/src/frontend.sv @@ -523,9 +523,9 @@ module instr_scan ( assign rvi_jalr_o = (instr_i[6:0] == OPCODE_JALR) ? 1'b1 : 1'b0; assign rvi_jump_o = (instr_i[6:0] == OPCODE_JAL) ? 1'b1 : 1'b0; // opcode JAL - assign rvc_jump_o = (instr_i[15:13] == OPCODE_C_J) & is_rvc_o & (instr_i[1:0] == 2'b01); + assign rvc_jump_o = (instr_i[15:13] == OPCODE_C1_J) & is_rvc_o & (instr_i[1:0] == 2'b01); assign rvc_jr_o = (instr_i[15:12] == 4'b1000) & (instr_i[6:2] == 5'b00000) & is_rvc_o & (instr_i[1:0] == 2'b10); - assign rvc_branch_o = ((instr_i[15:13] == OPCODE_C_BEQZ) | (instr_i[15:13] == OPCODE_C_BNEZ)) & is_rvc_o & (instr_i[1:0] == 2'b01); + assign rvc_branch_o = ((instr_i[15:13] == OPCODE_C1_BEQZ) | (instr_i[15:13] == OPCODE_C1_BNEZ)) & is_rvc_o & (instr_i[1:0] == 2'b01); // check that rs1 is x1 or x5 assign rvc_return_o = rvc_jr_o & ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & instr_i[7]; assign rvc_jalr_o = (instr_i[15:12] == 4'b1001) & (instr_i[6:2] == 5'b00000) & is_rvc_o; diff --git a/src/util/instruction_tracer_defines.svh b/src/util/instruction_tracer_defines.svh index 5436f1bcf..d048e7a8b 100644 --- a/src/util/instruction_tracer_defines.svh +++ b/src/util/instruction_tracer_defines.svh @@ -28,23 +28,23 @@ parameter INSTR_BGE = { 7'b?, 5'b?, 5'b?, 3'b101, 5'b?, OPCODE_BRANCH }; parameter INSTR_BLTU = { 7'b?, 5'b?, 5'b?, 3'b110, 5'b?, OPCODE_BRANCH }; parameter INSTR_BGEU = { 7'b?, 5'b?, 5'b?, 3'b111, 5'b?, OPCODE_BRANCH }; -// OPIMM -parameter INSTR_LI = { 12'b?, 5'b0, 3'b000, 5'b?, OPCODE_OPIMM }; -parameter INSTR_ADDI = { 17'b?, 3'b000, 5'b?, OPCODE_OPIMM }; -parameter INSTR_SLTI = { 17'b?, 3'b010, 5'b?, OPCODE_OPIMM }; -parameter INSTR_SLTIU = { 17'b?, 3'b011, 5'b?, OPCODE_OPIMM }; -parameter INSTR_XORI = { 17'b?, 3'b100, 5'b?, OPCODE_OPIMM }; -parameter INSTR_ORI = { 17'b?, 3'b110, 5'b?, OPCODE_OPIMM }; -parameter INSTR_ANDI = { 17'b?, 3'b111, 5'b?, OPCODE_OPIMM }; -parameter INSTR_SLLI = { 6'b000000, 11'b?, 3'b001, 5'b?, OPCODE_OPIMM }; -parameter INSTR_SRLI = { 6'b000000, 11'b?, 3'b101, 5'b?, OPCODE_OPIMM }; -parameter INSTR_SRAI = { 6'b010000, 11'b?, 3'b101, 5'b?, OPCODE_OPIMM }; +// OP-IMM +parameter INSTR_LI = { 12'b?, 5'b0, 3'b000, 5'b?, OPCODE_OP_IMM }; +parameter INSTR_ADDI = { 17'b?, 3'b000, 5'b?, OPCODE_OP_IMM }; +parameter INSTR_SLTI = { 17'b?, 3'b010, 5'b?, OPCODE_OP_IMM }; +parameter INSTR_SLTIU = { 17'b?, 3'b011, 5'b?, OPCODE_OP_IMM }; +parameter INSTR_XORI = { 17'b?, 3'b100, 5'b?, OPCODE_OP_IMM }; +parameter INSTR_ORI = { 17'b?, 3'b110, 5'b?, OPCODE_OP_IMM }; +parameter INSTR_ANDI = { 17'b?, 3'b111, 5'b?, OPCODE_OP_IMM }; +parameter INSTR_SLLI = { 6'b000000, 11'b?, 3'b001, 5'b?, OPCODE_OP_IMM }; +parameter INSTR_SRLI = { 6'b000000, 11'b?, 3'b101, 5'b?, OPCODE_OP_IMM }; +parameter INSTR_SRAI = { 6'b010000, 11'b?, 3'b101, 5'b?, OPCODE_OP_IMM }; -// OPIMM32 -parameter INSTR_ADDIW = { 17'b?, 3'b000, 5'b?, OPCODE_OPIMM32 }; -parameter INSTR_SLLIW = { 7'b0000000, 10'b?, 3'b001, 5'b?, OPCODE_OPIMM32 }; -parameter INSTR_SRLIW = { 7'b0000000, 10'b?, 3'b101, 5'b?, OPCODE_OPIMM32 }; -parameter INSTR_SRAIW = { 7'b0100000, 10'b?, 3'b101, 5'b?, OPCODE_OPIMM32 }; +// OP-IMM-32 +parameter INSTR_ADDIW = { 17'b?, 3'b000, 5'b?, OPCODE_OP_IMM_32 }; +parameter INSTR_SLLIW = { 7'b0000000, 10'b?, 3'b001, 5'b?, OPCODE_OP_IMM_32 }; +parameter INSTR_SRLIW = { 7'b0000000, 10'b?, 3'b101, 5'b?, OPCODE_OP_IMM_32 }; +parameter INSTR_SRAIW = { 7'b0100000, 10'b?, 3'b101, 5'b?, OPCODE_OP_IMM_32 }; // OP parameter INSTR_ADD = { 7'b0000000, 10'b?, 3'b000, 5'b?, OPCODE_OP }; @@ -59,17 +59,17 @@ parameter INSTR_OR = { 7'b0000000, 10'b?, 3'b110, 5'b?, OPCODE_OP }; parameter INSTR_AND = { 7'b0000000, 10'b?, 3'b111, 5'b?, OPCODE_OP }; parameter INSTR_MUL = { 7'b0000001, 10'b?, 3'b???, 5'b?, OPCODE_OP }; -// OP32 -parameter INSTR_ADDW = { 7'b0000000, 10'b?, 3'b000, 5'b?, OPCODE_OP32 }; -parameter INSTR_SUBW = { 7'b0100000, 10'b?, 3'b000, 5'b?, OPCODE_OP32 }; -parameter INSTR_SLLW = { 7'b0000000, 10'b?, 3'b001, 5'b?, OPCODE_OP32 }; -parameter INSTR_SRLW = { 7'b0000000, 10'b?, 3'b101, 5'b?, OPCODE_OP32 }; -parameter INSTR_SRAW = { 7'b0100000, 10'b?, 3'b101, 5'b?, OPCODE_OP32 }; -parameter INSTR_MULW = { 7'b0000001, 10'b?, 3'b???, 5'b?, OPCODE_OP32 }; +// OP-32 +parameter INSTR_ADDW = { 7'b0000000, 10'b?, 3'b000, 5'b?, OPCODE_OP_32 }; +parameter INSTR_SUBW = { 7'b0100000, 10'b?, 3'b000, 5'b?, OPCODE_OP_32 }; +parameter INSTR_SLLW = { 7'b0000000, 10'b?, 3'b001, 5'b?, OPCODE_OP_32 }; +parameter INSTR_SRLW = { 7'b0000000, 10'b?, 3'b101, 5'b?, OPCODE_OP_32 }; +parameter INSTR_SRAW = { 7'b0100000, 10'b?, 3'b101, 5'b?, OPCODE_OP_32 }; +parameter INSTR_MULW = { 7'b0000001, 10'b?, 3'b???, 5'b?, OPCODE_OP_32 }; -// FENCE -parameter INSTR_FENCE = { 4'b0, 8'b?, 13'b0, OPCODE_FENCE }; -parameter INSTR_FENCEI = { 17'b0, 3'b001, 5'b0, OPCODE_FENCE }; +// MISC-MEM +parameter INSTR_FENCE = { 4'b0, 8'b?, 13'b0, OPCODE_MISC_MEM }; +parameter INSTR_FENCEI = { 17'b0, 3'b001, 5'b0, OPCODE_MISC_MEM }; // SYSTEM parameter INSTR_CSRW = { 12'b?, 5'b?, 3'b001, 5'b0, OPCODE_SYSTEM }; parameter INSTR_CSRRW = { 12'b?, 5'b?, 3'b001, 5'b?, OPCODE_SYSTEM }; From a98d907b3502334f4e218f2a0a6396cec660867e Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 29 Mar 2018 15:39:32 +0200 Subject: [PATCH 03/94] :recycle: Update frontend for refactored opcodes --- src/frontend.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/frontend.sv b/src/frontend.sv index ec3d9f2c0..ffa6e468d 100644 --- a/src/frontend.sv +++ b/src/frontend.sv @@ -523,9 +523,9 @@ module instr_scan ( assign rvi_jalr_o = (instr_i[6:0] == OPCODE_JALR) ? 1'b1 : 1'b0; assign rvi_jump_o = (instr_i[6:0] == OPCODE_JAL) ? 1'b1 : 1'b0; // opcode JAL - assign rvc_jump_o = (instr_i[15:13] == OPCODE_C1_J) & is_rvc_o & (instr_i[1:0] == 2'b01); - assign rvc_jr_o = (instr_i[15:12] == 4'b1000) & (instr_i[6:2] == 5'b00000) & is_rvc_o & (instr_i[1:0] == 2'b10); - assign rvc_branch_o = ((instr_i[15:13] == OPCODE_C1_BEQZ) | (instr_i[15:13] == OPCODE_C1_BNEZ)) & is_rvc_o & (instr_i[1:0] == 2'b01); + assign rvc_jump_o = (instr_i[15:13] == OPCODE_C1_J) & is_rvc_o & (instr_i[1:0] == OPCODE_C1); + assign rvc_jr_o = (instr_i[15:12] == 4'b1000) & (instr_i[6:2] == 5'b00000) & is_rvc_o & (instr_i[1:0] == OPCODE_C2); + assign rvc_branch_o = ((instr_i[15:13] == OPCODE_C1_BEQZ) | (instr_i[15:13] == OPCODE_C1_BNEZ)) & is_rvc_o & (instr_i[1:0] == OPCODE_C1); // check that rs1 is x1 or x5 assign rvc_return_o = rvc_jr_o & ~instr_i[11] & ~instr_i[10] & ~instr_i[8] & instr_i[7]; assign rvc_jalr_o = (instr_i[15:12] == 4'b1001) & (instr_i[6:2] == 5'b00000) & is_rvc_o; From d2460f304817b6b755443cb1dcf3e6c80c717bd4 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 29 Mar 2018 11:47:26 +0200 Subject: [PATCH 04/94] :bug: Fix FP CSR writes do not cause flush --- include/ariane_pkg.sv | 6 +++--- src/csr_regfile.sv | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 27be12b38..e469878c0 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -417,9 +417,9 @@ package ariane_pkg; // ----- typedef enum logic [11:0] { // Floating-Point CSRs - CSR_FFLAGS = 12'h001; - CSR_FRM = 12'h002; - CSR_FCSR = 12'h003; + CSR_FFLAGS = 12'h001, + CSR_FRM = 12'h002, + CSR_FCSR = 12'h003, // Supervisor Mode CSRs CSR_SSTATUS = 12'h100, CSR_SIE = 12'h104, diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 1107a904c..9304735ce 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -300,9 +300,21 @@ module csr_regfile #( case (csr_addr.address) // Floating-Point - CSR_FFLAGS: fcsr_d.fflags = csr_wdata[4:0]; - CSR_FRM: fcsr_d.frm = csr_wdata[2:0]; - CSR_FCSR: fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space + CSR_FFLAGS: begin + fcsr_d.fflags = csr_wdata[4:0]; + // this instruction has side-effects + flush_o = 1'b1; + end + CSR_FRM: begin + fcsr_d.frm = csr_wdata[2:0]; + // this instruction has side-effects + flush_o = 1'b1; + end + CSR_FCSR: begin + fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space + // this instruction has side-effects + flush_o = 1'b1; + end // sstatus is a subset of mstatus - mask it accordingly CSR_SSTATUS: begin From 8cb26a39e27b223ab2a4727b56b4ce5ec95d9286 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 29 Mar 2018 15:35:09 +0200 Subject: [PATCH 05/94] :construction: Add FP extensions to decoder --- include/ariane_pkg.sv | 72 ++++++++++++---- src/decoder.sv | 193 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 247 insertions(+), 18 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index e469878c0..5879b7392 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -35,14 +35,21 @@ package ariane_pkg; localparam BITS_SATURATION_COUNTER = 2; localparam NR_COMMIT_PORTS = 2; - localparam logic [63:0] ISA_CODE = (1 << 2) // C - Compressed extension - | (1 << 8) // I - RV32I/64I/128I base ISA - | (1 << 12) // M - Integer Multiply/Divide extension - | (0 << 13) // N - User level interrupts supported - | (1 << 18) // S - Supervisor mode implemented - | (1 << 20) // U - User mode implemented - | (0 << 23) // X - Non-standard extensions present - | (1 << 63); // RV64 + // Floating-point extensions configuration + localparam bit RVF = 1'b1; // Is F extension enabled + localparam bit RVD = 1'b1; // Is D extension enabled + + localparam logic [63:0] ISA_CODE = (0 << 0) // A - Atomic Instructions extension + | (1 << 2) // C - Compressed extension + | (RVD << 3) // D - Double precsision floating-point extension + | (RVF << 5) // F - Single precsision floating-point extension + | (1 << 8) // I - RV32I/64I/128I base ISA + | (1 << 12) // M - Integer Multiply/Divide extension + | (0 << 13) // N - User level interrupts supported + | (1 << 18) // S - Supervisor mode implemented + | (1 << 20) // U - User mode implemented + | (0 << 23) // X - Non-standard extensions present + | (1 << 63); // RV64 // 32 registers + 1 bit for re-naming = 6 localparam REG_ADDR_SIZE = 6; @@ -152,7 +159,17 @@ package ariane_pkg; // Multiplications MUL, MULH, MULHU, MULHSU, MULW, // Divisions - DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW + DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW, + // Floating-Point Load and Store Instructions + FLD, FLW, FSD, FSW, + // Floating-Point Computational Instructions + FADD, FSUB, FMUL, FDIV, FMIN_MAX, FSQRT, FMADD, FMSUB, FNMADD, FNMSUB, + // Floating-Point Conversion and Move Instructions + FCVT_F2I, FCVT_I2F, FCVT_F2F, FSGNJ, FMV_F2X, FMV_X2F, + // Floating-Point Compare Instructions + FCMP, + // Floating-Point Classify Instruction + FCLASS } fu_op; // ---------------------- @@ -161,11 +178,11 @@ package ariane_pkg; // TODO: Add atomics function automatic logic [1:0] extract_transfer_size (fu_op op); case (op) - LD, SD: return 2'b11; - LW, LWU, SW: return 2'b10; - LH, LHU, SH: return 2'b01; - LB, SB, LBU: return 2'b00; - default: return 2'b11; + LD, SD, FLD, FSD : return 2'b11; + LW, LWU, SW, FLW, FSW : return 2'b10; + LH, LHU, SH : return 2'b01; + LB, LBU, SB : return 2'b00; + default : return 2'b11; endcase endfunction @@ -202,7 +219,10 @@ package ariane_pkg; logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1 logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2 logic [REG_ADDR_SIZE-1:0] rd; // register destination address - logic [63:0] result; // for unfinished instructions this field also holds the immediate + logic [63:0] result; // for unfinished instructions this field also holds the immediate, + // for unfinished floating-point that are partly encoded in rs2, this field also holds rs2 + // for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB) + // this field holds the address of the third operand from the floating-point register file logic valid; // is the result valid logic use_imm; // should we use the immediate as operand b? logic use_zimm; // use zimm as operand a @@ -225,6 +245,26 @@ package ariane_pkg; logic [6:0] opcode; } rtype_t; + typedef struct packed { + logic [31:27] rs3; + logic [1:0] funct2; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] funct3; + logic [11:7] rd; + logic [6:0] opcode; + } r4type_t; + + typedef struct packed { + logic [31:27] funct5; + logic [26:25] fmt; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:12] rm; + logic [11:7] rd; + logic [6:0] opcode; + } rftype_t; // floating-point + typedef struct packed { logic [31:20] imm; logic [19:15] rs1; @@ -251,6 +291,8 @@ package ariane_pkg; typedef union packed { logic [31:0] instr; rtype_t rtype; + r4type_t r4type; + rftype_t rftype; itype_t itype; stype_t stype; utype_t utype; diff --git a/src/decoder.sv b/src/decoder.sv index 55abaf035..8951c622e 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -29,6 +29,7 @@ module decoder ( input exception_t ex_i, // if an exception occured in if // From CSR input priv_lvl_t priv_lvl_i, // current privilege level + input logic [2:0] frm_i, // floating-point dynamic rounding mode input logic tvm_i, // trap virtual memory input logic tw_i, // timeout wait input logic tsr_i, // trap sret @@ -40,13 +41,15 @@ module decoder ( logic ecall; // this instruction is a software break-point logic ebreak; + // this instruction needs floating-point rounding-mode verification + logic check_fprm; instruction_t instr; assign instr = instruction_t'(instruction_i); // -------------------- // Immediate select // -------------------- enum logic[3:0] { - NOIMM, PCIMM, IIMM, SIMM, SBIMM, BIMM, UIMM, JIMM + NOIMM, PCIMM, IIMM, SIMM, SBIMM, BIMM, UIMM, JIMM, RS3 } imm_select; logic [63:0] imm_i_type; @@ -63,18 +66,19 @@ module decoder ( is_control_flow_instr_o = 1'b0; illegal_instr = 1'b0; instruction_o.pc = pc_i; + instruction_o.trans_id = 5'b0; instruction_o.fu = NONE; instruction_o.op = ADD; instruction_o.rs1 = 5'b0; instruction_o.rs2 = 5'b0; instruction_o.rd = 5'b0; instruction_o.use_pc = 1'b0; - instruction_o.trans_id = 5'b0; instruction_o.is_compressed = is_compressed_i; instruction_o.use_zimm = 1'b0; instruction_o.bp = branch_predict_i; ecall = 1'b0; ebreak = 1'b0; + check_fprm = 1'b0; if (~ex_i.valid) begin case (instr.rtype.opcode) @@ -384,6 +388,183 @@ module decoder ( endcase end + // -------------------------------- + // Floating-Point Load/store + // -------------------------------- + OPCODE_STORE_FP: begin + if (RVF || RVD) begin // only generate decoder if FP extensions are enabled + instruction_o.fu = STORE; + imm_select = SIMM; + instruction_o.rs1 = instr.stype.rs1; + instruction_o.rs2 = instr.stype.rs2; + // determine store size + unique case (instr.stype.funct3) + // Only process instruction if corresponding extension is active (static) + 3'b010: if (RVF) instruction_o.op = FSW; + else illegal_instr = 1'b1; + 3'b011: if (RVD) instruction_o.op = FSD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end else + illegal_instr = 1'b1; + end + + OPCODE_LOAD_FP: begin + if (RVF || RVD) begin // only generate decoder if FP extensions are enabled + instruction_o.fu = LOAD; + imm_select = IIMM; + instruction_o.rs1 = instr.itype.rs1; + instruction_o.rd = instr.itype.rd; + // determine load size + unique case (instr.itype.funct3) + // Only process instruction if corresponding extension is active (static) + 3'b010: if (RVF) instruction_o.op = FLW; + else illegal_instr = 1'b1; + 3'b011: if (RVD) instruction_o.op = FLD; + else illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end else + illegal_instr = 1'b1; + end + + // ---------------------------------- + // Floating-Point Reg-Reg Operations + // ---------------------------------- + OPCODE_MADD, + OPCODE_MSUB, + OPCODE_NMSUB, + OPCODE_NMADD: begin + if (RVF || RVD) begin // only generate decoder if FP extensions are enabled + instruction_o.fu = FPU; + instruction_o.rs1 = instr.r4type.rs1; + instruction_o.rs2 = instr.r4type.rs2; + instruction_o.rd = instr.r4type.rd; + imm_select = RS3; // rs3 into result field + check_fprm = 1'b1; + // select the correct fused operation + unique case (instr.r4type.opcode) + default: instruction_o.op = FMADD; // fmadd.fmt - Fused multiply-add + OPCODE_MSUB: instruction_o.op = FMSUB; // fmsub.fmt - Fused multiply-subtract + OPCODE_NMSUB: instruction_o.op = FNMSUB; // fnmsub.fmt - Negated fused multiply-subtract + OPCODE_NMADD: instruction_o.op = FNMADD; // fnmadd.fmt - Negated fused multiply-add + endcase + + // determine fp format + unique case (instr.r4type.funct2) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (!RVF) illegal_instr = 1'b1; + 2'b01: if (!RVD) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end else + illegal_instr = 1'b1; + end + + OPCODE_FP: begin + if (RVF || RVD) begin // only generate decoder if FP extensions are enabled + instruction_o.fu = FPU; + instruction_o.rs1 = instr.rftype.rs1; + instruction_o.rs2 = instr.rftype.rs2; + instruction_o.rd = instr.rftype.rd; + check_fprm = 1'b1; + // decode FP instruction + unique case (instr.rftype.funct5) + 5'b00000: instruction_o.op = FADD; // fadd.fmt - FP Addition + 5'b00001: instruction_o.op = FSUB; // fsub.fmt - FP Subtraction + 5'b00010: instruction_o.op = FMUL; // fmul.fmt - FP Multiplication + 5'b00011: instruction_o.op = FDIV; // fdiv.fmt - FP Division + 5'b01011: begin + instruction_o.op = FSQRT; // fsqrt.fmt - FP Square Root + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + 5'b00100: begin + instruction_o.op = FSGNJ; // fsgn{j[n]/jx}.fmt - FP Sign Injection + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm > 3'b010) illegal_instr = 1'b1; + end + 5'b00101: begin + instruction_o.op = FMIN_MAX; // fmin/fmax.fmt - FP Minimum / Maximum + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm > 3'b001) illegal_instr = 1'b1; + end + 5'b01000: begin + instruction_o.op = FCVT_F2F; // fcvt.fmt.fmt - FP to FP Conversion + imm_select = IIMM; // rs2 holds part of the intruction + if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + // check source format + unique case (instr.rftype.rs2[21:20]) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (!RVF) illegal_instr = 1'b1; + 2'b01: if (!RVD) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + end + 5'b11000: begin + instruction_o.op = FCVT_F2I; // fcvt.ifmt.fmt - FP to Int Conversion + imm_select = IIMM; // rs2 holds part of the instruction + if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + end + 5'b11100: begin + instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm == 3'b000) instruction_o.op = FMV_F2X; // fmv.ifmt.fmt - FPR to GPR Move + else if (instr.rftype.rm == 3'b001) instruction_o.op = FCLASS; // fclass.fmt - FP Classify + else illegal_instr = 1'b1; + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + 5'b10100: begin + instruction_o.op = FCMP; // feq/flt/fle.fmt - FP Comparisons + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm > 3'b010) illegal_instr = 1'b1; + end + 5'b11010: begin + instruction_o.op = FCVT_I2F; // fcvt.fmt.ifmt - Int to FP Conversion + imm_select = IIMM; // rs2 holds part of the instruction + if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + end + 5'b11110: begin + instruction_o.op = FMV_X2F; // fmv.fmt.ifmt - GPR to FPR Move + instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm != 3'b000) illegal_instr = 1'b1; + // rs2 must be zero + if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; + end + default : illegal_instr = 1'b1; + endcase + + // check format + unique case (instr.rftype.fmt) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (!RVF) illegal_instr = 1'b1; + 2'b01: if (!RVD) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check rounding mode + if (check_fprm) begin + unique case (instr.rftype.rm) + [3'b000:3'b100]: ; //legal rounding modes + 3'b111: begin + unique case (frm_i) + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end + default : illegal_instr = 1'b1; + endcase + end + end else + illegal_instr = 1'b1; + end + + // ---------------------------------- + // Atomic Operations + // ---------------------------------- `ifdef ENABLE_ATOMICS OPCODE_AMO: begin // we are going to use the load unit for AMOs @@ -489,6 +670,7 @@ module decoder ( endcase end end + // -------------------------------- // Sign extend immediate // -------------------------------- @@ -501,7 +683,7 @@ module decoder ( imm_uj_type = uj_imm(instruction_i); imm_bi_type = { {59{instruction_i[24]}}, instruction_i[24:20] }; - // NOIMM, PCIMM, IIMM, SIMM, BIMM, BIMM, UIMM, JIMM + // NOIMM, PCIMM, IIMM, SIMM, BIMM, BIMM, UIMM, JIMM, RS3 // select immediate case (imm_select) PCIMM: begin @@ -532,6 +714,11 @@ module decoder ( instruction_o.result = imm_uj_type; instruction_o.use_imm = 1'b1; end + RS3: begin + // result holds address of fp operand rs3 + instruction_o.result = {59'b0, instr.r4type.rs3}; + instruction_o.use_imm = 1'b0; + end default: begin instruction_o.result = 64'b0; instruction_o.use_imm = 1'b0; From 4d5fa5adbb273263fa6da325a32aea9bd09cc05e Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 29 Mar 2018 17:24:16 +0200 Subject: [PATCH 06/94] :construction: Add FP operations to compressed decoder --- src/compressed_decoder.sv | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/compressed_decoder.sv b/src/compressed_decoder.sv index 21721100d..2d609ffbd 100644 --- a/src/compressed_decoder.sv +++ b/src/compressed_decoder.sv @@ -36,6 +36,8 @@ module compressed_decoder is_compressed_o = 1'b1; instr_o = instr_i; + // I: | imm[11:0] | rs1 | funct3 | rd | opcode | + // S: | imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode | unique case (instr_i[1:0]) // C0 OPCODE_C0: begin @@ -46,6 +48,12 @@ module compressed_decoder if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1; end + OPCODE_C0_FLD: begin + // c.fld -> fld rd', imm(rs1') + // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 | + instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], OPCODE_LOAD_FP}; + end + OPCODE_C0_LW: begin // c.lw -> lw rd', imm(rs1') instr_o = {5'b0, instr_i[5], instr_i[12:10], instr_i[6], 2'b00, 2'b01, instr_i[9:7], 3'b010, 2'b01, instr_i[4:2], OPCODE_LOAD}; @@ -53,10 +61,15 @@ module compressed_decoder OPCODE_C0_LD: begin // c.ld -> ld rd', imm(rs1') - // | imm[11:0] | rs1 | funct3 | rd | opcode | + // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 | instr_o = {4'b0, instr_i[6:5], instr_i[12:10], 3'b000, 2'b01, instr_i[9:7], 3'b011, 2'b01, instr_i[4:2], OPCODE_LOAD}; end + OPCODE_C0_FSD: begin + // c.fsd -> fsd rs2', imm(rs1') + instr_o = {4'b0, instr_i[6:5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE_FP}; + end + OPCODE_C0_SW: begin // c.sw -> sw rs2', imm(rs1') instr_o = {5'b0, instr_i[5], instr_i[12], 2'b01, instr_i[4:2], 2'b01, instr_i[9:7], 3'b010, instr_i[11:10], instr_i[6], 2'b00, OPCODE_STORE}; @@ -192,6 +205,12 @@ module compressed_decoder if ({instr_i[12], instr_i[6:2]} == 6'b0) illegal_instr_o = 1'b1; // shift amount must be non zero end + OPCODE_C2_FLDSP: begin + // c.fldsp -> fld rd, imm(x2) + instr_o = {3'b0, instr_i[4:2], instr_i[12], instr_i[6:5], 3'b000, 5'h02, 3'b011, instr_i[11:7], OPCODE_LOAD_FP}; + if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; + end + OPCODE_C2_LWSP: begin // c.lwsp -> lw rd, imm(x2) instr_o = {4'b0, instr_i[3:2], instr_i[12], instr_i[6:4], 2'b00, 5'h02, 3'b010, instr_i[11:7], OPCODE_LOAD}; @@ -231,6 +250,11 @@ module compressed_decoder end end + OPCODE_C2_FSDSP: begin + // c.fsdsp -> fsd rs2, imm(x2) + instr_o = {3'b0, instr_i[9:7], instr_i[12], instr_i[6:2], 5'h02, 3'b011, instr_i[11:10], 3'b000, OPCODE_STORE_FP}; + end + OPCODE_C2_SWSP: begin // c.swsp -> sw rs2, imm(x2) instr_o = {4'b0, instr_i[8:7], instr_i[12], instr_i[6:2], 5'h02, 3'b010, instr_i[11:9], 2'b00, OPCODE_STORE}; From 4e8aa093843989cc0c930a9c0913a1848e8a2cc1 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 29 Mar 2018 18:52:19 +0200 Subject: [PATCH 07/94] :construction: Add FP operations to load/store unit --- src/load_unit.sv | 16 +++++++++++++++- src/lsu.sv | 8 ++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/load_unit.sv b/src/load_unit.sv index a8701aaa2..e8ce39e75 100644 --- a/src/load_unit.sv +++ b/src/load_unit.sv @@ -319,7 +319,9 @@ module load_unit ( logic [63:0] rdata_h_ext; // sign extension for half words logic [63:0] rdata_b_ext; // sign extension for bytes - // double words + logic [63:0] rdata_fw_box; // nan-boxing for single floats + + // double words or double floats always_comb begin : sign_extend_double_word rdata_d_ext = data_rdata_i[63:0]; end @@ -335,6 +337,17 @@ module load_unit ( endcase end + // nan-boxing single floats + always_comb begin : nan_box_single_float + case (load_data_q.address_offset) + default: rdata_fw_box = {{32{1'b1}}, data_rdata_i[31:0]}; + 3'b001: rdata_fw_box = {{32{1'b1}}, data_rdata_i[39:8]}; + 3'b010: rdata_fw_box = {{32{1'b1}}, data_rdata_i[47:16]}; + 3'b011: rdata_fw_box = {{32{1'b1}}, data_rdata_i[55:24]}; + 3'b100: rdata_fw_box = {{32{1'b1}}, data_rdata_i[63:32]}; + endcase + end + // sign extension for half words always_comb begin : sign_extend_half_word case (load_data_q.address_offset) @@ -366,6 +379,7 @@ module load_unit ( always_comb begin case (load_data_q.operator) LW, LWU: result_o = rdata_w_ext; + FLW: result_o = rdata_fw_box; LH, LHU: result_o = rdata_h_ext; LB, LBU: result_o = rdata_b_ext; default: result_o = rdata_d_ext; diff --git a/src/lsu.sv b/src/lsu.sv index e0071a11e..c9bc0c051 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -344,9 +344,9 @@ module lsu #( // 12 bit are the same anyway // and we can always generate the byte enable from the address at hand case (operator_i) - LD, SD: // double word + LD, SD, FLD, FSD: // double word be_i = 8'b1111_1111; - LW, LWU, SW: // word + LW, LWU, SW, FLW, FSW: // word case (vaddr_i[2:0]) 3'b000: be_i = 8'b0000_1111; 3'b001: be_i = 8'b0001_1110; @@ -401,12 +401,12 @@ module lsu #( if(lsu_ctrl.valid) begin case (lsu_ctrl.operator) // double word - LD, SD: begin + LD, SD, FLD, FSD: begin if (lsu_ctrl.vaddr[2:0] != 3'b000) data_misaligned = 1'b1; end // word - LW, LWU, SW: begin + LW, LWU, SW, FLW, FSW: begin if (lsu_ctrl.vaddr[1:0] != 2'b00) data_misaligned = 1'b1; end From 259e089ae30bc0394ed45ff740fed13a06c6eedd Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Thu, 29 Mar 2018 13:40:46 +0200 Subject: [PATCH 08/94] :construction: Parameterize register file (FPU preparation) --- Makefile | 2 +- src/ariane.sv | 8 +- src/ariane_regfile.sv | 195 ++++++++++++++----------------------- src/ariane_regfile_ff.sv | 109 ++++++++------------- src/commit_stage.sv | 51 ++++++---- src/csr_regfile.sv | 8 +- src/issue_read_operands.sv | 39 ++++---- src/issue_stage.sv | 1 + 8 files changed, 177 insertions(+), 236 deletions(-) diff --git a/Makefile b/Makefile index 35e4cf751..95ee60936 100755 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ max_cycles ?= 10000000 # Test case to run test_case ?= core_test # QuestaSim Version -questa_version ?= +questa_version ?= -10.6b # verilator version verilator ?= verilator # preset which runs a single test diff --git a/src/ariane.sv b/src/ariane.sv index 4134e9408..3efd2fb95 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -200,6 +200,7 @@ module ariane #( logic tw_csr_id; logic tsr_csr_id; logic dcache_en_csr_nbdcache; + logic csr_write_fflags_commit_cs; // ---------------------------- // Performance Counters <-> * // ---------------------------- @@ -368,7 +369,7 @@ module ariane #( .waddr_i ( waddr_commit_id ), .wdata_i ( wdata_commit_id ), .we_i ( we_commit_id ), - + .we_fpr_i ( ), // TODO .commit_instr_o ( commit_instr_id_commit ), .commit_ack_i ( commit_ack ), .* @@ -474,6 +475,7 @@ module ariane #( .waddr_o ( waddr_commit_id ), .wdata_o ( wdata_commit_id ), .we_o ( we_commit_id ), + .we_fpr_o ( ), // write FPU reg, TODO .commit_lsu_o ( lsu_commit_commit_ex ), .commit_lsu_ready_i ( lsu_commit_ready_ex_commit ), .commit_csr_o ( csr_commit_commit_ex ), @@ -481,6 +483,7 @@ module ariane #( .csr_op_o ( csr_op_commit_csr ), .csr_wdata_o ( csr_wdata_commit_csr ), .csr_rdata_i ( csr_rdata_csr_commit ), + .csr_write_fflags_o ( csr_write_fflags_commit_cs ), .csr_exception_i ( csr_exception_csr_commit ), .fence_i_o ( fence_i_commit_controller ), .fence_o ( fence_commit_controller ), @@ -504,6 +507,7 @@ module ariane #( .commit_ack_i ( commit_ack ), .ex_i ( ex_commit ), .csr_op_i ( csr_op_commit_csr ), + .csr_write_fflags_i ( csr_write_fflags_commit_cs ), .csr_addr_i ( csr_addr_ex_csr ), .csr_wdata_i ( csr_wdata_commit_csr ), .csr_rdata_o ( csr_rdata_csr_commit ), @@ -511,6 +515,8 @@ module ariane #( .csr_exception_o ( csr_exception_csr_commit ), .epc_o ( epc_commit_pcgen ), .eret_o ( eret ), + .fflags_o ( ), // FPU flags out + .frm_o ( ), // FPU rounding mode flags out TODO .trap_vector_base_o ( trap_vector_base_commit_pcgen ), .priv_lvl_o ( priv_lvl ), .ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ), diff --git a/src/ariane_regfile.sv b/src/ariane_regfile.sv index 0203202e8..512f8c868 100644 --- a/src/ariane_regfile.sv +++ b/src/ariane_regfile.sv @@ -23,151 +23,98 @@ // latches and is thus smaller than the flip-flop based RF. // -module ariane_regfile #( - parameter DATA_WIDTH = 32 +module ariane_regfile_latch #( + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned NR_READ_PORTS = 2, + parameter int unsigned NR_WRITE_PORTS = 2, + parameter bit ZERO_REG_ZERO = 0 )( - // Clock and Reset - input logic clk, - input logic rst_n, - - input logic test_en_i, - - //Read port R1 - input logic [4:0] raddr_a_i, - output logic [DATA_WIDTH-1:0] rdata_a_o, - - //Read port R2 - input logic [4:0] raddr_b_i, - output logic [DATA_WIDTH-1:0] rdata_b_o, - - - // Write port W1 - input logic [4:0] waddr_a_i, - input logic [DATA_WIDTH-1:0] wdata_a_i, - input logic we_a_i, - - // Write port W2 - input logic [4:0] waddr_b_i, - input logic [DATA_WIDTH-1:0] wdata_b_i, - input logic we_b_i + // clock and reset + input logic clk_i, + input logic rst_ni, + // disable clock gates for testing + input logic test_en_i, + // read port + input logic [NR_READ_PORTS-1:0][4:0] raddr_i, + output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, + // write port + input logic [NR_WRITE_PORTS-1:0][4:0] waddr_i, + input logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_i, + input logic [NR_WRITE_PORTS-1:0] we_i ); - localparam ADDR_WIDTH = 5;; - localparam NUM_WORDS = 2**ADDR_WIDTH; + localparam ADDR_WIDTH = 5;; + localparam NUM_WORDS = 2**ADDR_WIDTH; - logic [DATA_WIDTH-1:0] mem[NUM_WORDS]; + logic [NUM_WORDS-1:1] mem_clocks; - logic [NUM_WORDS-1:1] waddr_onehot_a; - logic [NUM_WORDS-1:1] waddr_onehot_b, waddr_onehot_b_q; + logic [DATA_WIDTH-1:0] mem[NUM_WORDS]; + logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:1] waddr_onehot,waddr_onehot_q; + logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_q; - logic [NUM_WORDS-1:1] mem_clocks; - logic [DATA_WIDTH-1:0] wdata_a_q; - logic [DATA_WIDTH-1:0] wdata_b_q; - // Write port W1 - logic [ADDR_WIDTH-1:0] raddr_a_int, raddr_b_int, waddr_a_int; + // decode addresses + for (genvar i = 0; i < NR_READ_PORTS; i++) + assign rdata_o[i] = mem[raddr_i[i][ADDR_WIDTH-1:0]]; - assign raddr_a_int = raddr_a_i[ADDR_WIDTH-1:0]; - assign raddr_b_int = raddr_b_i[ADDR_WIDTH-1:0]; - assign waddr_a_int = waddr_a_i[ADDR_WIDTH-1:0]; - - int unsigned i; - int unsigned j; - int unsigned k; - int unsigned l; - genvar x; - - logic clk_int; - - //----------------------------------------------------------------------------- - //-- READ : Read address decoder RAD - //----------------------------------------------------------------------------- - assign rdata_a_o = mem[raddr_a_int]; - assign rdata_b_o = mem[raddr_b_int]; - - //----------------------------------------------------------------------------- - // WRITE : SAMPLE INPUT DATA - //--------------------------------------------------------------------------- - - cluster_clock_gating CG_WE_GLOBAL - ( - .clk_i ( clk ), - .en_i ( we_a_i ), - .test_en_i ( test_en_i ), - .clk_o ( clk_int ) - ); - - // use clk_int here, since otherwise we don't want to write anything anyway - always_ff @(posedge clk_int, negedge rst_n) begin : sample_waddr - if (~rst_n) begin - wdata_a_q <= '0; - wdata_b_q <= '0; - waddr_onehot_b_q <= '0; + always_ff @(posedge clk_i, negedge rst_ni) begin : sample_waddr + if (~rst_ni) begin + wdata_q <= '0; end else begin - if (we_a_i) - wdata_a_q <= wdata_a_i; - if (we_b_i) - wdata_b_q <= wdata_b_i; - - waddr_onehot_b_q <= waddr_onehot_b; + for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) + // enable flipflop will most probably infer clock gating + if (we_i[i]) begin + wdata_q[i] <= wdata_i[i]; + end + waddr_onehot_q <= waddr_onehot; end end - //----------------------------------------------------------------------------- - //-- WRITE : Write Address Decoder (WAD), combinatorial process - //----------------------------------------------------------------------------- - always_comb begin : p_WADa - for (i = 1; i < NUM_WORDS; i++) begin : p_WordItera - if ((we_a_i == 1'b1) && (waddr_a_i == i)) - waddr_onehot_a[i] = 1'b1; - else - waddr_onehot_a[i] = 1'b0; + // WRITE : Write Address Decoder (WAD), combinatorial process + always_comb begin : decode_write_addess + for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) begin + for (int unsigned j = 1; j < NUM_WORDS; j++) begin + if (we_i[i] && (waddr_i[i] == j)) + waddr_onehot[i][j] = 1'b1; + else + waddr_onehot[i][j] = 1'b0; + end end end - always_comb begin : p_WADb - for (j = 1; j < NUM_WORDS; j++) begin : p_WordIterb - if ((we_b_i == 1'b1) && (waddr_b_i == j)) - waddr_onehot_b[j] = 1'b1; - else - waddr_onehot_b[j] = 1'b0; - end + // WRITE : Clock gating (if integrated clock-gating cells are available) + for (genvar x = ZERO_REG_ZERO; x < NUM_WORDS; x++) begin + + logic [NR_WRITE_PORTS-1:0] waddr_ored; + + for (genvar i = 0; i < NR_WRITE_PORTS; i++) + assign waddr_ored[i] = waddr_onehot[i][x]; + + cluster_clock_gating i_cg ( + .clk_i ( clk_i ), + .en_i ( |waddr_ored ), + .test_en_i ( test_en_i ), + .clk_o ( mem_clocks[x] ) + ); end - //----------------------------------------------------------------------------- - //-- WRITE : Clock gating (if integrated clock-gating cells are available) - //----------------------------------------------------------------------------- - generate - for (x = 1; x < NUM_WORDS; x++) - begin : CG_CELL_WORD_ITER - cluster_clock_gating CG_Inst - ( - .clk_i ( clk_int ), - .en_i ( waddr_onehot_a[x] | waddr_onehot_b[x] ), - .test_en_i ( test_en_i ), - .clk_o ( mem_clocks[x] ) - ); - end - endgenerate - - //----------------------------------------------------------------------------- - //-- WRITE : Write operation - //----------------------------------------------------------------------------- - //-- Generate M = WORDS sequential processes, each of which describes one - //-- word of the memory. The processes are synchronized with the clocks - //-- ClocksxC(i), i = 0, 1, ..., M-1 - //-- Use active low, i.e. transparent on low latches as storage elements - //-- Data is sampled on rising clock edge + // Generate M = WORDS sequential processes, each of which describes one + // word of the memory. The processes are synchronized with the clocks + // ClocksxC(i), i = 0, 1, ..., M-1 + // Use active low, i.e. transparent on low latches as storage elements + // Data is sampled on rising clock edge // Integer registers always_latch begin : latch_wdata // Note: The assignment has to be done inside this process or Modelsim complains about it - mem[0] = '0; + if (ZERO_REG_ZERO) + mem[0] = '0; - for(k = 1; k < NUM_WORDS; k++) - begin : w_WordIter - if (mem_clocks[k] == 1'b1) - mem[k] = waddr_onehot_b_q[k] ? wdata_b_q : wdata_a_q; - end + for (int unsigned i = 0; i < NR_WRITE_PORTS; i++) begin + for (int unsigned k = ZERO_REG_ZERO; k < NUM_WORDS; k++) begin + if (mem_clocks[k] && waddr_onehot_q[i][k]) + mem[k] = wdata_q[i]; + end + end end endmodule diff --git a/src/ariane_regfile_ff.sv b/src/ariane_regfile_ff.sv index 6514ecb12..51b716be3 100644 --- a/src/ariane_regfile_ff.sv +++ b/src/ariane_regfile_ff.sv @@ -23,87 +23,58 @@ // module ariane_regfile #( - parameter DATA_WIDTH = 32 + parameter int unsigned DATA_WIDTH = 32, + parameter int unsigned NR_READ_PORTS = 2, + parameter int unsigned NR_WRITE_PORTS = 2, + parameter bit ZERO_REG_ZERO = 0 )( - // Clock and Reset - input logic clk, - input logic rst_n, - - input logic test_en_i, - - //Read port R1 - input logic [4:0] raddr_a_i, - output logic [DATA_WIDTH-1:0] rdata_a_o, - - //Read port R2 - input logic [4:0] raddr_b_i, - output logic [DATA_WIDTH-1:0] rdata_b_o, - - - // Write port W1 - input logic [4:0] waddr_a_i, - input logic [DATA_WIDTH-1:0] wdata_a_i, - input logic we_a_i, - - // Write port W2 - input logic [4:0] waddr_b_i, - input logic [DATA_WIDTH-1:0] wdata_b_i, - input logic we_b_i + // clock and reset + input logic clk_i, + input logic rst_ni, + // disable clock gates for testing + input logic test_en_i, + // read port + input logic [NR_READ_PORTS-1:0][4:0] raddr_i, + output logic [NR_READ_PORTS-1:0][DATA_WIDTH-1:0] rdata_o, + // write port + input logic [NR_WRITE_PORTS-1:0][4:0] waddr_i, + input logic [NR_WRITE_PORTS-1:0][DATA_WIDTH-1:0] wdata_i, + input logic [NR_WRITE_PORTS-1:0] we_i ); localparam ADDR_WIDTH = 5; localparam NUM_WORDS = 2**ADDR_WIDTH; - logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] rf_reg; - logic [NUM_WORDS-1:0] we_a_dec, we_b_dec; + logic [NUM_WORDS-1:0][DATA_WIDTH-1:0] mem; + logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:0] we_dec; - always_comb begin : we_a_decoder - for (int i = 0; i < NUM_WORDS; i++) begin - if (waddr_a_i == i) - we_a_dec[i] = we_a_i; - else - we_a_dec[i] = 1'b0; - end - end - always_comb begin : we_b_decoder - for (int i = 0; i < NUM_WORDS; i++) begin - if (waddr_b_i == i) - we_b_dec[i] = we_b_i; - else - we_b_dec[i] = 1'b0; - end - end - - generate - // loop from 1 to NUM_WORDS-1 as R0 is nil - for (genvar i = 1; i < NUM_WORDS; i++) begin : rf_gen - - always_ff @(posedge clk, negedge rst_n) begin : register_write_behavioral - if (rst_n==1'b0) begin - rf_reg[i] <= 'b0; - end else begin - if (we_a_dec[i]) - rf_reg[i] <= wdata_a_i; - - if (we_b_dec[i]) - rf_reg[i] <= wdata_b_i; + always_comb begin : we_decoder + for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin + if (waddr_i[j] == i) + we_dec[j][i] = we_i[j]; + else + we_dec[j][i] = 1'b0; + end end - end end -// R0 is nil -`ifdef verilator - always_ff @(posedge clk, negedge rst_n) begin - rf_reg[0] <= '0; + // loop from 1 to NUM_WORDS-1 as R0 is nil + always_ff @(posedge clk_i, negedge rst_ni) begin : register_write_behavioral + if (~rst_ni) begin + mem <= '{default: '0}; + end else begin + for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin + for (int unsigned i = ZERO_REG_ZERO; i < NUM_WORDS; i++) begin + if (we_dec[j][i]) + mem[i] <= wdata_i[j]; + end + end + end end -`else - assign rf_reg[0] = '0; -`endif - endgenerate - - assign rdata_a_o = rf_reg[raddr_a_i]; - assign rdata_b_o = rf_reg[raddr_b_i]; + for (genvar i = 0; i < NR_READ_PORTS; i++) + assign rdata_o[i] = mem[raddr_i[i]]; endmodule diff --git a/src/commit_stage.sv b/src/commit_stage.sv index cf3dc927d..a7690afcf 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -21,16 +21,14 @@ module commit_stage #( input logic halt_i, // request to halt the core input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline output exception_t exception_o, // take exception to controller - // from scoreboard input scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_i, // the instruction we want to commit output logic [NR_COMMIT_PORTS-1:0] commit_ack_o, // acknowledge that we are indeed committing - // to register file output logic [NR_COMMIT_PORTS-1:0][4:0] waddr_o, // register file write address output logic [NR_COMMIT_PORTS-1:0][63:0] wdata_o, // register file write data output logic [NR_COMMIT_PORTS-1:0] we_o, // register file write enable - + output logic [NR_COMMIT_PORTS-1:0] we_fpr_o, // floating point register enable // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline) output logic [63:0] pc_o, // to/from CSR file @@ -38,6 +36,7 @@ module commit_stage #( output logic [63:0] csr_wdata_o, // data to write to CSR input logic [63:0] csr_rdata_i, // data to read from CSR input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit) + output logic csr_write_fflags_o, // write the fflags CSR // commit signals to ex output logic commit_lsu_o, // commit the pending store input logic commit_lsu_ready_i, // commit buffer of LSU is ready @@ -59,21 +58,22 @@ module commit_stage #( // write register file or commit instruction in LSU or CSR Buffer always_comb begin : commit // default assignments - commit_ack_o[0] = 1'b0; - commit_ack_o[1] = 1'b0; + commit_ack_o[0] = 1'b0; + commit_ack_o[1] = 1'b0; - we_o[0] = 1'b0; - we_o[1] = 1'b0; + we_o[0] = 1'b0; + we_o[1] = 1'b0; - commit_lsu_o = 1'b0; - commit_csr_o = 1'b0; - wdata_o[0] = commit_instr_i[0].result; - wdata_o[1] = commit_instr_i[1].result; - csr_op_o = ADD; // this corresponds to a CSR NOP - csr_wdata_o = 64'b0; - fence_i_o = 1'b0; - fence_o = 1'b0; - sfence_vma_o = 1'b0; + commit_lsu_o = 1'b0; + commit_csr_o = 1'b0; + wdata_o[0] = commit_instr_i[0].result; + wdata_o[1] = commit_instr_i[1].result; + csr_op_o = ADD; // this corresponds to a CSR NOP + csr_wdata_o = 64'b0; + fence_i_o = 1'b0; + fence_o = 1'b0; + sfence_vma_o = 1'b0; + csr_write_fflags_o = 1'b0; // we will not commit the instruction if we took an exception // but we do not commit the instruction if we requested a halt @@ -101,6 +101,16 @@ module commit_stage #( end end + // --------- + // FPU + // --------- + if (commit_instr_i[0].fu == FPU) begin + // write the CSR with potential exception flags from retiring floating point instruction + csr_op_o = CSR_SET; + csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]}; + csr_write_fflags_o = 1'b1; + end + // --------- // CSR Logic // --------- @@ -145,10 +155,15 @@ module commit_stage #( // check if the second instruction can be committed as well and the first wasn't a CSR instruction if (commit_ack_o[0] && commit_instr_i[1].valid && !halt_i && !(commit_instr_i[0].fu inside {CSR}) && !flush_dcache_i) begin // only if the first instruction didn't throw an exception and this instruction won't throw an exception - // and the operator is of type ALU, LOAD, CTRL_FLOW, MULT - if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT})) begin + // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT or FPU + if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU})) begin we_o[1] = 1'b1; commit_ack_o[1] = 1'b1; + // additionally check if we are retiring an FPU instruction because we need to make sure that we right all + // exception flags + csr_op_o = CSR_SET; + csr_wdata_o = {59'b0, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])}; + csr_write_fflags_o = (commit_instr_i[1].fu == FPU); end end end diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 9304735ce..fe9efd718 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -45,6 +45,7 @@ module csr_regfile #( input logic [11:0] csr_addr_i, // Address of the register to read/write input logic [63:0] csr_wdata_i, // Write data in output logic [63:0] csr_rdata_o, // Read data out + input logic csr_write_fflags_i, // Write fflags register input logic [63:0] pc_i, // PC of instruction accessing the CSR output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege // level or to write a read-only register also @@ -96,8 +97,8 @@ module csr_regfile #( // ---------------- // Assignments // ---------------- - // Debug MUX - assign csr_addr = csr_t'(((debug_csr_req_i) ? debug_csr_addr_i : csr_addr_i)); + // Debug MUX and fflags register + assign csr_addr = csr_t'(((debug_csr_req_i) ? debug_csr_addr_i : (csr_write_fflags_i) ? CSR_FFLAGS : csr_addr_i)); // Output the read data directly assign debug_csr_rdata_o = csr_rdata; @@ -667,6 +668,9 @@ module csr_regfile #( csr_exception_o.valid = 1'b1; end end + // in case we are writing the CSR flag no exception can ever occur, don't set the valid flag in that case + if (csr_write_fflags_i) + csr_exception_o.valid = 1'b0; // ------------------- // Wait for Interrupt diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 01eb8e53d..02bfa255a 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -70,7 +70,8 @@ module issue_read_operands #( // commit port input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i, input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i, - input logic [NR_COMMIT_PORTS-1:0] we_i + input logic [NR_COMMIT_PORTS-1:0] we_i, + input logic [NR_COMMIT_PORTS-1:0] we_fpr_i // committing instruction instruction // from scoreboard // input scoreboard_entry commit_instr_i, @@ -307,27 +308,23 @@ module issue_read_operands #( // ---------------------- // Integer Register File // ---------------------- + logic [1:0][63:0] rdata_o; + + assign operand_a_regfile = rdata_o[0]; + assign operand_b_regfile = rdata_o[1]; + ariane_regfile #( - .DATA_WIDTH ( 64 ) - ) regfile_i ( - // Clock and Reset - .clk ( clk_i ), - .rst_n ( rst_ni ), - .test_en_i ( test_en_i ), - - .raddr_a_i ( raddr_a ), - .rdata_a_o ( operand_a_regfile ), - - .raddr_b_i ( issue_instr_i.rs2[4:0] ), - .rdata_b_o ( operand_b_regfile ), - - .waddr_a_i ( waddr ), - .wdata_a_i ( wdata ), - .we_a_i ( we ), - - .waddr_b_i ( waddr_i[1] ), - .wdata_b_i ( wdata_i[1] ), - .we_b_i ( we_i[1] ) + .DATA_WIDTH ( 64 ), + .NR_READ_PORTS ( 2 ), + .NR_WRITE_PORTS ( 2 ), + .ZERO_REG_ZERO ( 1 ) + ) i_ariane_regfile ( + .raddr_i ( '{issue_instr_i.rs2[4:0], raddr_a} ), + .rdata_o ( rdata_o ), + .waddr_i ( '{waddr_i[1], waddr} ), + .wdata_i ( '{wdata_i[1], wdata} ), + .we_i ( '{we_i[1], we} ), + .* ); // ---------------------- diff --git a/src/issue_stage.sv b/src/issue_stage.sv index b90cfbc39..903d6b8da 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -76,6 +76,7 @@ module issue_stage #( input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i, input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i, input logic [NR_COMMIT_PORTS-1:0] we_i, + input logic [NR_COMMIT_PORTS-1:0] we_fpr_i, output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o, input logic [NR_COMMIT_PORTS-1:0] commit_ack_i From b93a971a5878a266159e3a9fc9b65bfb99fe1bd7 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 1 Apr 2018 03:53:57 +0200 Subject: [PATCH 09/94] :construction: Add FP register file and issue --- include/ariane_pkg.sv | 68 ++++++++++++- src/ariane.sv | 20 ++-- src/decoder.sv | 20 ++-- src/issue_read_operands.sv | 202 ++++++++++++++++++++++++------------- src/issue_stage.sv | 23 ++++- src/re_name.sv | 47 ++++++--- src/scoreboard.sv | 74 +++++++++----- 7 files changed, 324 insertions(+), 130 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 5879b7392..458838c08 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -39,6 +39,13 @@ package ariane_pkg; localparam bit RVF = 1'b1; // Is F extension enabled localparam bit RVD = 1'b1; // Is D extension enabled + // No need changing these by hand + localparam bit FP_PRESENT = RVF | RVD; + // Length of widest floating-point format + localparam FLEN = RVD ? 64 : // D ext. + RVF ? 32 : // F ext. + 0; + localparam logic [63:0] ISA_CODE = (0 << 0) // A - Atomic Instructions extension | (1 << 2) // C - Compressed extension | (RVD << 3) // D - Double precsision floating-point extension @@ -163,7 +170,7 @@ package ariane_pkg; // Floating-Point Load and Store Instructions FLD, FLW, FSD, FSW, // Floating-Point Computational Instructions - FADD, FSUB, FMUL, FDIV, FMIN_MAX, FSQRT, FMADD, FMSUB, FNMADD, FNMSUB, + FADD, FSUB, FMUL, FDIV, FMIN_MAX, FSQRT, FMADD, FMSUB, FNMSUB, FNMADD, // Floating-Point Conversion and Move Instructions FCVT_F2I, FCVT_I2F, FCVT_F2F, FSGNJ, FMV_F2X, FMV_X2F, // Floating-Point Compare Instructions @@ -172,6 +179,65 @@ package ariane_pkg; FCLASS } fu_op; + // ------------------------------- + // Extract Src/Dst FP Reg from Op + // ------------------------------- + function automatic logic is_rs1_fpr (input fu_op op); + if (FP_PRESENT) begin // makes function static for non-fp case + unique case (op) inside + [FADD:FNMADD], // Computational Operations + FCVT_F2I, // Float-Int Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_F2X, // FPR-GPR Moves + FCMP, // Comparisons + FCLASS : return 1'b1; // Classifications + default : return 1'b0; // all other ops + endcase + end else + return 1'b0; + endfunction; + + function automatic logic is_rs2_fpr (input fu_op op); + if (FP_PRESENT) begin // makes function static for non-fp case + unique case (op) inside + [FSD:FSW], // FP Stores + [FADD:FMIN_MAX], // Computational Operations (no sqrt) + [FMADD:FNMADD], // Fused Computational Operations + FSGNJ, // Sign Injections + FCMP : return 1'b1; // Comparisons + default : return 1'b0; // all other ops + endcase + end else + return 1'b0; + endfunction; + + // ternary operations encode the rs3 address in the imm field + function automatic logic is_imm_fpr (input fu_op op); + if (FP_PRESENT) begin // makes function static for non-fp case + unique case (op) inside + [FMADD:FNMADD] : return 1'b1; // Fused Computational Operations + default : return 1'b0; // all other ops + endcase + end else + return 1'b0; + endfunction; + + function automatic logic is_rd_fpr (input fu_op op); + if (FP_PRESENT) begin // makes function static for non-fp case + unique case (op) inside + [FLD:FLW], // FP Loads + [FADD:FNMADD], // Computational Operations + FCVT_I2F, // Int-Float Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_X2F : return 1'b1; // GPR-FPR Moves + default : return 1'b0; // all other ops + endcase + end else + return 1'b0; + endfunction; + // ---------------------- // Extract Bytes from Op // ---------------------- diff --git a/src/ariane.sv b/src/ariane.sv index 3efd2fb95..d7ab57fd2 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -172,7 +172,8 @@ module ariane #( // -------------- logic [NR_COMMIT_PORTS-1:0][4:0] waddr_commit_id; logic [NR_COMMIT_PORTS-1:0][63:0] wdata_commit_id; - logic [NR_COMMIT_PORTS-1:0] we_commit_id; + logic [NR_COMMIT_PORTS-1:0] we_gpr_commit_id; + logic [NR_COMMIT_PORTS-1:0] we_fpr_commit_id; // -------------- // IF <-> EX // -------------- @@ -184,6 +185,8 @@ module ariane #( // -------------- // CSR <-> * // -------------- + logic [4:0] fflags_csr_ci; + logic [2:0] frm_csr_id; logic enable_translation_csr_ex; logic en_ld_st_translation_csr_ex; priv_lvl_t ld_st_priv_lvl_csr_ex; @@ -365,11 +368,11 @@ module ariane #( .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, branch_result_ex_id, csr_result_ex_id, mult_result_ex_id }), .ex_ex_i ( {{$bits(exception_t){1'b0}}, lsu_exception_ex_id, branch_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}} }), .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, branch_valid_ex_id, csr_valid_ex_id, mult_valid_ex_id }), - + // Commit .waddr_i ( waddr_commit_id ), .wdata_i ( wdata_commit_id ), - .we_i ( we_commit_id ), - .we_fpr_i ( ), // TODO + .we_gpr_i ( we_gpr_commit_id ), + .we_fpr_i ( we_fpr_commit_id ), .commit_instr_o ( commit_instr_id_commit ), .commit_ack_i ( commit_ack ), .* @@ -474,8 +477,8 @@ module ariane #( .no_st_pending_i ( no_st_pending_ex_commit ), .waddr_o ( waddr_commit_id ), .wdata_o ( wdata_commit_id ), - .we_o ( we_commit_id ), - .we_fpr_o ( ), // write FPU reg, TODO + .we_o ( we_gpr_commit_id ), + .we_fpr_o ( we_fpr_commit_id ), .commit_lsu_o ( lsu_commit_commit_ex ), .commit_lsu_ready_i ( lsu_commit_ready_ex_commit ), .commit_csr_o ( csr_commit_commit_ex ), @@ -516,9 +519,10 @@ module ariane #( .epc_o ( epc_commit_pcgen ), .eret_o ( eret ), .fflags_o ( ), // FPU flags out - .frm_o ( ), // FPU rounding mode flags out TODO .trap_vector_base_o ( trap_vector_base_commit_pcgen ), .priv_lvl_o ( priv_lvl ), + .fflags_o ( fflags_csr_ci ), + .frm_o ( frm_csr_id ), .ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ), .en_translation_o ( enable_translation_csr_ex ), .en_ld_st_translation_o ( en_ld_st_translation_csr_ex ), @@ -640,7 +644,7 @@ module ariane #( // write-back assign tracer_if.waddr = waddr_commit_id; assign tracer_if.wdata = wdata_commit_id; - assign tracer_if.we = we_commit_id; + assign tracer_if.we = we_gpr_commit_id; // commit assign tracer_if.commit_instr = commit_instr_id_commit; assign tracer_if.commit_ack = commit_ack; diff --git a/src/decoder.sv b/src/decoder.sv index 8951c622e..8b440d402 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -392,7 +392,7 @@ module decoder ( // Floating-Point Load/store // -------------------------------- OPCODE_STORE_FP: begin - if (RVF || RVD) begin // only generate decoder if FP extensions are enabled + if (FP_PRESENT) begin // only generate decoder if FP extensions are enabled (static) instruction_o.fu = STORE; imm_select = SIMM; instruction_o.rs1 = instr.stype.rs1; @@ -411,7 +411,7 @@ module decoder ( end OPCODE_LOAD_FP: begin - if (RVF || RVD) begin // only generate decoder if FP extensions are enabled + if (FP_PRESENT) begin // only generate decoder if FP extensions are enabled (static) instruction_o.fu = LOAD; imm_select = IIMM; instruction_o.rs1 = instr.itype.rs1; @@ -436,7 +436,7 @@ module decoder ( OPCODE_MSUB, OPCODE_NMSUB, OPCODE_NMADD: begin - if (RVF || RVD) begin // only generate decoder if FP extensions are enabled + if (FP_PRESENT) begin // only generate decoder if FP extensions are enabled (static) instruction_o.fu = FPU; instruction_o.rs1 = instr.r4type.rs1; instruction_o.rs2 = instr.r4type.rs2; @@ -454,8 +454,8 @@ module decoder ( // determine fp format unique case (instr.r4type.funct2) // Only process instruction if corresponding extension is active (static) - 2'b00: if (!RVF) illegal_instr = 1'b1; - 2'b01: if (!RVD) illegal_instr = 1'b1; + 2'b00: if (~RVF) illegal_instr = 1'b1; + 2'b01: if (~RVD) illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase end else @@ -463,7 +463,7 @@ module decoder ( end OPCODE_FP: begin - if (RVF || RVD) begin // only generate decoder if FP extensions are enabled + if (FP_PRESENT) begin // only generate decoder if FP extensions are enabled (static) instruction_o.fu = FPU; instruction_o.rs1 = instr.rftype.rs1; instruction_o.rs2 = instr.rftype.rs2; @@ -497,8 +497,8 @@ module decoder ( // check source format unique case (instr.rftype.rs2[21:20]) // Only process instruction if corresponding extension is active (static) - 2'b00: if (!RVF) illegal_instr = 1'b1; - 2'b01: if (!RVD) illegal_instr = 1'b1; + 2'b00: if (~RVF) illegal_instr = 1'b1; + 2'b01: if (~RVD) illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase end @@ -540,8 +540,8 @@ module decoder ( // check format unique case (instr.rftype.fmt) // Only process instruction if corresponding extension is active (static) - 2'b00: if (!RVF) illegal_instr = 1'b1; - 2'b01: if (!RVD) illegal_instr = 1'b1; + 2'b00: if (~RVF) illegal_instr = 1'b1; + 2'b01: if (~RVD) illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 02bfa255a..9eee28f02 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -40,8 +40,12 @@ module issue_read_operands #( output logic [REG_ADDR_SIZE-1:0] rs2_o, input logic [63:0] rs2_i, input logic rs2_valid_i, + output logic [REG_ADDR_SIZE-1:0] rs3_o, + input logic [FLEN-1:0] rs3_i, + input logic rs3_valid_i, // get clobber input - input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_i, + input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_i, + input fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_i, // To FU, just single issue for now output fu_t fu_o, output fu_op operator_o, @@ -62,15 +66,18 @@ module issue_read_operands #( input logic lsu_ready_i, // FU is ready output logic lsu_valid_o, // Output is valid // MULT - input logic mult_ready_i, // FU is ready - output logic mult_valid_o, // Output is valid + input logic mult_ready_i, // FU is ready + output logic mult_valid_o, // Output is valid + // FPU + input logic fpu_ready_i, // FU is ready + output logic fpu_valid_o, // Output is valid // CSR input logic csr_ready_i, // FU is ready output logic csr_valid_o, // Output is valid // commit port input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i, input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i, - input logic [NR_COMMIT_PORTS-1:0] we_i, + input logic [NR_COMMIT_PORTS-1:0] we_gpr_i, input logic [NR_COMMIT_PORTS-1:0] we_fpr_i // committing instruction instruction // from scoreboard @@ -80,6 +87,7 @@ module issue_read_operands #( logic stall; // stall signal, we do not want to fetch any more entries logic fu_busy; // functional unit is busy logic [63:0] operand_a_regfile, operand_b_regfile; // operands coming from regfile + logic [FLEN-1:0] operand_c_regfile; // third operand only from fp regfile // output flipflop (ID <-> EX) logic [63:0] operand_a_n, operand_a_q, @@ -88,6 +96,7 @@ module issue_read_operands #( logic alu_valid_n, alu_valid_q; logic mult_valid_n, mult_valid_q; + logic fpu_valid_n, fpu_valid_q; logic lsu_valid_n, lsu_valid_q; logic csr_valid_n, csr_valid_q; logic branch_valid_n, branch_valid_q; @@ -97,7 +106,7 @@ module issue_read_operands #( fu_t fu_n, fu_q; // functional unit to use // forwarding signals - logic forward_rs1, forward_rs2; + logic forward_rs1, forward_rs2, forward_rs3; // ID <-> EX registers assign operand_a_o = operand_a_q; assign operand_b_o = operand_b_q; @@ -108,50 +117,12 @@ module issue_read_operands #( assign lsu_valid_o = lsu_valid_q; assign csr_valid_o = csr_valid_q; assign mult_valid_o = mult_valid_q; + assign fpu_valid_o = fpu_valid_q; assign trans_id_o = trans_id_q; assign imm_o = imm_q; // --------------- // Issue Stage // --------------- - // We can issue an instruction if we do not detect that any other instruction is writing the same - // destination register. - // We also need to check if there is an unresolved branch in the scoreboard. - always_comb begin : issue_scoreboard - // default assignment - issue_ack_o = 1'b0; - // check that we didn't stall, that the instruction we got is valid - // and that the functional unit we need is not busy - if (issue_instr_valid_i) begin - // check that the corresponding functional unit is not busy - if (~stall && ~fu_busy) begin - // ----------------------------------------- - // WAW - Write After Write Dependency Check - // ----------------------------------------- - // no other instruction has the same destination register -> issue the instruction - if (rd_clobber_i[issue_instr_i.rd] == NONE) begin - issue_ack_o = 1'b1; - end - // or check that the target destination register will be written in this cycle by the - // commit stage - for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++) - if (we_i[i] && waddr_i[i] == issue_instr_i.rd) begin - issue_ack_o = 1'b1; - end - end - // we can also issue the instruction under the following two circumstances: - // we can do this even if we are stalled or no functional unit is ready (as we don't need one) - // the decoder needs to make sure that the instruction is marked as valid when it does not - // need any functional unit or if an exception occurred previous to the execute stage. - // 1. we already got an exception - if (issue_instr_i.ex.valid) begin - issue_ack_o = 1'b1; - end - // 2. it is an instruction which does not need any functional unit - if (issue_instr_i.fu == NONE) begin - issue_ack_o = 1'b1; - end - end - end // select the right busy signal // this obviously depends on the functional unit we need @@ -165,6 +136,8 @@ module issue_read_operands #( fu_busy = ~branch_ready_i; MULT: fu_busy = ~mult_ready_i; + FPU: + fu_busy = ~fpu_ready_i; LOAD, STORE: fu_busy = ~lsu_ready_i; CSR: @@ -184,39 +157,52 @@ module issue_read_operands #( // operand forwarding signals forward_rs1 = 1'b0; forward_rs2 = 1'b0; + forward_rs3 = 1'b0; // FPR only // poll the scoreboard for those values rs1_o = issue_instr_i.rs1; rs2_o = issue_instr_i.rs2; + rs3_o = issue_instr_i.result[REG_ADDR_SIZE-1:0]; // rs3 is encoded in imm field + // 0. check that we are not using the zimm type in RS1 // as this is an immediate we do not have to wait on anything here - // 1. check if the source registers are clobberd + // 1. check if the source registers are clobbered --> check appropriate clobber list (gpr/fpr) // 2. poll the scoreboard - if (~issue_instr_i.use_zimm && rd_clobber_i[issue_instr_i.rs1] != NONE) begin + if (~issue_instr_i.use_zimm && (is_rs1_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs1] != NONE + : rd_clobber_gpr_i[issue_instr_i.rs1] != NONE)) begin // check if the clobbering instruction is not a CSR instruction, CSR instructions can only // be fetched through the register file since they can't be forwarded - // the operand is available, forward it - if (rs1_valid_i && rd_clobber_i[issue_instr_i.rs1] != CSR) + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs1_valid_i && (is_rs1_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs1] != CSR)) forward_rs1 = 1'b1; else // the operand is not available -> stall stall = 1'b1; - end - if (rd_clobber_i[issue_instr_i.rs2] != NONE) begin - // the operand is available, forward it - if (rs2_valid_i && rd_clobber_i[issue_instr_i.rs2] != CSR) + if (is_rs2_fpr(issue_instr_i.op) ? rd_clobber_fpr_i[issue_instr_i.rs2] != NONE + : rd_clobber_gpr_i[issue_instr_i.rs2] != NONE) begin + // if the operand is available, forward it. CSRs don't write to/from FPR + if (rs2_valid_i && (is_rs2_fpr(issue_instr_i.op) ? 1'b1 : rd_clobber_gpr_i[issue_instr_i.rs2] != CSR)) forward_rs2 = 1'b1; else // the operand is not available -> stall stall = 1'b1; end + + if (is_imm_fpr(issue_instr_i.op) && rd_clobber_fpr_i[issue_instr_i.result[REG_ADDR_SIZE-1:0]] != NONE) begin + // if the operand is available, forward it. CSRs don't write to/from FPR so no need to check + if (rs3_valid_i) + forward_rs3 = 1'b1; + else // the operand is not available -> stall + stall = 1'b1; + end end + // Forwarding/Output MUX always_comb begin : forwarding_operand_select - // default is regfile + // default is regfiles (gpr or fpr) operand_a_n = operand_a_regfile; operand_b_n = operand_b_regfile; - // immediates are the third operands in the store case - imm_n = issue_instr_i.result; + // immediates are the third operands in the store case or for certain fp operations + imm_n = is_imm_fpr(issue_instr_i.op) ? operand_c_regfile : issue_instr_i.result; trans_id_n = issue_instr_i.trans_id; fu_n = issue_instr_i.fu; operator_n = issue_instr_i.op; @@ -229,6 +215,10 @@ module issue_read_operands #( operand_b_n = rs2_i; end + if (forward_rs3) begin + imm_n = rs3_i; + end + // use the PC as operand a if (issue_instr_i.use_pc) begin operand_a_n = issue_instr_i.pc; @@ -244,11 +234,13 @@ module issue_read_operands #( operand_b_n = issue_instr_i.result; end end + // FU select, assert the correct valid out signal (in the next cycle) always_comb begin : unit_valid alu_valid_n = 1'b0; lsu_valid_n = 1'b0; mult_valid_n = 1'b0; + fpu_valid_n = 1'b0; csr_valid_n = 1'b0; branch_valid_n = 1'b0; // Exception pass through: @@ -262,6 +254,8 @@ module issue_read_operands #( branch_valid_n = 1'b1; MULT: mult_valid_n = 1'b1; + FPU: + fpu_valid_n = 1'b1; LOAD, STORE: lsu_valid_n = 1'b1; CSR: @@ -275,11 +269,54 @@ module issue_read_operands #( alu_valid_n = 1'b0; lsu_valid_n = 1'b0; mult_valid_n = 1'b0; + fpu_valid_n = 1'b0; csr_valid_n = 1'b0; branch_valid_n = 1'b0; end end + // We can issue an instruction if we do not detect that any other instruction is writing the same + // destination register. + // We also need to check if there is an unresolved branch in the scoreboard. + always_comb begin : issue_scoreboard + // default assignment + issue_ack_o = 1'b0; + // check that we didn't stall, that the instruction we got is valid + // and that the functional unit we need is not busy + if (issue_instr_valid_i) begin + // check that the corresponding functional unit is not busy + if (~stall && ~fu_busy) begin + // ----------------------------------------- + // WAW - Write After Write Dependency Check + // ----------------------------------------- + // no other instruction has the same destination register -> issue the instruction + if (is_rd_fpr(issue_instr_i.op) ? (rd_clobber_fpr_i[issue_instr_i.rd] == NONE) + : (rd_clobber_gpr_i[issue_instr_i.rd] == NONE)) begin + issue_ack_o = 1'b1; + end + // or check that the target destination register will be written in this cycle by the + // commit stage + for (int unsigned i = 0; i < NR_COMMIT_PORTS; i++) + if (is_rd_fpr(issue_instr_i.op) ? (we_fpr_i[i] && waddr_i[i] == issue_instr_i.rd) + : (we_gpr_i[i] && waddr_i[i] == issue_instr_i.rd)) begin + issue_ack_o = 1'b1; + end + end + // we can also issue the instruction under the following two circumstances: + // we can do this even if we are stalled or no functional unit is ready (as we don't need one) + // the decoder needs to make sure that the instruction is marked as valid when it does not + // need any functional unit or if an exception occurred previous to the execute stage. + // 1. we already got an exception + if (issue_instr_i.ex.valid) begin + issue_ack_o = 1'b1; + end + // 2. it is an instruction which does not need any functional unit + if (issue_instr_i.fu == NONE) begin + issue_ack_o = 1'b1; + end + end + end + // -------------------- // Debug Multiplexers // -------------------- @@ -290,14 +327,13 @@ module issue_read_operands #( always_comb begin // get the address from the issue stage by default // read port - debug_gpr_rdata_o = operand_a_regfile; raddr_a = issue_instr_i.rs1[4:0]; // write port waddr = waddr_i[0]; wdata = wdata_i[0]; - we = we_i[0]; + we = we_gpr_i[0]; // we've got a debug request in - if (debug_gpr_req_i) begin + if (debug_gpr_req_i) begin // TODO is there a fpr debug req? raddr_a = debug_gpr_addr_i; waddr = debug_gpr_addr_i; wdata = debug_gpr_wdata_i; @@ -308,25 +344,53 @@ module issue_read_operands #( // ---------------------- // Integer Register File // ---------------------- - logic [1:0][63:0] rdata_o; - - assign operand_a_regfile = rdata_o[0]; - assign operand_b_regfile = rdata_o[1]; + logic [1:0][63:0] rdata; ariane_regfile #( - .DATA_WIDTH ( 64 ), - .NR_READ_PORTS ( 2 ), - .NR_WRITE_PORTS ( 2 ), - .ZERO_REG_ZERO ( 1 ) + .DATA_WIDTH ( 64 ), + .NR_READ_PORTS ( 2 ), + .NR_WRITE_PORTS ( NR_COMMIT_PORTS ), + .ZERO_REG_ZERO ( 1 ) ) i_ariane_regfile ( .raddr_i ( '{issue_instr_i.rs2[4:0], raddr_a} ), - .rdata_o ( rdata_o ), + .rdata_o ( rdata ), .waddr_i ( '{waddr_i[1], waddr} ), .wdata_i ( '{wdata_i[1], wdata} ), - .we_i ( '{we_i[1], we} ), + .we_i ( '{we_gpr_i[1], we} ), .* ); + // ----------------------------- + // Floating-Point Register File + // ----------------------------- + logic [2:0][FLEN-1:0] fprdata; + + generate + if (FP_PRESENT) begin : float_regfile_gen + ariane_regfile #( + .DATA_WIDTH ( FLEN ), + .NR_READ_PORTS ( 3 ), + .NR_WRITE_PORTS ( NR_COMMIT_PORTS ), + .ZERO_REG_ZERO ( 0 ) + ) i_ariane_fp_regfile ( + .raddr_i ( '{issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]} ), + .rdata_o ( fprdata ), + .waddr_i ( waddr_i ), + .wdata_i ( '{wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]} ), + .we_i ( we_fpr_i ), + .* + ); + end else begin : no_fpr_gen + assign fprdata = '{default: '0}; + end + endgenerate + + assign debug_gpr_rdata_o = rdata[0]; + + assign operand_a_regfile = is_rs1_fpr(issue_instr_i.op) ? fprdata[0] : rdata[0]; + assign operand_b_regfile = is_rs2_fpr(issue_instr_i.op) ? fprdata[1] : rdata[1]; + assign operand_c_regfile = fprdata[2]; + // ---------------------- // Registers (ID <-> EX) // ---------------------- @@ -338,6 +402,7 @@ module issue_read_operands #( alu_valid_q <= 1'b0; branch_valid_q <= 1'b0; mult_valid_q <= 1'b0; + fpu_valid_q <= 1'b0; lsu_valid_q <= 1'b0; csr_valid_q <= 1'b0; fu_q <= NONE; @@ -353,6 +418,7 @@ module issue_read_operands #( alu_valid_q <= alu_valid_n; branch_valid_q <= branch_valid_n; mult_valid_q <= mult_valid_n; + fpu_valid_q <= fpu_valid_n; lsu_valid_q <= lsu_valid_n; csr_valid_q <= csr_valid_n; fu_q <= fu_n; diff --git a/src/issue_stage.sv b/src/issue_stage.sv index 903d6b8da..0d3cc92e8 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -75,7 +75,7 @@ module issue_stage #( // commit port input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i, input logic [NR_COMMIT_PORTS-1:0][63:0] wdata_i, - input logic [NR_COMMIT_PORTS-1:0] we_i, + input logic [NR_COMMIT_PORTS-1:0] we_gpr_i, input logic [NR_COMMIT_PORTS-1:0] we_fpr_i, output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o, @@ -84,7 +84,8 @@ module issue_stage #( // --------------------------------------------------- // Scoreboard (SB) <-> Issue and Read Operands (IRO) // --------------------------------------------------- - fu_t [2**REG_ADDR_SIZE:0] rd_clobber_sb_iro; + fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_sb_iro; + fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_sb_iro; logic [REG_ADDR_SIZE-1:0] rs1_iro_sb; logic [63:0] rs1_sb_iro; @@ -94,6 +95,10 @@ module issue_stage #( logic [63:0] rs2_sb_iro; logic rs2_valid_iro_sb; + logic [REG_ADDR_SIZE-1:0] rs3_iro_sb; + logic [FLEN-1:0] rs3_sb_iro; + logic rs3_valid_iro_sb; + scoreboard_entry_t issue_instr_sb_rename; logic issue_instr_valid_sb_rename; logic issue_ack_rename_sb; @@ -103,7 +108,7 @@ module issue_stage #( logic issue_ack_iro_rename; // --------------------------------------------------------- - // 1. Issue instruction and read operand + // 1. Issue instruction and read operand, also commit // --------------------------------------------------------- issue_read_operands i_issue_read_operands ( .flush_i ( flush_unissued_instr_i ), @@ -116,7 +121,11 @@ module issue_stage #( .rs2_o ( rs2_iro_sb ), .rs2_i ( rs2_sb_iro ), .rs2_valid_i ( rs2_valid_iro_sb ), - .rd_clobber_i ( rd_clobber_sb_iro ), + .rs3_o ( rs3_iro_sb ), + .rs3_i ( rs3_sb_iro ), + .rs3_valid_i ( rs3_valid_iro_sb ), + .rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ), + .rd_clobber_fpr_i ( rd_clobber_fpr_sb_iro ), .* ); @@ -142,13 +151,17 @@ module issue_stage #( .NR_WB_PORTS ( NR_WB_PORTS ) ) i_scoreboard ( .unresolved_branch_i ( 1'b0 ), - .rd_clobber_o ( rd_clobber_sb_iro ), + .rd_clobber_gpr_o ( rd_clobber_gpr_sb_iro ), + .rd_clobber_fpr_o ( rd_clobber_fpr_sb_iro ), .rs1_i ( rs1_iro_sb ), .rs1_o ( rs1_sb_iro ), .rs1_valid_o ( rs1_valid_sb_iro ), .rs2_i ( rs2_iro_sb ), .rs2_o ( rs2_sb_iro ), .rs2_valid_o ( rs2_valid_iro_sb ), + .rs3_i ( rs3_iro_sb ), + .rs3_o ( rs3_sb_iro ), + .rs3_valid_o ( rs3_valid_iro_sb ), .issue_instr_o ( issue_instr_sb_rename ), .issue_instr_valid_o ( issue_instr_valid_sb_rename ), diff --git a/src/re_name.sv b/src/re_name.sv index 5c19d0a71..d9ab11389 100644 --- a/src/re_name.sv +++ b/src/re_name.sv @@ -23,11 +23,11 @@ import ariane_pkg::*; module re_name ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low - // coming from scoreboard + // from/to scoreboard input scoreboard_entry_t issue_instr_i, input logic issue_instr_valid_i, output logic issue_ack_o, - // coming from scoreboard + // from/to issue and read operands output scoreboard_entry_t issue_instr_o, output logic issue_instr_valid_o, input logic issue_ack_i @@ -38,28 +38,47 @@ module re_name ( assign issue_ack_o = issue_ack_i; // keep track of re-naming data structures - logic [31:0] re_name_table_n, re_name_table_q; + logic [31:0] re_name_table_gpr_n, re_name_table_gpr_q; + logic [31:0] re_name_table_fpr_n, re_name_table_fpr_q; // ------------------- // Re-naming // ------------------- always_comb begin + // MSB of the renamed source register addresses + logic name_bit_rs1, name_bit_rs2, name_bit_rs3; // default assignments - re_name_table_n = re_name_table_q; + re_name_table_gpr_n = re_name_table_gpr_q; + re_name_table_fpr_n = re_name_table_fpr_q; issue_instr_o = issue_instr_i; if (issue_ack_i) begin - // if we acknowledge the instruction tic the corresponding register - re_name_table_n[issue_instr_i.rd] = re_name_table_q[issue_instr_i.rd] ^ 1'b1; + // if we acknowledge the instruction tic the corresponding destination register + if (is_rd_fpr(issue_instr_i.op)) + re_name_table_fpr_n[issue_instr_i.rd] = re_name_table_fpr_q[issue_instr_i.rd] ^ 1'b1; + else + re_name_table_gpr_n[issue_instr_i.rd] = re_name_table_gpr_q[issue_instr_i.rd] ^ 1'b1; end - // re-name the source registers - issue_instr_o.rs1 = { re_name_table_q[issue_instr_i.rs1], issue_instr_i.rs1 }; - issue_instr_o.rs2 = { re_name_table_q[issue_instr_i.rs1], issue_instr_i.rs2 }; + // select name bit according to the register file used for source operands + name_bit_rs1 = is_rs1_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rs1] + : re_name_table_gpr_q[issue_instr_i.rs1]; + name_bit_rs2 = is_rs2_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rs2] + : re_name_table_gpr_q[issue_instr_i.rs2]; + // rs3 is only used in certain FP operations and held like an immediate + name_bit_rs3 = re_name_table_fpr_q[issue_instr_i.result[4:0]]; // make sure only the addr bits are read - // we don't want to re-name register zero, it is non-writeable anyway - re_name_table_n[0] = 1'b0; + // re-name the source registers + issue_instr_o.rs1 = { name_bit_rs1, issue_instr_i.rs1 }; + issue_instr_o.rs2 = { name_bit_rs2, issue_instr_i.rs2 }; + + // re-name the third operand in imm if it's actually an operand + if (is_imm_fpr(issue_instr_i.op)) + issue_instr_o.result = {name_bit_rs3, issue_instr_i.result[4:0]}; + + // we don't want to re-name gp register zero, it is non-writeable anyway + re_name_table_gpr_n[0] = 1'b0; end // ------------------- @@ -67,9 +86,11 @@ module re_name ( // ------------------- always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin - re_name_table_q <= '0; + re_name_table_gpr_q <= '0; + re_name_table_fpr_q <= '0; end else begin - re_name_table_q <= re_name_table_n; + re_name_table_gpr_q <= re_name_table_gpr_n; + re_name_table_fpr_q <= re_name_table_fpr_n; end end endmodule diff --git a/src/scoreboard.sv b/src/scoreboard.sv index e6419c66f..bdd907b41 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -26,7 +26,8 @@ module scoreboard #( input logic flush_i, // flush whole scoreboard input logic unresolved_branch_i, // we have an unresolved branch // list of clobbered registers to issue stage - output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_o, + output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_gpr_o, + output fu_t [2**REG_ADDR_SIZE:0] rd_clobber_fpr_o, // regfile like interface to operand read stage input logic [REG_ADDR_SIZE-1:0] rs1_i, @@ -37,12 +38,16 @@ module scoreboard #( output logic [63:0] rs2_o, output logic rs2_valid_o, + input logic [REG_ADDR_SIZE-1:0] rs3_i, + output logic [63:0] rs3_o, + output logic rs3_valid_o, + // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer output scoreboard_entry_t [NR_COMMIT_PORTS-1:0] commit_instr_o, input logic [NR_COMMIT_PORTS-1:0] commit_ack_i, - // instruction to put on top of scoreboard e.g. : top pointer - // we can always put this instruction to the to p unless we signal with asserted full_o + // instruction to put on top of scoreboard e.g.: top pointer + // we can always put this instruction to the top unless we signal with asserted full_o input scoreboard_entry_t decoded_instr_i, input logic decoded_instr_valid_i, output logic decoded_instr_ack_o, @@ -66,9 +71,9 @@ module scoreboard #( scoreboard_entry_t sbe; // this is the score board entry we will send to ex } mem_q [NR_ENTRIES-1:0], mem_n [NR_ENTRIES-1:0]; - logic [$clog2(NR_ENTRIES)-1:0] issue_cnt_n, issue_cnt_q; - logic [$clog2(NR_ENTRIES)-1:0] issue_pointer_n, issue_pointer_q; - logic [$clog2(NR_ENTRIES)-1:0] commit_pointer_n, commit_pointer_q; + logic [BITS_ENTRIES-1:0] issue_cnt_n, issue_cnt_q; + logic [BITS_ENTRIES-1:0] issue_pointer_n, issue_pointer_q; + logic [BITS_ENTRIES-1:0] commit_pointer_n, commit_pointer_q; logic issue_full; // the issue queue is full don't issue any new instructions @@ -76,7 +81,7 @@ module scoreboard #( // output commit instruction directly always_comb begin : commit_ports - for (logic [$clog2(NR_ENTRIES)-1:0] i = 0; i < NR_COMMIT_PORTS; i++) + for (logic [BITS_ENTRIES-1:0] i = 0; i < NR_COMMIT_PORTS; i++) commit_instr_o[i] = mem_q[commit_pointer_q + i].sbe; end @@ -94,8 +99,8 @@ module scoreboard #( // maintain a FIFO with issued instructions // keep track of all issued instructions always_comb begin : issue_fifo - automatic logic [$clog2(NR_ENTRIES)-1:0] issue_cnt; - automatic logic [$clog2(NR_ENTRIES)-1:0] commit_pointer; + automatic logic [BITS_ENTRIES-1:0] issue_cnt; + automatic logic [BITS_ENTRIES-1:0] commit_pointer; commit_pointer = commit_pointer_q; issue_cnt = issue_cnt_q; @@ -123,8 +128,8 @@ module scoreboard #( if (wb_valid_i[i] && mem_n[trans_id_i[i]].issued) begin mem_n[trans_id_i[i]].sbe.valid = 1'b1; mem_n[trans_id_i[i]].sbe.result = wbdata_i[i]; - // write the exception back if it is valid - if (ex_i[i].valid) + // write the exception back if it is valid --> removing this check to always write back FP flags + // if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; end end @@ -133,7 +138,7 @@ module scoreboard #( // Commit Port // ------------ // we've got an acknowledge from commit - for (logic [$clog2(NR_ENTRIES)-1:0] i = 0; i < NR_COMMIT_PORTS; i++) begin + for (logic [BITS_ENTRIES-1:0] i = 0; i < NR_COMMIT_PORTS; i++) begin if (commit_ack_i[i]) begin // decrease the issue counter issue_cnt--; @@ -144,6 +149,7 @@ module scoreboard #( commit_pointer++; end end + // ------ // Flush // ------ @@ -159,6 +165,7 @@ module scoreboard #( commit_pointer = '0; end end + // update issue counter issue_cnt_n = issue_cnt; // update commit potiner @@ -170,16 +177,20 @@ module scoreboard #( // ------------------- // rd_clobber output: output currently clobbered destination registers always_comb begin : clobber_output - rd_clobber_o = '{default: NONE}; + rd_clobber_gpr_o = '{default: NONE}; + rd_clobber_fpr_o = '{default: NONE}; // check for all valid entries and set the clobber register accordingly for (int unsigned i = 0; i < NR_ENTRIES; i++) begin if (mem_q[i].issued) begin // output the functional unit which is going to clobber this register - rd_clobber_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu; + if (is_rd_fpr(mem_q[i].sbe.op)) + rd_clobber_fpr_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu; + else + rd_clobber_gpr_o[mem_q[i].sbe.rd] = mem_q[i].sbe.fu; end end - // the zero register is always free - rd_clobber_o[0] = NONE; + // the gpr zero register is always free + rd_clobber_gpr_o[0] = NONE; end // ---------------------------------- @@ -189,20 +200,26 @@ module scoreboard #( always_comb begin : read_operands rs1_o = 64'b0; rs2_o = 64'b0; + rs3_o = 64'b0; rs1_valid_o = 1'b0; rs2_valid_o = 1'b0; + rs3_valid_o = 1'b0; for (int unsigned i = 0; i < NR_ENTRIES; i++) begin // only consider this entry if it is valid if (mem_q[i].issued) begin // look at the appropriate fields and look whether there was an - // instruction that wrote the rd field before, first for RS1 and then for RS2 - if (mem_q[i].sbe.rd == rs1_i) begin + // instruction that wrote the rd field before, first for RS1 and then for RS2, then for RS3 + // we check the type of the stored result register file against issued register file + if ((mem_q[i].sbe.rd == rs1_i) && (is_rs1_fpr(mem_q[i].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin rs1_o = mem_q[i].sbe.result; rs1_valid_o = mem_q[i].sbe.valid; - end else if (mem_q[i].sbe.rd == rs2_i) begin + end else if ((mem_q[i].sbe.rd == rs2_i) && (is_rs2_fpr(mem_q[i].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin rs2_o = mem_q[i].sbe.result; rs2_valid_o = mem_q[i].sbe.valid; + end else if (mem_q[i].sbe.rd == rs3_i) begin // rs3 is only considered in FP cases so no check needed + rs3_o = mem_q[i].sbe.result; + rs3_valid_o = mem_q[i].sbe.valid; end end end @@ -213,22 +230,29 @@ module scoreboard #( // provide a direct combinational path from WB a.k.a forwarding // make sure that we are not forwarding a result that got an exception for (int unsigned j = 0; j < NR_WB_PORTS; j++) begin - if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid) begin + if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid + && (is_rs1_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin rs1_o = wbdata_i[j]; rs1_valid_o = wb_valid_i[j]; break; end - if (mem_q[trans_id_i[j]].sbe.rd == rs2_i && wb_valid_i[j] && ~ex_i[j].valid) begin + if (mem_q[trans_id_i[j]].sbe.rd == rs2_i && wb_valid_i[j] && ~ex_i[j].valid + && (is_rs2_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin rs2_o = wbdata_i[j]; rs2_valid_o = wb_valid_i[j]; break; end + if (mem_q[trans_id_i[j]].sbe.rd == rs3_i && wb_valid_i[j] && ~ex_i[j].valid) begin // rs3 only uses fpr + rs3_o = wbdata_i[j]; + rs3_valid_o = wb_valid_i[j]; + break; + end end // make sure we didn't read the zero register - if (rs1_i == '0) + if (rs1_i == '0 && ~is_rs1_fpr(issue_instr_o.op)) // only GPR reg0 is 0 rs1_valid_o = 1'b0; - if (rs2_i == '0) + if (rs2_i == '0 && ~is_rs2_fpr(issue_instr_o.op)) // only GPR reg0 is 0 rs2_valid_o = 1'b0; end @@ -249,12 +273,12 @@ module scoreboard #( `ifndef SYNTHESIS `ifndef verilator initial begin - assert (NR_ENTRIES == 2**$clog2(NR_ENTRIES)) else $fatal("Scoreboard size needs to be a power of two."); + assert (NR_ENTRIES == 2**BITS_ENTRIES) else $fatal("Scoreboard size needs to be a power of two."); end // assert that zero is never set assert property ( - @(posedge clk_i) rst_ni |-> (rd_clobber_o[0] == NONE)) + @(posedge clk_i) rst_ni |-> (rd_clobber_gpr_o[0] == NONE)) else $error ("RD 0 should not bet set"); // assert that we never acknowledge a commit if the instruction is not valid assert property ( From 77e6bde99e0127dd975be1a01096ebf63b27f9e5 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 1 Apr 2018 04:33:47 +0200 Subject: [PATCH 10/94] :construction: Add FP support to commit stage --- src/ariane.sv | 16 ++++++++-------- src/commit_stage.sv | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 18 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index d7ab57fd2..3b2d0b97b 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -423,6 +423,13 @@ module ariane #( .lsu_commit_ready_o ( lsu_commit_ready_ex_commit ), // to commit .lsu_exception_o ( lsu_exception_ex_id ), .no_st_pending_o ( no_st_pending_ex_commit ), + // MULT + .mult_ready_o ( mult_ready_ex_id ), + .mult_valid_i ( mult_valid_id_ex ), + .mult_trans_id_o ( mult_trans_id_ex_id ), + .mult_result_o ( mult_result_ex_id ), + .mult_valid_o ( mult_valid_ex_id ), + // CSR .csr_ready_o ( csr_ready_ex_id ), .csr_valid_i ( csr_valid_id_ex ), @@ -451,13 +458,6 @@ module ariane #( .mxr_i ( mxr_csr_ex ), // from CSR .satp_ppn_i ( satp_ppn_csr_ex ), // from CSR .asid_i ( asid_csr_ex ), // from CSR - - .mult_ready_o ( mult_ready_ex_id ), - .mult_valid_i ( mult_valid_id_ex ), - .mult_trans_id_o ( mult_trans_id_ex_id ), - .mult_result_o ( mult_result_ex_id ), - .mult_valid_o ( mult_valid_ex_id ), - .data_if ( data_if ), .dcache_en_i ( dcache_en_csr_nbdcache ), .flush_dcache_i ( flush_dcache_ctrl_ex | flush_dcache_i ), @@ -477,7 +477,7 @@ module ariane #( .no_st_pending_i ( no_st_pending_ex_commit ), .waddr_o ( waddr_commit_id ), .wdata_o ( wdata_commit_id ), - .we_o ( we_gpr_commit_id ), + .we_gpr_o ( we_gpr_commit_id ), .we_fpr_o ( we_fpr_commit_id ), .commit_lsu_o ( lsu_commit_commit_ex ), .commit_lsu_ready_i ( lsu_commit_ready_ex_commit ), diff --git a/src/commit_stage.sv b/src/commit_stage.sv index a7690afcf..a0865d369 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -27,7 +27,7 @@ module commit_stage #( // to register file output logic [NR_COMMIT_PORTS-1:0][4:0] waddr_o, // register file write address output logic [NR_COMMIT_PORTS-1:0][63:0] wdata_o, // register file write data - output logic [NR_COMMIT_PORTS-1:0] we_o, // register file write enable + output logic [NR_COMMIT_PORTS-1:0] we_gpr_o, // register file write enable output logic [NR_COMMIT_PORTS-1:0] we_fpr_o, // floating point register enable // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline) output logic [63:0] pc_o, @@ -47,6 +47,7 @@ module commit_stage #( output logic sfence_vma_o // flush TLBs and pipeline ); + // TODO make these parametric with NR_COMMIT_PORTS assign waddr_o[0] = commit_instr_i[0].rd[4:0]; assign waddr_o[1] = commit_instr_i[1].rd[4:0]; @@ -57,12 +58,14 @@ module commit_stage #( // ------------------- // write register file or commit instruction in LSU or CSR Buffer always_comb begin : commit + // default assignments commit_ack_o[0] = 1'b0; commit_ack_o[1] = 1'b0; - we_o[0] = 1'b0; - we_o[1] = 1'b0; + we_gpr_o[0] = 1'b0; + we_gpr_o[1] = 1'b0; + we_fpr_o = '{default: 1'b0}; commit_lsu_o = 1'b0; commit_csr_o = 1'b0; @@ -87,7 +90,10 @@ module commit_stage #( if (!exception_o.valid) begin // we can definitely write the register file // if the instruction is not committing anything the destination - we_o[0] = 1'b1; + if (is_rd_fpr(commit_instr_i[0].op)) + we_fpr_o[0] = 1'b1; + else + we_gpr_o[0] = 1'b1; // check whether the instruction we retire was a store // do not commit the instruction if we got an exception since the store buffer will be cleared @@ -102,7 +108,7 @@ module commit_stage #( end // --------- - // FPU + // FPU Flags // --------- if (commit_instr_i[0].fu == FPU) begin // write the CSR with potential exception flags from retiring floating point instruction @@ -157,13 +163,25 @@ module commit_stage #( // only if the first instruction didn't throw an exception and this instruction won't throw an exception // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT or FPU if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU})) begin - we_o[1] = 1'b1; + + if (is_rd_fpr(commit_instr_i[1].op)) + we_fpr_o[1] = 1'b1; + else + we_gpr_o[1] = 1'b1; + commit_ack_o[1] = 1'b1; - // additionally check if we are retiring an FPU instruction because we need to make sure that we right all + + // additionally check if we are retiring an FPU instruction because we need to make sure that we write all // exception flags - csr_op_o = CSR_SET; - csr_wdata_o = {59'b0, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])}; - csr_write_fflags_o = (commit_instr_i[1].fu == FPU); + if (commit_instr_i[1].fu == FPU) begin + csr_op_o = CSR_SET; + if (csr_write_fflags_o) + csr_wdata_o = {59'b0, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])}; + else + csr_wdata_o = {59'b0, commit_instr_i[1].ex.cause[4:0]}; + + csr_write_fflags_o = 1'b1; + end end end end From 962fffa1199e49ed12bf1cf0a1be219a47895012 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Tue, 3 Apr 2018 16:59:16 +0200 Subject: [PATCH 11/94] :construction: Fix missing `inside` for some cases --- src/decoder.sv | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/src/decoder.sv b/src/decoder.sv index 8b440d402..ebfb1d907 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -458,11 +458,26 @@ module decoder ( 2'b01: if (~RVD) illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase - end else + + // check rounding mode + if (check_fprm) begin + unique case (instr.rftype.rm) inside + [3'b000:3'b100]: ; //legal rounding modes + 3'b111: begin + unique case (frm_i) inside + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end + default : illegal_instr = 1'b1; + endcase + end + end else begin illegal_instr = 1'b1; + end end - OPCODE_FP: begin + OPCODE_OP_FP: begin if (FP_PRESENT) begin // only generate decoder if FP extensions are enabled (static) instruction_o.fu = FPU; instruction_o.rs1 = instr.rftype.rs1; @@ -502,11 +517,21 @@ module decoder ( default: illegal_instr = 1'b1; endcase end + 5'b10100: begin + instruction_o.op = FCMP; // feq/flt/fle.fmt - FP Comparisons + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm > 3'b010) illegal_instr = 1'b1; + end 5'b11000: begin instruction_o.op = FCVT_F2I; // fcvt.ifmt.fmt - FP to Int Conversion imm_select = IIMM; // rs2 holds part of the instruction if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 end + 5'b11010: begin + instruction_o.op = FCVT_I2F; // fcvt.fmt.ifmt - Int to FP Conversion + imm_select = IIMM; // rs2 holds part of the instruction + if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + end 5'b11100: begin instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit check_fprm = 1'b0; // instruction encoded in rm, do the check here @@ -516,16 +541,6 @@ module decoder ( // rs2 must be zero if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; end - 5'b10100: begin - instruction_o.op = FCMP; // feq/flt/fle.fmt - FP Comparisons - check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (instr.rftype.rm > 3'b010) illegal_instr = 1'b1; - end - 5'b11010: begin - instruction_o.op = FCVT_I2F; // fcvt.fmt.ifmt - Int to FP Conversion - imm_select = IIMM; // rs2 holds part of the instruction - if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 - end 5'b11110: begin instruction_o.op = FMV_X2F; // fmv.fmt.ifmt - GPR to FPR Move instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit @@ -547,10 +562,10 @@ module decoder ( // check rounding mode if (check_fprm) begin - unique case (instr.rftype.rm) + unique case (instr.rftype.rm) inside [3'b000:3'b100]: ; //legal rounding modes 3'b111: begin - unique case (frm_i) + unique case (frm_i) inside [3'b000:3'b100]: ; //legal rounding modes default : illegal_instr = 1'b1; endcase @@ -558,8 +573,9 @@ module decoder ( default : illegal_instr = 1'b1; endcase end - end else + end else begin illegal_instr = 1'b1; + end end // ---------------------------------- From b148cdfbe83972009b827ad56ef335d0a9773e9f Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 1 Apr 2018 15:46:38 +0200 Subject: [PATCH 12/94] :construction: Working on fpu integration --- src/ariane.sv | 19 ++++++++++++++----- src/ex_stage.sv | 7 +++++++ src/issue_stage.sv | 3 +++ src/scoreboard.sv | 7 +++++-- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 3b2d0b97b..ec458a03e 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -148,6 +148,13 @@ module ariane #( logic [TRANS_ID_BITS-1:0] mult_trans_id_ex_id; logic [63:0] mult_result_ex_id; logic mult_valid_ex_id; + // FPU + logic fpu_ready_ex_id; + logic fpu_valid_id_ex; + logic [TRANS_ID_BITS-1:0] fpu_trans_id_ex_id; + logic [63:0] fpu_result_ex_id; + logic fpu_valid_ex_id; + exception_t fpu_exception_ex_id; // CSR logic csr_ready_ex_id; logic csr_valid_id_ex; @@ -185,8 +192,8 @@ module ariane #( // -------------- // CSR <-> * // -------------- - logic [4:0] fflags_csr_ci; - logic [2:0] frm_csr_id; + logic [4:0] fflags_csr_commit; + logic [2:0] frm_csr_id_issue; logic enable_translation_csr_ex; logic en_ld_st_translation_csr_ex; priv_lvl_t ld_st_priv_lvl_csr_ex; @@ -360,6 +367,9 @@ module ariane #( // Multiplier .mult_ready_i ( mult_ready_ex_id ), .mult_valid_o ( mult_valid_id_ex ), + // FPU + .fpu_ready_i ( fpu_ready_ex_id ), + .fpu_valid_o ( fpu_valid_id_ex ), // CSR .csr_ready_i ( csr_ready_ex_id ), .csr_valid_o ( csr_valid_id_ex ), @@ -518,11 +528,10 @@ module ariane #( .csr_exception_o ( csr_exception_csr_commit ), .epc_o ( epc_commit_pcgen ), .eret_o ( eret ), - .fflags_o ( ), // FPU flags out .trap_vector_base_o ( trap_vector_base_commit_pcgen ), .priv_lvl_o ( priv_lvl ), - .fflags_o ( fflags_csr_ci ), - .frm_o ( frm_csr_id ), + .fflags_o ( fflags_csr_commit ), + .frm_o ( frm_csr_id_issue ), .ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ), .en_translation_o ( enable_translation_csr_ex ), .en_ld_st_translation_o ( en_ld_st_translation_csr_ex ), diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 0a4404cd0..982093574 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -77,6 +77,13 @@ module ex_stage #( output logic [TRANS_ID_BITS-1:0] mult_trans_id_o, output logic [63:0] mult_result_o, output logic mult_valid_o, + // FPU + output logic fpu_ready_o, // FU is ready + input logic fpu_valid_i, // Output is valid + output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, + output logic [63:0] fpu_result_o, + output logic fpu_valid_o, + output exception_t fpu_exception_o, // Memory Management input logic enable_translation_i, diff --git a/src/issue_stage.sv b/src/issue_stage.sv index 0d3cc92e8..14f71e25b 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -62,6 +62,9 @@ module issue_stage #( input logic mult_ready_i, output logic mult_valid_o, // Branch predict Out + input logic fpu_ready_i, + output logic fpu_valid_o, + input logic csr_ready_i, output logic csr_valid_o, diff --git a/src/scoreboard.sv b/src/scoreboard.sv index bdd907b41..a0db9c650 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -128,9 +128,12 @@ module scoreboard #( if (wb_valid_i[i] && mem_n[trans_id_i[i]].issued) begin mem_n[trans_id_i[i]].sbe.valid = 1'b1; mem_n[trans_id_i[i]].sbe.result = wbdata_i[i]; - // write the exception back if it is valid --> removing this check to always write back FP flags - // if (ex_i[i].valid) + // write the exception back if it is valid + if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; + // write the fflags back from the FPU (exception valid is never set), leave tval intact + else if (mem_n[trans_id_i[i]].sbe.fu = FPU) + mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause; end end From c6d7092c7443803a9cd3742fa80c54cef48ba9df Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 10 Apr 2018 13:20:48 +0200 Subject: [PATCH 13/94] Small fixes to make RISC-V test pass --- include/ariane_pkg.sv | 4 ++-- src/ariane.sv | 10 ++++++++-- src/id_stage.sv | 33 +++++++++++++++++---------------- src/scoreboard.sv | 2 +- 4 files changed, 28 insertions(+), 21 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 458838c08..285893e28 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -36,8 +36,8 @@ package ariane_pkg; localparam NR_COMMIT_PORTS = 2; // Floating-point extensions configuration - localparam bit RVF = 1'b1; // Is F extension enabled - localparam bit RVD = 1'b1; // Is D extension enabled + localparam bit RVF = 1'b0; // Is F extension enabled + localparam bit RVD = 1'b0; // Is D extension enabled // No need changing these by hand localparam bit FP_PRESENT = RVF | RVD; diff --git a/src/ariane.sv b/src/ariane.sv index ec458a03e..1a1283097 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -316,6 +316,7 @@ module ariane #( .issue_instr_ack_i ( issue_instr_issue_id ), .priv_lvl_i ( priv_lvl ), + .frm_i ( frm_csr_id_issue ), .tvm_i ( tvm_csr_id ), .tw_i ( tw_csr_id ), .tsr_i ( tsr_csr_id ), @@ -439,7 +440,12 @@ module ariane #( .mult_trans_id_o ( mult_trans_id_ex_id ), .mult_result_o ( mult_result_ex_id ), .mult_valid_o ( mult_valid_ex_id ), - + .fpu_ready_o ( ), // FU is ready + .fpu_valid_i ( 1'b0 ), // Output is valid + .fpu_trans_id_o ( ), + .fpu_result_o ( ), + .fpu_valid_o ( ), + .fpu_exception_o ( ), // CSR .csr_ready_o ( csr_ready_ex_id ), .csr_valid_i ( csr_valid_id_ex ), @@ -531,7 +537,7 @@ module ariane #( .trap_vector_base_o ( trap_vector_base_commit_pcgen ), .priv_lvl_o ( priv_lvl ), .fflags_o ( fflags_csr_commit ), - .frm_o ( frm_csr_id_issue ), + .frm_o ( frm_csr_id_issue ), .ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ), .en_translation_o ( enable_translation_csr_ex ), .en_ld_st_translation_o ( en_ld_st_translation_csr_ex ), diff --git a/src/id_stage.sv b/src/id_stage.sv index 1257f907e..b838afa97 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -16,31 +16,32 @@ import ariane_pkg::*; module id_stage ( - input logic clk_i, // Clock - input logic rst_ni, // Asynchronous reset active low + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low - input logic flush_i, + input logic flush_i, // from IF - input fetch_entry_t fetch_entry_i, - input logic fetch_entry_valid_i, - output logic decoded_instr_ack_o, // acknowledge the instruction (fetch entry) + input fetch_entry_t fetch_entry_i, + input logic fetch_entry_valid_i, + output logic decoded_instr_ack_o, // acknowledge the instruction (fetch entry) // to ID - output scoreboard_entry_t issue_entry_o, // a decoded instruction - output logic issue_entry_valid_o, // issue entry is valid - output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions - input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions + output scoreboard_entry_t issue_entry_o, // a decoded instruction + output logic issue_entry_valid_o, // issue entry is valid + output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions + input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions // from CSR file - input priv_lvl_t priv_lvl_i, // current privilege level - input logic tvm_i, - input logic tw_i, - input logic tsr_i + input priv_lvl_t priv_lvl_i, // current privilege level + input logic [2:0] frm_i, // floating-point dynamic rounding mode + input logic tvm_i, + input logic tw_i, + input logic tsr_i ); // register stage struct packed { - logic valid; + logic valid; scoreboard_entry_t sbe; - logic is_ctrl_flow; + logic is_ctrl_flow; } issue_n, issue_q; diff --git a/src/scoreboard.sv b/src/scoreboard.sv index a0db9c650..36afe17c6 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -132,7 +132,7 @@ module scoreboard #( if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; // write the fflags back from the FPU (exception valid is never set), leave tval intact - else if (mem_n[trans_id_i[i]].sbe.fu = FPU) + else if (mem_n[trans_id_i[i]].sbe.fu == FPU) mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause; end end From 3d42624d6d65933507547afd49573cd7188598ab Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Tue, 3 Apr 2018 16:55:20 +0200 Subject: [PATCH 14/94] :construction: Add smallFloat operations to decoder --- include/ariane_pkg.sv | 23 ++++++++++---- src/decoder.sv | 70 +++++++++++++++++++++++++++++++++++-------- src/load_unit.sv | 32 ++++++++++++++++++++ src/lsu.sv | 6 ++-- 4 files changed, 109 insertions(+), 22 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 285893e28..c046c9b21 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -39,12 +39,23 @@ package ariane_pkg; localparam bit RVF = 1'b0; // Is F extension enabled localparam bit RVD = 1'b0; // Is D extension enabled + + // Transprecision floating-point extensions configuration + localparam bit XF16 = 1'b1; // Is half-precision float extension (Xf16) enabled + localparam bit XF16ALT = 1'b1; // Is alternative half-precision float extension (Xf16alt) enabled + localparam bit XF8 = 1'b1; // Is quarter-precision float extension (Xf8) enabled + localparam bit XFVEC = 1'b1; // Is vectorial float extension (Xfvec) enabled + // No need changing these by hand - localparam bit FP_PRESENT = RVF | RVD; + localparam bit FP_PRESENT = RVF | RVD | XF16 | XF16ALT | XF8; // Length of widest floating-point format - localparam FLEN = RVD ? 64 : // D ext. - RVF ? 32 : // F ext. - 0; + localparam FLEN = RVD ? 64 : // D ext. + RVF ? 32 : // F ext. + XF16 ? 16 : // Xf16 ext. + XF16ALT ? 16 : // Xf16alt ext. + XF8 ? 8 : // Xf8 ext. + 0; // Unused in case of no FP + localparam bit NSX = XF16 | XF16ALT | XF8 | XFVEC; // Are non-standard extensions present? localparam logic [63:0] ISA_CODE = (0 << 0) // A - Atomic Instructions extension | (1 << 2) // C - Compressed extension @@ -55,7 +66,7 @@ package ariane_pkg; | (0 << 13) // N - User level interrupts supported | (1 << 18) // S - Supervisor mode implemented | (1 << 20) // U - User mode implemented - | (0 << 23) // X - Non-standard extensions present + | (NSX << 23) // X - Non-standard extensions present | (1 << 63); // RV64 // 32 registers + 1 bit for re-naming = 6 @@ -168,7 +179,7 @@ package ariane_pkg; // Divisions DIV, DIVU, DIVW, DIVUW, REM, REMU, REMW, REMUW, // Floating-Point Load and Store Instructions - FLD, FLW, FSD, FSW, + FLD, FLW, FLH, FLB, FSD, FSW, FSH, FSB, // Floating-Point Computational Instructions FADD, FSUB, FMUL, FDIV, FMIN_MAX, FSQRT, FMADD, FMSUB, FNMSUB, FNMADD, // Floating-Point Conversion and Move Instructions diff --git a/src/decoder.sv b/src/decoder.sv index ebfb1d907..44aba1101 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -400,6 +400,10 @@ module decoder ( // determine store size unique case (instr.stype.funct3) // Only process instruction if corresponding extension is active (static) + 3'b000: if (XF8) instruction_o.op = FSB; + else illegal_instr = 1'b1; + 3'b001: if (XF16 | XF16ALT) instruction_o.op = FSH; + else illegal_instr = 1'b1; 3'b010: if (RVF) instruction_o.op = FSW; else illegal_instr = 1'b1; 3'b011: if (RVD) instruction_o.op = FSD; @@ -419,6 +423,10 @@ module decoder ( // determine load size unique case (instr.itype.funct3) // Only process instruction if corresponding extension is active (static) + 3'b000: if (XF8) instruction_o.op = FLB; + else illegal_instr = 1'b1; + 3'b001: if (XF16 | XF16ALT) instruction_o.op = FLH; + else illegal_instr = 1'b1; 3'b010: if (RVF) instruction_o.op = FLW; else illegal_instr = 1'b1; 3'b011: if (RVD) instruction_o.op = FLD; @@ -454,8 +462,10 @@ module decoder ( // determine fp format unique case (instr.r4type.funct2) // Only process instruction if corresponding extension is active (static) - 2'b00: if (~RVF) illegal_instr = 1'b1; - 2'b01: if (~RVD) illegal_instr = 1'b1; + 2'b00: if (~RVF) illegal_instr = 1'b1; + 2'b01: if (~RVD) illegal_instr = 1'b1; + 2'b10: if (~XF16 & ~XF16ALT) illegal_instr = 1'b1; + 2'b11: if (~XF8) illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase @@ -498,12 +508,24 @@ module decoder ( 5'b00100: begin instruction_o.op = FSGNJ; // fsgn{j[n]/jx}.fmt - FP Sign Injection check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (instr.rftype.rm > 3'b010) illegal_instr = 1'b1; + if (XF16ALT) begin // FP16ALT instructions encoded in rm separately + if (!(instr.rftype.rm inside {[3'b000:3'b010], [3'b100:3'b110]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000:3'b010]})) + illegal_instr = 1'b1; + end end 5'b00101: begin instruction_o.op = FMIN_MAX; // fmin/fmax.fmt - FP Minimum / Maximum check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (instr.rftype.rm > 3'b001) illegal_instr = 1'b1; + if (XF16ALT) begin // FP16ALT instructions encoded in rm separately + if (!(instr.rftype.rm inside {[3'b000:3'b001], [3'b100:3'b101]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000:3'b001]})) + illegal_instr = 1'b1; + end end 5'b01000: begin instruction_o.op = FCVT_F2F; // fcvt.fmt.fmt - FP to FP Conversion @@ -512,15 +534,23 @@ module decoder ( // check source format unique case (instr.rftype.rs2[21:20]) // Only process instruction if corresponding extension is active (static) - 2'b00: if (~RVF) illegal_instr = 1'b1; - 2'b01: if (~RVD) illegal_instr = 1'b1; + 2'b00: if (~RVF) illegal_instr = 1'b1; + 2'b01: if (~RVD) illegal_instr = 1'b1; + 2'b10: if (~XF16 & ~XF16ALT) illegal_instr = 1'b1; + 2'b11: if (~XF8) illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase end 5'b10100: begin instruction_o.op = FCMP; // feq/flt/fle.fmt - FP Comparisons check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (instr.rftype.rm > 3'b010) illegal_instr = 1'b1; + if (XF16ALT) begin // FP16ALT instructions encoded in rm separately + if (!(instr.rftype.rm inside {[3'b000:3'b010], [3'b100:3'b110]})) + illegal_instr = 1'b1; + end else begin + if (!(instr.rftype.rm inside {[3'b000:3'b010]})) + illegal_instr = 1'b1; + end end 5'b11000: begin instruction_o.op = FCVT_F2I; // fcvt.ifmt.fmt - FP to Int Conversion @@ -534,9 +564,11 @@ module decoder ( end 5'b11100: begin instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit - check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (instr.rftype.rm == 3'b000) instruction_o.op = FMV_F2X; // fmv.ifmt.fmt - FPR to GPR Move - else if (instr.rftype.rm == 3'b001) instruction_o.op = FCLASS; // fclass.fmt - FP Classify + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr.rftype.rm == 3'b000 || (XF16ALT && instr.rftype.rm == 3'b100)) // FP16ALT has separate encoding + instruction_o.op = FMV_F2X; // fmv.ifmt.fmt - FPR to GPR Move + else if (instr.rftype.rm == 3'b001 || (XF16ALT && instr.rftype.rm == 3'b101)) // FP16ALT has separate encoding + instruction_o.op = FCLASS; // fclass.fmt - FP Classify else illegal_instr = 1'b1; // rs2 must be zero if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; @@ -545,7 +577,8 @@ module decoder ( instruction_o.op = FMV_X2F; // fmv.fmt.ifmt - GPR to FPR Move instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (instr.rftype.rm != 3'b000) illegal_instr = 1'b1; + if (instr.rftype.rm != 3'b000 || (XF16ALT && instr.rftype.rm == 3'b100)) + illegal_instr = 1'b1; // rs2 must be zero if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; end @@ -555,8 +588,10 @@ module decoder ( // check format unique case (instr.rftype.fmt) // Only process instruction if corresponding extension is active (static) - 2'b00: if (~RVF) illegal_instr = 1'b1; - 2'b01: if (~RVD) illegal_instr = 1'b1; + 2'b00: if (~RVF) illegal_instr = 1'b1; + 2'b01: if (~RVD) illegal_instr = 1'b1; + 2'b10: if (~XF16 & ~XF16ALT) illegal_instr = 1'b1; + 2'b11: if (~XF8) illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase @@ -564,7 +599,16 @@ module decoder ( if (check_fprm) begin unique case (instr.rftype.rm) inside [3'b000:3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~XF16ALT || instr.rftype.fmt != 2'b10) + illegal_instr = 1'b1; + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end 3'b111: begin + // rounding mode from frm csr unique case (frm_i) inside [3'b000:3'b100]: ; //legal rounding modes default : illegal_instr = 1'b1; diff --git a/src/load_unit.sv b/src/load_unit.sv index e8ce39e75..e7018afda 100644 --- a/src/load_unit.sv +++ b/src/load_unit.sv @@ -320,6 +320,8 @@ module load_unit ( logic [63:0] rdata_b_ext; // sign extension for bytes logic [63:0] rdata_fw_box; // nan-boxing for single floats + logic [63:0] rdata_fh_box; // nan-boxing for half floats + logic [63:0] rdata_fb_box; // nan-boxing for quarter floats // double words or double floats always_comb begin : sign_extend_double_word @@ -361,6 +363,19 @@ module load_unit ( endcase end + // nan-boxing half floats + always_comb begin : nan_box_half_float + case (load_data_q.address_offset) + default: rdata_fh_box = {{48{1'b1}}, data_rdata_i[15:0]}; + 3'b001: rdata_fh_box = {{48{1'b1}}, data_rdata_i[23:8]}; + 3'b010: rdata_fh_box = {{48{1'b1}}, data_rdata_i[31:16]}; + 3'b011: rdata_fh_box = {{48{1'b1}}, data_rdata_i[39:24]}; + 3'b100: rdata_fh_box = {{48{1'b1}}, data_rdata_i[47:32]}; + 3'b101: rdata_fh_box = {{48{1'b1}}, data_rdata_i[55:40]}; + 3'b110: rdata_fh_box = {{48{1'b1}}, data_rdata_i[63:48]}; + endcase + end + // sign extend byte always_comb begin : sign_extend_byte case (load_data_q.address_offset) @@ -375,13 +390,30 @@ module load_unit ( endcase end + + // nan-boxing quarter floats + always_comb begin : nan_box_quarter_float + case (load_data_q.address_offset) + default: rdata_fb_box = {{56{1'b1}}, data_rdata_i[7:0]}; + 3'b001: rdata_fb_box = {{56{1'b1}}, data_rdata_i[15:8]}; + 3'b010: rdata_fb_box = {{56{1'b1}}, data_rdata_i[23:16]}; + 3'b011: rdata_fb_box = {{56{1'b1}}, data_rdata_i[31:24]}; + 3'b100: rdata_fb_box = {{56{1'b1}}, data_rdata_i[39:32]}; + 3'b101: rdata_fb_box = {{56{1'b1}}, data_rdata_i[47:40]}; + 3'b110: rdata_fb_box = {{56{1'b1}}, data_rdata_i[55:48]}; + 3'b111: rdata_fb_box = {{56{1'b1}}, data_rdata_i[63:56]}; + endcase + end + // Result Mux always_comb begin case (load_data_q.operator) LW, LWU: result_o = rdata_w_ext; FLW: result_o = rdata_fw_box; LH, LHU: result_o = rdata_h_ext; + FLH: result_o = rdata_fh_box; LB, LBU: result_o = rdata_b_ext; + FLB: result_o = rdata_fb_box; default: result_o = rdata_d_ext; endcase end diff --git a/src/lsu.sv b/src/lsu.sv index c9bc0c051..c3f1a3d6d 100644 --- a/src/lsu.sv +++ b/src/lsu.sv @@ -355,7 +355,7 @@ module lsu #( 3'b100: be_i = 8'b1111_0000; default:; endcase - LH, LHU, SH: // half word + LH, LHU, SH, FLH, FSH: // half word case (vaddr_i[2:0]) 3'b000: be_i = 8'b0000_0011; 3'b001: be_i = 8'b0000_0110; @@ -366,7 +366,7 @@ module lsu #( 3'b110: be_i = 8'b1100_0000; default:; endcase - LB, LBU, SB: // byte + LB, LBU, SB, FLB, FSB: // byte case (vaddr_i[2:0]) 3'b000: be_i = 8'b0000_0001; 3'b001: be_i = 8'b0000_0010; @@ -412,7 +412,7 @@ module lsu #( end // half word - LH, LHU, SH: begin + LH, LHU, SH, FLH, FSH: begin if (lsu_ctrl.vaddr[0] != 1'b0) data_misaligned = 1'b1; end From a1125162a623b606a62c9cfa52e149bc92904e57 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Tue, 3 Apr 2018 22:36:55 +0200 Subject: [PATCH 15/94] :construction: Add vectorial FP operations to decoder --- include/ariane_pkg.sv | 77 +++++++---- src/decoder.sv | 297 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 316 insertions(+), 58 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index c046c9b21..7f00d4ede 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -46,8 +46,10 @@ package ariane_pkg; localparam bit XF8 = 1'b1; // Is quarter-precision float extension (Xf8) enabled localparam bit XFVEC = 1'b1; // Is vectorial float extension (Xfvec) enabled - // No need changing these by hand + // -------------------------------------- + // vvvv Don't change these by hand! vvvv localparam bit FP_PRESENT = RVF | RVD | XF16 | XF16ALT | XF8; + // Length of widest floating-point format localparam FLEN = RVD ? 64 : // D ext. RVF ? 32 : // F ext. @@ -55,8 +57,16 @@ package ariane_pkg; XF16ALT ? 16 : // Xf16alt ext. XF8 ? 8 : // Xf8 ext. 0; // Unused in case of no FP + localparam bit NSX = XF16 | XF16ALT | XF8 | XFVEC; // Are non-standard extensions present? + localparam bit RVFVEC = RVF & XFVEC & FLEN>32; // FP32 vectors available if vectors and larger fmt enabled + localparam bit XF16VEC = XF16 & XFVEC & FLEN>16; // FP16 vectors available if vectors and larger fmt enabled + localparam bit XF16ALTVEC = XF16ALT & XFVEC & FLEN>16; // FP16ALT vectors available if vectors and larger fmt enabled + localparam bit XF8VEC = XF8 & XFVEC & FLEN>8; // FP8 vectors available if vectors and larger fmt enabled + // ^^^^ until here ^^^^ + // --------------------- + localparam logic [63:0] ISA_CODE = (0 << 0) // A - Atomic Instructions extension | (1 << 2) // C - Compressed extension | (RVD << 3) // D - Double precsision floating-point extension @@ -146,7 +156,7 @@ package ariane_pkg; } bht_prediction_t; typedef enum logic[3:0] { - NONE, LOAD, STORE, ALU, CTRL_FLOW, MULT, CSR, FPU + NONE, LOAD, STORE, ALU, CTRL_FLOW, MULT, CSR, FPU, FPU_VEC, FPU_VEC_REPL } fu_t; localparam EXC_OFF_RST = 8'h80; @@ -187,7 +197,9 @@ package ariane_pkg; // Floating-Point Compare Instructions FCMP, // Floating-Point Classify Instruction - FCLASS + FCLASS, + // Vectorial Floating-Point Instructions that don't directly map onto the scalar ones + VFMIN, VFMAX, VFSGNJ, VFSGNJN, VFSGNJX, VFEQ, VFNE, VFLT, VFGE, VFLE, VFGT, VFCPKAB_S, VFCPKCD_S, VFCPKAB_D, VFCPKCD_D } fu_op; // ------------------------------- @@ -196,14 +208,15 @@ package ariane_pkg; function automatic logic is_rs1_fpr (input fu_op op); if (FP_PRESENT) begin // makes function static for non-fp case unique case (op) inside - [FADD:FNMADD], // Computational Operations - FCVT_F2I, // Float-Int Casts - FCVT_F2F, // Float-Float Casts - FSGNJ, // Sign Injections - FMV_F2X, // FPR-GPR Moves - FCMP, // Comparisons - FCLASS : return 1'b1; // Classifications - default : return 1'b0; // all other ops + [FADD:FNMADD], // Computational Operations + FCVT_F2I, // Float-Int Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_F2X, // FPR-GPR Moves + FCMP, // Comparisons + FCLASS, // Classifications + [VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops + default : return 1'b0; // all other ops endcase end else return 1'b0; @@ -212,12 +225,13 @@ package ariane_pkg; function automatic logic is_rs2_fpr (input fu_op op); if (FP_PRESENT) begin // makes function static for non-fp case unique case (op) inside - [FSD:FSW], // FP Stores - [FADD:FMIN_MAX], // Computational Operations (no sqrt) - [FMADD:FNMADD], // Fused Computational Operations - FSGNJ, // Sign Injections - FCMP : return 1'b1; // Comparisons - default : return 1'b0; // all other ops + [FSD:FSW], // FP Stores + [FADD:FMIN_MAX], // Computational Operations (no sqrt) + [FMADD:FNMADD], // Fused Computational Operations + FSGNJ, // Sign Injections + FCMP, // Comparisons + [VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops + default : return 1'b0; // all other ops endcase end else return 1'b0; @@ -237,13 +251,14 @@ package ariane_pkg; function automatic logic is_rd_fpr (input fu_op op); if (FP_PRESENT) begin // makes function static for non-fp case unique case (op) inside - [FLD:FLW], // FP Loads - [FADD:FNMADD], // Computational Operations - FCVT_I2F, // Int-Float Casts - FCVT_F2F, // Float-Float Casts - FSGNJ, // Sign Injections - FMV_X2F : return 1'b1; // GPR-FPR Moves - default : return 1'b0; // all other ops + [FLD:FLW], // FP Loads + [FADD:FNMADD], // Computational Operations + FCVT_I2F, // Int-Float Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_X2F, // GPR-FPR Moves + [VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops + default : return 1'b0; // all other ops endcase end else return 1'b0; @@ -324,7 +339,7 @@ package ariane_pkg; typedef struct packed { logic [31:27] rs3; - logic [1:0] funct2; + logic [26:25] funct2; logic [24:20] rs2; logic [19:15] rs1; logic [14:12] funct3; @@ -342,6 +357,17 @@ package ariane_pkg; logic [6:0] opcode; } rftype_t; // floating-point + typedef struct packed { + logic [31:30] funct2; + logic [29:25] vecfltop; + logic [24:20] rs2; + logic [19:15] rs1; + logic [14:14] repl; + logic [13:12] vfmt; + logic [11:7] rd; + logic [6:0] opcode; + } rvftype_t; // vectorial floating-point + typedef struct packed { logic [31:20] imm; logic [19:15] rs1; @@ -370,6 +396,7 @@ package ariane_pkg; rtype_t rtype; r4type_t r4type; rftype_t rftype; + rvftype_t rvftype; itype_t itype; stype_t stype; utype_t utype; diff --git a/src/decoder.sv b/src/decoder.sv index 44aba1101..cdde7da23 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -233,35 +233,257 @@ module decoder ( // Reg-Reg Operations // -------------------------- OPCODE_OP: begin - instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; - instruction_o.rs1 = instr.rtype.rs1; - instruction_o.rs2 = instr.rtype.rs2; - instruction_o.rd = instr.rtype.rd; + // -------------------------------------------- + // Vectorial Floating-Point Reg-Reg Operations + // -------------------------------------------- + if (instr.rvftype.funct2 == 2'b10) begin // Prefix 10 for all Xfvec ops + if (FP_PRESENT & XFVEC) begin // only generate decoder if FP extensions are enabled (static) + automatic logic allow_replication; // control honoring of replication flag - unique case ({instr.rtype.funct7, instr.rtype.funct3}) - {7'b000_0000, 3'b000}: instruction_o.op = ADD; // Add - {7'b010_0000, 3'b000}: instruction_o.op = SUB; // Sub - {7'b000_0000, 3'b010}: instruction_o.op = SLTS; // Set Lower Than - {7'b000_0000, 3'b011}: instruction_o.op = SLTU; // Set Lower Than Unsigned - {7'b000_0000, 3'b100}: instruction_o.op = XORL; // Xor - {7'b000_0000, 3'b110}: instruction_o.op = ORL; // Or - {7'b000_0000, 3'b111}: instruction_o.op = ANDL; // And - {7'b000_0000, 3'b001}: instruction_o.op = SLL; // Shift Left Logical - {7'b000_0000, 3'b101}: instruction_o.op = SRL; // Shift Right Logical - {7'b010_0000, 3'b101}: instruction_o.op = SRA; // Shift Right Arithmetic - // Multiplications - {7'b000_0001, 3'b000}: instruction_o.op = MUL; - {7'b000_0001, 3'b001}: instruction_o.op = MULH; - {7'b000_0001, 3'b010}: instruction_o.op = MULHSU; - {7'b000_0001, 3'b011}: instruction_o.op = MULHU; - {7'b000_0001, 3'b100}: instruction_o.op = DIV; - {7'b000_0001, 3'b101}: instruction_o.op = DIVU; - {7'b000_0001, 3'b110}: instruction_o.op = REM; - {7'b000_0001, 3'b111}: instruction_o.op = REMU; - default: begin + instruction_o.fu = FPU_VEC; // Same unit, but sets 'vectorial' signal + instruction_o.rs1 = instr.rvftype.rs1; + instruction_o.rs2 = instr.rvftype.rs2; + instruction_o.rd = instr.rvftype.rd; + check_fprm = 1'b1; + allow_replication = 1'b1; + // decode vectorial FP instruction + unique case (instr.rvftype.vecfltop) + 5'b00001 : instruction_o.op = FADD; // vfadd.vfmt - Vectorial FP Addition + 5'b00010 : instruction_o.op = FSUB; // vfsub.vfmt - Vectorial FP Subtraction + 5'b00011 : instruction_o.op = FMUL; // vfmul.vfmt - Vectorial FP Multiplication + 5'b00100 : instruction_o.op = FDIV; // vfdiv.vfmt - Vectorial FP Division + 5'b00101 : begin + instruction_o.op = VFMIN; // vfmin.vfmt - Vectorial FP Minimum + check_fprm = 1'b0; // rounding mode irrelevant + end + 5'b00110 : begin + instruction_o.op = VFMAX; // vfmax.vfmt - Vectorial FP Maximum + check_fprm = 1'b0; // rounding mode irrelevant + end + 5'b00111 : begin + instruction_o.op = FSQRT; // vfsqrt.vfmt - Vectorial FP Square Root + allow_replication = 1'b0; // only one operand + if (instr.rvftype.rs2 != 5'b00000) illegal_instr = 1'b1; // rs2 must be 0 + end + 5'b01000 : begin + instruction_o.op = FMADD; // vfmac.vfmt - Vectorial FP Multiply-Accumulate + imm_select = SIMM; // rd into result field (upper bits don't matter) + end + 5'b01001 : begin + instruction_o.op = FMSUB; // vfmre.vfmt - Vectorial FP Multiply-Reduce + imm_select = SIMM; // rd into result field (upper bits don't matter) + end + 5'b01100 : begin + unique case (instr.rvftype.rs2) inside // operation encoded in rs2, `inside` for matching ? + 5'b00000 : begin + if (instr.rvftype.repl) + instruction_o.op = FMV_F2X; // vfmv.x.vfmt - FPR to GPR Move + else + instruction_o.op = FMV_X2F; // vfmv.vfmt.x - GPR to FPR Move + check_fprm = 1'b0; // no rounding for moves + end + 5'b00001 : begin + instruction_o.op = FCLASS; // vfclass.vfmt - Vectorial FP Classify + check_fprm = 1'b0; // no rounding for classification + allow_replication = 1'b0; // R must not be set + end + 5'b00010 : instruction_o.op = FCVT_F2I; // vfcvt.x.vfmt - Vectorial FP to Int Conversion + 5'b00011 : instruction_o.op = FCVT_I2F; // vfcvt.vfmt.x - Vectorial Int to FP Conversion + 5'b001?? : begin + instruction_o.op = FCVT_F2F; // vfcvt.vfmt.vfmt - Vectorial FP to FP Conversion + allow_replication = 1'b0; // R must not be set + // determine source format + unique case (instr.rvftype.rs2[21:20]) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~RVFVEC) illegal_instr = 1'b1; + 2'b01: if (~XF16ALTVEC) illegal_instr = 1'b1; + 2'b10: if (~XF16VEC) illegal_instr = 1'b1; + 2'b11: if (~XF8VEC) illegal_instr = 1'b1; + default : illegal_instr = 1'b1; + endcase + end + default : illegal_instr = 1'b1; + endcase + end + 5'b01101 : begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = VFSGNJ; // vfsgnj.vfmt - Vectorial FP Sign Injection + end + 5'b01110 : begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = VFSGNJN; // vfsgnjN.vfmt - Vectorial FP Negated Sign Injection + end + 5'b01111 : begin + check_fprm = 1'b0; // no rounding for sign-injection + instruction_o.op = VFSGNJX; // vfsgnjx.vfmt - Vectorial FP XORed Sign Injection + end + 5'b10000 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFEQ; // vfeq.vfmt - Vectorial FP Equality + end + 5'b10001 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFNE; // vfne.vfmt - Vectorial FP Non-Equality + end + 5'b10010 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFLT; // vfle.vfmt - Vectorial FP Less Than + end + 5'b10011 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFGE; // vfge.vfmt - Vectorial FP Greater or Equal + end + 5'b10100 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFLE; // vfle.vfmt - Vectorial FP Less or Equal + end + 5'b10101 : begin + check_fprm = 1'b0; // no rounding for comparisons + instruction_o.op = VFGT; // vfgt.vfmt - Vectorial FP Greater Than + end + 5'b11000 : begin + allow_replication = 1'b0; // no replication for cast-and-pack + instruction_o.op = VFCPKAB_S; // vfcpka/b.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, lowest 4 entries + if (~RVF) illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: begin + if (~RVFVEC) illegal_instr = 1'b1; // destination vector not supported + if (instr.rvftype.repl) illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32 + end + 2'b01: begin + if (~XF16ALTVEC) illegal_instr = 1'b1; // destination vector not supported + end + 2'b10: begin + if (~XF16VEC) illegal_instr = 1'b1; // destination vector not supported + end + 2'b11: begin + if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported + end + default : illegal_instr = 1'b1; + endcase + end + 5'b11001 : begin + allow_replication = 1'b0; // no replication for cast-and-pack + instruction_o.op = VFCPKCD_S; // vfcpkc/d.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, second 4 entries + if (~RVF) illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32 + 2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT + 2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16 + 2'b11: begin + if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported + end + default : illegal_instr = 1'b1; + endcase + end + 5'b11010 : begin + allow_replication = 1'b0; // no replication for cast-and-pack + instruction_o.op = VFCPKAB_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, lowest 4 entries + if (~RVD) illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: begin + if (~RVFVEC) illegal_instr = 1'b1; // destination vector not supported + if (instr.rvftype.repl) illegal_instr = 1'b1; // no entries 2/3 in vector of 2 fp32 + end + 2'b01: begin + if (~XF16ALTVEC) illegal_instr = 1'b1; // destination vector not supported + end + 2'b10: begin + if (~XF16VEC) illegal_instr = 1'b1; // destination vector not supported + end + 2'b11: begin + if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported + end + default : illegal_instr = 1'b1; + endcase + end + 5'b11011 : begin + allow_replication = 1'b0; // no replication for cast-and-pack + instruction_o.op = VFCPKCD_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, second 4 entries + if (~RVD) illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 + // check destination format + unique case (instr.rvftype.vfmt) + // Only process instruction if corresponding extension is active and FLEN suffices (static) + 2'b00: illegal_instr = 1'b1; // no entries 4-7 in vector of 2 FP32 + 2'b01: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16ALT + 2'b10: illegal_instr = 1'b1; // no entries 4-7 in vector of 4 FP16 + 2'b11: begin + if (~XF8VEC) illegal_instr = 1'b1; // destination vector not supported + end + default : illegal_instr = 1'b1; + endcase + end + default : illegal_instr = 1'b1; + endcase + + // check format + unique case (instr.rftype.fmt) + // Only process instruction if corresponding extension is active (static) + 2'b00: if (~RVFVEC) illegal_instr = 1'b1; + 2'b01: if (~XF16ALTVEC) illegal_instr = 1'b1; + 2'b10: if (~XF16VEC) illegal_instr = 1'b1; + 2'b11: if (~XF8VEC) illegal_instr = 1'b1; + default: illegal_instr = 1'b1; + endcase + + // check disallowed replication + if (~allow_replication & instr.rvftype.repl) illegal_instr = 1'b1; + + // communicate replication to the unit + if (instr.rvftype.repl) instruction_o.fu = FPU_VEC_REPL; // Same unit, but sets 'vectorial' and 'replication' signals + + // check rounding mode + if (check_fprm) begin + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end + + end else begin // No vectorial FP enabled (static) illegal_instr = 1'b1; end - endcase + + // --------------------------- + // Integer Reg-Reg Operations + // --------------------------- + end else begin + instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; + instruction_o.rs1 = instr.rtype.rs1; + instruction_o.rs2 = instr.rtype.rs2; + instruction_o.rd = instr.rtype.rd; + + unique case ({instr.rtype.funct7, instr.rtype.funct3}) + {7'b000_0000, 3'b000}: instruction_o.op = ADD; // Add + {7'b010_0000, 3'b000}: instruction_o.op = SUB; // Sub + {7'b000_0000, 3'b010}: instruction_o.op = SLTS; // Set Lower Than + {7'b000_0000, 3'b011}: instruction_o.op = SLTU; // Set Lower Than Unsigned + {7'b000_0000, 3'b100}: instruction_o.op = XORL; // Xor + {7'b000_0000, 3'b110}: instruction_o.op = ORL; // Or + {7'b000_0000, 3'b111}: instruction_o.op = ANDL; // And + {7'b000_0000, 3'b001}: instruction_o.op = SLL; // Shift Left Logical + {7'b000_0000, 3'b101}: instruction_o.op = SRL; // Shift Right Logical + {7'b010_0000, 3'b101}: instruction_o.op = SRA; // Shift Right Arithmetic + // Multiplications + {7'b000_0001, 3'b000}: instruction_o.op = MUL; + {7'b000_0001, 3'b001}: instruction_o.op = MULH; + {7'b000_0001, 3'b010}: instruction_o.op = MULHSU; + {7'b000_0001, 3'b011}: instruction_o.op = MULHU; + {7'b000_0001, 3'b100}: instruction_o.op = DIV; + {7'b000_0001, 3'b101}: instruction_o.op = DIVU; + {7'b000_0001, 3'b110}: instruction_o.op = REM; + {7'b000_0001, 3'b111}: instruction_o.op = REMU; + default: begin + illegal_instr = 1'b1; + end + endcase + end end // -------------------------- @@ -453,10 +675,10 @@ module decoder ( check_fprm = 1'b1; // select the correct fused operation unique case (instr.r4type.opcode) - default: instruction_o.op = FMADD; // fmadd.fmt - Fused multiply-add - OPCODE_MSUB: instruction_o.op = FMSUB; // fmsub.fmt - Fused multiply-subtract - OPCODE_NMSUB: instruction_o.op = FNMSUB; // fnmsub.fmt - Negated fused multiply-subtract - OPCODE_NMADD: instruction_o.op = FNMADD; // fnmadd.fmt - Negated fused multiply-add + default: instruction_o.op = FMADD; // fmadd.fmt - FP Fused multiply-add + OPCODE_MSUB: instruction_o.op = FMSUB; // fmsub.fmt - FP Fused multiply-subtract + OPCODE_NMSUB: instruction_o.op = FNMSUB; // fnmsub.fmt - FP Negated fused multiply-subtract + OPCODE_NMADD: instruction_o.op = FNMADD; // fnmadd.fmt - FP Negated fused multiply-add endcase // determine fp format @@ -473,7 +695,16 @@ module decoder ( if (check_fprm) begin unique case (instr.rftype.rm) inside [3'b000:3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~XF16ALT || instr.rftype.fmt != 2'b10) + illegal_instr = 1'b1; + unique case (frm_i) inside // actual rounding mode from frm csr + [3'b000:3'b100]: ; //legal rounding modes + default : illegal_instr = 1'b1; + endcase + end 3'b111: begin + // rounding mode from frm csr unique case (frm_i) inside [3'b000:3'b100]: ; //legal rounding modes default : illegal_instr = 1'b1; @@ -508,7 +739,7 @@ module decoder ( 5'b00100: begin instruction_o.op = FSGNJ; // fsgn{j[n]/jx}.fmt - FP Sign Injection check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (XF16ALT) begin // FP16ALT instructions encoded in rm separately + if (XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) if (!(instr.rftype.rm inside {[3'b000:3'b010], [3'b100:3'b110]})) illegal_instr = 1'b1; end else begin @@ -519,7 +750,7 @@ module decoder ( 5'b00101: begin instruction_o.op = FMIN_MAX; // fmin/fmax.fmt - FP Minimum / Maximum check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (XF16ALT) begin // FP16ALT instructions encoded in rm separately + if (XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) if (!(instr.rftype.rm inside {[3'b000:3'b001], [3'b100:3'b101]})) illegal_instr = 1'b1; end else begin @@ -544,7 +775,7 @@ module decoder ( 5'b10100: begin instruction_o.op = FCMP; // feq/flt/fle.fmt - FP Comparisons check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (XF16ALT) begin // FP16ALT instructions encoded in rm separately + if (XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) if (!(instr.rftype.rm inside {[3'b000:3'b010], [3'b100:3'b110]})) illegal_instr = 1'b1; end else begin From 12bd35ce7dddcb7651ce8795142f6d3d50e5727f Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 4 Apr 2018 09:11:12 +0200 Subject: [PATCH 16/94] :construction: Working on FPU integration to Ariane --- .gitmodules | 6 + Makefile | 23 +- include/ariane_pkg.sv | 8 +- src/ariane.sv | 34 ++- src/commit_stage.sv | 8 +- src/decoder.sv | 16 +- src/ex_stage.sv | 22 ++ src/fpnew | 1 + src/fpu_legacy | 1 + src/fpu_wrap.sv | 492 +++++++++++++++++++++++++++++++++++++ src/issue_read_operands.sv | 42 +++- src/issue_stage.sv | 2 + src/scoreboard.sv | 6 +- src/util/find_first_one.sv | 85 +++++++ 14 files changed, 699 insertions(+), 47 deletions(-) create mode 160000 src/fpnew create mode 160000 src/fpu_legacy create mode 100644 src/fpu_wrap.sv create mode 100644 src/util/find_first_one.sv diff --git a/.gitmodules b/.gitmodules index b06847d1e..e826f576e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,3 +16,9 @@ [submodule "src/axi_node"] path = src/axi_node url = https://github.com/pulp-platform/axi_node.git +[submodule "fpnew"] + path = src/fpnew + url = git@iis-git.ee.ethz.ch:smach/fpnew.git +[submodule "fpu-legacy"] + path = src/fpu_legacy + url = git@iis-git.ee.ethz.ch:pulp-open/fpu.git diff --git a/Makefile b/Makefile index 95ee60936..85f62b219 100755 --- a/Makefile +++ b/Makefile @@ -20,9 +20,11 @@ riscv-test ?= rv64ui-p-add # Sources # Ariane PKG ariane_pkg := include/ariane_pkg.sv include/nbdcache_pkg.sv +# FPnew PKG +fpnew_pkg := src/fpnew/src/pkg/fpnew_pkg.vhd src/fpnew/src/pkg/fpnew_fmts_pkg.vhd src/fpnew/src/pkg/fpnew_comps_pkg.vhd src/fpnew/src/pkg/fpnew_pkg_constants.vhd # utility modules util := $(wildcard src/util/*.svh) src/util/instruction_tracer_pkg.sv src/util/instruction_tracer_if.sv \ - src/util/generic_fifo.sv src/util/cluster_clock_gating.sv src/util/behav_sram.sv + src/util/generic_fifo.sv src/util/cluster_clock_gating.sv src/util/behav_sram.sv src/util/find_first_one.sv # test targets tests := alu scoreboard fifo dcache_arbiter store_queue lsu core fetch_fifo # UVM agents @@ -39,7 +41,10 @@ test_pkg := $(wildcard tb/test/*/*sequence_pkg.sv*) $(wildcard tb/test/*/*_pkg.s dpi := $(wildcard tb/dpi/*) # this list contains the standalone components src := $(wildcard src/*.sv) $(wildcard tb/common/*.sv) $(wildcard src/axi2per/*.sv) $(wildcard src/axi_slice/*.sv) \ - $(wildcard src/axi_node/*.sv) $(wildcard src/axi_mem_if/*.sv) + $(wildcard src/axi_node/*.sv) $(wildcard src/axi_mem_if/src/*.sv) src/fpu_legacy/hdl/fpu_utils/fpu_ff.sv \ + src/fpu_legacy/hdl/fpu_div_sqrt_mvp/defs_div_sqrt_mvp.sv $(wildcard src/fpu_legacy/hdl/fpu_div_sqrt_mvp/*.sv) \ + $(fpnew_pkg) $(wildcard src/fpnew/src/utils/*.vhd) $(wildcard src/fpnew/src/ops/*.vhd) \ + $(wildcard src/fpnew/src/subunits/*.vhd) src/fpnew/src/fpnew.vhd src/fpnew/src/fpnew_top.vhd # look for testbenches tbs := tb/alu_tb.sv tb/core_tb.sv tb/dcache_arbiter_tb.sv tb/store_queue_tb.sv tb/scoreboard_tb.sv tb/fifo_tb.sv @@ -65,7 +70,11 @@ riscv-tests := rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p- rv64um-p-mul rv64um-p-mulh rv64um-p-mulhsu rv64um-p-mulhu rv64um-p-div rv64um-p-divu rv64um-p-rem \ rv64um-p-remu rv64um-p-mulw rv64um-p-divw rv64um-p-divuw rv64um-p-remw rv64um-p-remuw \ rv64um-v-mul rv64um-v-mulh rv64um-v-mulhsu rv64um-v-mulhu rv64um-v-div rv64um-v-divu rv64um-v-rem \ - rv64um-v-remu rv64um-v-mulw rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw + rv64um-v-remu rv64um-v-mulw rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw \ + rv64uf-p-fadd rv64uf-p-fclass rv64uf-p-fcmp rv64uf-p-fcvt rv64uf-p-fcvt_w rv64uf-p-fdiv rv64uf-p-fmadd \ + rv64uf-p-fmin rv64uf-p-ldst rv64uf-p-move rv64uf-p-recoding \ + rv64uf-v-fadd rv64uf-v-fclass rv64uf-v-fcmp rv64uf-v-fcvt rv64uf-v-fcvt_w rv64uf-v-fdiv rv64uf-v-fmadd \ + rv64uf-v-fmin rv64uf-v-ldst rv64uf-v-move rv64uf-v-recoding # failed test directory failed-tests := $(wildcard failedtests/*.S) @@ -73,6 +82,7 @@ failed-tests := $(wildcard failedtests/*.S) incdir := ./includes # Compile and sim flags compile_flag += +cover=bcfst+/dut -incr -64 -nologo -quiet -suppress 13262 -permissive +compile_flag_vhd += -64 -nologo -quiet -2008 uvm-flags += +UVM_NO_RELNOTES # Iterate over all include directories and write them with +incdir+ prefixed # +incdir+ works for Verilator and QuestaSim @@ -89,7 +99,8 @@ $(library)/.build-srcs: $(util) $(src) vlog$(questa_version) $(compile_flag) -work $(library) $(filter %.sv,$(util)) $(list_incdir) -suppress 2583 # Suppress message that always_latch may not be checked thoroughly by QuestaSim. # Compile agents, interfaces and environments - vlog$(questa_version) $(compile_flag) -work $(library) -pedanticerrors $(src) $(list_incdir) -suppress 2583 + vlog$(questa_version) $(compile_flag) -work $(library) -pedanticerrors $(filter %.sv,$(src)) $(list_incdir) -suppress 2583 + vcom$(questa_version) $(compile_flag_vhd) -work $(library) -pedanticerrors $(filter %.vhd,$(src)) touch $(library)/.build-srcs # build TBs @@ -136,7 +147,7 @@ sim_nopt: build simc: build vsim${questa_version} -64 -c -lib ${library} ${top_level}_optimized +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ - +BASEDIR=$(riscv-test-dir) $(uvm-flags) +ASMTEST=$(riscv-test) "+UVM_VERBOSITY=HIGH" -coverage -classdebug -sv_lib $(library)/elf_dpi -do "run -all; do tb/wave/wave_core.do; exit" + +BASEDIR=$(riscv-test-dir) $(uvm-flags) +ASMTEST=$(riscv-test) "+UVM_VERBOSITY=HIGH" -coverage -classdebug -sv_lib $(library)/elf_dpi -do "set NumericStdNoWarnings 1; run -all; do tb/wave/wave_core.do; exit" run-asm-tests: build $(foreach test, $(riscv-tests), vsim$(questa_version) -64 +BASEDIR=$(riscv-test-dir) +max-cycles=$(max_cycles) \ @@ -174,7 +185,7 @@ $(tests): build # User Verilator verilate: $(verilator) $(ariane_pkg) $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) $(wildcard src/axi_slice/*.sv) \ - src/util/cluster_clock_gating.sv src/util/behav_sram.sv src/axi_mem_if/axi2mem.sv tb/agents/axi_if/axi_if.sv \ + src/util/cluster_clock_gating.sv src/util/behav_sram.sv src/axi_mem_if/src/axi2mem.sv tb/agents/axi_if/axi_if.sv \ --unroll-count 1024 -Wno-fatal -Wno-UNOPTFLAT -LDFLAGS "-lfesvr" -CFLAGS "-std=c++11" -Wall --cc --trace \ $(list_incdir) --top-module ariane_wrapped --exe tb/ariane_tb.cpp tb/simmem.cpp cd obj_dir && make -j8 -f Variane_wrapped.mk diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 7f00d4ede..a0ef16361 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -27,7 +27,7 @@ package ariane_pkg; localparam NR_SB_ENTRIES = 8; // number of scoreboard entries localparam TRANS_ID_BITS = $clog2(NR_SB_ENTRIES); // depending on the number of scoreboard entries we need that many bits // to uniquely identify the entry in the scoreboard - localparam NR_WB_PORTS = 5; + localparam NR_WB_PORTS = 6; localparam ASID_WIDTH = 1; localparam BTB_ENTRIES = 8; localparam BHT_ENTRIES = 32; @@ -36,8 +36,8 @@ package ariane_pkg; localparam NR_COMMIT_PORTS = 2; // Floating-point extensions configuration - localparam bit RVF = 1'b0; // Is F extension enabled - localparam bit RVD = 1'b0; // Is D extension enabled + localparam bit RVF = 1'b1; // Is F extension enabled + localparam bit RVD = 1'b1; // Is D extension enabled // Transprecision floating-point extensions configuration @@ -156,7 +156,7 @@ package ariane_pkg; } bht_prediction_t; typedef enum logic[3:0] { - NONE, LOAD, STORE, ALU, CTRL_FLOW, MULT, CSR, FPU, FPU_VEC, FPU_VEC_REPL + NONE, LOAD, STORE, ALU, CTRL_FLOW, MULT, CSR, FPU, FPU_VEC } fu_t; localparam EXC_OFF_RST = 8'h80; diff --git a/src/ariane.sv b/src/ariane.sv index 1a1283097..044b86eac 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -151,6 +151,8 @@ module ariane #( // FPU logic fpu_ready_ex_id; logic fpu_valid_id_ex; + logic [1:0] fpu_fmt_id_ex; + logic [2:0] fpu_rm_id_ex; logic [TRANS_ID_BITS-1:0] fpu_trans_id_ex_id; logic [63:0] fpu_result_ex_id; logic fpu_valid_ex_id; @@ -193,7 +195,7 @@ module ariane #( // CSR <-> * // -------------- logic [4:0] fflags_csr_commit; - logic [2:0] frm_csr_id_issue; + logic [2:0] frm_csr_id_issue_ex; logic enable_translation_csr_ex; logic en_ld_st_translation_csr_ex; priv_lvl_t ld_st_priv_lvl_csr_ex; @@ -316,7 +318,7 @@ module ariane #( .issue_instr_ack_i ( issue_instr_issue_id ), .priv_lvl_i ( priv_lvl ), - .frm_i ( frm_csr_id_issue ), + .frm_i ( frm_csr_id_issue_ex ), .tvm_i ( tvm_csr_id ), .tw_i ( tw_csr_id ), .tsr_i ( tsr_csr_id ), @@ -371,14 +373,16 @@ module ariane #( // FPU .fpu_ready_i ( fpu_ready_ex_id ), .fpu_valid_o ( fpu_valid_id_ex ), + .fpu_fmt_o ( fpu_fmt_id_ex ), + .fpu_rm_o ( fpu_rm_id_ex ), // CSR .csr_ready_i ( csr_ready_ex_id ), .csr_valid_o ( csr_valid_id_ex ), - .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, branch_trans_id_ex_id, csr_trans_id_ex_id, mult_trans_id_ex_id }), - .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, branch_result_ex_id, csr_result_ex_id, mult_result_ex_id }), - .ex_ex_i ( {{$bits(exception_t){1'b0}}, lsu_exception_ex_id, branch_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}} }), - .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, branch_valid_ex_id, csr_valid_ex_id, mult_valid_ex_id }), + .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, branch_trans_id_ex_id, csr_trans_id_ex_id, mult_trans_id_ex_id, fpu_trans_id_ex_id }), + .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, branch_result_ex_id, csr_result_ex_id, mult_result_ex_id, fpu_result_ex_id }), + .ex_ex_i ( {{$bits(exception_t){1'b0}}, lsu_exception_ex_id, branch_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}}, fpu_exception_ex_id }), + .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, branch_valid_ex_id, csr_valid_ex_id, mult_valid_ex_id, fpu_valid_ex_id }), // Commit .waddr_i ( waddr_commit_id ), .wdata_i ( wdata_commit_id ), @@ -440,12 +444,16 @@ module ariane #( .mult_trans_id_o ( mult_trans_id_ex_id ), .mult_result_o ( mult_result_ex_id ), .mult_valid_o ( mult_valid_ex_id ), - .fpu_ready_o ( ), // FU is ready - .fpu_valid_i ( 1'b0 ), // Output is valid - .fpu_trans_id_o ( ), - .fpu_result_o ( ), - .fpu_valid_o ( ), - .fpu_exception_o ( ), + // FPU + .fpu_ready_o ( fpu_ready_ex_id ), + .fpu_valid_i ( fpu_valid_id_ex ), + .fpu_fmt_i ( fpu_fmt_id_ex ), + .fpu_rm_i ( fpu_rm_id_ex ), + .fpu_frm_i ( frm_csr_id_issue_ex ), + .fpu_trans_id_o ( fpu_trans_id_ex_id ), + .fpu_result_o ( fpu_result_ex_id ), + .fpu_valid_o ( fpu_valid_ex_id ), + .fpu_exception_o ( fpu_exception_ex_id ), // CSR .csr_ready_o ( csr_ready_ex_id ), .csr_valid_i ( csr_valid_id_ex ), @@ -537,7 +545,7 @@ module ariane #( .trap_vector_base_o ( trap_vector_base_commit_pcgen ), .priv_lvl_o ( priv_lvl ), .fflags_o ( fflags_csr_commit ), - .frm_o ( frm_csr_id_issue ), + .frm_o ( frm_csr_id_issue_ex ), .ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ), .en_translation_o ( enable_translation_csr_ex ), .en_ld_st_translation_o ( en_ld_st_translation_csr_ex ), diff --git a/src/commit_stage.sv b/src/commit_stage.sv index a0865d369..31fdd8f82 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -110,7 +110,7 @@ module commit_stage #( // --------- // FPU Flags // --------- - if (commit_instr_i[0].fu == FPU) begin + if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin // write the CSR with potential exception flags from retiring floating point instruction csr_op_o = CSR_SET; csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]}; @@ -161,8 +161,8 @@ module commit_stage #( // check if the second instruction can be committed as well and the first wasn't a CSR instruction if (commit_ack_o[0] && commit_instr_i[1].valid && !halt_i && !(commit_instr_i[0].fu inside {CSR}) && !flush_dcache_i) begin // only if the first instruction didn't throw an exception and this instruction won't throw an exception - // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT or FPU - if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU})) begin + // and the functional unit is of type ALU, LOAD, CTRL_FLOW, MULT, FPU or FPU_VEC + if (!exception_o.valid && !commit_instr_i[1].ex.valid && (commit_instr_i[1].fu inside {ALU, LOAD, CTRL_FLOW, MULT, FPU, FPU_VEC})) begin if (is_rd_fpr(commit_instr_i[1].op)) we_fpr_o[1] = 1'b1; @@ -173,7 +173,7 @@ module commit_stage #( // additionally check if we are retiring an FPU instruction because we need to make sure that we write all // exception flags - if (commit_instr_i[1].fu == FPU) begin + if (commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin csr_op_o = CSR_SET; if (csr_write_fflags_o) csr_wdata_o = {59'b0, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])}; diff --git a/src/decoder.sv b/src/decoder.sv index cdde7da23..6e8415b1f 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -435,9 +435,6 @@ module decoder ( // check disallowed replication if (~allow_replication & instr.rvftype.repl) illegal_instr = 1'b1; - // communicate replication to the unit - if (instr.rvftype.repl) instruction_o.fu = FPU_VEC_REPL; // Same unit, but sets 'vectorial' and 'replication' signals - // check rounding mode if (check_fprm) begin unique case (frm_i) inside // actual rounding mode from frm csr @@ -761,14 +758,15 @@ module decoder ( 5'b01000: begin instruction_o.op = FCVT_F2F; // fcvt.fmt.fmt - FP to FP Conversion imm_select = IIMM; // rs2 holds part of the intruction - if (instr.rftype.rs2[24:22]) illegal_instr = 1'b1; // bits [21:20] used, other bits must be 0 + if (instr.rftype.rs2[24:23]) illegal_instr = 1'b1; // bits [22:20] used, other bits must be 0 // check source format - unique case (instr.rftype.rs2[21:20]) + unique case (instr.rftype.rs2[22:20]) // Only process instruction if corresponding extension is active (static) - 2'b00: if (~RVF) illegal_instr = 1'b1; - 2'b01: if (~RVD) illegal_instr = 1'b1; - 2'b10: if (~XF16 & ~XF16ALT) illegal_instr = 1'b1; - 2'b11: if (~XF8) illegal_instr = 1'b1; + 3'b000: if (~RVF) illegal_instr = 1'b1; + 3'b001: if (~RVD) illegal_instr = 1'b1; + 3'b010: if (~XF16) illegal_instr = 1'b1; + 3'b110: if (~XF16ALT) illegal_instr = 1'b1; + 3'b011: if (~XF8) illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase end diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 982093574..a385afd5b 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -80,6 +80,9 @@ module ex_stage #( // FPU output logic fpu_ready_o, // FU is ready input logic fpu_valid_i, // Output is valid + input logic [1:0] fpu_fmt_i, // FP format + input logic [2:0] fpu_rm_i, // FP rm + input logic [2:0] fpu_frm_i, // FP frm csr output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, output logic [63:0] fpu_result_o, output logic fpu_valid_o, @@ -139,6 +142,25 @@ module ex_stage #( .* ); + // ---------------- + // FPU + // ---------------- + generate + if( FP_PRESENT ) begin : fpu_gen + fpu_wrap fpu_i ( + .operand_c_i ( imm_i ), + .result_o ( fpu_result_o ), + .* + ); + end else begin : no_fpu_gen + assign fpu_ready_o = '0; + assign fpu_trans_id_o = '0; + assign fpu_result_o = '0; + assign fpu_valid_o = '0; + assign fpu_exception_o = '0; + end + endgenerate + // ---------------- // Load-Store Unit // ---------------- diff --git a/src/fpnew b/src/fpnew new file mode 160000 index 000000000..6bc7c8681 --- /dev/null +++ b/src/fpnew @@ -0,0 +1 @@ +Subproject commit 6bc7c86818b19467322ab69a7fc387db4f727821 diff --git a/src/fpu_legacy b/src/fpu_legacy new file mode 160000 index 000000000..6dc841aeb --- /dev/null +++ b/src/fpu_legacy @@ -0,0 +1 @@ +Subproject commit 6dc841aebe7f7f02570e18135f407f8f8a903320 diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv new file mode 100644 index 000000000..d882a2d24 --- /dev/null +++ b/src/fpu_wrap.sv @@ -0,0 +1,492 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// Author: Stefan Mach, ETH Zurich +// Date: 12.04.2018 +// Description: Wrapper for the floating-point unit + + +import ariane_pkg::*; + +module fpu_wrap ( + input logic clk_i, + input logic rst_ni, + input logic flush_i, + input logic [TRANS_ID_BITS-1:0] trans_id_i, + input fu_t fu_i, + output logic fpu_ready_o, + input logic fpu_valid_i, + input fu_op operator_i, + input logic [FLEN-1:0] operand_a_i, + input logic [FLEN-1:0] operand_b_i, + input logic [FLEN-1:0] operand_c_i, + input logic [1:0] fpu_fmt_i, + input logic [2:0] fpu_rm_i, + input logic [2:0] fpu_frm_i, + output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, + output logic [FLEN-1:0] result_o, + output logic fpu_valid_o, + output exception_t fpu_exception_o +); + + //----------------------------------- + // FPnew encoding from FPnew package + //----------------------------------- + localparam OPBITS = 4; + localparam FMTBITS = 3; + localparam IFMTBITS = 2; + + integer OP_NUMBITS, FMT_NUMBITS, IFMT_NUMBITS; + + logic [OPBITS-1:0] OP_FMADD; + logic [OPBITS-1:0] OP_FNMSUB; + logic [OPBITS-1:0] OP_ADD; + logic [OPBITS-1:0] OP_MUL; + logic [OPBITS-1:0] OP_DIV; + logic [OPBITS-1:0] OP_SQRT; + logic [OPBITS-1:0] OP_SGNJ; + logic [OPBITS-1:0] OP_MINMAX; + logic [OPBITS-1:0] OP_CMP; + logic [OPBITS-1:0] OP_CLASS; + logic [OPBITS-1:0] OP_F2I; + logic [OPBITS-1:0] OP_I2F; + logic [OPBITS-1:0] OP_F2F; + logic [OPBITS-1:0] OP_CPK; + + logic [FMTBITS-1:0] FMT_FP32; + logic [FMTBITS-1:0] FMT_FP64; + logic [FMTBITS-1:0] FMT_FP16; + logic [FMTBITS-1:0] FMT_FP8; + logic [FMTBITS-1:0] FMT_FP16ALT; + logic [FMTBITS-1:0] FMT_CUST1; + logic [FMTBITS-1:0] FMT_CUST2; + logic [FMTBITS-1:0] FMT_CUST3; + + logic [IFMTBITS-1:0] IFMT_INT8; + logic [IFMTBITS-1:0] IFMT_INT16; + logic [IFMTBITS-1:0] IFMT_INT32; + logic [IFMTBITS-1:0] IFMT_INT64; + + // bind the constants from the fpnew entity + fpnew_pkg_constants i_fpnew_constants ( .* ); + + // always_comb begin + // assert (OPBITS >= OP_NUMBITS) else $error("OPBITS is smaller than %0d", OP_NUMBITS); + // assert (FMTBITS >= FMT_NUMBITS) else $error("FMTBITS is smaller than %0d", FMT_NUMBITS); + // assert (IFMTBITS >= IFMT_NUMBITS) else $error("IFMTBITS is smaller than %0d", IFMT_NUMBITS); + // end + + //------------------------------------------------- + // Inputs to the FPU and protocol inversion buffer + //------------------------------------------------- + logic [FLEN-1:0] operand_a_n, operand_a_q, operand_a; + logic [FLEN-1:0] operand_b_n, operand_b_q, operand_b; + logic [FLEN-1:0] operand_c_n, operand_c_q, operand_c; + logic [OPBITS-1:0] fpu_op_n, fpu_op_q, fpu_op; + logic fpu_op_mod_n, fpu_op_mod_q, fpu_op_mod; + logic [FMTBITS-1:0] fpu_fmt_n, fpu_fmt_q, fpu_fmt; + logic [FMTBITS-1:0] fpu_fmt2_n, fpu_fmt2_q, fpu_fmt2; + logic [IFMTBITS-1:0] fpu_ifmt_n, fpu_ifmt_q, fpu_ifmt; + logic [2:0] fpu_rm_n, fpu_rm_q, fpu_rm; + logic fpu_vec_op_n, fpu_vec_op_q, fpu_vec_op; + + logic [TRANS_ID_BITS-1:0] fpu_tag_n, fpu_tag_q, fpu_tag; + + logic fpu_in_ready, reg_in_ready; + logic fpu_in_valid, reg_in_valid; + logic fpu_out_ready, reg_out_ready; + logic fpu_out_valid, reg_out_valid; + + logic [4:0] fpu_status; + + + // generate if (FP_PRESENT) begin : fpu_gen + + //----------------------------- + // Translate inputs + //----------------------------- + + always_comb begin : input_translation + + automatic logic vec_replication; // control honoring of replication flag + + // Default Values + operand_a_n = operand_a_i; + operand_b_n = operand_b_i; + operand_c_n = operand_c_i; + fpu_op_n = OP_SGNJ; // sign injection by default + fpu_op_mod_n = 1'b0; + fpu_fmt_n = FMT_FP32; + fpu_fmt2_n = FMT_FP32; + fpu_ifmt_n = IFMT_INT32; + fpu_rm_n = fpu_rm_i; + fpu_vec_op_n = fu_i == FPU_VEC; + fpu_tag_n = trans_id_i; + vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field + + // Scalar Rounding Modes - some ops encode inside RM but use smaller range + if (!(fpu_rm_i inside {[3'b000:3'b100]})) + fpu_rm_n = fpu_frm_i; + + // Vectorial ops always consult FRM + if (fpu_vec_op_n) + fpu_rm_n = fpu_frm_i; + + // Formats + unique case (fpu_fmt_i) + // FP32 + 2'b00 : fpu_fmt_n = FMT_FP32; + // FP64 or FP16ALT (vectorial) + 2'b01 : fpu_fmt_n = fpu_vec_op_n ? FMT_FP16ALT : FMT_FP64; + // FP16 or FP16ALT (scalar) + 2'b10 : begin + if (!fpu_vec_op_n && fpu_rm_i==3'b101) + fpu_fmt_n = FMT_FP16ALT; + else + fpu_fmt_n = FMT_FP16; + end + // FP8 + default : fpu_fmt_n = FMT_FP8; + endcase + + + // Operations (this can modify the rounding mode field!) + case (operator_i) + // Addition + FADD : fpu_op_n = OP_ADD; + // Subtraction is modified ADD + FSUB : begin + fpu_op_n = OP_ADD; + fpu_op_mod_n = 1'b1; + end + // Multiplication + FMUL : fpu_op_n = OP_MUL; + // Division + FDIV : fpu_op_n = OP_DIV; + // Min/Max - OP is encoded in rm (000-001) + FMIN_MAX : fpu_op_n = OP_MINMAX; + // Square Root + FSQRT : fpu_op_n = OP_SQRT; + // Fused Multiply Add + FMADD : fpu_op_n = OP_FMADD; + // Fused Multiply Subtract is modified FMADD + FMSUB : begin + fpu_op_n = OP_FMADD; + fpu_op_mod_n = 1'b1; + end + // Fused Negated Multiply Subtract + FNMSUB : fpu_op_n = FNMSUB; + // Fused Negated Multiply Add is modified FNMSUB + FNMADD : begin + fpu_op_n = FNMSUB; + fpu_op_mod_n = 1'b1; + end + // Float to Int Cast - Op encoded in lowest two imm bits or rm + FCVT_F2I : begin + fpu_op_n = OP_F2I; + // Vectorial Ops encoded in rm (000-001) + if (fpu_vec_op_n) begin + fpu_op_mod_n = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00 : fpu_ifmt_n = IFMT_INT32; + 2'b01, + 2'b10 : fpu_ifmt_n = IFMT_INT16; + 2'b11 : fpu_ifmt_n = IFMT_INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_n = operand_c_n[0]; + if (operand_c_n[1]) + fpu_ifmt_n = IFMT_INT64; + else + fpu_ifmt_n = IFMT_INT32; + end + end + // Int to Float Cast - Op encoded in lowest two imm bits or rm + FCVT_I2F : begin + fpu_op_n = OP_I2F; + // Vectorial Ops encoded in rm (000-001) + if (fpu_vec_op_n) begin + fpu_op_mod_n = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00 : fpu_ifmt_n = IFMT_INT32; + 2'b01, + 2'b10 : fpu_ifmt_n = IFMT_INT16; + 2'b11 : fpu_ifmt_n = IFMT_INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_n = operand_c_n[0]; + if (operand_c_n[1]) + fpu_ifmt_n = IFMT_INT64; + else + fpu_ifmt_n = IFMT_INT32; + end + end + // Float to Float Cast - Source format encoded in lowest two/three imm bits + FCVT_F2F : begin + fpu_op_n = OP_F2F; + // Vectorial ops encoded in lowest two imm bits + if (fpu_vec_op_n) begin + vec_replication = 1'b0; // no replication for casts (not needed) + unique case (operand_c_n[1:0]) + 2'b00: fpu_fmt2_n = FMT_FP32; + 2'b01: fpu_fmt2_n = FMT_FP16ALT; + 2'b10: fpu_fmt2_n = FMT_FP16; + 2'b11: fpu_fmt2_n = FMT_FP8; + endcase + // Scalar ops encoded in lowest three imm bits + end else begin + unique case (operand_c_n[2:0]) + 3'b000: fpu_fmt2_n = FMT_FP32; + 3'b001: fpu_fmt2_n = FMT_FP64; + 3'b010: fpu_fmt2_n = FMT_FP16; + 3'b110: fpu_fmt2_n = FMT_FP16ALT; + 3'b011: fpu_fmt2_n = FMT_FP8; + endcase + end + end + // Scalar Sign Injection - op encoded in rm (000-010) + FSGNJ : fpu_op_n = OP_SGNJ; + // Move from FPR to GPR - mapped to NOP since no recoding + FMV_F2X : begin + fpu_op_n = OP_SGNJ; + operand_b_n = operand_a_n; + vec_replication = 1'b0; // no replication, we set second operand + end + // Move from GPR to FPR - mapped to NOP since no recoding + FMV_X2F : begin + fpu_op_n = OP_SGNJ; + operand_b_n = operand_a_n; + vec_replication = 1'b0; // no replication, we set second operand + end + // Scalar Comparisons - op encoded in rm (000-010) + FCMP : fpu_op_n = OP_CMP; + // Classification + FCLASS : fpu_op_n = OP_CLASS; + // Vectorial Minimum - set up scalar encoding in rm + VFMIN : begin + fpu_op_n = OP_MINMAX; + fpu_rm_n = 3'b000; // min + end + // Vectorial Maximum - set up scalar encoding in rm + VFMAX : begin + fpu_op_n = OP_MINMAX; + fpu_rm_n = 3'b001; // max + end + // Vectorial Sign Injection - set up scalar encoding in rm + VFSGNJ : begin + fpu_op_n = OP_SGNJ; + fpu_rm_n = 3'b000; // sgnj + end + // Vectorial Negated Sign Injection - set up scalar encoding in rm + VFSGNJN : begin + fpu_op_n = OP_SGNJ; + fpu_rm_n = 3'b001; // sgnjn + end + // Vectorial Xored Sign Injection - set up scalar encoding in rm + VFSGNJX : begin + fpu_op_n = OP_SGNJ; + fpu_rm_n = 3'b010; // sgnjx + end + // Vectorial Equals - set up scalar encoding in rm + VFEQ : begin + fpu_op_n = OP_CMP; + fpu_rm_n = 3'b010; // eq + end + // Vectorial Not Equals - set up scalar encoding in rm + VFNE : begin + fpu_op_n = OP_CMP; + fpu_op_mod_n = 1'b1; // invert output + fpu_rm_n = 3'b010; // eq + end + // Vectorial Less Than - set up scalar encoding in rm + VFLT : begin + fpu_op_n = OP_CMP; + fpu_rm_n = 3'b001; // lt + end + // Vectorial Greater or Equal - set up scalar encoding in rm + VFGE : begin + fpu_op_n = OP_CMP; + fpu_op_mod_n = 1'b1; // invert output + fpu_rm_n = 3'b001; // lt + end + // Vectorial Less or Equal - set up scalar encoding in rm + VFLE : begin + fpu_op_n = OP_CMP; + fpu_rm_n = 3'b000; // le + end + // Vectorial Greater Than - set up scalar encoding in rm + VFGT : begin + fpu_op_n = OP_CMP; + fpu_op_mod_n = 1'b1; // invert output + fpu_rm_n = 3'b000; // le + end + + // VFCPKAB_S : + // VFCPKCD_S : + // VFCPKAB_D : + // VFCPKCD_D : + + // by default set opb = opa to have a sgnj nop + default : operand_b_n = operand_a_n; + endcase + + // Replication + if (fpu_vec_op_n && vec_replication) begin + case (fpu_fmt_n) + FMT_FP32 : operand_b_n = RVD ? {2{operand_b_i[31:0]}} : operand_b_i; + FMT_FP16, + FMT_FP16ALT : operand_b_n = RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; + FMT_FP8 : operand_b_n = RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; + endcase // fpu_fmt_n + end + end + + + //--------------------------------------------------------- + // Upstream protocol inversion: InValid depends on InReady + //--------------------------------------------------------- + + // Input is ready whenever the register is free to accept a potentially spilling instruction + assign fpu_ready_o = reg_in_ready; + + // Input data goes to the buffer register if the received instruction cannot be handled + assign reg_in_valid = fpu_valid_i & ~fpu_in_ready; + + // Data being applied to unit is taken from the register if there's an instruction waiting + assign fpu_in_valid = reg_out_valid | fpu_valid_i; + + // The input register is ready to accept new data if: + // 1. The current instruction will be processed by the fpu + // 2. There is no instruction waiting in the register + assign reg_in_ready = reg_out_ready | ~reg_out_valid; + + // Register output side is signalled ready if: + // 1. The operation held in the reg is valid and will be processed + // 2. The register doesn't hold a valid instructin + assign reg_out_ready = fpu_in_ready | ~reg_out_valid; + + // Buffer register + always_ff @(posedge clk_i or negedge rst_ni) begin : fp_buffer_reg + if(~rst_ni) begin + reg_out_valid <= '0; + operand_a_q <= '0; + operand_b_q <= '0; + operand_c_q <= '0; + fpu_op_q <= '0; + fpu_op_mod_q <= '0; + fpu_fmt_q <= '0; + fpu_fmt2_q <= '0; + fpu_ifmt_q <= '0; + fpu_rm_q <= '0; + fpu_vec_op_q <= '0; + fpu_tag_q <= '0; + end else begin + if (reg_out_ready) begin // Only advance pipeline if unit is ready for our op + reg_out_valid <= reg_in_valid; + if (reg_in_valid) begin // clock gate data to save poer + operand_a_q <= operand_a_n; + operand_b_q <= operand_b_n; + operand_c_q <= operand_c_n; + fpu_op_q <= fpu_op_n; + fpu_op_mod_q <= fpu_op_mod_n; + fpu_fmt_q <= fpu_fmt_n; + fpu_fmt2_q <= fpu_fmt2_n; + fpu_ifmt_q <= fpu_ifmt_n; + fpu_rm_q <= fpu_rm_n; + fpu_vec_op_q <= fpu_vec_op_n; + fpu_tag_q <= fpu_tag_n; + end + end + end + end + + // Select FPU input data: from register if valid data in register, else directly vom input + assign operand_a = reg_out_valid ? operand_a_q : operand_a_n; + assign operand_b = reg_out_valid ? operand_b_q : operand_b_n; + assign operand_c = reg_out_valid ? operand_c_q : operand_c_n; + assign fpu_op = reg_out_valid ? fpu_op_q : fpu_op_n; + assign fpu_op_mod = reg_out_valid ? fpu_op_mod_q : fpu_op_mod_n; + assign fpu_fmt = reg_out_valid ? fpu_fmt_q : fpu_fmt_n; + assign fpu_fmt2 = reg_out_valid ? fpu_fmt2_q : fpu_fmt2_n; + assign fpu_ifmt = reg_out_valid ? fpu_ifmt_q : fpu_ifmt_n; + assign fpu_rm = reg_out_valid ? fpu_rm_q : fpu_rm_n; + assign fpu_vec_op = reg_out_valid ? fpu_vec_op_q : fpu_vec_op_n; + assign fpu_tag = reg_out_valid ? fpu_tag_q : fpu_tag_n; + + //--------------- + // FPU instance + //--------------- + fpnew_top #( + .WIDTH ( FLEN ), + .TAG_WIDTH ( TRANS_ID_BITS ), + .RV64 ( 1'b1 ), + .RVF ( RVF ), + .RVD ( RVD ), + .Xf16 ( XF16 ), + .Xf16alt ( XF16ALT ), + .Xf8 ( XF8 ), + .Xfvec ( XFVEC ), + // TODO MOVE THESE VALUES TO PACKAGE + .LATENCY_COMP_F ( 31'h2 ), + .LATENCY_COMP_D ( 31'h3 ), + .LATENCY_COMP_Xf16 ( 31'h2 ), + .LATENCY_COMP_Xf16alt ( 31'h2 ), + .LATENCY_COMP_Xf8 ( 31'h1 ), + .LATENCY_DIVSQRT ( 31'h1 ), + .LATENCY_NONCOMP ( 31'h0 ), + .LATENCY_CONV ( 31'h1 ) + ) fpnew_top_i ( + .Clk_CI ( clk_i ), + .Reset_RBI ( rst_ni ), + .A_DI ( operand_a ), + .B_DI ( operand_b ), + .C_DI ( operand_c ), + .RoundMode_SI ( fpu_rm ), + .Op_SI ( fpu_op ), + .OpMod_SI ( fpu_op_mod ), + .VectorialOp_SI ( fpu_vec_op ), + .FpFmt_SI ( fpu_fmt ), + .FpFmt2_SI ( fpu_fmt2 ), + .IntFmt_SI ( fpu_ifmt ), + .Tag_DI ( fpu_tag ), + .InValid_SI ( fpu_in_valid ), + .InReady_SO ( fpu_in_ready ), + .Z_DO ( result_o ), + .Status_DO ( fpu_status ), + .Tag_DO ( fpu_trans_id_o ), + .OutValid_SO ( fpu_out_valid ), + .OutReady_SI ( fpu_out_ready ) + ); + + // Pack status flag into exception cause, tval ignored in wb, exception is always invalid + assign fpu_exception_o.cause = {59'h0, fpu_status}; + assign fpu_exception_o.valid = 1'b0; + + // Donwstream write port is dedicated to FPU and always ready + assign fpu_out_ready = 1'b1; + + // Downstream valid from unit + assign fpu_valid_o = fpu_out_valid; + + // end else begin : no_fpu_gen + + // assign fpu_ready_o = 1'b0; + // assign fpu_trans_id_o = '0; + // assign result_o = '0; + // assign fpu_valid_o = 1'b0; + // assign fpu_exception_o = '0; + // end + // endgenerate + +endmodule diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 9eee28f02..61f611bb6 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -71,6 +71,8 @@ module issue_read_operands #( // FPU input logic fpu_ready_i, // FU is ready output logic fpu_valid_o, // Output is valid + output logic [1:0] fpu_fmt_o, // FP fmt field from instr. + output logic [2:0] fpu_rm_o, // FP rm field from instr. // CSR input logic csr_ready_i, // FU is ready output logic csr_valid_o, // Output is valid @@ -94,12 +96,14 @@ module issue_read_operands #( operand_b_n, operand_b_q, imm_n, imm_q; - logic alu_valid_n, alu_valid_q; - logic mult_valid_n, mult_valid_q; - logic fpu_valid_n, fpu_valid_q; - logic lsu_valid_n, lsu_valid_q; - logic csr_valid_n, csr_valid_q; - logic branch_valid_n, branch_valid_q; + logic alu_valid_n, alu_valid_q; + logic mult_valid_n, mult_valid_q; + logic fpu_valid_n, fpu_valid_q; + logic [1:0] fpu_fmt_n, fpu_fmt_q; + logic [2:0] fpu_rm_n, fpu_rm_q; + logic lsu_valid_n, lsu_valid_q; + logic csr_valid_n, csr_valid_q; + logic branch_valid_n, branch_valid_q; logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; fu_op operator_n, operator_q; // operation to perform @@ -107,6 +111,11 @@ module issue_read_operands #( // forwarding signals logic forward_rs1, forward_rs2, forward_rs3; + + // original instruction stored in tval + instruction_t orig_instr; + assign orig_instr = instruction_t'(issue_instr_i.ex.tval[31:0]); + // ID <-> EX registers assign operand_a_o = operand_a_q; assign operand_b_o = operand_b_q; @@ -118,6 +127,8 @@ module issue_read_operands #( assign csr_valid_o = csr_valid_q; assign mult_valid_o = mult_valid_q; assign fpu_valid_o = fpu_valid_q; + assign fpu_fmt_o = fpu_fmt_q; + assign fpu_rm_o = fpu_rm_q; assign trans_id_o = trans_id_q; assign imm_o = imm_q; // --------------- @@ -136,7 +147,8 @@ module issue_read_operands #( fu_busy = ~branch_ready_i; MULT: fu_busy = ~mult_ready_i; - FPU: + FPU, + FPU_VEC: fu_busy = ~fpu_ready_i; LOAD, STORE: fu_busy = ~lsu_ready_i; @@ -241,6 +253,8 @@ module issue_read_operands #( lsu_valid_n = 1'b0; mult_valid_n = 1'b0; fpu_valid_n = 1'b0; + fpu_fmt_n = 2'b0; + fpu_rm_n = 3'b0; csr_valid_n = 1'b0; branch_valid_n = 1'b0; // Exception pass through: @@ -254,8 +268,16 @@ module issue_read_operands #( branch_valid_n = 1'b1; MULT: mult_valid_n = 1'b1; - FPU: + FPU : begin fpu_valid_n = 1'b1; + fpu_fmt_n = orig_instr.rftype.fmt; // fmt bits from instruction + fpu_rm_n = orig_instr.rftype.rm; // rm bits from instruction + end + FPU_VEC : begin + fpu_valid_n = 1'b1; + fpu_fmt_n = orig_instr.rvftype.vfmt; // vfmt bits from instruction + fpu_rm_n = {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction + end LOAD, STORE: lsu_valid_n = 1'b1; CSR: @@ -403,6 +425,8 @@ module issue_read_operands #( branch_valid_q <= 1'b0; mult_valid_q <= 1'b0; fpu_valid_q <= 1'b0; + fpu_fmt_q <= 2'b0; + fpu_rm_q <= 3'b0; lsu_valid_q <= 1'b0; csr_valid_q <= 1'b0; fu_q <= NONE; @@ -419,6 +443,8 @@ module issue_read_operands #( branch_valid_q <= branch_valid_n; mult_valid_q <= mult_valid_n; fpu_valid_q <= fpu_valid_n; + fpu_fmt_q <= fpu_fmt_n; + fpu_rm_q <= fpu_rm_n; lsu_valid_q <= lsu_valid_n; csr_valid_q <= csr_valid_n; fu_q <= fu_n; diff --git a/src/issue_stage.sv b/src/issue_stage.sv index 14f71e25b..a1e300065 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -64,6 +64,8 @@ module issue_stage #( input logic fpu_ready_i, output logic fpu_valid_o, + output logic [1:0] fpu_fmt_o, // FP fmt field from instr. + output logic [2:0] fpu_rm_o, // FP rm field from instr. input logic csr_ready_i, output logic csr_valid_o, diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 36afe17c6..ee93bd97d 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -39,7 +39,7 @@ module scoreboard #( output logic rs2_valid_o, input logic [REG_ADDR_SIZE-1:0] rs3_i, - output logic [63:0] rs3_o, + output logic [FLEN-1:0] rs3_o, output logic rs3_valid_o, // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer @@ -132,7 +132,7 @@ module scoreboard #( if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; // write the fflags back from the FPU (exception valid is never set), leave tval intact - else if (mem_n[trans_id_i[i]].sbe.fu == FPU) + else if (mem_n[trans_id_i[i]].sbe.fu inside {FPU, FPU_VEC}) mem_n[trans_id_i[i]].sbe.ex.cause = ex_i[i].cause; end end @@ -203,7 +203,7 @@ module scoreboard #( always_comb begin : read_operands rs1_o = 64'b0; rs2_o = 64'b0; - rs3_o = 64'b0; + rs3_o = '0; rs1_valid_o = 1'b0; rs2_valid_o = 1'b0; rs3_valid_o = 1'b0; diff --git a/src/util/find_first_one.sv b/src/util/find_first_one.sv new file mode 100644 index 000000000..53653f20d --- /dev/null +++ b/src/util/find_first_one.sv @@ -0,0 +1,85 @@ +// Copyright (c) 2018 ETH Zurich, University of Bologna +// All rights reserved. +// +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. + + +/// A leading-one finder / leading zero counter. +/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB) +/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB) +module find_first_one #( + /// The width of the input vector. + parameter int WIDTH = -1, + parameter int FLIP = 0 +)( + input logic [WIDTH-1:0] in_i, + output logic [$clog2(WIDTH)-1:0] first_one_o, + output logic no_ones_o +); + + localparam int NUM_LEVELS = $clog2(WIDTH); + + // pragma translate_off + initial begin + assert(WIDTH >= 0); + end + // pragma translate_on + + logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut; + logic [2**NUM_LEVELS-1:0] sel_nodes; + logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + for (genvar i = 0; i < WIDTH; i++) begin + assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i]; + end + + for (genvar j = 0; j < WIDTH; j++) begin + assign index_lut[j] = j; + end + + for (genvar level = 0; level < NUM_LEVELS; level++) begin + + if (level < NUM_LEVELS-1) begin + for (genvar l = 0; l < 2**level; l++) begin + assign sel_nodes[2**level-1+l] = sel_nodes[2**(level+1)-1+l*2] | sel_nodes[2**(level+1)-1+l*2+1]; + assign index_nodes[2**level-1+l] = (sel_nodes[2**(level+1)-1+l*2] == 1'b1) ? + index_nodes[2**(level+1)-1+l*2] : index_nodes[2**(level+1)-1+l*2+1]; + end + end + + if (level == NUM_LEVELS-1) begin + for (genvar k = 0; k < 2**level; k++) begin + // if two successive indices are still in the vector... + if (k * 2 < WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1]; + assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1]; + end + // if only the first index is still in the vector... + if (k * 2 == WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2]; + assign index_nodes[2**level-1+k] = index_lut[k*2]; + end + // if index is out of range + if (k * 2 > WIDTH-1) begin + assign sel_nodes[2**level-1+k] = 1'b0; + assign index_nodes[2**level-1+k] = '0; + end + end + end + end + + assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0; + assign no_ones_o = NUM_LEVELS > 0 ? ~sel_nodes[0] : '1; + +endmodule From 9336a95c5f0dfee3dd372f4f2abcce6243671389 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 19 Mar 2018 14:54:00 +0100 Subject: [PATCH 17/94] :bug: Fix permanent selection of word divisions --- src/mult.sv | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mult.sv b/src/mult.sv index 814081611..8b1d9993a 100644 --- a/src/mult.sv +++ b/src/mult.sv @@ -124,6 +124,7 @@ module mult ( word_op_d = 1'b1; // regular operation end else begin + word_op_d = 1'b0; // no sign extending is necessary as we are already using the full 64 bit operand_a = operand_a_i; operand_b = operand_b_i; From 1ce21e961d682397c5c04230a72b0483036fc1f6 Mon Sep 17 00:00:00 2001 From: Jonathan Richard Robert Kimmitt Date: Wed, 21 Mar 2018 15:19:37 +0000 Subject: [PATCH 18/94] Add placeholders for PMP CSRs for memory protection. --- Makefile | 2 +- include/ariane_pkg.sv | 2 ++ src/csr_regfile.sv | 13 +++++++++++-- src/util/instruction_trace_item.svh | 2 ++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 85f62b219..9a5f37159 100755 --- a/Makefile +++ b/Makefile @@ -49,7 +49,7 @@ src := $(wildcard src/*.sv) $(wildcard tb/common/*.sv) $(wildcard src/axi2per/*. tbs := tb/alu_tb.sv tb/core_tb.sv tb/dcache_arbiter_tb.sv tb/store_queue_tb.sv tb/scoreboard_tb.sv tb/fifo_tb.sv # RISCV-tests path -riscv-test-dir := tmp/riscv-tests/build/isa +riscv-test-dir := $(RISCV)/riscv64-unknown-elf/share/riscv-tests/isa riscv-tests := rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p-addw rv64ui-p-and rv64ui-p-auipc \ rv64ui-p-beq rv64ui-p-bge rv64ui-p-bgeu rv64ui-p-andi rv64ui-p-blt rv64ui-p-bltu rv64ui-p-bne \ rv64ui-p-simple rv64ui-p-jal rv64ui-p-jalr rv64ui-p-or rv64ui-p-ori rv64ui-p-sub rv64ui-p-subw \ diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index a0ef16361..4d5bc25d0 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -590,6 +590,8 @@ package ariane_pkg; CSR_MCAUSE = 12'h342, CSR_MTVAL = 12'h343, CSR_MIP = 12'h344, + CSR_PMPCFG0 = 12'h3A0, + CSR_PMPADDR0 = 12'h3B0, CSR_MVENDORID = 12'hF11, CSR_MARCHID = 12'hF12, CSR_MIMPID = 12'hF13, diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index fe9efd718..828afcce8 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -141,6 +141,8 @@ module csr_regfile #( logic [63:0] medeleg_q, medeleg_d; logic [63:0] mideleg_q, mideleg_d; logic [63:0] mip_q, mip_d; + logic [63:0] pmpcfg0_q, pmpcfg0_d; + logic [63:0] pmpaddr0_q, pmpaddr0_d; logic [63:0] mie_q, mie_d; logic [63:0] mscratch_q, mscratch_d; logic [63:0] mepc_q, mepc_d; @@ -216,7 +218,6 @@ module csr_regfile #( CSR_MISA: csr_rdata = ISA_CODE; CSR_MEDELEG: csr_rdata = medeleg_q; CSR_MIDELEG: csr_rdata = mideleg_q; - CSR_MIP: csr_rdata = mip_q; CSR_MIE: csr_rdata = mie_q; CSR_MTVEC: csr_rdata = mtvec_q; CSR_MCOUNTEREN: csr_rdata = 64'b0; // not implemented @@ -224,6 +225,10 @@ module csr_regfile #( CSR_MEPC: csr_rdata = mepc_q; CSR_MCAUSE: csr_rdata = mcause_q; CSR_MTVAL: csr_rdata = mtval_q; + CSR_MIP: csr_rdata = mip_q; + // Placeholders for M-mode protection + CSR_PMPCFG0: csr_rdata = pmpcfg0_q; + CSR_PMPADDR0: csr_rdata = pmpaddr0_q; CSR_MVENDORID: csr_rdata = 64'b0; // not implemented CSR_MARCHID: csr_rdata = 64'b0; // PULP, anonymous source (no allocated ID yet) CSR_MIMPID: csr_rdata = 64'b0; // not implemented @@ -385,7 +390,6 @@ module csr_regfile #( // mask the register so that unsupported interrupts can never be set CSR_MIE: mie_d = csr_wdata & 64'hBBB; // we only support supervisor and m-mode interrupts - CSR_MIP: mip_d = mip; CSR_MTVEC: begin mtvec_d = {csr_wdata[63:2], 1'b0, csr_wdata[0]}; @@ -400,6 +404,11 @@ module csr_regfile #( CSR_MEPC: mepc_d = {csr_wdata[63:1], 1'b0}; CSR_MCAUSE: mcause_d = csr_wdata; CSR_MTVAL: mtval_d = csr_wdata; + CSR_MIP: mip_d = mip; + // Placeholders for M-mode protection + CSR_PMPCFG0: pmpcfg0_d = csr_wdata; + CSR_PMPADDR0: pmpaddr0_d = csr_wdata; + CSR_MCYCLE: cycle_d = csr_wdata; CSR_MINSTRET: instret = csr_wdata; CSR_DCACHE: dcache_d = csr_wdata[0]; // enable bit diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 624e19c78..a8bfa52f7 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -82,6 +82,8 @@ class instruction_trace_item; CSR_MCAUSE: return "mcause"; CSR_MTVAL: return "mtval"; CSR_MIP: return "mip"; + CSR_PMPCFG0: return "pmpcfg0"; + CSR_PMPADDR0: return "pmpaddr0"; CSR_MVENDORID: return "mvendorid"; CSR_MARCHID: return "marchid"; CSR_MIMPID: return "mimpid"; From 3acc3e0d67f82eb4185d6e3676a4e428d1a9ad94 Mon Sep 17 00:00:00 2001 From: Jonathan Richard Robert Kimmitt Date: Wed, 21 Mar 2018 16:58:53 +0000 Subject: [PATCH 19/94] Add travis pre-check script and remove spurious spaces from Makefile --- Makefile | 10 +++++----- travis.sh | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 travis.sh diff --git a/Makefile b/Makefile index 9a5f37159..55e03727c 100755 --- a/Makefile +++ b/Makefile @@ -49,14 +49,14 @@ src := $(wildcard src/*.sv) $(wildcard tb/common/*.sv) $(wildcard src/axi2per/*. tbs := tb/alu_tb.sv tb/core_tb.sv tb/dcache_arbiter_tb.sv tb/store_queue_tb.sv tb/scoreboard_tb.sv tb/fifo_tb.sv # RISCV-tests path -riscv-test-dir := $(RISCV)/riscv64-unknown-elf/share/riscv-tests/isa +riscv-test-dir := tmp/riscv-tests/build/isa riscv-tests := rv64ui-p-add rv64ui-p-addi rv64ui-p-slli rv64ui-p-addiw rv64ui-p-addw rv64ui-p-and rv64ui-p-auipc \ rv64ui-p-beq rv64ui-p-bge rv64ui-p-bgeu rv64ui-p-andi rv64ui-p-blt rv64ui-p-bltu rv64ui-p-bne \ rv64ui-p-simple rv64ui-p-jal rv64ui-p-jalr rv64ui-p-or rv64ui-p-ori rv64ui-p-sub rv64ui-p-subw \ - rv64ui-p-xor rv64ui-p-xori rv64ui-p-slliw rv64ui-p-sll rv64ui-p-slli rv64ui-p-sllw \ - rv64ui-p-slt rv64ui-p-slti rv64ui-p-sltiu rv64ui-p-sltu rv64ui-p-sra rv64ui-p-srai \ - rv64ui-p-sraiw rv64ui-p-sraw rv64ui-p-srl rv64ui-p-srli rv64ui-p-srliw rv64ui-p-srlw \ - rv64ui-p-lb rv64ui-p-lbu rv64ui-p-ld rv64ui-p-lh rv64ui-p-lhu rv64ui-p-lui rv64ui-p-lw rv64ui-p-lwu \ + rv64ui-p-xor rv64ui-p-xori rv64ui-p-slliw rv64ui-p-sll rv64ui-p-slli rv64ui-p-sllw \ + rv64ui-p-slt rv64ui-p-slti rv64ui-p-sltiu rv64ui-p-sltu rv64ui-p-sra rv64ui-p-srai \ + rv64ui-p-sraiw rv64ui-p-sraw rv64ui-p-srl rv64ui-p-srli rv64ui-p-srliw rv64ui-p-srlw \ + rv64ui-p-lb rv64ui-p-lbu rv64ui-p-ld rv64ui-p-lh rv64ui-p-lhu rv64ui-p-lui rv64ui-p-lw rv64ui-p-lwu \ rv64mi-p-csr rv64mi-p-mcsr rv64mi-p-illegal rv64mi-p-ma_addr rv64mi-p-ma_fetch rv64mi-p-sbreak rv64mi-p-scall \ rv64si-p-csr rv64si-p-ma_fetch rv64si-p-scall rv64si-p-wfi rv64si-p-sbreak rv64si-p-dirty \ rv64uc-p-rvc \ diff --git a/travis.sh b/travis.sh new file mode 100644 index 000000000..ffcc278ca --- /dev/null +++ b/travis.sh @@ -0,0 +1,37 @@ +# This script emulates what travis check in test does on the public server +# comment out next command if you don't want to use sudo +sudo apt install \ + gcc-4.8 \ + g++-4.8 \ + gperf \ + autoconf \ + automake \ + autotools-dev \ + libmpc-dev \ + libmpfr-dev \ + libgmp-dev \ + gawk \ + build-essential \ + bison \ + flex \ + texinfo \ + python-pexpect \ + libusb-1.0-0-dev \ + device-tree-compiler +# Customise this to a fast local disk +export TOP=/local/scratch/$USER +export TRAVIS_BUILD_DIR=$TOP/ariane-isatest +export RISCV=$TOP/riscv_install +export PATH=$TOP/riscv_install/bin:$TRAVIS_BUILD_DIR/tmp/bin:$PATH +export CXX=g++-4.8 CC=gcc-4.8 +ci/make-tmp.sh +export LIBRARY_PATH=$TRAVIS_BUILD_DIR/tmp/lib +export LD_LIBRARY_PATH=$TRAVIS_BUILD_DIR/tmp/lib +export C_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include +export CPLUS_INCLUDE_PATH=$TRAVIS_BUILD_DIR/tmp/include +export VERILATOR_ROOT=$TRAVIS_BUILD_DIR/tmp/verilator-3.918/ +ci/build-riscv-gcc.sh +ci/install-verilator.sh +ci/install-fesvr.sh +ci/build-riscv-tests.sh +make run-asm-tests-verilator From 6ae5ee5bfbedbda85e3166a649d89d84f9450a1b Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Mon, 9 Apr 2018 13:14:15 +0200 Subject: [PATCH 20/94] Fix D$ behaviour during a flush --- src/cache_ctrl.sv | 7 ++++--- src/miss_handler.sv | 9 ++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/cache_ctrl.sv b/src/cache_ctrl.sv index 1e5ed032d..dbf3f97e5 100644 --- a/src/cache_ctrl.sv +++ b/src/cache_ctrl.sv @@ -29,6 +29,7 @@ module cache_ctrl #( )( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low + input logic flush_i, input logic bypass_i, // enable cache output logic busy_o, // Core request ports @@ -145,7 +146,7 @@ module cache_ctrl #( IDLE: begin // a new request arrived - if (data_req_i) begin + if (data_req_i && !flush_i) begin // request the cache line - we can do this specualtive req_o = '1; @@ -186,7 +187,7 @@ module cache_ctrl #( tag_o = (state_q == WAIT_TAG_SAVED || mem_req_q.we) ? mem_req_q.tag : address_tag_i; // we speculatively request another transfer - if (data_req_i) begin + if (data_req_i && !flush_i) begin req_o = '1; end @@ -198,7 +199,7 @@ module cache_ctrl #( if (|hit_way_i) begin // we can request another cache-line if this was a load // make another request - if (data_req_i && !mem_req_q.we) begin + if (data_req_i && !mem_req_q.we && !flush_i) begin state_d = WAIT_TAG; // switch back to WAIT_TAG mem_req_d.index = address_index_i; mem_req_d.be = data_be_i; diff --git a/src/miss_handler.sv b/src/miss_handler.sv index ddde5b8fe..0c300a942 100644 --- a/src/miss_handler.sv +++ b/src/miss_handler.sv @@ -304,8 +304,12 @@ module miss_handler #( // not dirty ~> increment and continue end else begin // increment and re-request - cnt_d = cnt_q + (1'b1 << BYTE_OFFSET); - state_d = FLUSH_REQ_STATUS; + cnt_d = cnt_q + (1'b1 << BYTE_OFFSET); + state_d = FLUSH_REQ_STATUS; + addr_o = cnt_q; + req_o = 1'b1; + be_o.valid = '1; + we_o = 1'b1; // finished with flushing operation, go back to idle if (cnt_q[INDEX_WIDTH-1:BYTE_OFFSET] == NUM_WORDS-1) begin flush_ack_o = 1'b1; @@ -323,7 +327,6 @@ module miss_handler #( // only write the dirty array be_o.dirty = '1; be_o.valid = '1; - data_o = 'b0; cnt_d = cnt_q + (1'b1 << BYTE_OFFSET); // finished initialization if (cnt_q[INDEX_WIDTH-1:BYTE_OFFSET] == NUM_WORDS-1) From 5c7666f40273d8bd1a07ac6d7a088c2eb5b9325b Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 9 Apr 2018 15:11:15 +0200 Subject: [PATCH 21/94] Pump submodules, cherry-pick TLB clean-up --- CHANGELOG.md | 7 ++ CONTRIBUTING.md | 205 ++++++------------------------------------------ src/axi_mem_if | 2 +- tb | 2 +- 4 files changed, 31 insertions(+), 185 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42c2cb214..b5ad0bf63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### 2.0.2 - 2018-04-08 + +### Changed + +- Bugfix in flush behaviour of cache #34 +- Pumped submodules + ### 2.0.1 - 2018-01-26 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 37bb70dc0..038fd8b38 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,172 +1,6 @@ # Styleguides -## TL;DR - Absolute Minimum - -- If the existing coding style is sane, stick to it. -- Avoid `defines` and `ifdefs` as much as possible. -- Do not use tabs, use spaces. -- Use 4 spaces to open a new indentation level. -- Names of signals, modules, and structs should be *lower case with underscores* as whitespace replacements (e.g.: `fetch_busy`). Names of constants and parameters should be upper case with underscores. **Never use CamelCase**. -- Instantiation of modules should be prefixed with `i_`, e.g.: `i_prefetcher`. -- For port definitions keep a post-fix direction (`_o`, `_i`, `_io`). -- For active low signals put an additional (`n` for internal signals, `_no`, `_ni`, `_nio` for interface ones). -- Denote output of a register with `_q` and the input with `_d`. - -## Coding Style - -- Keep the files tidy. No superfluous line breaks, align ports on a common boundary. -- Do not use tabs, use spaces. If you really want to use tabs, use them consistently. -- Name dedicated signals wiring `module foo` (output) with `module bar` (input) `signal_foo_bar` -- Within an IP, use Interfaces to connect component instances whenever possible. -- Use Interfaces at the top-level interface of the IP, but also provide a wrapper that “unrolls” the Interfaces into input and output ports. -- Do not put overly large comment headers. Nevertheless, try to structure your HDL code, e.g.: - - ``` - // ------------------------------------ - // CSR - Control and Status Registers - // ------------------------------------ - ``` - -- Specify memory map and integration rules while coding, using the `crazy88` (TODO: Link to Documentation) syntax. -- Put `begin` statements on the same level as the block qualifier, for example: - - ```verilog - module A ( - input logic flush_i - ); - - logic whatever_signal; - - always_comb begin - if (flush_i) begin - // do some stuff here - end else if (whatever_signal) begin - // do some other stuff - end - end - endmodule - ``` - -- The exception to the former rule are `always` blocks, where the `begin` can be placed on a new line, for example (K&R): - - ```verilog - module A ( - input logic flush_i - ); - - logic whatever_signal; - - always_comb - begin - if (flush_i) begin - // do some stuff here - end else if (whatever_signal) begin - // do some other stuff - end - end - endmodule - ``` - -- For `case`, always use `begin` and `end` and follow the K&R style: - - ```verilog - case (foo_i) - 8'h0 : begin - // case 0 - end - 8'h1 : begin - // case 1 - end - endcase - ``` - -- For `if`-`else` chains, use K&R style: - - ```verilog - if (foo_i) begin - // do stuff - end else if (bar_i) begin - // do some other stuff - end - ``` - - As a variant, you can also put the end on its own line: - - ```verilog - if (foo_i) begin - // do stuff - end - else if (bar_i) begin - // do some other stuff - end - ``` - - > The rationale is that extra lines for `begin/else/end` carry no information at all. They even may prevent parts of the code to not have enough space on some screens. Process blocks on the other hand are more self-contained and multiple process blocks are not required to be visible at the same time. - > The intention behind this is to keep code which is closely related together (like the code in an `always` block). It then should easily fit on a single screen. - -- Give generics a meaningful type e.g.: `parameter int unsigned ASID_WIDTH = 1`. The default type is a signed integer which in most of the time does not make an awful lot of sense for hardware. -- Always name control blocks within a `generate`: - - ```verilog - generate - for (genvar i=0; i<10; i++) begin : ten_times_gen - // something to generate 10x - end // ten_times_gen - - if (PARAM == 0) begin : no_param_gen - // something - end // no_param_gen - else begin : param_gen - // something else - end // param_gen - endgenerate - ``` - -- Name `structs` which are used as types with a post-fix `_t`: - - ```verilog - typedef struct packed { - logic [1:0] rw; - priv_lvl_t priv_lvl; - logic [7:0] address; - } csr_addr_t; - ``` - ```verilog - module A ( - input logic [11:0] address_i - ); - - csr_addr_t csr_addr; - - assign csr_addr = csr_addr_t'(address_i); - - always_comb begin - if (csr_addr.priv_lvl == U_MODE) begin - // do something fancy with this signal - end - end - endmodule - ``` - -- Consider using [EditorConfig](http://editorconfig.org/): - - ``` - # top-most EditorConfig file - root = true - - # Unix-style newlines with a newline ending every file - [*] - end_of_line = lf - insert_final_newline = true - trim_trailing_whitespace = true - max_line_length = off - # 4 space indentation - [*.{sv, svh, v, vhd}] - indent_style = space - indent_size = 4 - ``` - - There are plug-ins for almost any sane editor. The same example `.editorconfig` can also be found in this repository. +See [style-guidlines](https://github.com/pulp-platform/style-guidelines) ## Git Considerations @@ -180,22 +14,27 @@ - Wrap the body at 72 characters. - Use the body to explain what and why vs. how. - Consider starting the commit message with an applicable emoji: - * :art: `:art:` when improving the format/structure of the code - * :racehorse: `:racehorse:` when improving performance - * :memo: `:memo:` when writing docs - * :penguin: `:penguin:` when fixing something on Linux - * :apple: `:apple:` when fixing something on macOS - * :checkered_flag: `:checkered_flag:` when fixing something on Windows - * :bug: `:bug:` when fixing a bug - * :fire: `:fire:` when removing code or files - * :green_heart: `:green_heart:` when fixing the CI build - * :white_check_mark: `:white_check_mark:` when adding tests - * :lock: `:lock:` when dealing with security - * :arrow_up: `:arrow_up:` when upgrading dependencies - * :arrow_down: `:arrow_down:` when downgrading dependencies - * :shirt: `:shirt:` when removing linter warnings - * :scissors: `:scissors:` when restructuring your HDL - * :space_invader: `:space_invader:` when fixing something synthesis related + * :art: `:art:` Improving the format/structure of the code + * :zap: `:zap:` When improving performance + * :fire: `:fire` Removing code or files. + * :memo: `:memo:` When writing docs + * :bug: `:bug:` When fixing a bug + * :fire: `:fire:` When removing code or files + * :green_heart: `:green_heart:` When fixing the CI build + * :construction_worker: `:construction_worker:` Adding CI build system + * :white_check_mark: `:white_check_mark:` When adding tests + * :lock: `:lock:` When dealing with security + * :arrow_up: `:arrow_up:` When upgrading dependencies + * :arrow_down: `:arrow_down:` When downgrading dependencies + * :rotating_light: `:rotating_light:` When removing linter warnings + * :pencil2: `pencil2:` Fixing typos + * :recycle: `:scisccor:` Refactoring code. + * :boom: `:boom:` Introducing breaking changes + * :truck: `truck` Moving or renaming files. + * :space_invader: `:space_invader:` When fixing something synthesis related + * :beers: `:beer:` Writing code drunkenly. + * :ok_hand: `:ok_hand` Updating code due to code review changes + * :building_construction: `:building_construction:` Making architectural changes. For a detailed why and how please refer to one of the multiple [resources](https://chris.beams.io/posts/git-commit/) regarding git commit messages. diff --git a/src/axi_mem_if b/src/axi_mem_if index 516bd102b..da0e5af3f 160000 --- a/src/axi_mem_if +++ b/src/axi_mem_if @@ -1 +1 @@ -Subproject commit 516bd102b4e668cd7165a584dc68eb996389bcd1 +Subproject commit da0e5af3f04c67cb89a19f5c25f979c4d2b6bec3 diff --git a/tb b/tb index 346ab824a..a80273c54 160000 --- a/tb +++ b/tb @@ -1 +1 @@ -Subproject commit 346ab824aad1c20f7029434c55c92c76c974f3eb +Subproject commit a80273c54114541d90bf4308fba7ccf438a83d3c From d8552d5ecd27afa9491f1fa2549cd0b4fb348853 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 9 Apr 2018 15:33:40 +0200 Subject: [PATCH 22/94] Pump tb version --- Makefile | 2 +- tb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 55e03727c..1f8095a91 100755 --- a/Makefile +++ b/Makefile @@ -137,7 +137,7 @@ $(library): vlib${questa_version} ${library} sim: build - vsim${questa_version} -64 -lib ${library} ${top_level}_optimized +UVM_TESTNAME=${test_case} +BASEDIR=$(riscv-test-dir) \ + vsim${questa_version} -64 -lib ${library} ${top_level}_optimized +UVM_TESTNAME=${test_case} +BASEDIR=$(riscv-test-dir) -noautoldlibpath \ +ASMTEST=$(riscv-test) $(uvm-flags) +UVM_VERBOSITY=HIGH -coverage -classdebug -sv_lib $(library)/elf_dpi -do "do tb/wave/wave_core.do" sim_nopt: build diff --git a/tb b/tb index a80273c54..d5605a06e 160000 --- a/tb +++ b/tb @@ -1 +1 @@ -Subproject commit a80273c54114541d90bf4308fba7ccf438a83d3c +Subproject commit d5605a06ecf7e2fa881c55f25870fdcef5433a48 From ce94aa5681020d0b6bd33c70c47196df11bf21f2 Mon Sep 17 00:00:00 2001 From: Matheus Cavalcante Date: Wed, 11 Apr 2018 10:14:42 +0200 Subject: [PATCH 23/94] Reset cache_ctrl state after a kill_req_i (#38) --- src/cache_ctrl.sv | 19 ++++++------------- src/scoreboard.sv | 44 ++++++++------------------------------------ 2 files changed, 14 insertions(+), 49 deletions(-) diff --git a/src/cache_ctrl.sv b/src/cache_ctrl.sv index dbf3f97e5..f1bf88f27 100644 --- a/src/cache_ctrl.sv +++ b/src/cache_ctrl.sv @@ -262,13 +262,6 @@ module cache_ctrl #( mem_req_d.bypass = 1'b1; state_d = WAIT_REFILL_GNT; end - end else begin - // we can potentially accept a new request -> I don't know how this works out timing vise - // as this will chain some paths together... - // For now this should not happen to frequently and we spare another cycle - // go back to idle - state_d = IDLE; - data_rvalid_o = 1'b1; end end @@ -317,11 +310,7 @@ module cache_ctrl #( // its for sure a miss WAIT_TAG_BYPASSED: begin // the request was killed - if (kill_req_i) begin - state_d = IDLE; - // we need to ack the killing - data_rvalid_o = 1'b1; - end else begin + if (!kill_req_i) begin // save tag mem_req_d.tag = address_tag_i; state_d = WAIT_REFILL_GNT; @@ -409,8 +398,12 @@ module cache_ctrl #( state_d = IDLE; end end - endcase + + if (kill_req_i) begin + state_d = IDLE; + data_rvalid_o = 1'b1; + end end // -------------- diff --git a/src/scoreboard.sv b/src/scoreboard.sv index ee93bd97d..913b30536 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -298,42 +298,14 @@ module scoreboard #( else $error ("Issue acknowledged but instruction is not valid"); // there should never be more than one instruction writing the same destination register (except x0) - // assert strict pointer ordering - - // print scoreboard - // initial begin - // automatic string pointer = ""; - // static integer f = $fopen("scoreboard.txt", "w"); - - // forever begin - // wait(rst_ni == 1'b1); - // @(posedge clk_i) - // $fwrite(f, $time); - // $fwrite(f, "\n"); - // $fwrite(f, "._________________________.\n"); - // for (int i = 0; i < NR_ENTRIES; i++) begin - // if (i == commit_pointer_q && i == issue_pointer_q && i == top_pointer_q) - // pointer = " <- top, issue, commit pointer"; - // else if (i == commit_pointer_q && i == issue_pointer_q) - // pointer = " <- issue, commit pointer"; - // else if (i == top_pointer_q && i == issue_pointer_q) - // pointer = " <- top, issue pointer"; - // else if (i == top_pointer_q && i == commit_pointer_q) - // pointer = " <- top, commit pointer"; - // else if (i == top_pointer_q) - // pointer = " <- top pointer"; - // else if (i == commit_pointer_q) - // pointer = " <- commit pointer"; - // else if (i == issue_pointer_q) - // pointer = " <- issue pointer"; - // else - // pointer = ""; - // $fwrite(f, "|_________________________| %s\n", pointer); - // end - // $fwrite(f, "\n"); - // end - // $fclose(f); - // end + // check that no functional unit is retiring with the same transaction id + for (genvar i = 0; i < NR_WB_PORTS; i++) begin + for (genvar j = 0; j < NR_WB_PORTS; j++) begin + assert property ( + @(posedge clk_i) wb_valid_i[i] && wb_valid_i[j] && (i != j) |-> (trans_id_i[i] != trans_id_i[j])) + else $error ("Two or more functional units are retiring instructions with the same transaction id!"); + end + end `endif `endif endmodule From 749eb531238996f5f95941ffaa86c2ea57a6b52f Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Mon, 16 Apr 2018 10:32:49 +0200 Subject: [PATCH 24/94] :twisted_rightwards_arrows: Update fpu_legacy to latest master --- src/fpu_legacy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu_legacy b/src/fpu_legacy index 6dc841aeb..04e8470f7 160000 --- a/src/fpu_legacy +++ b/src/fpu_legacy @@ -1 +1 @@ -Subproject commit 6dc841aebe7f7f02570e18135f407f8f8a903320 +Subproject commit 04e8470f7561fb8f02b33d92bfdff679c76549ef From 2ebaf987321b7dea575ff719b8ebe58ee79bf0b7 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Tue, 17 Apr 2018 00:06:56 +0200 Subject: [PATCH 25/94] :arrow_up: Bump tb for FPU waves --- tb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tb b/tb index d5605a06e..b1197dd71 160000 --- a/tb +++ b/tb @@ -1 +1 @@ -Subproject commit d5605a06ecf7e2fa881c55f25870fdcef5433a48 +Subproject commit b1197dd712c289a5daaab5b532943287e6bde9d7 From 67c89c6739bd8e1665709b6d2c6c6675775a99a3 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Mon, 16 Apr 2018 23:51:05 +0200 Subject: [PATCH 26/94] :sparkles: Add FP instruction support to tracer --- src/ariane.sv | 3 +- src/util/instruction_trace_item.svh | 215 ++++++++++++++++++++++-- src/util/instruction_tracer.svh | 26 +-- src/util/instruction_tracer_defines.svh | 29 ++++ src/util/instruction_tracer_if.sv | 5 +- 5 files changed, 248 insertions(+), 30 deletions(-) diff --git a/src/ariane.sv b/src/ariane.sv index 044b86eac..2da276b8d 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -667,7 +667,8 @@ module ariane #( // write-back assign tracer_if.waddr = waddr_commit_id; assign tracer_if.wdata = wdata_commit_id; - assign tracer_if.we = we_gpr_commit_id; + assign tracer_if.we_gpr = we_gpr_commit_id; + assign tracer_if.we_fpr = we_fpr_commit_id; // commit assign tracer_if.commit_instr = commit_instr_id_commit; assign tracer_if.commit_ack = commit_ack; diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index a8bfa52f7..8ff3a6ca7 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -19,9 +19,12 @@ class instruction_trace_item; scoreboard_entry_t sbe; logic [31:0] pc; logic [31:0] instr; - logic [63:0] reg_file [32]; + logic [63:0] gp_reg_file [32]; + logic [63:0] fp_reg_file [32]; logic [4:0] read_regs [$]; + logic read_fpr [$]; logic [4:0] result_regs [$]; + logic result_fpr [$]; logic [63:0] imm; logic [63:0] result; logic [63:0] paddr; @@ -29,19 +32,21 @@ class instruction_trace_item; branchpredict_t bp; // constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information - function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] reg_file [32], logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl, branchpredict_t bp); + function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] gp_reg_file [32], logic [63:0] fp_reg_file [32], logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl, branchpredict_t bp); this.simtime = simtime; this.cycle = cycle; this.pc = sbe.pc; this.sbe = sbe; this.instr = instr; - this.reg_file = reg_file; + this.gp_reg_file = gp_reg_file; + this.fp_reg_file = fp_reg_file; this.result = result; this.paddr = paddr; this.bp = bp; this.priv_lvl = getPrivLevel(priv_lvl); endfunction - // convert register address to ABI compatible form + + // convert gp register address to ABI compatible form function string regAddrToStr(logic [5:0] addr); case (addr) 0: return "x0"; @@ -56,9 +61,65 @@ class instruction_trace_item; default: return $sformatf("s%0d", (addr - 16)); endcase endfunction + // convert fp register address to ABI compatible form + function string fpRegAddrToStr(logic [5:0] addr); + case (addr) inside + [0:7] : return $sformatf("ft%0d", addr); + [8:9] : return $sformatf("fs%0d", (addr - 8)); + [10:17] : return $sformatf("fa%0d", (addr - 10)); + [18:27] : return $sformatf("fs%0d", (addr - 16)); + [28:31] : return $sformatf("ft%0d", (addr - 20)); + endcase + endfunction + + function string fpFmtToStr(logic [1:0] fmt); + case (fmt) + 2'b00 : return "s"; + 2'b01 : return "d"; + 2'b10 : return "h"; + 2'b11 : return "b"; + default : return "XX"; + endcase + endfunction + + function string fmvFpFmtToStr(logic [1:0] fmt); + case (fmt) + 2'b00 : return "w"; + 2'b01 : return "d"; + 2'b10 : return "h"; + 2'b11 : return "b"; + default : return "XX"; + endcase + endfunction + + function string intFmtToStr(logic [1:0] ifmt); + case (ifmt) + 2'b00 : return "w"; + 2'b01 : return "wu"; + 2'b10 : return "l"; + 2'b11 : return "lu"; + default : return "XX"; + endcase + endfunction + + function string fpRmToStr(logic [2:0] rm); + case (rm) + 3'b000 : return "rne"; + 3'b001 : return "rtz"; + 3'b010 : return "rdn"; + 3'b011 : return "rup"; + 3'b100 : return "rmm"; + 3'b111 : return "dyn"; // what is this called in rv binutils? + default: return "INVALID"; + endcase + endfunction function string csrAddrToStr(logic [11:0] addr); case (addr) + CSR_FFLAGS: return "fflags"; + CSR_FRM: return "frm"; + CSR_FCSR: return "fcsr"; + CSR_SSTATUS: return "sstatus"; CSR_SIE: return "sie"; CSR_STVEC: return "stvec"; @@ -102,7 +163,7 @@ class instruction_trace_item; function string printInstr(); string s; - casex (instr) + case (instr) inside // Aliases 32'h00_00_00_13: s = this.printMnemonic("nop"); // Regular opcodes @@ -156,6 +217,32 @@ class instruction_trace_item; INSTR_SRLW: s = this.printRInstr("srlw"); INSTR_SRAW: s = this.printRInstr("sraw"); INSTR_MULW: s = this.printMulInstr(1'b1); + // FP + INSTR_FMADD: s = this.printR4Instr("fmadd"); + INSTR_FMSUB: s = this.printR4Instr("fmsub"); + INSTR_FNSMSUB: s = this.printR4Instr("fnmsub"); + INSTR_FNMADD: s = this.printR4Instr("fnmadd"); + + INSTR_FADD: s = this.printRFInstr("fadd", 1'b1); + INSTR_FSUB: s = this.printRFInstr("fsub", 1'b1); + INSTR_FMUL: s = this.printRFInstr("fmul", 1'b1); + INSTR_FDIV: s = this.printRFInstr("fdiv", 1'b1); + INSTR_FSQRT: s = this.printRFInstr1Op("fsqrt", 1'b1); + INSTR_FSGNJ: s = this.printRFInstr("fsgnj", 1'b0); + INSTR_FSGNJN: s = this.printRFInstr("fsgnjn", 1'b0); + INSTR_FSGNJX: s = this.printRFInstr("fsgnjx", 1'b0); + INSTR_FMIN: s = this.printRFInstr("fmin", 1'b0); + INSTR_FMAX: s = this.printRFInstr("fmax", 1'b0); + INSTR_FLE: s = this.printRFInstr("fle", 1'b0); + INSTR_FLT: s = this.printRFInstr("flt", 1'b0); + INSTR_FEQ: s = this.printRFInstr("feq", 1'b0); + + INSTR_FCVT_F2F, + INSTR_FMV_F2X, + INSTR_FCLASS, + INSTR_FMV_X2F, + INSTR_FCVT_F2I, + INSTR_FCVT_I2F: s = this.printFpSpecialInstr(); // these are a mess to do nicely // FENCE INSTR_FENCE: s = this.printMnemonic("fence"); INSTR_FENCEI: s = this.printMnemonic("fence.i"); @@ -182,8 +269,10 @@ class instruction_trace_item; INSTR_WFI: s = this.printMnemonic("wfi"); INSTR_SFENCE: s = this.printMnemonic("sfence.vma"); // loads and stores - INSTR_LOAD: s = this.printLoadInstr(); - INSTR_STORE: s = this.printStoreInstr(); + INSTR_LOAD, + INSTR_LOAD_FP: s = this.printLoadInstr(); + INSTR_STORE, + INSTR_STORE_FP: s = this.printStoreInstr(); default: s = this.printMnemonic("INVALID"); endcase @@ -203,23 +292,29 @@ class instruction_trace_item; // s); foreach (result_regs[i]) begin - if (result_regs[i] != 0) + if (result_fpr[i]) + s = $sformatf("%s %-4s:%16x", s, fpRegAddrToStr(result_regs[i]), this.result); + else if (result_regs[i] != 0) s = $sformatf("%s %-4s:%16x", s, regAddrToStr(result_regs[i]), this.result); end foreach (read_regs[i]) begin - if (read_regs[i] != 0) - s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), reg_file[read_regs[i]]); + if (read_fpr[i]) + s = $sformatf("%s %-4s:%16x", s, fpRegAddrToStr(read_regs[i]), fp_reg_file[read_regs[i]]); + else if (read_regs[i] != 0) + s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), gp_reg_file[read_regs[i]]); end casex (instr) // check of the instrction was a load or store - INSTR_STORE: begin - logic [63:0] vaddress = reg_file[read_regs[1]] + this.imm; + INSTR_STORE, + INSTR_STORE_FP: begin + logic [63:0] vaddress = gp_reg_file[read_regs[1]] + this.imm; s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr); end - INSTR_LOAD: begin - logic [63:0] vaddress = reg_file[read_regs[0]] + this.imm; + INSTR_LOAD, + INSTR_LOAD_FP: begin + logic [63:0] vaddress = gp_reg_file[read_regs[0]] + this.imm; s = $sformatf("%s VA: %x PA: %x", s, vaddress, this.paddr); end endcase @@ -242,16 +337,80 @@ class instruction_trace_item; function string printRInstr(input string mnemonic); result_regs.push_back(sbe.rd); + result_fpr.push_back(1'b0); read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b0); read_regs.push_back(sbe.rs2); + read_fpr.push_back(1'b0); return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2)); endfunction // printRInstr + function string printRFInstr(input string mnemonic, input bit use_rnd); + + result_regs.push_back(sbe.rd); + result_fpr.push_back(1'b1); + read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b1); + read_regs.push_back(sbe.rs2); + read_fpr.push_back(1'b1); + + if (use_rnd && instr[14:12]!=3'b111) + return $sformatf("%s.%-10s %s, %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRmToStr(instr[14:12])); + else + return $sformatf("%s.%-10s %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2)); + endfunction // printRFInstr + + function string printRFInstr1Op(input string mnemonic, input bit use_rnd); + + result_regs.push_back(sbe.rd); + result_fpr.push_back(1'b1); + read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b1); + + if (use_rnd && instr[14:12]!=3'b111) + return $sformatf("%s.%-10s %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + else + return $sformatf("%s.%-10s %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + endfunction // printRFInstr1Op + + function string printR4Instr(input string mnemonic); + + result_regs.push_back(sbe.rd); + result_fpr.push_back(1'b1); + read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b1); + read_regs.push_back(sbe.rs2); + read_fpr.push_back(1'b1); + read_regs.push_back(instr[31:27]); + read_fpr.push_back(1'b1); + + return $sformatf("%s.%-10s %s, %s, %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); + endfunction // printR4Instr + + function string printFpSpecialInstr(); + + result_regs.push_back(sbe.rd); + result_fpr.push_back(is_rd_fpr(sbe.op)); + result_regs.push_back(sbe.rs1); + result_fpr.push_back(is_rs1_fpr(sbe.op)); + + case (sbe.op) + FCVT_F2F : return $sformatf("fcvt.%s.%-10s %s, %s, %s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + FCVT_F2I : return $sformatf("fcvt.%s.%-10s %s, %s, %s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + FCVT_I2F : return $sformatf("fcvt.%s.%-10s %s, %s, %s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20]), fpRegAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + FMV_F2X : return $sformatf("fmv.x.%-10s %s, %s", fmvFpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + FMV_X2F : return $sformatf("fmv.%s.x\t %s, %s", fmvFpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + FCLASS : return $sformatf("fclass.%-10s %s, %s", fpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + endcase + endfunction + function string printIInstr(input string mnemonic); result_regs.push_back(sbe.rd); + result_fpr.push_back(1'b0); read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b0); if (sbe.rs1 == 0) return $sformatf("%-16s %s, %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); @@ -262,7 +421,9 @@ class instruction_trace_item; function string printIuInstr(input string mnemonic); result_regs.push_back(sbe.rd); + result_fpr.push_back(1'b0); read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b0); return $sformatf("%-16s %s, %s, 0x%0x", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), sbe.result); endfunction // printIuInstr @@ -270,7 +431,9 @@ class instruction_trace_item; function string printSBInstr(input string mnemonic); read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b0); read_regs.push_back(sbe.rs2); + read_fpr.push_back(1'b0); if (sbe.rs2 == 0) return $sformatf("%-16s %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), $signed(sbe.result)); @@ -281,6 +444,7 @@ class instruction_trace_item; function string printUInstr(input string mnemonic); result_regs.push_back(sbe.rd); + result_fpr.push_back(1'b0); return $sformatf("%-16s %s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]); endfunction // printUInstr @@ -310,6 +474,7 @@ class instruction_trace_item; function string printUJInstr(input string mnemonic); result_regs.push_back(sbe.rd); + result_fpr.push_back(1'b0); // jump instruction if (sbe.rd == 0) return $sformatf("%-16s pc + %0d", mnemonic, $signed(sbe.result)); @@ -320,8 +485,10 @@ class instruction_trace_item; function string printCSRInstr(input string mnemonic); result_regs.push_back(sbe.rd); + result_fpr.push_back(1'b0); if (instr[14] == 0) begin read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b0); if (sbe.rd != 0 && sbe.rs1 != 0) begin return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero @@ -356,12 +523,20 @@ class instruction_trace_item; default: return printMnemonic("INVALID"); endcase + if (instr[6:0] == OPCODE_LOAD_FP) + mnemonic = $sformatf("f%s",mnemonic); + result_regs.push_back(sbe.rd); + result_fpr.push_back(is_rd_fpr(sbe.op)); read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b0); // save the immediate for calculating the virtual address this.imm = sbe.result; - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + if (instr[6:0] == OPCODE_LOAD_FP) + return $sformatf("%-15s %s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + else + return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction function string printStoreInstr(); @@ -375,12 +550,20 @@ class instruction_trace_item; default: return printMnemonic("INVALID"); endcase + if (instr[6:0] == OPCODE_STORE_FP) + mnemonic = $sformatf("f%s",mnemonic); + read_regs.push_back(sbe.rs2); + read_fpr.push_back(is_rs2_fpr(sbe.op)); read_regs.push_back(sbe.rs1); + read_fpr.push_back(1'b0); // save the immediate for calculating the virtual address this.imm = sbe.result; - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + if (instr[6:0] == OPCODE_STORE_FP) + return $sformatf("%-16s %s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + else + return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction // printSInstr diff --git a/src/util/instruction_tracer.svh b/src/util/instruction_tracer.svh index 60173d8ad..c14b0aef8 100644 --- a/src/util/instruction_tracer.svh +++ b/src/util/instruction_tracer.svh @@ -25,8 +25,9 @@ class instruction_tracer; scoreboard_entry_t issue_sbe; // store resolved branches, get (mis-)predictions branchpredict_t bp [$]; - // shadow copy of the register file - logic [63:0] reg_file [32]; + // shadow copy of the register files + logic [63:0] gp_reg_file [32]; + logic [63:0] fp_reg_file [32]; // 64 bit clock tick count longint unsigned clk_ticks; int f; @@ -58,7 +59,7 @@ class instruction_tracer; logic [31:0] decode_instruction, issue_instruction, issue_commit_instruction; scoreboard_entry_t commit_instruction; // initialize register 0 - reg_file [0] = 0; + gp_reg_file [0] = 0; forever begin automatic branchpredict_t bp_instruction = '0; @@ -123,10 +124,12 @@ class instruction_tracer; // the scoreboards issue entry still contains the immediate value as a result // check if the write back is valid, if not we need to source the result from the register file // as the most recent version of this register will be there. - if (tracer_if.pck.we[i]) begin + if (tracer_if.pck.we_gpr[i] || tracer_if.pck.we_fpr[i]) printInstr(issue_sbe, issue_commit_instruction, tracer_if.pck.wdata[i], address_mapping, tracer_if.pck.priv_lvl, bp_instruction); - end else - printInstr(issue_sbe, issue_commit_instruction, reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, bp_instruction); + else if (is_rd_fpr(commit_instruction.op)) + printInstr(issue_sbe, issue_commit_instruction, fp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, bp_instruction); + else + printInstr(issue_sbe, issue_commit_instruction, gp_reg_file[commit_instruction.rd], address_mapping, tracer_if.pck.priv_lvl, bp_instruction); end end // -------------- @@ -139,11 +142,12 @@ class instruction_tracer; // ---------------------- // Commit Registers // ---------------------- - // update shadow reg file here + // update shadow reg files here for (int i = 0; i < 2; i++) - if (tracer_if.pck.we[i] && tracer_if.pck.waddr[i] != 5'b0) begin - reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; - end + if (tracer_if.pck.we_gpr[i] && tracer_if.pck.waddr[i] != 5'b0) + gp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; + else if (tracer_if.pck.we_fpr[i]) + fp_reg_file[tracer_if.pck.waddr[i]] = tracer_if.pck.wdata[i]; // -------------- // Flush Signals @@ -178,7 +182,7 @@ class instruction_tracer; endfunction function void printInstr(scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl, branchpredict_t bp); - instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.reg_file, result, paddr, priv_lvl, bp); + instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.gp_reg_file, this.fp_reg_file, result, paddr, priv_lvl, bp); // print instruction to console string print_instr = iti.printInstr(); uvm_report_info( "Tracer", print_instr, UVM_HIGH); diff --git a/src/util/instruction_tracer_defines.svh b/src/util/instruction_tracer_defines.svh index d048e7a8b..5264a20ff 100644 --- a/src/util/instruction_tracer_defines.svh +++ b/src/util/instruction_tracer_defines.svh @@ -100,6 +100,35 @@ parameter INSTR_DIVU = { 7'b0000001, 10'b?, 3'b101, 5'b?, OPCODE_OP }; parameter INSTR_REM = { 7'b0000001, 10'b?, 3'b110, 5'b?, OPCODE_OP }; parameter INSTR_REMU = { 7'b0000001, 10'b?, 3'b111, 5'b?, OPCODE_OP }; +// RVFD +parameter INSTR_FMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, OPCODE_MADD}; +parameter INSTR_FMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, OPCODE_MSUB}; +parameter INSTR_FNSMSUB = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, OPCODE_NMSUB}; +parameter INSTR_FNMADD = { 5'b?, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, OPCODE_NMADD}; + +parameter INSTR_FADD = { 5'b00000, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FSUB = { 5'b00001, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FMUL = { 5'b00010, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FDIV = { 5'b00011, 2'b?, 5'b?, 5'b?, 3'b?, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FSQRT = { 5'b01011, 2'b?, 5'b0, 5'b?, 3'b?, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FSGNJ = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FSGNJN = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FSGNJX = { 5'b00100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FMIN = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FMAX = { 5'b00101, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FLE = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b000, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FLT = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b001, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FEQ = { 5'b10100, 2'b?, 5'b?, 5'b?, 3'b010, 5'b?, OPCODE_OP_FP}; + +parameter INSTR_FCVT_F2F = { 5'b01000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FMV_F2X = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FCLASS = { 5'b11100, 2'b?, 5'b0, 5'b?, 3'b001, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FMV_X2F = { 5'b11110, 2'b?, 5'b0, 5'b?, 3'b000, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FCVT_F2I = { 5'b11000, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, OPCODE_OP_FP}; +parameter INSTR_FCVT_I2F = { 5'b11010, 2'b?, 5'b000??, 5'b?, 3'b?, 5'b?, OPCODE_OP_FP}; + // Load/Stores parameter INSTR_LOAD = {25'b?, OPCODE_LOAD}; +parameter INSTR_LOAD_FP = {25'b?, OPCODE_LOAD_FP}; parameter INSTR_STORE = {25'b?, OPCODE_STORE}; +parameter INSTR_STORE_FP = {25'b?, OPCODE_STORE_FP}; diff --git a/src/util/instruction_tracer_if.sv b/src/util/instruction_tracer_if.sv index d0fac6645..fedb7ecd2 100644 --- a/src/util/instruction_tracer_if.sv +++ b/src/util/instruction_tracer_if.sv @@ -32,7 +32,8 @@ interface instruction_tracer_if ( // WB stage logic [1:0][4:0] waddr; logic [1:0][63:0] wdata; - logic [1:0] we; + logic [1:0] we_gpr; + logic [1:0] we_fpr; // commit stage scoreboard_entry_t [1:0] commit_instr; // commit instruction logic [1:0] commit_ack; @@ -56,7 +57,7 @@ interface instruction_tracer_if ( clocking pck @(posedge clk); input rstn, flush_unissued, flush, instruction, fetch_valid, fetch_ack, issue_ack, issue_sbe, waddr, st_valid, st_paddr, ld_valid, ld_kill, ld_paddr, resolve_branch, - wdata, we, commit_instr, commit_ack, exception, priv_lvl; + wdata, we_gpr, we_fpr, commit_instr, commit_ack, exception, priv_lvl; endclocking `endif From 90bff929559c968e07aa91aa842ac2bc5e5502f5 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 18 Apr 2018 14:30:04 +0200 Subject: [PATCH 27/94] :bug: Fix several FPU bugs, bump FPU dependencies --- src/fpnew | 2 +- src/fpu_legacy | 2 +- src/fpu_wrap.sv | 23 ++++++--- src/util/instruction_trace_item.svh | 76 ++++++++++++++--------------- tb | 2 +- 5 files changed, 56 insertions(+), 49 deletions(-) diff --git a/src/fpnew b/src/fpnew index 6bc7c8681..97d0a50a2 160000 --- a/src/fpnew +++ b/src/fpnew @@ -1 +1 @@ -Subproject commit 6bc7c86818b19467322ab69a7fc387db4f727821 +Subproject commit 97d0a50a2c7270b83a3e8e983b9c63f666d24168 diff --git a/src/fpu_legacy b/src/fpu_legacy index 04e8470f7..a67d8de46 160000 --- a/src/fpu_legacy +++ b/src/fpu_legacy @@ -1 +1 @@ -Subproject commit 04e8470f7561fb8f02b33d92bfdff679c76549ef +Subproject commit a67d8de4606afa59d1d3cebb7a72011c6bee2b16 diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index d882a2d24..38b75595e 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -182,10 +182,10 @@ module fpu_wrap ( fpu_op_mod_n = 1'b1; end // Fused Negated Multiply Subtract - FNMSUB : fpu_op_n = FNMSUB; + FNMSUB : fpu_op_n = OP_FNMSUB; // Fused Negated Multiply Add is modified FNMSUB FNMADD : begin - fpu_op_n = FNMSUB; + fpu_op_n = OP_FNMSUB; fpu_op_mod_n = 1'b1; end // Float to Int Cast - Op encoded in lowest two imm bits or rm @@ -260,14 +260,15 @@ module fpu_wrap ( // Move from FPR to GPR - mapped to NOP since no recoding FMV_F2X : begin fpu_op_n = OP_SGNJ; + fpu_op_mod_n = 1'b1; // no NaN-Boxing operand_b_n = operand_a_n; - vec_replication = 1'b0; // no replication, we set second operand + vec_replication = 1'b0; // no replication, we set second operand end // Move from GPR to FPR - mapped to NOP since no recoding FMV_X2F : begin fpu_op_n = OP_SGNJ; operand_b_n = operand_a_n; - vec_replication = 1'b0; // no replication, we set second operand + vec_replication = 1'b0; // no replication, we set second operand end // Scalar Comparisons - op encoded in rm (000-010) FCMP : fpu_op_n = OP_CMP; @@ -342,14 +343,20 @@ module fpu_wrap ( endcase // Replication - if (fpu_vec_op_n && vec_replication) begin + if (fpu_vec_op_n && vec_replication) case (fpu_fmt_n) - FMT_FP32 : operand_b_n = RVD ? {2{operand_b_i[31:0]}} : operand_b_i; + FMT_FP32 : operand_b_n = RVD ? {2{operand_b_n[31:0]}} : operand_b_n; FMT_FP16, - FMT_FP16ALT : operand_b_n = RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; - FMT_FP8 : operand_b_n = RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; + FMT_FP16ALT : operand_b_n = RVD ? {4{operand_b_n[15:0]}} : {2{operand_b_n[15:0]}}; + FMT_FP8 : operand_b_n = RVD ? {8{operand_b_n[7:0]}} : {4{operand_b_n[7:0]}}; endcase // fpu_fmt_n + + // Ugly but needs to be done: map additions to operands B and C + if (fpu_op_n == OP_ADD) begin + operand_c_n = operand_b_n; + operand_b_n = operand_a_n; end + end diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 8ff3a6ca7..0d9248956 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -237,9 +237,10 @@ class instruction_trace_item; INSTR_FLT: s = this.printRFInstr("flt", 1'b0); INSTR_FEQ: s = this.printRFInstr("feq", 1'b0); + INSTR_FCLASS: s = this.printRFInstr1Op("fclass", 1'b0); + INSTR_FCVT_F2F, INSTR_FMV_F2X, - INSTR_FCLASS, INSTR_FMV_X2F, INSTR_FCVT_F2I, INSTR_FCVT_I2F: s = this.printFpSpecialInstr(); // these are a mess to do nicely @@ -343,35 +344,35 @@ class instruction_trace_item; read_regs.push_back(sbe.rs2); read_fpr.push_back(1'b0); - return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2)); + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2)); endfunction // printRInstr function string printRFInstr(input string mnemonic, input bit use_rnd); result_regs.push_back(sbe.rd); - result_fpr.push_back(1'b1); + result_fpr.push_back(is_rd_fpr(sbe.op)); read_regs.push_back(sbe.rs1); - read_fpr.push_back(1'b1); + read_fpr.push_back(is_rs1_fpr(sbe.op)); read_regs.push_back(sbe.rs2); - read_fpr.push_back(1'b1); + read_fpr.push_back(is_rs2_fpr(sbe.op)); if (use_rnd && instr[14:12]!=3'b111) - return $sformatf("%s.%-10s %s, %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(sbe.rs2):regAddrToStr(sbe.rs2), fpRmToStr(instr[14:12])); else - return $sformatf("%s.%-10s %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2)); + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(sbe.rs2):regAddrToStr(sbe.rs2)); endfunction // printRFInstr function string printRFInstr1Op(input string mnemonic, input bit use_rnd); result_regs.push_back(sbe.rd); - result_fpr.push_back(1'b1); + result_fpr.push_back(is_rd_fpr(sbe.op)); read_regs.push_back(sbe.rs1); - read_fpr.push_back(1'b1); + read_fpr.push_back(is_rs1_fpr(sbe.op)); if (use_rnd && instr[14:12]!=3'b111) - return $sformatf("%s.%-10s %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); else - return $sformatf("%s.%-10s %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1)); endfunction // printRFInstr1Op function string printR4Instr(input string mnemonic); @@ -385,23 +386,22 @@ class instruction_trace_item; read_regs.push_back(instr[31:27]); read_fpr.push_back(1'b1); - return $sformatf("%s.%-10s %s, %s, %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); endfunction // printR4Instr function string printFpSpecialInstr(); result_regs.push_back(sbe.rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - result_regs.push_back(sbe.rs1); - result_fpr.push_back(is_rs1_fpr(sbe.op)); + read_regs.push_back(sbe.rs1); + read_fpr.push_back(is_rs1_fpr(sbe.op)); case (sbe.op) - FCVT_F2F : return $sformatf("fcvt.%s.%-10s %s, %s, %s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FCVT_F2I : return $sformatf("fcvt.%s.%-10s %s, %s, %s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FCVT_I2F : return $sformatf("fcvt.%s.%-10s %s, %s, %s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20]), fpRegAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FMV_F2X : return $sformatf("fmv.x.%-10s %s, %s", fmvFpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); - FMV_X2F : return $sformatf("fmv.%s.x\t %s, %s", fmvFpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); - FCLASS : return $sformatf("fclass.%-10s %s, %s", fpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + FCVT_F2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20])), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + FCVT_F2I : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + FCVT_I2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20])), fpRegAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + FMV_F2X : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); endcase endfunction @@ -413,9 +413,9 @@ class instruction_trace_item; read_fpr.push_back(1'b0); if (sbe.rs1 == 0) - return $sformatf("%-16s %s, %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + return $sformatf("%-12s %4s, %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); - return $sformatf("%-16s %s, %s, %0d", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, %0d", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), $signed(sbe.result)); endfunction // printIInstr function string printIuInstr(input string mnemonic); @@ -425,7 +425,7 @@ class instruction_trace_item; read_regs.push_back(sbe.rs1); read_fpr.push_back(1'b0); - return $sformatf("%-16s %s, %s, 0x%0x", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), sbe.result); + return $sformatf("%-12s %4s, %s, 0x%0x", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), sbe.result); endfunction // printIuInstr function string printSBInstr(input string mnemonic); @@ -436,9 +436,9 @@ class instruction_trace_item; read_fpr.push_back(1'b0); if (sbe.rs2 == 0) - return $sformatf("%-16s %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), $signed(sbe.result)); + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), $signed(sbe.result)); else - return $sformatf("%-16s %s, %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2), $signed(sbe.result)); endfunction // printIuInstr function string printUInstr(input string mnemonic); @@ -446,7 +446,7 @@ class instruction_trace_item; result_regs.push_back(sbe.rd); result_fpr.push_back(1'b0); - return $sformatf("%-16s %s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]); + return $sformatf("%-12s %4s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]); endfunction // printUInstr function string printJump(); @@ -477,9 +477,9 @@ class instruction_trace_item; result_fpr.push_back(1'b0); // jump instruction if (sbe.rd == 0) - return $sformatf("%-16s pc + %0d", mnemonic, $signed(sbe.result)); + return $sformatf("%-16s pc + %0d", mnemonic, $signed(sbe.result)); else - return $sformatf("%-16s %s, pc + %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); endfunction // printUJInstr function string printCSRInstr(input string mnemonic); @@ -490,21 +490,21 @@ class instruction_trace_item; read_regs.push_back(sbe.rs1); read_fpr.push_back(1'b0); if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero end else if (sbe.rd == 0) begin - return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); end else if (sbe.rs1 == 0) begin - return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); end end else begin if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-16s %s, %d, %s", mnemonic, regAddrToStr(sbe.rd), $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %d, %s", mnemonic, regAddrToStr(sbe.rd), $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero end else if (sbe.rd == 0) begin - return $sformatf("%-16s %d, %s", mnemonic, $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-14s %2d, %s", mnemonic, $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); end else if (sbe.rs1 == 0) begin - return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); end end endfunction // printCSRInstr @@ -534,9 +534,9 @@ class instruction_trace_item; this.imm = sbe.result; if (instr[6:0] == OPCODE_LOAD_FP) - return $sformatf("%-15s %s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); else - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction function string printStoreInstr(); @@ -561,9 +561,9 @@ class instruction_trace_item; this.imm = sbe.result; if (instr[6:0] == OPCODE_STORE_FP) - return $sformatf("%-16s %s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); else - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction // printSInstr diff --git a/tb b/tb index b1197dd71..59f931065 160000 --- a/tb +++ b/tb @@ -1 +1 @@ -Subproject commit b1197dd712c289a5daaab5b532943287e6bde9d7 +Subproject commit 59f9310659626cd4817348219133fd777a0b1700 From 71f407b65d7465fc979d4c318c0ef1feed5cd52d Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 18 Apr 2018 14:32:17 +0200 Subject: [PATCH 28/94] :sparkles: Add register renaming to issue stage Renaming can be turned on or off with the `ENABLE_RENAME` parameter in `ariane_pkg.sv`. --- include/ariane_pkg.sv | 2 + src/issue_read_operands.sv | 2 +- src/issue_stage.sv | 82 ++++++++-------- src/re_name.sv | 25 ++++- src/util/instruction_trace_item.svh | 142 +++++++++++++++------------- 5 files changed, 140 insertions(+), 113 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 4d5bc25d0..e9027ebc4 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -35,6 +35,8 @@ package ariane_pkg; localparam BITS_SATURATION_COUNTER = 2; localparam NR_COMMIT_PORTS = 2; + localparam ENABLE_RENAME = 1'b1; + // Floating-point extensions configuration localparam bit RVF = 1'b1; // Is F extension enabled localparam bit RVD = 1'b1; // Is D extension enabled diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 61f611bb6..87677b86c 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -29,7 +29,7 @@ module issue_read_operands #( input logic debug_gpr_we_i, input logic [63:0] debug_gpr_wdata_i, output logic [63:0] debug_gpr_rdata_o, - // coming from scoreboard + // coming from rename input scoreboard_entry_t issue_instr_i, input logic issue_instr_valid_i, output logic issue_ack_o, diff --git a/src/issue_stage.sv b/src/issue_stage.sv index a1e300065..2065f9bb6 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -104,52 +104,31 @@ module issue_stage #( logic [FLEN-1:0] rs3_sb_iro; logic rs3_valid_iro_sb; - scoreboard_entry_t issue_instr_sb_rename; - logic issue_instr_valid_sb_rename; - logic issue_ack_rename_sb; + scoreboard_entry_t issue_instr_rename_sb; + logic issue_instr_valid_rename_sb; + logic issue_ack_sb_rename; - scoreboard_entry_t issue_instr_rename_iro; - logic issue_instr_valid_rename_iro; - logic issue_ack_iro_rename; + scoreboard_entry_t issue_instr_sb_iro; + logic issue_instr_valid_sb_iro; + logic issue_ack_iro_sb; // --------------------------------------------------------- - // 1. Issue instruction and read operand, also commit - // --------------------------------------------------------- - issue_read_operands i_issue_read_operands ( - .flush_i ( flush_unissued_instr_i ), - .issue_instr_i ( issue_instr_rename_iro ), - .issue_instr_valid_i ( issue_instr_valid_rename_iro ), - .issue_ack_o ( issue_ack_iro_rename ), - .rs1_o ( rs1_iro_sb ), - .rs1_i ( rs1_sb_iro ), - .rs1_valid_i ( rs1_valid_sb_iro ), - .rs2_o ( rs2_iro_sb ), - .rs2_i ( rs2_sb_iro ), - .rs2_valid_i ( rs2_valid_iro_sb ), - .rs3_o ( rs3_iro_sb ), - .rs3_i ( rs3_sb_iro ), - .rs3_valid_i ( rs3_valid_iro_sb ), - .rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ), - .rd_clobber_fpr_i ( rd_clobber_fpr_sb_iro ), - .* - ); - - // --------------------------------------------------------- - // 2. Re-name + // 1. Re-name // --------------------------------------------------------- re_name i_re_name ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), - .issue_instr_i ( issue_instr_sb_rename ), - .issue_instr_valid_i ( issue_instr_valid_sb_rename ), - .issue_ack_o ( issue_ack_rename_sb ), - .issue_instr_o ( issue_instr_rename_iro ), - .issue_instr_valid_o ( issue_instr_valid_rename_iro ), - .issue_ack_i ( issue_ack_iro_rename ) + .flush_i ( flush_i ), + .issue_instr_i ( decoded_instr_i ), + .issue_instr_valid_i ( decoded_instr_valid_i ), + .issue_ack_o ( decoded_instr_ack_o ), + .issue_instr_o ( issue_instr_rename_sb ), + .issue_instr_valid_o ( issue_instr_valid_rename_sb ), + .issue_ack_i ( issue_ack_sb_rename ) ); // --------------------------------------------------------- - // 3. Manage issued instructions in a scoreboard + // 2. Manage instructions in a scoreboard // --------------------------------------------------------- scoreboard #( .NR_ENTRIES ( NR_ENTRIES ), @@ -168,9 +147,12 @@ module issue_stage #( .rs3_o ( rs3_sb_iro ), .rs3_valid_o ( rs3_valid_iro_sb ), - .issue_instr_o ( issue_instr_sb_rename ), - .issue_instr_valid_o ( issue_instr_valid_sb_rename ), - .issue_ack_i ( issue_ack_rename_sb ), + .decoded_instr_i ( issue_instr_rename_sb ), + .decoded_instr_valid_i ( issue_instr_valid_rename_sb ), + .decoded_instr_ack_o ( issue_ack_sb_rename ), + .issue_instr_o ( issue_instr_sb_iro ), + .issue_instr_valid_o ( issue_instr_valid_sb_iro ), + .issue_ack_i ( issue_ack_iro_sb ), .trans_id_i ( trans_id_i ), .wbdata_i ( wbdata_i ), @@ -178,4 +160,26 @@ module issue_stage #( .* ); + // --------------------------------------------------------- + // 3. Issue instruction and read operand, also commit + // --------------------------------------------------------- + issue_read_operands i_issue_read_operands ( + .flush_i ( flush_unissued_instr_i ), + .issue_instr_i ( issue_instr_sb_iro ), + .issue_instr_valid_i ( issue_instr_valid_sb_iro ), + .issue_ack_o ( issue_ack_iro_sb ), + .rs1_o ( rs1_iro_sb ), + .rs1_i ( rs1_sb_iro ), + .rs1_valid_i ( rs1_valid_sb_iro ), + .rs2_o ( rs2_iro_sb ), + .rs2_i ( rs2_sb_iro ), + .rs2_valid_i ( rs2_valid_iro_sb ), + .rs3_o ( rs3_iro_sb ), + .rs3_i ( rs3_sb_iro ), + .rs3_valid_i ( rs3_valid_iro_sb ), + .rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ), + .rd_clobber_fpr_i ( rd_clobber_fpr_sb_iro ), + .* + ); + endmodule diff --git a/src/re_name.sv b/src/re_name.sv index d9ab11389..36a0b24f9 100644 --- a/src/re_name.sv +++ b/src/re_name.sv @@ -23,6 +23,7 @@ import ariane_pkg::*; module re_name ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // Flush renaming state // from/to scoreboard input scoreboard_entry_t issue_instr_i, input logic issue_instr_valid_i, @@ -46,12 +47,12 @@ module re_name ( // ------------------- always_comb begin // MSB of the renamed source register addresses - logic name_bit_rs1, name_bit_rs2, name_bit_rs3; + logic name_bit_rs1, name_bit_rs2, name_bit_rs3, name_bit_rd; // default assignments re_name_table_gpr_n = re_name_table_gpr_q; re_name_table_fpr_n = re_name_table_fpr_q; - issue_instr_o = issue_instr_i; + issue_instr_o = issue_instr_i; if (issue_ack_i) begin // if we acknowledge the instruction tic the corresponding destination register @@ -69,16 +70,30 @@ module re_name ( // rs3 is only used in certain FP operations and held like an immediate name_bit_rs3 = re_name_table_fpr_q[issue_instr_i.result[4:0]]; // make sure only the addr bits are read + // select name bit according to the state it will have after renaming + name_bit_rd = is_rd_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rd] ^ 1'b1 + : re_name_table_gpr_q[issue_instr_i.rd] ^ (issue_instr_i.rd != '0); // don't rename x0 + // re-name the source registers - issue_instr_o.rs1 = { name_bit_rs1, issue_instr_i.rs1 }; - issue_instr_o.rs2 = { name_bit_rs2, issue_instr_i.rs2 }; + issue_instr_o.rs1 = { ENABLE_RENAME & name_bit_rs1, issue_instr_i.rs1[4:0] }; + issue_instr_o.rs2 = { ENABLE_RENAME & name_bit_rs2, issue_instr_i.rs2[4:0] }; // re-name the third operand in imm if it's actually an operand if (is_imm_fpr(issue_instr_i.op)) - issue_instr_o.result = {name_bit_rs3, issue_instr_i.result[4:0]}; + issue_instr_o.result = { ENABLE_RENAME & name_bit_rs3, issue_instr_i.result[4:0]}; + + // re-name the destination register + issue_instr_o.rd = { ENABLE_RENAME & name_bit_rd, issue_instr_i.rd[4:0] }; // we don't want to re-name gp register zero, it is non-writeable anyway re_name_table_gpr_n[0] = 1'b0; + + // Handle flushes + if (flush_i) begin + re_name_table_gpr_n = '0; + re_name_table_fpr_n = '0; + end + end // ------------------- diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 0d9248956..1ebf3cdc8 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -31,6 +31,8 @@ class instruction_trace_item; string priv_lvl; branchpredict_t bp; + logic [4:0] rs1, rs2, rs3, rd; + // constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] gp_reg_file [32], logic [63:0] fp_reg_file [32], logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl, branchpredict_t bp); this.simtime = simtime; @@ -44,6 +46,10 @@ class instruction_trace_item; this.paddr = paddr; this.bp = bp; this.priv_lvl = getPrivLevel(priv_lvl); + this.rs1 = sbe.rs1[4:0]; + this.rs2 = sbe.rs2[4:0]; + this.rs3 = instr[31:27]; + this.rd = sbe.rd[4:0]; endfunction // convert gp register address to ABI compatible form @@ -337,116 +343,116 @@ class instruction_trace_item; function string printRInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(1'b0); - return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2)); + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), regAddrToStr(rs2)); endfunction // printRInstr function string printRFInstr(input string mnemonic, input bit use_rnd); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(is_rs1_fpr(sbe.op)); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(is_rs2_fpr(sbe.op)); if (use_rnd && instr[14:12]!=3'b111) - return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(sbe.rs2):regAddrToStr(sbe.rs2), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), fpRmToStr(instr[14:12])); else - return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(sbe.rs2):regAddrToStr(sbe.rs2)); + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2)); endfunction // printRFInstr function string printRFInstr1Op(input string mnemonic, input bit use_rnd); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(is_rs1_fpr(sbe.op)); if (use_rnd && instr[14:12]!=3'b111) - return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), fpRmToStr(instr[14:12])); else - return $sformatf("%-12s %4s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1)); endfunction // printRFInstr1Op function string printR4Instr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b1); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b1); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(1'b1); - read_regs.push_back(instr[31:27]); + read_regs.push_back(rs3); read_fpr.push_back(1'b1); - return $sformatf("%-12s %4s, %s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), fpRegAddrToStr(rd), fpRegAddrToStr(rs1), fpRegAddrToStr(rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); endfunction // printR4Instr function string printFpSpecialInstr(); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(is_rs1_fpr(sbe.op)); case (sbe.op) - FCVT_F2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20])), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FCVT_F2I : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FCVT_I2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20])), fpRegAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FMV_F2X : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); - FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + FCVT_F2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20])), fpRegAddrToStr(rd), fpRegAddrToStr(rs1), fpRmToStr(instr[14:12])); + FCVT_F2I : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1), fpRmToStr(instr[14:12])); + FCVT_I2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20])), fpRegAddrToStr(rd), regAddrToStr(rs1), fpRmToStr(instr[14:12])); + FMV_F2X : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1)); + FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1)); endcase endfunction function string printIInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - if (sbe.rs1 == 0) - return $sformatf("%-12s %4s, %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + if (rs1 == 0) + return $sformatf("%-12s %4s, %0d", mnemonic, regAddrToStr(rd), $signed(sbe.result)); - return $sformatf("%-12s %4s, %s, %0d", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, %0d", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), $signed(sbe.result)); endfunction // printIInstr function string printIuInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - return $sformatf("%-12s %4s, %s, 0x%0x", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), sbe.result); + return $sformatf("%-12s %4s, %s, 0x%0x", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), sbe.result); endfunction // printIuInstr function string printSBInstr(input string mnemonic); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(1'b0); - if (sbe.rs2 == 0) - return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), $signed(sbe.result)); + if (rs2 == 0) + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(rs1), $signed(sbe.result)); else - return $sformatf("%-12s %4s, %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, pc + %0d", mnemonic, regAddrToStr(rs1), regAddrToStr(rs2), $signed(sbe.result)); endfunction // printIuInstr function string printUInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); - return $sformatf("%-12s %4s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]); + return $sformatf("%-12s %4s, 0x%0h", mnemonic, regAddrToStr(rd), sbe.result[31:12]); endfunction // printUInstr function string printJump(); @@ -454,7 +460,7 @@ class instruction_trace_item; case (instr[6:0]) OPCODE_JALR: begin // is this a return? - if (sbe.rd == 'b0 && (sbe.rs1 == 'h1 || sbe.rs1 == 'h5)) begin + if (rd == 'b0 && (rs1 == 'h1 || rs1 == 'h5)) begin return this.printMnemonic("ret"); end else begin return this.printIInstr("jalr"); @@ -462,7 +468,7 @@ class instruction_trace_item; end OPCODE_JAL: begin - if (sbe.rd == 'b0) + if (rd == 'b0) return this.printUJInstr("j"); else return this.printUJInstr("jal"); @@ -473,38 +479,38 @@ class instruction_trace_item; function string printUJInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); // jump instruction - if (sbe.rd == 0) - return $sformatf("%-16s pc + %0d", mnemonic, $signed(sbe.result)); + if (rd == 0) + return $sformatf("%-12s pc + %0d", mnemonic, $signed(sbe.result)); else - return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(rd), $signed(sbe.result)); endfunction // printUJInstr function string printCSRInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); if (instr[14] == 0) begin - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + if (rd != 0 && rs1 != 0) begin + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero - end else if (sbe.rd == 0) begin - return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); - end else if (sbe.rs1 == 0) begin - return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + end else if (rd == 0) begin + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rs1), csrAddrToStr(sbe.result[11:0])); + end else if (rs1 == 0) begin + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rd), csrAddrToStr(sbe.result[11:0])); end end else begin - if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-12s %4s, %d, %s", mnemonic, regAddrToStr(sbe.rd), $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + if (rd != 0 && rs1 != 0) begin + return $sformatf("%-12s %4s, %d, %s", mnemonic, regAddrToStr(rd), $unsigned(rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero - end else if (sbe.rd == 0) begin - return $sformatf("%-14s %2d, %s", mnemonic, $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); - end else if (sbe.rs1 == 0) begin - return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + end else if (rd == 0) begin + return $sformatf("%-14s %2d, %s", mnemonic, $unsigned(rs1), csrAddrToStr(sbe.result[11:0])); + end else if (rs1 == 0) begin + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rd), csrAddrToStr(sbe.result[11:0])); end end endfunction // printCSRInstr @@ -526,17 +532,17 @@ class instruction_trace_item; if (instr[6:0] == OPCODE_LOAD_FP) mnemonic = $sformatf("f%s",mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); // save the immediate for calculating the virtual address this.imm = sbe.result; if (instr[6:0] == OPCODE_LOAD_FP) - return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(rd), $signed(sbe.result), regAddrToStr(rs1)); else - return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(rd), $signed(sbe.result), regAddrToStr(rs1)); endfunction function string printStoreInstr(); @@ -553,17 +559,17 @@ class instruction_trace_item; if (instr[6:0] == OPCODE_STORE_FP) mnemonic = $sformatf("f%s",mnemonic); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(is_rs2_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); // save the immediate for calculating the virtual address this.imm = sbe.result; if (instr[6:0] == OPCODE_STORE_FP) - return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(rs2), $signed(sbe.result), regAddrToStr(rs1)); else - return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(rs2), $signed(sbe.result), regAddrToStr(rs1)); endfunction // printSInstr From 91475d971d52df7caeeb047ebd6c6893f9e58ce0 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Tue, 17 Apr 2018 00:06:56 +0200 Subject: [PATCH 29/94] :arrow_up: Bump tb for FPU waves --- .gitmodules | 2 +- tb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index e826f576e..b3950ecdb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,7 +3,7 @@ url = https://github.com/pulp-platform/riscv-torture.git [submodule "tb"] path = tb - url = https://github.com/pulp-platform/uvm-components.git + url = https://github.com/stmach/uvm-components.git [submodule "src/axi_mem_if"] path = src/axi_mem_if url = https://github.com/pulp-platform/axi_mem_if.git diff --git a/tb b/tb index d5605a06e..b1197dd71 160000 --- a/tb +++ b/tb @@ -1 +1 @@ -Subproject commit d5605a06ecf7e2fa881c55f25870fdcef5433a48 +Subproject commit b1197dd712c289a5daaab5b532943287e6bde9d7 From 0a826d15df9a54f50dc141edbbb5dfb06176e6b8 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 18 Apr 2018 14:30:04 +0200 Subject: [PATCH 30/94] :bug: Fix several FPU bugs, bump FPU dependencies --- src/fpnew | 2 +- src/fpu_legacy | 2 +- src/fpu_wrap.sv | 23 ++++++--- src/util/instruction_trace_item.svh | 76 ++++++++++++++--------------- tb | 2 +- 5 files changed, 56 insertions(+), 49 deletions(-) diff --git a/src/fpnew b/src/fpnew index 6bc7c8681..97d0a50a2 160000 --- a/src/fpnew +++ b/src/fpnew @@ -1 +1 @@ -Subproject commit 6bc7c86818b19467322ab69a7fc387db4f727821 +Subproject commit 97d0a50a2c7270b83a3e8e983b9c63f666d24168 diff --git a/src/fpu_legacy b/src/fpu_legacy index 04e8470f7..a67d8de46 160000 --- a/src/fpu_legacy +++ b/src/fpu_legacy @@ -1 +1 @@ -Subproject commit 04e8470f7561fb8f02b33d92bfdff679c76549ef +Subproject commit a67d8de4606afa59d1d3cebb7a72011c6bee2b16 diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index d882a2d24..38b75595e 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -182,10 +182,10 @@ module fpu_wrap ( fpu_op_mod_n = 1'b1; end // Fused Negated Multiply Subtract - FNMSUB : fpu_op_n = FNMSUB; + FNMSUB : fpu_op_n = OP_FNMSUB; // Fused Negated Multiply Add is modified FNMSUB FNMADD : begin - fpu_op_n = FNMSUB; + fpu_op_n = OP_FNMSUB; fpu_op_mod_n = 1'b1; end // Float to Int Cast - Op encoded in lowest two imm bits or rm @@ -260,14 +260,15 @@ module fpu_wrap ( // Move from FPR to GPR - mapped to NOP since no recoding FMV_F2X : begin fpu_op_n = OP_SGNJ; + fpu_op_mod_n = 1'b1; // no NaN-Boxing operand_b_n = operand_a_n; - vec_replication = 1'b0; // no replication, we set second operand + vec_replication = 1'b0; // no replication, we set second operand end // Move from GPR to FPR - mapped to NOP since no recoding FMV_X2F : begin fpu_op_n = OP_SGNJ; operand_b_n = operand_a_n; - vec_replication = 1'b0; // no replication, we set second operand + vec_replication = 1'b0; // no replication, we set second operand end // Scalar Comparisons - op encoded in rm (000-010) FCMP : fpu_op_n = OP_CMP; @@ -342,14 +343,20 @@ module fpu_wrap ( endcase // Replication - if (fpu_vec_op_n && vec_replication) begin + if (fpu_vec_op_n && vec_replication) case (fpu_fmt_n) - FMT_FP32 : operand_b_n = RVD ? {2{operand_b_i[31:0]}} : operand_b_i; + FMT_FP32 : operand_b_n = RVD ? {2{operand_b_n[31:0]}} : operand_b_n; FMT_FP16, - FMT_FP16ALT : operand_b_n = RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; - FMT_FP8 : operand_b_n = RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; + FMT_FP16ALT : operand_b_n = RVD ? {4{operand_b_n[15:0]}} : {2{operand_b_n[15:0]}}; + FMT_FP8 : operand_b_n = RVD ? {8{operand_b_n[7:0]}} : {4{operand_b_n[7:0]}}; endcase // fpu_fmt_n + + // Ugly but needs to be done: map additions to operands B and C + if (fpu_op_n == OP_ADD) begin + operand_c_n = operand_b_n; + operand_b_n = operand_a_n; end + end diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 8ff3a6ca7..0d9248956 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -237,9 +237,10 @@ class instruction_trace_item; INSTR_FLT: s = this.printRFInstr("flt", 1'b0); INSTR_FEQ: s = this.printRFInstr("feq", 1'b0); + INSTR_FCLASS: s = this.printRFInstr1Op("fclass", 1'b0); + INSTR_FCVT_F2F, INSTR_FMV_F2X, - INSTR_FCLASS, INSTR_FMV_X2F, INSTR_FCVT_F2I, INSTR_FCVT_I2F: s = this.printFpSpecialInstr(); // these are a mess to do nicely @@ -343,35 +344,35 @@ class instruction_trace_item; read_regs.push_back(sbe.rs2); read_fpr.push_back(1'b0); - return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2)); + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2)); endfunction // printRInstr function string printRFInstr(input string mnemonic, input bit use_rnd); result_regs.push_back(sbe.rd); - result_fpr.push_back(1'b1); + result_fpr.push_back(is_rd_fpr(sbe.op)); read_regs.push_back(sbe.rs1); - read_fpr.push_back(1'b1); + read_fpr.push_back(is_rs1_fpr(sbe.op)); read_regs.push_back(sbe.rs2); - read_fpr.push_back(1'b1); + read_fpr.push_back(is_rs2_fpr(sbe.op)); if (use_rnd && instr[14:12]!=3'b111) - return $sformatf("%s.%-10s %s, %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(sbe.rs2):regAddrToStr(sbe.rs2), fpRmToStr(instr[14:12])); else - return $sformatf("%s.%-10s %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2)); + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(sbe.rs2):regAddrToStr(sbe.rs2)); endfunction // printRFInstr function string printRFInstr1Op(input string mnemonic, input bit use_rnd); result_regs.push_back(sbe.rd); - result_fpr.push_back(1'b1); + result_fpr.push_back(is_rd_fpr(sbe.op)); read_regs.push_back(sbe.rs1); - read_fpr.push_back(1'b1); + read_fpr.push_back(is_rs1_fpr(sbe.op)); if (use_rnd && instr[14:12]!=3'b111) - return $sformatf("%s.%-10s %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); else - return $sformatf("%s.%-10s %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1)); endfunction // printRFInstr1Op function string printR4Instr(input string mnemonic); @@ -385,23 +386,22 @@ class instruction_trace_item; read_regs.push_back(instr[31:27]); read_fpr.push_back(1'b1); - return $sformatf("%s.%-10s %s, %s, %s, %s, %s", mnemonic, fpFmtToStr(instr[26:25]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); endfunction // printR4Instr function string printFpSpecialInstr(); result_regs.push_back(sbe.rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - result_regs.push_back(sbe.rs1); - result_fpr.push_back(is_rs1_fpr(sbe.op)); + read_regs.push_back(sbe.rs1); + read_fpr.push_back(is_rs1_fpr(sbe.op)); case (sbe.op) - FCVT_F2F : return $sformatf("fcvt.%s.%-10s %s, %s, %s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20]), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FCVT_F2I : return $sformatf("fcvt.%s.%-10s %s, %s, %s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FCVT_I2F : return $sformatf("fcvt.%s.%-10s %s, %s, %s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20]), fpRegAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FMV_F2X : return $sformatf("fmv.x.%-10s %s, %s", fmvFpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); - FMV_X2F : return $sformatf("fmv.%s.x\t %s, %s", fmvFpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); - FCLASS : return $sformatf("fclass.%-10s %s, %s", fpFmtToStr(instr[26:25]), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + FCVT_F2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20])), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + FCVT_F2I : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + FCVT_I2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20])), fpRegAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + FMV_F2X : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); endcase endfunction @@ -413,9 +413,9 @@ class instruction_trace_item; read_fpr.push_back(1'b0); if (sbe.rs1 == 0) - return $sformatf("%-16s %s, %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + return $sformatf("%-12s %4s, %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); - return $sformatf("%-16s %s, %s, %0d", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, %0d", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), $signed(sbe.result)); endfunction // printIInstr function string printIuInstr(input string mnemonic); @@ -425,7 +425,7 @@ class instruction_trace_item; read_regs.push_back(sbe.rs1); read_fpr.push_back(1'b0); - return $sformatf("%-16s %s, %s, 0x%0x", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), sbe.result); + return $sformatf("%-12s %4s, %s, 0x%0x", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), sbe.result); endfunction // printIuInstr function string printSBInstr(input string mnemonic); @@ -436,9 +436,9 @@ class instruction_trace_item; read_fpr.push_back(1'b0); if (sbe.rs2 == 0) - return $sformatf("%-16s %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), $signed(sbe.result)); + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), $signed(sbe.result)); else - return $sformatf("%-16s %s, %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2), $signed(sbe.result)); endfunction // printIuInstr function string printUInstr(input string mnemonic); @@ -446,7 +446,7 @@ class instruction_trace_item; result_regs.push_back(sbe.rd); result_fpr.push_back(1'b0); - return $sformatf("%-16s %s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]); + return $sformatf("%-12s %4s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]); endfunction // printUInstr function string printJump(); @@ -477,9 +477,9 @@ class instruction_trace_item; result_fpr.push_back(1'b0); // jump instruction if (sbe.rd == 0) - return $sformatf("%-16s pc + %0d", mnemonic, $signed(sbe.result)); + return $sformatf("%-16s pc + %0d", mnemonic, $signed(sbe.result)); else - return $sformatf("%-16s %s, pc + %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); endfunction // printUJInstr function string printCSRInstr(input string mnemonic); @@ -490,21 +490,21 @@ class instruction_trace_item; read_regs.push_back(sbe.rs1); read_fpr.push_back(1'b0); if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-16s %s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero end else if (sbe.rd == 0) begin - return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); end else if (sbe.rs1 == 0) begin - return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); end end else begin if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-16s %s, %d, %s", mnemonic, regAddrToStr(sbe.rd), $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %d, %s", mnemonic, regAddrToStr(sbe.rd), $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero end else if (sbe.rd == 0) begin - return $sformatf("%-16s %d, %s", mnemonic, $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-14s %2d, %s", mnemonic, $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); end else if (sbe.rs1 == 0) begin - return $sformatf("%-16s %s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); end end endfunction // printCSRInstr @@ -534,9 +534,9 @@ class instruction_trace_item; this.imm = sbe.result; if (instr[6:0] == OPCODE_LOAD_FP) - return $sformatf("%-15s %s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); else - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction function string printStoreInstr(); @@ -561,9 +561,9 @@ class instruction_trace_item; this.imm = sbe.result; if (instr[6:0] == OPCODE_STORE_FP) - return $sformatf("%-16s %s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); else - return $sformatf("%-16s %s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); endfunction // printSInstr diff --git a/tb b/tb index b1197dd71..59f931065 160000 --- a/tb +++ b/tb @@ -1 +1 @@ -Subproject commit b1197dd712c289a5daaab5b532943287e6bde9d7 +Subproject commit 59f9310659626cd4817348219133fd777a0b1700 From 385359f5e6ce44e337c3eaad29b3ad7a0c63eb5d Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 18 Apr 2018 14:32:17 +0200 Subject: [PATCH 31/94] :sparkles: Add register renaming to issue stage Renaming can be turned on or off with the `ENABLE_RENAME` parameter in `ariane_pkg.sv`. --- include/ariane_pkg.sv | 2 + src/issue_read_operands.sv | 2 +- src/issue_stage.sv | 82 ++++++++-------- src/re_name.sv | 25 ++++- src/util/instruction_trace_item.svh | 142 +++++++++++++++------------- 5 files changed, 140 insertions(+), 113 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 4d5bc25d0..e9027ebc4 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -35,6 +35,8 @@ package ariane_pkg; localparam BITS_SATURATION_COUNTER = 2; localparam NR_COMMIT_PORTS = 2; + localparam ENABLE_RENAME = 1'b1; + // Floating-point extensions configuration localparam bit RVF = 1'b1; // Is F extension enabled localparam bit RVD = 1'b1; // Is D extension enabled diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 61f611bb6..87677b86c 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -29,7 +29,7 @@ module issue_read_operands #( input logic debug_gpr_we_i, input logic [63:0] debug_gpr_wdata_i, output logic [63:0] debug_gpr_rdata_o, - // coming from scoreboard + // coming from rename input scoreboard_entry_t issue_instr_i, input logic issue_instr_valid_i, output logic issue_ack_o, diff --git a/src/issue_stage.sv b/src/issue_stage.sv index a1e300065..2065f9bb6 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -104,52 +104,31 @@ module issue_stage #( logic [FLEN-1:0] rs3_sb_iro; logic rs3_valid_iro_sb; - scoreboard_entry_t issue_instr_sb_rename; - logic issue_instr_valid_sb_rename; - logic issue_ack_rename_sb; + scoreboard_entry_t issue_instr_rename_sb; + logic issue_instr_valid_rename_sb; + logic issue_ack_sb_rename; - scoreboard_entry_t issue_instr_rename_iro; - logic issue_instr_valid_rename_iro; - logic issue_ack_iro_rename; + scoreboard_entry_t issue_instr_sb_iro; + logic issue_instr_valid_sb_iro; + logic issue_ack_iro_sb; // --------------------------------------------------------- - // 1. Issue instruction and read operand, also commit - // --------------------------------------------------------- - issue_read_operands i_issue_read_operands ( - .flush_i ( flush_unissued_instr_i ), - .issue_instr_i ( issue_instr_rename_iro ), - .issue_instr_valid_i ( issue_instr_valid_rename_iro ), - .issue_ack_o ( issue_ack_iro_rename ), - .rs1_o ( rs1_iro_sb ), - .rs1_i ( rs1_sb_iro ), - .rs1_valid_i ( rs1_valid_sb_iro ), - .rs2_o ( rs2_iro_sb ), - .rs2_i ( rs2_sb_iro ), - .rs2_valid_i ( rs2_valid_iro_sb ), - .rs3_o ( rs3_iro_sb ), - .rs3_i ( rs3_sb_iro ), - .rs3_valid_i ( rs3_valid_iro_sb ), - .rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ), - .rd_clobber_fpr_i ( rd_clobber_fpr_sb_iro ), - .* - ); - - // --------------------------------------------------------- - // 2. Re-name + // 1. Re-name // --------------------------------------------------------- re_name i_re_name ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), - .issue_instr_i ( issue_instr_sb_rename ), - .issue_instr_valid_i ( issue_instr_valid_sb_rename ), - .issue_ack_o ( issue_ack_rename_sb ), - .issue_instr_o ( issue_instr_rename_iro ), - .issue_instr_valid_o ( issue_instr_valid_rename_iro ), - .issue_ack_i ( issue_ack_iro_rename ) + .flush_i ( flush_i ), + .issue_instr_i ( decoded_instr_i ), + .issue_instr_valid_i ( decoded_instr_valid_i ), + .issue_ack_o ( decoded_instr_ack_o ), + .issue_instr_o ( issue_instr_rename_sb ), + .issue_instr_valid_o ( issue_instr_valid_rename_sb ), + .issue_ack_i ( issue_ack_sb_rename ) ); // --------------------------------------------------------- - // 3. Manage issued instructions in a scoreboard + // 2. Manage instructions in a scoreboard // --------------------------------------------------------- scoreboard #( .NR_ENTRIES ( NR_ENTRIES ), @@ -168,9 +147,12 @@ module issue_stage #( .rs3_o ( rs3_sb_iro ), .rs3_valid_o ( rs3_valid_iro_sb ), - .issue_instr_o ( issue_instr_sb_rename ), - .issue_instr_valid_o ( issue_instr_valid_sb_rename ), - .issue_ack_i ( issue_ack_rename_sb ), + .decoded_instr_i ( issue_instr_rename_sb ), + .decoded_instr_valid_i ( issue_instr_valid_rename_sb ), + .decoded_instr_ack_o ( issue_ack_sb_rename ), + .issue_instr_o ( issue_instr_sb_iro ), + .issue_instr_valid_o ( issue_instr_valid_sb_iro ), + .issue_ack_i ( issue_ack_iro_sb ), .trans_id_i ( trans_id_i ), .wbdata_i ( wbdata_i ), @@ -178,4 +160,26 @@ module issue_stage #( .* ); + // --------------------------------------------------------- + // 3. Issue instruction and read operand, also commit + // --------------------------------------------------------- + issue_read_operands i_issue_read_operands ( + .flush_i ( flush_unissued_instr_i ), + .issue_instr_i ( issue_instr_sb_iro ), + .issue_instr_valid_i ( issue_instr_valid_sb_iro ), + .issue_ack_o ( issue_ack_iro_sb ), + .rs1_o ( rs1_iro_sb ), + .rs1_i ( rs1_sb_iro ), + .rs1_valid_i ( rs1_valid_sb_iro ), + .rs2_o ( rs2_iro_sb ), + .rs2_i ( rs2_sb_iro ), + .rs2_valid_i ( rs2_valid_iro_sb ), + .rs3_o ( rs3_iro_sb ), + .rs3_i ( rs3_sb_iro ), + .rs3_valid_i ( rs3_valid_iro_sb ), + .rd_clobber_gpr_i ( rd_clobber_gpr_sb_iro ), + .rd_clobber_fpr_i ( rd_clobber_fpr_sb_iro ), + .* + ); + endmodule diff --git a/src/re_name.sv b/src/re_name.sv index d9ab11389..36a0b24f9 100644 --- a/src/re_name.sv +++ b/src/re_name.sv @@ -23,6 +23,7 @@ import ariane_pkg::*; module re_name ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low + input logic flush_i, // Flush renaming state // from/to scoreboard input scoreboard_entry_t issue_instr_i, input logic issue_instr_valid_i, @@ -46,12 +47,12 @@ module re_name ( // ------------------- always_comb begin // MSB of the renamed source register addresses - logic name_bit_rs1, name_bit_rs2, name_bit_rs3; + logic name_bit_rs1, name_bit_rs2, name_bit_rs3, name_bit_rd; // default assignments re_name_table_gpr_n = re_name_table_gpr_q; re_name_table_fpr_n = re_name_table_fpr_q; - issue_instr_o = issue_instr_i; + issue_instr_o = issue_instr_i; if (issue_ack_i) begin // if we acknowledge the instruction tic the corresponding destination register @@ -69,16 +70,30 @@ module re_name ( // rs3 is only used in certain FP operations and held like an immediate name_bit_rs3 = re_name_table_fpr_q[issue_instr_i.result[4:0]]; // make sure only the addr bits are read + // select name bit according to the state it will have after renaming + name_bit_rd = is_rd_fpr(issue_instr_i.op) ? re_name_table_fpr_q[issue_instr_i.rd] ^ 1'b1 + : re_name_table_gpr_q[issue_instr_i.rd] ^ (issue_instr_i.rd != '0); // don't rename x0 + // re-name the source registers - issue_instr_o.rs1 = { name_bit_rs1, issue_instr_i.rs1 }; - issue_instr_o.rs2 = { name_bit_rs2, issue_instr_i.rs2 }; + issue_instr_o.rs1 = { ENABLE_RENAME & name_bit_rs1, issue_instr_i.rs1[4:0] }; + issue_instr_o.rs2 = { ENABLE_RENAME & name_bit_rs2, issue_instr_i.rs2[4:0] }; // re-name the third operand in imm if it's actually an operand if (is_imm_fpr(issue_instr_i.op)) - issue_instr_o.result = {name_bit_rs3, issue_instr_i.result[4:0]}; + issue_instr_o.result = { ENABLE_RENAME & name_bit_rs3, issue_instr_i.result[4:0]}; + + // re-name the destination register + issue_instr_o.rd = { ENABLE_RENAME & name_bit_rd, issue_instr_i.rd[4:0] }; // we don't want to re-name gp register zero, it is non-writeable anyway re_name_table_gpr_n[0] = 1'b0; + + // Handle flushes + if (flush_i) begin + re_name_table_gpr_n = '0; + re_name_table_fpr_n = '0; + end + end // ------------------- diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 0d9248956..1ebf3cdc8 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -31,6 +31,8 @@ class instruction_trace_item; string priv_lvl; branchpredict_t bp; + logic [4:0] rs1, rs2, rs3, rd; + // constructor creating a new instruction trace item, e.g.: a single instruction with all relevant information function new (time simtime, longint unsigned cycle, scoreboard_entry_t sbe, logic [31:0] instr, logic [63:0] gp_reg_file [32], logic [63:0] fp_reg_file [32], logic [63:0] result, logic [63:0] paddr, priv_lvl_t priv_lvl, branchpredict_t bp); this.simtime = simtime; @@ -44,6 +46,10 @@ class instruction_trace_item; this.paddr = paddr; this.bp = bp; this.priv_lvl = getPrivLevel(priv_lvl); + this.rs1 = sbe.rs1[4:0]; + this.rs2 = sbe.rs2[4:0]; + this.rs3 = instr[31:27]; + this.rd = sbe.rd[4:0]; endfunction // convert gp register address to ABI compatible form @@ -337,116 +343,116 @@ class instruction_trace_item; function string printRInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(1'b0); - return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2)); + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), regAddrToStr(rs2)); endfunction // printRInstr function string printRFInstr(input string mnemonic, input bit use_rnd); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(is_rs1_fpr(sbe.op)); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(is_rs2_fpr(sbe.op)); if (use_rnd && instr[14:12]!=3'b111) - return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(sbe.rs2):regAddrToStr(sbe.rs2), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), fpRmToStr(instr[14:12])); else - return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(sbe.rs2):regAddrToStr(sbe.rs2)); + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2)); endfunction // printRFInstr function string printRFInstr1Op(input string mnemonic, input bit use_rnd); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(is_rs1_fpr(sbe.op)); if (use_rnd && instr[14:12]!=3'b111) - return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1), fpRmToStr(instr[14:12])); else - return $sformatf("%-12s %4s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(sbe.rd):regAddrToStr(sbe.rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(sbe.rs1):regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs1_fpr(sbe.op)?fpRegAddrToStr(rs1):regAddrToStr(rs1)); endfunction // printRFInstr1Op function string printR4Instr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b1); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b1); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(1'b1); - read_regs.push_back(instr[31:27]); + read_regs.push_back(rs3); read_fpr.push_back(1'b1); - return $sformatf("%-12s %4s, %s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRegAddrToStr(sbe.rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); + return $sformatf("%-12s %4s, %s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), fpRegAddrToStr(rd), fpRegAddrToStr(rs1), fpRegAddrToStr(rs2), fpRegAddrToStr(instr[31:27]), fpRmToStr(instr[14:12])); endfunction // printR4Instr function string printFpSpecialInstr(); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(is_rs1_fpr(sbe.op)); case (sbe.op) - FCVT_F2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20])), fpRegAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FCVT_F2I : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FCVT_I2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20])), fpRegAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), fpRmToStr(instr[14:12])); - FMV_F2X : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); - FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(sbe.rd), fpRegAddrToStr(sbe.rs1)); + FCVT_F2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), fpFmtToStr(instr[21:20])), fpRegAddrToStr(rd), fpRegAddrToStr(rs1), fpRmToStr(instr[14:12])); + FCVT_F2I : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1), fpRmToStr(instr[14:12])); + FCVT_I2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20])), fpRegAddrToStr(rd), regAddrToStr(rs1), fpRmToStr(instr[14:12])); + FMV_F2X : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1)); + FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1)); endcase endfunction function string printIInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - if (sbe.rs1 == 0) - return $sformatf("%-12s %4s, %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + if (rs1 == 0) + return $sformatf("%-12s %4s, %0d", mnemonic, regAddrToStr(rd), $signed(sbe.result)); - return $sformatf("%-12s %4s, %s, %0d", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, %0d", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), $signed(sbe.result)); endfunction // printIInstr function string printIuInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - return $sformatf("%-12s %4s, %s, 0x%0x", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), sbe.result); + return $sformatf("%-12s %4s, %s, 0x%0x", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), sbe.result); endfunction // printIuInstr function string printSBInstr(input string mnemonic); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(1'b0); - if (sbe.rs2 == 0) - return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), $signed(sbe.result)); + if (rs2 == 0) + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(rs1), $signed(sbe.result)); else - return $sformatf("%-12s %4s, %s, pc + %0d", mnemonic, regAddrToStr(sbe.rs1), regAddrToStr(sbe.rs2), $signed(sbe.result)); + return $sformatf("%-12s %4s, %s, pc + %0d", mnemonic, regAddrToStr(rs1), regAddrToStr(rs2), $signed(sbe.result)); endfunction // printIuInstr function string printUInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); - return $sformatf("%-12s %4s, 0x%0h", mnemonic, regAddrToStr(sbe.rd), sbe.result[31:12]); + return $sformatf("%-12s %4s, 0x%0h", mnemonic, regAddrToStr(rd), sbe.result[31:12]); endfunction // printUInstr function string printJump(); @@ -454,7 +460,7 @@ class instruction_trace_item; case (instr[6:0]) OPCODE_JALR: begin // is this a return? - if (sbe.rd == 'b0 && (sbe.rs1 == 'h1 || sbe.rs1 == 'h5)) begin + if (rd == 'b0 && (rs1 == 'h1 || rs1 == 'h5)) begin return this.printMnemonic("ret"); end else begin return this.printIInstr("jalr"); @@ -462,7 +468,7 @@ class instruction_trace_item; end OPCODE_JAL: begin - if (sbe.rd == 'b0) + if (rd == 'b0) return this.printUJInstr("j"); else return this.printUJInstr("jal"); @@ -473,38 +479,38 @@ class instruction_trace_item; function string printUJInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); // jump instruction - if (sbe.rd == 0) - return $sformatf("%-16s pc + %0d", mnemonic, $signed(sbe.result)); + if (rd == 0) + return $sformatf("%-12s pc + %0d", mnemonic, $signed(sbe.result)); else - return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result)); + return $sformatf("%-12s %4s, pc + %0d", mnemonic, regAddrToStr(rd), $signed(sbe.result)); endfunction // printUJInstr function string printCSRInstr(input string mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(1'b0); if (instr[14] == 0) begin - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); - if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(sbe.rd), regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + if (rd != 0 && rs1 != 0) begin + return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero - end else if (sbe.rd == 0) begin - return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rs1), csrAddrToStr(sbe.result[11:0])); - end else if (sbe.rs1 == 0) begin - return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + end else if (rd == 0) begin + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rs1), csrAddrToStr(sbe.result[11:0])); + end else if (rs1 == 0) begin + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rd), csrAddrToStr(sbe.result[11:0])); end end else begin - if (sbe.rd != 0 && sbe.rs1 != 0) begin - return $sformatf("%-12s %4s, %d, %s", mnemonic, regAddrToStr(sbe.rd), $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); + if (rd != 0 && rs1 != 0) begin + return $sformatf("%-12s %4s, %d, %s", mnemonic, regAddrToStr(rd), $unsigned(rs1), csrAddrToStr(sbe.result[11:0])); // don't display instructions which write to zero - end else if (sbe.rd == 0) begin - return $sformatf("%-14s %2d, %s", mnemonic, $unsigned(sbe.rs1), csrAddrToStr(sbe.result[11:0])); - end else if (sbe.rs1 == 0) begin - return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(sbe.rd), csrAddrToStr(sbe.result[11:0])); + end else if (rd == 0) begin + return $sformatf("%-14s %2d, %s", mnemonic, $unsigned(rs1), csrAddrToStr(sbe.result[11:0])); + end else if (rs1 == 0) begin + return $sformatf("%-12s %4s, %s", mnemonic, regAddrToStr(rd), csrAddrToStr(sbe.result[11:0])); end end endfunction // printCSRInstr @@ -526,17 +532,17 @@ class instruction_trace_item; if (instr[6:0] == OPCODE_LOAD_FP) mnemonic = $sformatf("f%s",mnemonic); - result_regs.push_back(sbe.rd); + result_regs.push_back(rd); result_fpr.push_back(is_rd_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); // save the immediate for calculating the virtual address this.imm = sbe.result; if (instr[6:0] == OPCODE_LOAD_FP) - return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(rd), $signed(sbe.result), regAddrToStr(rs1)); else - return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(sbe.rd), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(rd), $signed(sbe.result), regAddrToStr(rs1)); endfunction function string printStoreInstr(); @@ -553,17 +559,17 @@ class instruction_trace_item; if (instr[6:0] == OPCODE_STORE_FP) mnemonic = $sformatf("f%s",mnemonic); - read_regs.push_back(sbe.rs2); + read_regs.push_back(rs2); read_fpr.push_back(is_rs2_fpr(sbe.op)); - read_regs.push_back(sbe.rs1); + read_regs.push_back(rs1); read_fpr.push_back(1'b0); // save the immediate for calculating the virtual address this.imm = sbe.result; if (instr[6:0] == OPCODE_STORE_FP) - return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, fpRegAddrToStr(rs2), $signed(sbe.result), regAddrToStr(rs1)); else - return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(sbe.rs2), $signed(sbe.result), regAddrToStr(sbe.rs1)); + return $sformatf("%-12s %4s, %0d(%s)", mnemonic, regAddrToStr(rs2), $signed(sbe.result), regAddrToStr(rs1)); endfunction // printSInstr From 23037ff55f1b4f3c81e745d884c10f09ac8e84b5 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Wed, 18 Apr 2018 18:53:18 +0200 Subject: [PATCH 32/94] :space_invader: Fix timing loop and unpacked assign --- .gitmodules | 2 +- Bender.yml | 37 +++++++++++++++ src/ariane_regfile.sv | 4 +- src/commit_stage.sv | 19 ++++---- src/csr_regfile.sv | 12 +++-- src/ff1.sv | 97 ++++++++++++++++++++------------------ src/fpu_wrap.sv | 32 ++++++++++++- src/icache.sv | 4 +- src/issue_read_operands.sv | 34 +++++++++---- src/mult.sv | 4 +- 10 files changed, 166 insertions(+), 79 deletions(-) diff --git a/.gitmodules b/.gitmodules index e826f576e..b3950ecdb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,7 +3,7 @@ url = https://github.com/pulp-platform/riscv-torture.git [submodule "tb"] path = tb - url = https://github.com/pulp-platform/uvm-components.git + url = https://github.com/stmach/uvm-components.git [submodule "src/axi_mem_if"] path = src/axi_mem_if url = https://github.com/pulp-platform/axi_mem_if.git diff --git a/Bender.yml b/Bender.yml index 5c25e60c6..83192e677 100644 --- a/Bender.yml +++ b/Bender.yml @@ -12,6 +12,40 @@ dependencies: common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: 1.1.0 } sources: + - src/fpu_legacy/hdl/fpu_utils/fpu_ff.sv + - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/defs_div_sqrt_mvp.sv + - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/control_mvp.sv + - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/div_sqrt_mvp_wrapper.sv + - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/div_sqrt_top_mvp.sv + - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/iteration_div_sqrt_mvp.sv + - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/norm_div_sqrt_mvp.sv + - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/nrbd_nrsc_mvp.sv + - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/preprocess_mvp.sv + - src/fpnew/src/pkg/fpnew_pkg.vhd + - src/fpnew/src/pkg/fpnew_fmts_pkg.vhd + - src/fpnew/src/pkg/fpnew_comps_pkg.vhd + - src/fpnew/src/pkg/fpnew_pkg_constants.vhd + - src/fpnew/src/utils/fp_pipe.vhd + - src/fpnew/src/utils/fp_rounding.vhd + - src/fpnew/src/utils/fp_arbiter.vhd + - src/fpnew/src/ops/fma_core.vhd + - src/fpnew/src/ops/fp_fma.vhd + - src/fpnew/src/ops/fp_divsqrt_multi.vhd + - src/fpnew/src/ops/fp_noncomp.vhd + - src/fpnew/src/ops/fp_f2fcasts.vhd + - src/fpnew/src/ops/fp_f2icasts.vhd + - src/fpnew/src/ops/fp_i2fcasts.vhd + - src/fpnew/src/ops/fp_conv_multi.vhd + - src/fpnew/src/subunits/addmul_fmt_slice.vhd + - src/fpnew/src/subunits/addmul_block.vhd + - src/fpnew/src/subunits/divsqrt_multifmt_slice.vhd + - src/fpnew/src/subunits/divsqrt_block.vhd + - src/fpnew/src/subunits/noncomp_fmt_slice.vhd + - src/fpnew/src/subunits/noncomp_block.vhd + - src/fpnew/src/subunits/conv_multifmt_slice.vhd + - src/fpnew/src/subunits/conv_block.vhd + - src/fpnew/src/fpnew.vhd + - src/fpnew/src/fpnew_top.vhd - include/ariane_pkg.sv - include/nbdcache_pkg.sv - target: not(synthesis) @@ -19,6 +53,7 @@ sources: - src/util/instruction_tracer_pkg.sv - src/util/instruction_tracer_if.sv - src/alu.sv + - src/fpu_wrap.sv - src/ariane.sv - src/branch_unit.sv - src/cache_ctrl.sv @@ -58,3 +93,5 @@ sources: - src/store_buffer.sv - src/store_unit.sv - src/tlb.sv + + diff --git a/src/ariane_regfile.sv b/src/ariane_regfile.sv index 512f8c868..83a9c5533 100644 --- a/src/ariane_regfile.sv +++ b/src/ariane_regfile.sv @@ -23,7 +23,7 @@ // latches and is thus smaller than the flip-flop based RF. // -module ariane_regfile_latch #( +module ariane_regfile #( parameter int unsigned DATA_WIDTH = 32, parameter int unsigned NR_READ_PORTS = 2, parameter int unsigned NR_WRITE_PORTS = 2, @@ -46,7 +46,7 @@ module ariane_regfile_latch #( localparam ADDR_WIDTH = 5;; localparam NUM_WORDS = 2**ADDR_WIDTH; - logic [NUM_WORDS-1:1] mem_clocks; + logic [NUM_WORDS-1:ZERO_REG_ZERO] mem_clocks; logic [DATA_WIDTH-1:0] mem[NUM_WORDS]; logic [NR_WRITE_PORTS-1:0][NUM_WORDS-1:1] waddr_onehot,waddr_onehot_q; diff --git a/src/commit_stage.sv b/src/commit_stage.sv index 31fdd8f82..b212ebe9d 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -105,17 +105,17 @@ module commit_stage #( else // if the LSU buffer is not ready - do not commit, wait commit_ack_o[0] = 1'b0; end + + // --------- + // FPU Flags + // --------- + if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin + // write the CSR with potential exception flags from retiring floating point instruction + csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]}; + csr_write_fflags_o = 1'b1; + end end - // --------- - // FPU Flags - // --------- - if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin - // write the CSR with potential exception flags from retiring floating point instruction - csr_op_o = CSR_SET; - csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]}; - csr_write_fflags_o = 1'b1; - end // --------- // CSR Logic @@ -174,7 +174,6 @@ module commit_stage #( // additionally check if we are retiring an FPU instruction because we need to make sure that we write all // exception flags if (commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin - csr_op_o = CSR_SET; if (csr_write_fflags_o) csr_wdata_o = {59'b0, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0])}; else diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 828afcce8..05658293c 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -45,7 +45,7 @@ module csr_regfile #( input logic [11:0] csr_addr_i, // Address of the register to read/write input logic [63:0] csr_wdata_i, // Write data in output logic [63:0] csr_rdata_o, // Read data out - input logic csr_write_fflags_i, // Write fflags register + input logic csr_write_fflags_i, // Write fflags register e.g.: we are retiring a floating point instruction input logic [63:0] pc_i, // PC of instruction accessing the CSR output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege // level or to write a read-only register also @@ -98,7 +98,7 @@ module csr_regfile #( // Assignments // ---------------- // Debug MUX and fflags register - assign csr_addr = csr_t'(((debug_csr_req_i) ? debug_csr_addr_i : (csr_write_fflags_i) ? CSR_FFLAGS : csr_addr_i)); + assign csr_addr = csr_t'(((debug_csr_req_i) ? debug_csr_addr_i : csr_addr_i)); // Output the read data directly assign debug_csr_rdata_o = csr_rdata; @@ -431,6 +431,11 @@ module csr_regfile #( default: update_access_exception = 1'b1; endcase end + + // write the floating point status register + if (csr_write_fflags_i) + fcsr_d.fflags = csr_wdata_i[4:0] | fcsr_q.fflags; + // --------------------- // External Interrupts // --------------------- @@ -677,9 +682,6 @@ module csr_regfile #( csr_exception_o.valid = 1'b1; end end - // in case we are writing the CSR flag no exception can ever occur, don't set the valid flag in that case - if (csr_write_fflags_i) - csr_exception_o.valid = 1'b0; // ------------------- // Wait for Interrupt diff --git a/src/ff1.sv b/src/ff1.sv index e098db288..847f51e17 100644 --- a/src/ff1.sv +++ b/src/ff1.sv @@ -1,44 +1,53 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. +// Copyright (c) 2018 ETH Zurich, University of Bologna +// All rights reserved. // -// Author: Florian Zaruba -// Date: 05.06.2017 -// Description: Finds first one +// This code is under development and not yet released to the public. +// Until it is released, the code is under the copyright of ETH Zurich and +// the University of Bologna, and may contain confidential and/or unpublished +// work. Any reuse/redistribution is strictly forbidden without written +// permission from ETH Zurich. +// +// Bug fixes and contributions will eventually be released under the +// SolderPad open hardware license in the context of the PULP platform +// (http://www.pulp-platform.org), under the copyright of ETH Zurich and the +// University of Bologna. -// ----------------- -// Find First One -// ----------------- -module ff1 #( - parameter int unsigned LEN = 32 + +/// A leading-one finder / leading zero counter. +/// Set FLIP to 0 for find_first_one => first_one_o is the index of the first one (from the LSB) +/// Set FLIP to 1 for leading zero counter => first_one_o is the number of leading zeroes (from the MSB) +module find_first_one #( + /// The width of the input vector. + parameter int WIDTH = -1, + parameter int FLIP = 0 )( - input logic [LEN-1:0] in_i, - output logic [$clog2(LEN)-1:0] first_one_o, - output logic no_ones_o + input logic [WIDTH-1:0] in_i, + output logic [$clog2(WIDTH)-1:0] first_one_o, + output logic no_ones_o ); -localparam int unsigned NUM_LEVELS = $clog2(LEN); + localparam int NUM_LEVELS = $clog2(WIDTH); -logic [LEN-1:0] [NUM_LEVELS-1:0] index_lut; -logic [2**NUM_LEVELS-1:0] sel_nodes; -logic [2**NUM_LEVELS-1:0] [NUM_LEVELS-1:0] index_nodes; - -// ---------------------------- -// Generate Tree Structure -// ---------------------------- -generate - for (genvar j = 0; j < LEN; j++) begin - assign index_lut[j] = $unsigned(j[NUM_LEVELS-1:0]); + // pragma translate_off + initial begin + assert(WIDTH >= 0); + end + // pragma translate_on + + logic [WIDTH-1:0][NUM_LEVELS-1:0] index_lut; + logic [2**NUM_LEVELS-1:0] sel_nodes; + logic [2**NUM_LEVELS-1:0][NUM_LEVELS-1:0] index_nodes; + + logic [WIDTH-1:0] in_tmp; + + for (genvar i = 0; i < WIDTH; i++) begin + assign in_tmp[i] = FLIP ? in_i[WIDTH-1-i] : in_i[i]; + end + + for (genvar j = 0; j < WIDTH; j++) begin + assign index_lut[j] = j; end -endgenerate -generate for (genvar level = 0; level < NUM_LEVELS; level++) begin if (level < NUM_LEVELS-1) begin @@ -52,29 +61,25 @@ generate if (level == NUM_LEVELS-1) begin for (genvar k = 0; k < 2**level; k++) begin // if two successive indices are still in the vector... - if (k * 2 < LEN) begin - assign sel_nodes[2**level-1+k] = in_i[k*2] | in_i[k*2+1]; - assign index_nodes[2**level-1+k] = (in_i[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1]; + if (k * 2 < WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2] | in_tmp[k*2+1]; + assign index_nodes[2**level-1+k] = (in_tmp[k*2] == 1'b1) ? index_lut[k*2] : index_lut[k*2+1]; end // if only the first index is still in the vector... - if (k * 2 == LEN) begin - assign sel_nodes[2**level-1+k] = in_i[k*2]; + if (k * 2 == WIDTH-1) begin + assign sel_nodes[2**level-1+k] = in_tmp[k*2]; assign index_nodes[2**level-1+k] = index_lut[k*2]; end // if index is out of range - if (k * 2 > LEN) begin + if (k * 2 > WIDTH-1) begin assign sel_nodes[2**level-1+k] = 1'b0; assign index_nodes[2**level-1+k] = '0; end end end end -endgenerate -// -------------------- -// Connect Output -// -------------------- -assign first_one_o = index_nodes[0]; -assign no_ones_o = ~sel_nodes[0]; + assign first_one_o = NUM_LEVELS > 0 ? index_nodes[0] : '0; + assign no_ones_o = NUM_LEVELS > 0 ? ~sel_nodes[0] : '1; -endmodule +endmodule \ No newline at end of file diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index 38b75595e..6b7477578 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -75,7 +75,37 @@ module fpu_wrap ( logic [IFMTBITS-1:0] IFMT_INT64; // bind the constants from the fpnew entity - fpnew_pkg_constants i_fpnew_constants ( .* ); + fpnew_pkg_constants i_fpnew_constants ( + .OP_NUMBITS ( OP_NUMBITS ), + .OP_FMADD ( OP_FMADD ), + .OP_FNMSUB ( OP_FNMSUB ), + .OP_ADD ( OP_ADD ), + .OP_MUL ( OP_MUL ), + .OP_DIV ( OP_DIV ), + .OP_SQRT ( OP_SQRT ), + .OP_SGNJ ( OP_SGNJ ), + .OP_MINMAX ( OP_MINMAX ), + .OP_CMP ( OP_CMP ), + .OP_CLASS ( OP_CLASS ), + .OP_F2I ( OP_F2I ), + .OP_I2F ( OP_I2F ), + .OP_F2F ( OP_F2F ), + .OP_CPK ( OP_CPK ), + .FMT_NUMBITS ( FMT_NUMBITS ), + .FMT_FP32 ( FMT_FP32 ), + .FMT_FP64 ( FMT_FP64 ), + .FMT_FP16 ( FMT_FP16 ), + .FMT_FP8 ( FMT_FP8 ), + .FMT_FP16ALT ( FMT_FP16ALT ), + .FMT_CUST1 ( FMT_CUST1 ), + .FMT_CUST2 ( FMT_CUST2 ), + .FMT_CUST3 ( FMT_CUST3 ), + .IFMT_NUMBITS ( IFMT_NUMBITS ), + .IFMT_INT8 ( IFMT_INT8 ), + .IFMT_INT16 ( IFMT_INT16 ), + .IFMT_INT32 ( IFMT_INT32 ), + .IFMT_INT64 ( IFMT_INT64 ) + ); // always_comb begin // assert (OPBITS >= OP_NUMBITS) else $error("OPBITS is smaller than %0d", OP_NUMBITS); diff --git a/src/icache.sv b/src/icache.sv index e1087510f..bc5dc2604 100644 --- a/src/icache.sv +++ b/src/icache.sv @@ -410,8 +410,8 @@ module icache #( ready_o = 1'b0; end - ff1 #( - .LEN ( SET_ASSOCIATIVITY ) + find_first_one #( + .WIDTH ( SET_ASSOCIATIVITY ) ) i_ff1 ( .in_i ( ~way_valid ), .first_one_o ( repl_invalid ), diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 87677b86c..a61a4c772 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -367,6 +367,16 @@ module issue_read_operands #( // Integer Register File // ---------------------- logic [1:0][63:0] rdata; + logic [1:0][4:0] raddr_pack; + + // pack signals + logic [NR_COMMIT_PORTS-1:0][4:0] waddr_pack; + logic [NR_COMMIT_PORTS-1:0][63:0] wdata_pack; + logic [NR_COMMIT_PORTS-1:0] we_pack; + assign raddr_pack = {issue_instr_i.rs2[4:0], raddr_a}; + assign waddr_pack = {waddr_i[1], waddr}; + assign wdata_pack = {wdata_i[1], wdata}; + assign we_pack = {we_gpr_i[1], we}; ariane_regfile #( .DATA_WIDTH ( 64 ), @@ -374,11 +384,11 @@ module issue_read_operands #( .NR_WRITE_PORTS ( NR_COMMIT_PORTS ), .ZERO_REG_ZERO ( 1 ) ) i_ariane_regfile ( - .raddr_i ( '{issue_instr_i.rs2[4:0], raddr_a} ), - .rdata_o ( rdata ), - .waddr_i ( '{waddr_i[1], waddr} ), - .wdata_i ( '{wdata_i[1], wdata} ), - .we_i ( '{we_gpr_i[1], we} ), + .raddr_i ( raddr_pack ), + .rdata_o ( rdata ), + .waddr_i ( waddr_pack ), + .wdata_i ( wdata_pack ), + .we_i ( we_pack ), .* ); @@ -387,6 +397,10 @@ module issue_read_operands #( // ----------------------------- logic [2:0][FLEN-1:0] fprdata; + // pack signals + logic [2:0][4:0] fp_raddr_pack; + assign fp_raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + generate if (FP_PRESENT) begin : float_regfile_gen ariane_regfile #( @@ -395,11 +409,11 @@ module issue_read_operands #( .NR_WRITE_PORTS ( NR_COMMIT_PORTS ), .ZERO_REG_ZERO ( 0 ) ) i_ariane_fp_regfile ( - .raddr_i ( '{issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]} ), - .rdata_o ( fprdata ), - .waddr_i ( waddr_i ), - .wdata_i ( '{wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]} ), - .we_i ( we_fpr_i ), + .raddr_i ( fp_raddr_pack ), + .rdata_o ( fprdata ), + .waddr_i ( waddr_pack ), + .wdata_i ( wdata_pack ), + .we_i ( we_fpr_i ), .* ); end else begin : no_fpr_gen diff --git a/src/mult.sv b/src/mult.sv index 8b1d9993a..e9540817c 100644 --- a/src/mult.sv +++ b/src/mult.sv @@ -141,8 +141,8 @@ module mult ( // Find First one // --------------------- // this unit is used to speed up the sequential division by shifting the dividend first - ff1 #( - .LEN ( 64 ) + find_first_one #( + .WIDTH ( 64 ) ) i_ff1 ( .in_i ( ff1_input ), // signed = operand_b_rev_neg, unsigned operand_b_rev .first_one_o ( ff1_result ), From fd03e94ae575ddcb268d375a52f3536c39e7236b Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 20 Apr 2018 16:01:46 +0200 Subject: [PATCH 33/94] :bug: Revert back to ff-based register file --- src/ariane_regfile.sv | 2 +- src/issue_read_operands.sv | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ariane_regfile.sv b/src/ariane_regfile.sv index 83a9c5533..f8a58dac3 100644 --- a/src/ariane_regfile.sv +++ b/src/ariane_regfile.sv @@ -23,7 +23,7 @@ // latches and is thus smaller than the flip-flop based RF. // -module ariane_regfile #( +module ariane_regfile_lol #( parameter int unsigned DATA_WIDTH = 32, parameter int unsigned NR_READ_PORTS = 2, parameter int unsigned NR_WRITE_PORTS = 2, diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index a61a4c772..4df112acc 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -399,7 +399,9 @@ module issue_read_operands #( // pack signals logic [2:0][4:0] fp_raddr_pack; + logic [NR_COMMIT_PORTS-1:0][63:0] fp_wdata_pack; assign fp_raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + assign fp_wdata_pack = {wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]}; generate if (FP_PRESENT) begin : float_regfile_gen From a5332779dabb66b25d04e00c54a62d67b1f229ef Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 20 Apr 2018 16:02:51 +0200 Subject: [PATCH 34/94] :bug: Fix `FMV` ops trace and add timestamp to tracer --- src/util/instruction_trace_item.svh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 1ebf3cdc8..266a471df 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -284,7 +284,7 @@ class instruction_trace_item; endcase - s = $sformatf("%10t %10d %s %h %h %h %-36s", simtime, + s = $sformatf("%8dns %8d %s %h %h %h %-36s", simtime, cycle, priv_lvl, sbe.pc, @@ -312,7 +312,7 @@ class instruction_trace_item; else if (read_regs[i] != 0) s = $sformatf("%s %-4s:%16x", s, regAddrToStr(read_regs[i]), gp_reg_file[read_regs[i]]); end - casex (instr) + case (instr) inside // check of the instrction was a load or store INSTR_STORE, INSTR_STORE_FP: begin @@ -407,7 +407,7 @@ class instruction_trace_item; FCVT_F2I : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", intFmtToStr(instr[21:20]), fpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1), fpRmToStr(instr[14:12])); FCVT_I2F : return $sformatf("%-12s %4s, %s, %s", $sformatf("fcvt.%s.%s", fpFmtToStr(instr[26:25]), intFmtToStr(instr[21:20])), fpRegAddrToStr(rd), regAddrToStr(rs1), fpRmToStr(instr[14:12])); FMV_F2X : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1)); - FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.x.%s", fmvFpFmtToStr(instr[26:25])), regAddrToStr(rd), fpRegAddrToStr(rs1)); + FMV_X2F : return $sformatf("%-12s %4s, %s", $sformatf("fmv.%s.x", fmvFpFmtToStr(instr[26:25])), fpRegAddrToStr(rd), regAddrToStr(rs1)); endcase endfunction From 6158c5a14c57fe1d35225042ef96944f98627895 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 20 Apr 2018 16:04:56 +0200 Subject: [PATCH 35/94] :bug: Fix misc. FPU bugs and bump FP dependencies --- Makefile | 2 +- src/fpnew | 2 +- src/fpu_legacy | 2 +- src/fpu_wrap.sv | 9 ++++++--- tb | 2 +- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 1f8095a91..446c97525 100755 --- a/Makefile +++ b/Makefile @@ -138,7 +138,7 @@ $(library): sim: build vsim${questa_version} -64 -lib ${library} ${top_level}_optimized +UVM_TESTNAME=${test_case} +BASEDIR=$(riscv-test-dir) -noautoldlibpath \ - +ASMTEST=$(riscv-test) $(uvm-flags) +UVM_VERBOSITY=HIGH -coverage -classdebug -sv_lib $(library)/elf_dpi -do "do tb/wave/wave_core.do" + +ASMTEST=$(riscv-test) $(uvm-flags) +UVM_VERBOSITY=HIGH -coverage -classdebug -sv_lib $(library)/elf_dpi -do "set NumericStdNoWarnings 1; set StdArithNoWarnings 1; do tb/wave/wave_core.do" sim_nopt: build vsim${questa_version} -64 -novopt -lib ${library} ${top_level} +UVM_TESTNAME=${test_case} +BASEDIR=$(riscv-test-dir) \ diff --git a/src/fpnew b/src/fpnew index 97d0a50a2..f693140ea 160000 --- a/src/fpnew +++ b/src/fpnew @@ -1 +1 @@ -Subproject commit 97d0a50a2c7270b83a3e8e983b9c63f666d24168 +Subproject commit f693140ea22f40f0b8989bf8fe03bf4726cde3e3 diff --git a/src/fpu_legacy b/src/fpu_legacy index a67d8de46..afbada816 160000 --- a/src/fpu_legacy +++ b/src/fpu_legacy @@ -1 +1 @@ -Subproject commit a67d8de4606afa59d1d3cebb7a72011c6bee2b16 +Subproject commit afbada8165819fa0174e5418fde67c7bde2216a6 diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index 6b7477578..07461893a 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -287,9 +287,10 @@ module fpu_wrap ( end // Scalar Sign Injection - op encoded in rm (000-010) FSGNJ : fpu_op_n = OP_SGNJ; - // Move from FPR to GPR - mapped to NOP since no recoding + // Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding FMV_F2X : begin fpu_op_n = OP_SGNJ; + fpu_rm_n = 3'b011; // passthrough without checking nan-box fpu_op_mod_n = 1'b1; // no NaN-Boxing operand_b_n = operand_a_n; vec_replication = 1'b0; // no replication, we set second operand @@ -297,6 +298,7 @@ module fpu_wrap ( // Move from GPR to FPR - mapped to NOP since no recoding FMV_X2F : begin fpu_op_n = OP_SGNJ; + fpu_rm_n = 3'b011; // passthrough without checking nan-box operand_b_n = operand_a_n; vec_replication = 1'b0; // no replication, we set second operand end @@ -429,8 +431,8 @@ module fpu_wrap ( fpu_vec_op_q <= '0; fpu_tag_q <= '0; end else begin - if (reg_out_ready) begin // Only advance pipeline if unit is ready for our op - reg_out_valid <= reg_in_valid; + if (reg_out_ready || flush_i) begin // Only advance pipeline if unit is ready for our op + reg_out_valid <= reg_in_valid & ~flush_i; if (reg_in_valid) begin // clock gate data to save poer operand_a_q <= operand_a_n; operand_b_q <= operand_b_n; @@ -499,6 +501,7 @@ module fpu_wrap ( .Tag_DI ( fpu_tag ), .InValid_SI ( fpu_in_valid ), .InReady_SO ( fpu_in_ready ), + .Flush_SI ( flush_i ), .Z_DO ( result_o ), .Status_DO ( fpu_status ), .Tag_DO ( fpu_trans_id_o ), diff --git a/tb b/tb index 59f931065..e68015e57 160000 --- a/tb +++ b/tb @@ -1 +1 @@ -Subproject commit 59f9310659626cd4817348219133fd777a0b1700 +Subproject commit e68015e5736819ec26dccca9168457b608069a9e From 8620f41ea1db93c7883844011092147a23cabab3 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 22 Apr 2018 19:17:04 +0200 Subject: [PATCH 36/94] :bug: Fix problem with I$ when axi.last is high --- src/icache.sv | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/icache.sv b/src/icache.sv index bc5dc2604..5e1f73381 100644 --- a/src/icache.sv +++ b/src/icache.sv @@ -25,6 +25,7 @@ module icache #( input logic clk_i, input logic rst_ni, input logic flush_i, // flush the icache, flush and kill have to be asserted together + input logic fetch_enable_i, // the core should fetch instructions input logic req_i, // we request a new word input logic is_speculative_i, // is this request speculative or not input logic kill_s1_i, // kill the current request @@ -237,9 +238,9 @@ module icache #( case (state_q) // ~> we are ready to receive a new request IDLE: begin - ready_o = 1'b1; + ready_o = 1'b1 & fetch_enable_i; // we are getting a new request - if (req_i) begin + if (req_i && fetch_enable_i) begin // request the content of all arrays req = '1; // save the virtual address @@ -356,11 +357,11 @@ module icache #( if (kill_s2_i) state_d = WAIT_KILLED_AXI_R_RESP; - if (axi.r_last) begin + if (axi.r_valid && axi.r_last) begin state_d = (kill_s2_i) ? IDLE : REDO_REQ; end - if ((state_q == WAIT_KILLED_AXI_R_RESP) && axi.r_last) + if ((state_q == WAIT_KILLED_AXI_R_RESP) && axi.r_last && axi.r_valid) state_d = IDLE; end // ~> redo the request, From 035a6b5a08d27afeeab5bafbd02b38620e0baba3 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Tue, 3 Jul 2018 21:07:29 +0200 Subject: [PATCH 37/94] :bug: Fix FP register forwarding checks to use rd --- src/scoreboard.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 913b30536..92abc3738 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -214,10 +214,10 @@ module scoreboard #( // look at the appropriate fields and look whether there was an // instruction that wrote the rd field before, first for RS1 and then for RS2, then for RS3 // we check the type of the stored result register file against issued register file - if ((mem_q[i].sbe.rd == rs1_i) && (is_rs1_fpr(mem_q[i].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin + if ((mem_q[i].sbe.rd == rs1_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin rs1_o = mem_q[i].sbe.result; rs1_valid_o = mem_q[i].sbe.valid; - end else if ((mem_q[i].sbe.rd == rs2_i) && (is_rs2_fpr(mem_q[i].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin + end else if ((mem_q[i].sbe.rd == rs2_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin rs2_o = mem_q[i].sbe.result; rs2_valid_o = mem_q[i].sbe.valid; end else if (mem_q[i].sbe.rd == rs3_i) begin // rs3 is only considered in FP cases so no check needed @@ -234,13 +234,13 @@ module scoreboard #( // make sure that we are not forwarding a result that got an exception for (int unsigned j = 0; j < NR_WB_PORTS; j++) begin if (mem_q[trans_id_i[j]].sbe.rd == rs1_i && wb_valid_i[j] && ~ex_i[j].valid - && (is_rs1_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin + && (is_rd_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs1_fpr(issue_instr_o.op))) begin rs1_o = wbdata_i[j]; rs1_valid_o = wb_valid_i[j]; break; end if (mem_q[trans_id_i[j]].sbe.rd == rs2_i && wb_valid_i[j] && ~ex_i[j].valid - && (is_rs2_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin + && (is_rd_fpr(mem_q[trans_id_i[j]].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin rs2_o = wbdata_i[j]; rs2_valid_o = wb_valid_i[j]; break; From b0d3c6a1707de29235f7295ac699ef4fd0946e06 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Tue, 3 Jul 2018 21:14:55 +0200 Subject: [PATCH 38/94] :bug: Fix missing <32bit FP loads/stores in pkg --- include/ariane_pkg.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index e9027ebc4..54bcecc7c 100755 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -227,7 +227,7 @@ package ariane_pkg; function automatic logic is_rs2_fpr (input fu_op op); if (FP_PRESENT) begin // makes function static for non-fp case unique case (op) inside - [FSD:FSW], // FP Stores + [FSD:FSB], // FP Stores [FADD:FMIN_MAX], // Computational Operations (no sqrt) [FMADD:FNMADD], // Fused Computational Operations FSGNJ, // Sign Injections @@ -253,7 +253,7 @@ package ariane_pkg; function automatic logic is_rd_fpr (input fu_op op); if (FP_PRESENT) begin // makes function static for non-fp case unique case (op) inside - [FLD:FLW], // FP Loads + [FLD:FLB], // FP Loads [FADD:FNMADD], // Computational Operations FCVT_I2F, // Int-Float Casts FCVT_F2F, // Float-Float Casts @@ -274,8 +274,8 @@ package ariane_pkg; case (op) LD, SD, FLD, FSD : return 2'b11; LW, LWU, SW, FLW, FSW : return 2'b10; - LH, LHU, SH : return 2'b01; - LB, LBU, SB : return 2'b00; + LH, LHU, SH, FLH, FSH : return 2'b01; + LB, LBU, SB, FLB, FSB : return 2'b00; default : return 2'b11; endcase endfunction From 9a80a5c0ee8dc151501fae748323f541915ad7fa Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 31 Jul 2018 20:17:26 -0700 Subject: [PATCH 39/94] Update paths to FPU submodules --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index b3950ecdb..6db9c1fd9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -18,7 +18,7 @@ url = https://github.com/pulp-platform/axi_node.git [submodule "fpnew"] path = src/fpnew - url = git@iis-git.ee.ethz.ch:smach/fpnew.git + url = git@iis-git.ee.ethz.ch:pulp-restricted/fpnew.git [submodule "fpu-legacy"] path = src/fpu_legacy - url = git@iis-git.ee.ethz.ch:pulp-open/fpu.git + url = git@github.com:pulp-platform/fpu.git From c8d32cc21e01ea2c4809cc858f85a3e4f6529b09 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 7 Sep 2018 17:41:36 +0200 Subject: [PATCH 40/94] :bug: Fix FMV encoding --- src/decoder.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/decoder.sv b/src/decoder.sv index 8ed07c33a..e584c96aa 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -813,7 +813,7 @@ module decoder ( instruction_o.op = FMV_X2F; // fmv.fmt.ifmt - GPR to FPR Move instruction_o.rs2 = instr.rftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (instr.rftype.rm != 3'b000 || (XF16ALT && instr.rftype.rm == 3'b100)) + if (!(instr.rftype.rm == 3'b000 || (XF16ALT && instr.rftype.rm == 3'b100))) illegal_instr = 1'b1; // rs2 must be zero if (instr.rftype.rs2 != 5'b00000) illegal_instr = 1'b1; From 3a9a0b7a582aa3be777641ea9f6feb277e21d8c3 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 11 Sep 2018 13:43:04 +0200 Subject: [PATCH 41/94] Merge floating point support --- Makefile | 14 ++------------ ci/riscv-asm-tests.list | 16 ++++++++++++++++ src/ariane.sv | 16 +++++----------- src/load_unit.sv | 12 ++++++------ tb/wave/wave_core.do | 6 ++++-- 5 files changed, 33 insertions(+), 31 deletions(-) diff --git a/Makefile b/Makefile index bdc720eab..a0d89f23f 100755 --- a/Makefile +++ b/Makefile @@ -43,16 +43,6 @@ test_pkg := $(wildcard tb/test/*/*sequence_pkg.sv*) \ dpi := $(patsubst tb/dpi/%.cc,work/%.o,$(wildcard tb/dpi/*.cc)) dpi_hdr := $(wildcard tb/dpi/*.h) # this list contains the standalone components -# <<<<<<< HEAD -# src := $(wildcard src/*.sv) $(wildcard tb/common/*.sv) $(wildcard src/axi_slice/*.sv) \ -# $(wildcard src/axi_node/*.sv) $(wildcard src/axi_mem_if/src/*.sv) src/fpu_legacy/hdl/fpu_utils/fpu_ff.sv \ -# src/fpu_legacy/hdl/fpu_div_sqrt_mvp/defs_div_sqrt_mvp.sv $(wildcard src/fpu_legacy/hdl/fpu_div_sqrt_mvp/*.sv) \ -# $(fpnew_pkg) $(wildcard src/fpnew/src/utils/*.vhd) $(wildcard src/fpnew/src/ops/*.vhd) \ -# $(wildcard src/fpnew/src/subunits/*.vhd) src/fpnew/src/fpnew.vhd src/fpnew/src/fpnew_top.vhd \ -# $(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv)) $(wildcard bootrom/*.sv) \ -# $(wildcard src/debug/debug_rom/*.sv) - -# ======= src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ $(wildcard src/fpu/src/utils/*.vhd) \ $(wildcard src/fpu/src/ops/*.vhd) \ @@ -160,7 +150,7 @@ $(riscv-asm-tests): build $(library)/ariane_dpi.so +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ $(QUESTASIM_FLAGS) \ -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi \ - -do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ + -do "set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$@ ++$(target-options) | tee tmp/riscv-asm-tests-$@.log $(riscv-benchmarks): build $(library)/ariane_dpi.so @@ -168,7 +158,7 @@ $(riscv-benchmarks): build $(library)/ariane_dpi.so +BASEDIR=$(riscv-benchmarks-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ $(QUESTASIM_FLAGS) \ -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(library)/ariane_dpi \ - -do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ + -do "set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ ${top_level}_optimized +permissive-off ++$(riscv-benchmarks-dir)/$@ ++$(target-options) | tee tmp/riscv-benchmarks-$@.log diff --git a/ci/riscv-asm-tests.list b/ci/riscv-asm-tests.list index aeb1b746d..144584d3a 100644 --- a/ci/riscv-asm-tests.list +++ b/ci/riscv-asm-tests.list @@ -149,3 +149,19 @@ rv64uf-v-fmin rv64uf-v-ldst rv64uf-v-move rv64uf-v-recoding +rv64ud-p-fadd +rv64ud-p-fclass +rv64ud-p-fcmp +rv64ud-p-fcvt +rv64ud-p-fcvt_w +rv64ud-p-fdiv +rv64ud-p-fmadd +rv64ud-p-fmin +rv64ud-v-fadd +rv64ud-v-fclass +rv64ud-v-fcmp +rv64ud-v-fcvt +rv64ud-v-fcvt_w +rv64ud-v-fdiv +rv64ud-v-fmadd +rv64ud-v-fmin diff --git a/src/ariane.sv b/src/ariane.sv index edaff22fa..4d4178a70 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -367,11 +367,11 @@ module ariane #( .lsu_exception_o ( lsu_exception_ex_id ), .no_st_pending_o ( no_st_pending_ex_commit ), // MULT - // .mult_ready_o ( mult_ready_ex_id ), - // .mult_valid_i ( mult_valid_id_ex ), - // .mult_trans_id_o ( mult_trans_id_ex_id ), - // .mult_result_o ( mult_result_ex_id ), - // .mult_valid_o ( mult_valid_ex_id ), + .mult_ready_o ( mult_ready_ex_id ), + .mult_valid_i ( mult_valid_id_ex ), + .mult_trans_id_o ( mult_trans_id_ex_id ), + .mult_result_o ( mult_result_ex_id ), + .mult_valid_o ( mult_valid_ex_id ), // FPU .fpu_ready_o ( fpu_ready_ex_id ), .fpu_valid_i ( fpu_valid_id_ex ), @@ -405,12 +405,6 @@ module ariane #( .asid_i ( asid_csr_ex ), // from CSR .icache_areq_i ( icache_areq_cache_ex ), .icache_areq_o ( icache_areq_ex_cache ), - - .mult_ready_o ( mult_ready_ex_id ), - .mult_valid_i ( mult_valid_id_ex ), - .mult_trans_id_o ( mult_trans_id_ex_id ), - .mult_result_o ( mult_result_ex_id ), - .mult_valid_o ( mult_valid_ex_id ), // DCACHE interfaces .dcache_req_ports_i ( dcache_req_ports_cache_ex ), .dcache_req_ports_o ( dcache_req_ports_ex_cache ) diff --git a/src/load_unit.sv b/src/load_unit.sv index bef553255..2a4cdc462 100644 --- a/src/load_unit.sv +++ b/src/load_unit.sv @@ -331,8 +331,8 @@ module load_unit ( // prepare these signals for faster selection in the next cycle - assign signed_d = load_data_q.operator inside { LW, LH, LB }; - assign fp_sign_d = load_data_q.operator inside { FLW, FLH, FLB }; + assign signed_d = load_data_d.operator inside {LW, LH, LB}; + assign fp_sign_d = load_data_d.operator inside {FLW, FLH, FLB}; assign idx_d = (load_data_d.operator inside {LW, FLW}) ? load_data_d.address_offset + 3 : (load_data_d.operator inside {LH, FLH}) ? load_data_d.address_offset + 1 : load_data_d.address_offset; @@ -354,15 +354,15 @@ module load_unit ( // result mux always_comb begin unique case (load_data_q.operator) - LW, LWU: result_o = {{32{sign_bit}}, shifted_data[31:0]}; - LH, LHU: result_o = {{48{sign_bit}}, shifted_data[15:0]}; - LB, LBU: result_o = {{56{sign_bit}}, shifted_data[7:0]}; + LW, LWU, FLW: result_o = {{32{sign_bit}}, shifted_data[31:0]}; + LH, LHU, FLH: result_o = {{48{sign_bit}}, shifted_data[15:0]}; + LB, LBU, FLB: result_o = {{56{sign_bit}}, shifted_data[7:0]}; default: result_o = shifted_data; endcase end always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs - if(~rst_ni) begin + if (~rst_ni) begin idx_q <= 0; signed_q <= 0; fp_sign_q <= 0; diff --git a/tb/wave/wave_core.do b/tb/wave/wave_core.do index 8e74fc121..53caa9d0c 100644 --- a/tb/wave/wave_core.do +++ b/tb/wave/wave_core.do @@ -5,8 +5,8 @@ add wave -noupdate -group frontend -group icache /ariane_tb/dut/i_ariane/i_std_c add wave -noupdate -group frontend -group ras /ariane_tb/dut/i_ariane/i_frontend/i_ras/* add wave -noupdate -group frontend -group btb /ariane_tb/dut/i_ariane/i_frontend/i_btb/* add wave -noupdate -group frontend -group bht /ariane_tb/dut/i_ariane/i_frontend/i_bht/* -add wave -noupdate -group frontend -group instr_scan /ariane_tb/dut/i_ariane/i_frontend/*/i_instr_scan/* -add wave -noupdate -group frontend -group fetch_fifo /ariane_tb/dut/i_ariane/i_frontend/i_fetch_fifo/* +# add wave -noupdate -group frontend -group instr_scan /ariane_tb/dut/i_ariane/i_frontend/*/i_instr_scan/* +# add wave -noupdate -group frontend -group fetch_fifo /ariane_tb/dut/i_ariane/i_frontend/i_fetch_fifo/* add wave -noupdate -group id_stage -group decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/* add wave -noupdate -group id_stage -group compressed_decoder /ariane_tb/dut/i_ariane/id_stage_i/compressed_decoder_i/* @@ -22,6 +22,8 @@ add wave -noupdate -group ex_stage -group alu /ariane_tb/dut/i_ariane/ex_stage_i add wave -noupdate -group ex_stage -group mult /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/* add wave -noupdate -group ex_stage -group mult -group mul /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_mul/* add wave -noupdate -group ex_stage -group mult -group div /ariane_tb/dut/i_ariane/ex_stage_i/i_mult/i_div/* +add wave -noupdate -group ex_stage -group fpu /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/* +add wave -noupdate -group ex_stage -group fpu -group fpnew /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/* add wave -noupdate -group ex_stage -group lsu /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/* add wave -noupdate -group ex_stage -group lsu -group lsu_bypass /ariane_tb/dut/i_ariane/ex_stage_i/lsu_i/lsu_bypass_i/* From c762c5817a1e1e3326c6a1e2a5c3d2fa2f6c18df Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 11 Sep 2018 17:32:22 +0200 Subject: [PATCH 42/94] :sparkles: Implement privileged part of F-Extension --- include/riscv_pkg.sv | 11 ++++- src/ariane.sv | 8 +++- src/commit_stage.sv | 3 +- src/csr_regfile.sv | 90 ++++++++++++++++++++++++++++---------- src/decoder.sv | 12 ++--- src/ex_stage.sv | 5 ++- src/id_stage.sv | 3 ++ src/issue_read_operands.sv | 2 +- 8 files changed, 100 insertions(+), 34 deletions(-) diff --git a/include/riscv_pkg.sv b/include/riscv_pkg.sv index 1e6ad2a42..8d7492861 100644 --- a/include/riscv_pkg.sv +++ b/include/riscv_pkg.sv @@ -32,6 +32,13 @@ package riscv; XLEN_128 = 2'b11 } xlen_t; + typedef enum logic [1:0] { + Off = 2'b00, + Initial = 2'b01, + Clean = 2'b10, + Dirty = 2'b11 + } xs_t; + typedef struct packed { logic sd; // signal dirty - read-only - hardwired zero logic [62:36] wpri4; // writes preserved reads ignored @@ -44,8 +51,8 @@ package riscv; logic mxr; // make executable readable logic sum; // permit supervisor user memory access logic mprv; // modify privilege - privilege level for ld/st - logic [1:0] xs; // extension register - hardwired to zero - logic [1:0] fs; // extension register - hardwired to zero + xs_t xs; // extension register - hardwired to zero + xs_t fs; // floating point extension register priv_lvl_t mpp; // holds the previous privilege mode up to machine logic [1:0] wpri2; // writes preserved reads ignored logic spp; // holds the previous privilege mode up to supervisor diff --git a/src/ariane.sv b/src/ariane.sv index 4d4178a70..36d75d9a4 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -135,6 +135,7 @@ module ariane #( // -------------- // CSR Commit logic csr_commit_commit_ex; + logic dirty_fp_state; // LSU Commit logic lsu_commit_commit_ex; logic lsu_commit_ready_ex_commit; @@ -154,6 +155,7 @@ module ariane #( // CSR <-> * // -------------- logic [4:0] fflags_csr_commit; + riscv::xs_t fs; logic [2:0] frm_csr_id_issue_ex; logic enable_translation_csr_ex; logic en_ld_st_translation_csr_ex; @@ -255,6 +257,7 @@ module ariane #( .issue_instr_ack_i ( issue_instr_issue_id ), .priv_lvl_i ( priv_lvl ), + .fs_i ( fs ), .frm_i ( frm_csr_id_issue_ex ), .debug_mode_i ( debug_mode_csr_id ), .tvm_i ( tvm_csr_id ), @@ -415,8 +418,9 @@ module ariane #( // --------- commit_stage commit_stage_i ( .halt_i ( halt_ctrl ), - .flush_dcache_i ( dcache_flush_ctrl_cache ), + .flush_dcache_i ( dcache_flush_ctrl_cache ), .exception_o ( ex_commit ), + .dirty_fp_state_o ( dirty_fp_state ), .debug_mode_i ( debug_mode_csr_id ), .debug_req_i ( debug_req_i ), .single_step_i ( single_step_csr_commit ), @@ -455,6 +459,7 @@ module ariane #( .ex_i ( ex_commit ), .csr_op_i ( csr_op_commit_csr ), .csr_write_fflags_i ( csr_write_fflags_commit_cs ), + .dirty_fp_state_i ( dirty_fp_state ), .csr_addr_i ( csr_addr_ex_csr ), .csr_wdata_i ( csr_wdata_commit_csr ), .csr_rdata_o ( csr_rdata_csr_commit ), @@ -465,6 +470,7 @@ module ariane #( .set_debug_pc_o ( set_debug_pc ), .trap_vector_base_o ( trap_vector_base_commit_pcgen ), .priv_lvl_o ( priv_lvl ), + .fs_o ( fs ), .fflags_o ( fflags_csr_commit ), .frm_o ( frm_csr_id_issue_ex ), .ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ), diff --git a/src/commit_stage.sv b/src/commit_stage.sv index ccee0cba0..001ed150b 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -21,6 +21,7 @@ module commit_stage #( input logic halt_i, // request to halt the core input logic flush_dcache_i, // request to flush dcache -> also flush the pipeline output exception_t exception_o, // take exception to controller + output logic dirty_fp_state_o, // mark the F state as dirty input logic debug_mode_i, // we are in debug mode input logic debug_req_i, // debug unit is requesting to enter debug mode input logic single_step_i, // we are in single step debug mode @@ -55,6 +56,7 @@ module commit_stage #( assign waddr_o[1] = commit_instr_i[1].rd[4:0]; assign pc_o = commit_instr_i[0].pc; + assign dirty_fp_state_o = |we_fpr_o; // ------------------- // Commit Instruction @@ -69,7 +71,6 @@ module commit_stage #( we_gpr_o[0] = 1'b0; we_gpr_o[1] = 1'b0; we_fpr_o = '{default: 1'b0}; - commit_lsu_o = 1'b0; commit_csr_o = 1'b0; wdata_o[0] = commit_instr_i[0].result; diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index ff927decb..8ab6e1799 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -21,7 +21,6 @@ module csr_regfile #( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic time_irq_i, // Timer threw a interrupt - // send a flush request out if a CSR with a side effect has changed (e.g. written) output logic flush_o, output logic halt_csr_o, // halt requested @@ -39,6 +38,7 @@ module csr_regfile #( input logic [11:0] csr_addr_i, // Address of the register to read/write input logic [63:0] csr_wdata_i, // Write data in output logic [63:0] csr_rdata_o, // Read data out + input logic dirty_fp_state_i, // Mark the FP sate as dirty input logic csr_write_fflags_i, // Write fflags register e.g.: we are retiring a floating point instruction input logic [63:0] pc_i, // PC of instruction accessing the CSR output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege @@ -50,6 +50,7 @@ module csr_regfile #( output logic [63:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec) output riscv::priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in // FPU + output riscv::xs_t fs_o, // Floating point extension status output logic [4:0] fflags_o, // Floating-Point Accured Exceptions output logic [2:0] frm_o, // Floating-Point Dynamic Rounding Mode // MMU @@ -91,12 +92,14 @@ module csr_regfile #( logic mret; // return from M-mode exception logic sret; // return from S-mode exception logic dret; // return from debug mode - + // CSR write causes us to mark the FPU state as dirty + logic dirty_fp_state_csr; riscv::csr_t csr_addr; // ---------------- // Assignments // ---------------- assign csr_addr = riscv::csr_t'(csr_addr_i); + assign fs_o = mstatus_q.fs; // ---------------- // CSR Registers // ---------------- @@ -153,9 +156,27 @@ module csr_regfile #( if (csr_read) begin case (csr_addr.address) - riscv::CSR_FFLAGS: csr_rdata = {59'b0, fcsr_q.fflags}; - riscv::CSR_FRM: csr_rdata = {61'b0, fcsr_q.frm}; - riscv::CSR_FCSR: csr_rdata = {32'b0, fcsr_q}; + riscv::CSR_FFLAGS: begin + if (mstatus_q.fs == riscv::Off) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = {59'b0, fcsr_q.fflags}; + end + end + riscv::CSR_FRM: begin + if (mstatus_q.fs == riscv::Off) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = {61'b0, fcsr_q.frm}; + end + end + riscv::CSR_FCSR: begin + if (mstatus_q.fs == riscv::Off) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = {32'b0, fcsr_q}; + end + end // debug registers riscv::CSR_DCSR: csr_rdata = {32'b0, dcsr_q}; riscv::CSR_DPC: csr_rdata = dpc_q; @@ -287,25 +308,40 @@ module csr_regfile #( stval_d = stval_q; satp_d = satp_q; en_ld_st_translation_d = en_ld_st_translation_q; - + dirty_fp_state_csr = 1'b0; // check for correct access rights and that we are writing if (csr_we) begin case (csr_addr.address) // Floating-Point riscv::CSR_FFLAGS: begin - fcsr_d.fflags = csr_wdata[4:0]; - // this instruction has side-effects - flush_o = 1'b1; + if (mstatus_q.fs == riscv::Off) begin + update_access_exception = 1'b1; + end else begin + dirty_fp_state_csr = 1'b1; + fcsr_d.fflags = csr_wdata[4:0]; + // this instruction has side-effects + flush_o = 1'b1; + end end riscv::CSR_FRM: begin - fcsr_d.frm = csr_wdata[2:0]; - // this instruction has side-effects - flush_o = 1'b1; + if (mstatus_q.fs == riscv::Off) begin + update_access_exception = 1'b1; + end else begin + dirty_fp_state_csr = 1'b1; + fcsr_d.frm = csr_wdata[2:0]; + // this instruction has side-effects + flush_o = 1'b1; + end end riscv::CSR_FCSR: begin - fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space - // this instruction has side-effects - flush_o = 1'b1; + if (mstatus_q.fs == riscv::Off) begin + update_access_exception = 1'b1; + end else begin + dirty_fp_state_csr = 1'b1; + fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space + // this instruction has side-effects + flush_o = 1'b1; + end end // debug CSR riscv::CSR_DCSR: begin @@ -330,10 +366,13 @@ module csr_regfile #( // also hardwire the registers for sstatus mstatus_d.sxl = riscv::XLEN_64; mstatus_d.uxl = riscv::XLEN_64; - // hardwired zero registers - mstatus_d.sd = 1'b0; - mstatus_d.xs = 2'b0; - mstatus_d.fs = 2'b0; + // hardwired extension registers + mstatus_d.sd = (&mstatus_q.xs) | (&mstatus_q.fs); + mstatus_d.xs = riscv::Off; + // hardwire to zero if floating point extension is not present + if (!FP_PRESENT) begin + mstatus_d.fs = riscv::Off; + end mstatus_d.upie = 1'b0; mstatus_d.uie = 1'b0; // not all fields of mstatus can be written @@ -390,9 +429,11 @@ module csr_regfile #( mstatus_d.sxl = riscv::XLEN_64; mstatus_d.uxl = riscv::XLEN_64; // hardwired zero registers - mstatus_d.sd = 1'b0; - mstatus_d.xs = 2'b0; - mstatus_d.fs = 2'b0; + mstatus_d.sd = (&mstatus_q.xs) | (&mstatus_q.fs); + mstatus_d.xs = riscv::Off; + if (!FP_PRESENT) begin + mstatus_d.fs = riscv::Off; + end mstatus_d.upie = 1'b0; mstatus_d.uie = 1'b0; // this register has side-effects on other registers, flush the pipeline @@ -451,6 +492,11 @@ module csr_regfile #( endcase end + // mark the floating point extension register as dirty + if (FP_PRESENT && (dirty_fp_state_csr || dirty_fp_state_i)) begin + mstatus_d.fs = riscv::Dirty; + end + // write the floating point status register if (csr_write_fflags_i) fcsr_d.fflags = csr_wdata_i[4:0] | fcsr_q.fflags; diff --git a/src/decoder.sv b/src/decoder.sv index 2ade3ce7f..d09921817 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -30,6 +30,7 @@ module decoder ( // From CSR input riscv::priv_lvl_t priv_lvl_i, // current privilege level input logic debug_mode_i, // we are in debug mode + input riscv::xs_t fs_i, // floating point extension status input logic [2:0] frm_i, // floating-point dynamic rounding mode input logic tvm_i, // trap virtual memory input logic tw_i, // timeout wait @@ -244,7 +245,8 @@ module decoder ( // Vectorial Floating-Point Reg-Reg Operations // -------------------------------------------- if (instr.rvftype.funct2 == 2'b10) begin // Prefix 10 for all Xfvec ops - if (FP_PRESENT & XFVEC) begin // only generate decoder if FP extensions are enabled (static) + // only generate decoder if FP extensions are enabled (static) + if (FP_PRESENT && XFVEC && fs_i != riscv::Off) begin automatic logic allow_replication; // control honoring of replication flag instruction_o.fu = FPU_VEC; // Same unit, but sets 'vectorial' signal @@ -618,7 +620,7 @@ module decoder ( // Floating-Point Load/store // -------------------------------- riscv::OpcodeStoreFp: begin - if (FP_PRESENT) begin // only generate decoder if FP extensions are enabled (static) + if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) instruction_o.fu = STORE; imm_select = SIMM; instruction_o.rs1 = instr.stype.rs1; @@ -641,7 +643,7 @@ module decoder ( end riscv::OpcodeLoadFp: begin - if (FP_PRESENT) begin // only generate decoder if FP extensions are enabled (static) + if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) instruction_o.fu = LOAD; imm_select = IIMM; instruction_o.rs1 = instr.itype.rs1; @@ -670,7 +672,7 @@ module decoder ( riscv::OpcodeMsub, riscv::OpcodeNmsub, riscv::OpcodeNmadd: begin - if (FP_PRESENT) begin // only generate decoder if FP extensions are enabled (static) + if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) instruction_o.fu = FPU; instruction_o.rs1 = instr.r4type.rs1; instruction_o.rs2 = instr.r4type.rs2; @@ -723,7 +725,7 @@ module decoder ( end riscv::OpcodeOpFp: begin - if (FP_PRESENT) begin // only generate decoder if FP extensions are enabled (static) + if (FP_PRESENT && fs_i != riscv::Off) begin // only generate decoder if FP extensions are enabled (static) instruction_o.fu = FPU; instruction_o.rs1 = instr.rftype.rs1; instruction_o.rs2 = instr.rftype.rs2; diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 7f1a46c46..04f704b8d 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -122,7 +122,8 @@ module ex_stage #( // Branch Engine // -------------------- branch_unit branch_unit_i ( - .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i), // any functional unit is valid, check that there is no accidental mis-predict + // any functional unit is valid, check that there is no accidental mis-predict + .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ), .branch_comp_res_i ( alu_branch_res ), .* ); @@ -139,7 +140,7 @@ module ex_stage #( // FPU // ---------------- generate - if( FP_PRESENT ) begin : fpu_gen + if (FP_PRESENT) begin : fpu_gen fpu_wrap fpu_i ( .operand_c_i ( imm_i ), .result_o ( fpu_result_o ), diff --git a/src/id_stage.sv b/src/id_stage.sv index 946ccee0f..46b248c60 100644 --- a/src/id_stage.sv +++ b/src/id_stage.sv @@ -32,6 +32,7 @@ module id_stage ( input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions // from CSR file input riscv::priv_lvl_t priv_lvl_i, // current privilege level + input riscv::xs_t fs_i, // floating point extension status input logic [2:0] frm_i, // floating-point dynamic rounding mode input logic debug_mode_i, // we are in debug mode @@ -92,6 +93,8 @@ module id_stage ( .ex_i ( fetch_entry.ex ), .instruction_o ( decoded_instruction ), .is_control_flow_instr_o ( is_control_flow_instr ), + .fs_i, + .frm_i, .* ); diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 21591a025..f79f07544 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -233,7 +233,7 @@ module issue_read_operands #( // use the zimm as operand a if (issue_instr_i.use_zimm) begin // zero extend operand a - operand_a_n = {52'b0, issue_instr_i.rs1}; + operand_a_n = {52'b0, issue_instr_i.rs1[4:0]}; end // or is it an immediate (including PC), this is not the case for a store and control flow instructions if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW)) begin From cc1007fb76312c788820f7e32abca06e68cbd6f9 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 18 Sep 2018 14:35:17 +0200 Subject: [PATCH 43/94] Fix regfile --- Bender.yml | 51 ++++++++++++++++++++++------------------ src/ariane_regfile_ff.sv | 11 ++++++--- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/Bender.yml b/Bender.yml index 4c10f73d5..a0db9e0c5 100644 --- a/Bender.yml +++ b/Bender.yml @@ -2,15 +2,18 @@ package: name: ariane authors: [ "Florian Zaruba " ] +package: + name: ariane + authors: [ "Florian Zaruba " ] + dependencies: - axi: { git: "git@iis-git.ee.ethz.ch:sasa/axi.git", rev: master } - axi2per: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi2per.git", rev: master } - axi_mem_if: { git: "git@github.com:pulp-platform/axi_mem_if.git", rev: master } - axi_node: { git: "git@iis-git.ee.ethz.ch:pulp-open/axi_node.git", version: v1.1.0 } - axi_slice: { git: "git@iis-git.ee.ethz.ch:sasa/axi_slice.git", version: 1.1.2 } - tech_cells_generic: { git: "git@iis-git.ee.ethz.ch:pulp-open/tech_cells_generic.git", rev: master } - common_cells: { git: "git@iis-git.ee.ethz.ch:sasa/common_cells.git", version: v1.7.4 } - fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: v0.3.2 } + axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.4.5 } + axi_mem_if: { git: "https://github.com/pulp-platform/axi_mem_if.git", version: 0.2.0 } + axi_node: { git: "https://github.com/pulp-platform/axi_node.git", version: 1.1.1 } + tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.1.1 } + common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.7.5 } + fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: 0.3.2 } + sources: - src/fpu_legacy/hdl/fpu_utils/fpu_ff.sv - src/fpu_legacy/hdl/fpu_div_sqrt_mvp/defs_div_sqrt_mvp.sv @@ -58,47 +61,49 @@ sources: - src/fpu_wrap.sv - src/ariane.sv - src/branch_unit.sv - - src/cache_ctrl.sv - - src/commit_stage.sv - src/compressed_decoder.sv - src/controller.sv - src/csr_buffer.sv - src/csr_regfile.sv - src/decoder.sv - src/ex_stage.sv - - src/frontend/btb.sv, - - src/frontend/bht.sv, - - src/frontend/ras.sv, - - src/frontend/instr_scan.sv, + - src/frontend/btb.sv + - src/frontend/bht.sv + - src/frontend/ras.sv + - src/frontend/instr_scan.sv - src/frontend/frontend.sv - - src/icache.sv - src/id_stage.sv - src/instr_realigner.sv - src/issue_read_operands.sv - src/issue_stage.sv - - src/lfsr.sv - src/load_unit.sv - src/lsu_arbiter.sv - src/lsu.sv - - src/miss_handler.sv - src/mmu.sv - src/mult.sv - - src/nbdcache.sv - - src/vdregs.sv - src/perf_counters.sv - src/ptw.sv - - src/std_cache_subsystem.sv - - src/sram_wrapper.sv - # - src/ariane_regfile_ff.sv - - src/ariane_regfile.sv + - src/ariane_regfile_ff.sv + # - src/ariane_regfile.sv - src/re_name.sv - src/scoreboard.sv - src/store_buffer.sv - src/store_unit.sv - src/tlb.sv + - src/commit_stage.sv + - src/axi_adapter.sv + - src/cache_subsystem/cache_ctrl.sv + - src/cache_subsystem/miss_handler.sv + - src/cache_subsystem/std_cache_subsystem.sv + - src/cache_subsystem/std_icache.sv + - src/cache_subsystem/std_nbdcache.sv + - src/debug/debug_rom/debug_rom.sv - src/debug/dm_csrs.sv + - src/clint/clint.sv + - src/clint/axi_lite_interface.sv - src/debug/dm_mem.sv - src/debug/dm_top.sv - src/debug/dmi_cdc.sv - src/debug/dmi_jtag.sv + - src/debug/dm_sba.sv - src/debug/dmi_jtag_tap.sv diff --git a/src/ariane_regfile_ff.sv b/src/ariane_regfile_ff.sv index 51b716be3..84424e57d 100644 --- a/src/ariane_regfile_ff.sv +++ b/src/ariane_regfile_ff.sv @@ -66,7 +66,7 @@ module ariane_regfile #( mem <= '{default: '0}; end else begin for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin - for (int unsigned i = ZERO_REG_ZERO; i < NUM_WORDS; i++) begin + for (int unsigned i = 0; i < NUM_WORDS; i++) begin if (we_dec[j][i]) mem[i] <= wdata_i[j]; end @@ -74,7 +74,12 @@ module ariane_regfile #( end end - for (genvar i = 0; i < NR_READ_PORTS; i++) - assign rdata_o[i] = mem[raddr_i[i]]; + for (genvar i = ZERO_REG_ZERO; i < NR_READ_PORTS; i++) begin + if (ZERO_REG_ZERO) begin + assign rdata_o[0] = '0; + end else begin + assign rdata_o[i] = mem[raddr_i[i]]; + end + end endmodule From f405a42e36c8ef8148b739bd9c30974fa5b94552 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 18 Sep 2018 14:42:31 +0200 Subject: [PATCH 44/94] Fix problem in Bender.yml --- Bender.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Bender.yml b/Bender.yml index a0db9e0c5..32bed8513 100644 --- a/Bender.yml +++ b/Bender.yml @@ -2,10 +2,6 @@ package: name: ariane authors: [ "Florian Zaruba " ] -package: - name: ariane - authors: [ "Florian Zaruba " ] - dependencies: axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.4.5 } axi_mem_if: { git: "https://github.com/pulp-platform/axi_mem_if.git", version: 0.2.0 } From 04ad1f65cf50c6b0d1c1dede47c13d3230160986 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 18 Sep 2018 15:04:47 +0200 Subject: [PATCH 45/94] Fix regfile again --- src/ariane_regfile_ff.sv | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/ariane_regfile_ff.sv b/src/ariane_regfile_ff.sv index 84424e57d..a5b9c6954 100644 --- a/src/ariane_regfile_ff.sv +++ b/src/ariane_regfile_ff.sv @@ -67,19 +67,19 @@ module ariane_regfile #( end else begin for (int unsigned j = 0; j < NR_WRITE_PORTS; j++) begin for (int unsigned i = 0; i < NUM_WORDS; i++) begin - if (we_dec[j][i]) + if (we_dec[j][i]) begin mem[i] <= wdata_i[j]; + end + end + if (ZERO_REG_ZERO) begin + mem[0] <= '0; end end end end - for (genvar i = ZERO_REG_ZERO; i < NR_READ_PORTS; i++) begin - if (ZERO_REG_ZERO) begin - assign rdata_o[0] = '0; - end else begin - assign rdata_o[i] = mem[raddr_i[i]]; - end + for (genvar i = 0; i < NR_READ_PORTS; i++) begin + assign rdata_o[i] = mem[raddr_i[i]]; end endmodule From b9ae8bfb4bb511ec6aa4090f948e815f1fe53f7c Mon Sep 17 00:00:00 2001 From: Michael Schaffner Date: Tue, 18 Sep 2018 15:33:18 +0200 Subject: [PATCH 46/94] Add correct paths for GitLab CI --- .gitlab-ci.yml | 44 ++++++++++---------------------------------- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 97f8d30ad..daa6a1154 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,23 +1,23 @@ before_script: - - export CXX=g++-7 CC=gcc-7 # paths to local or network installations (the riscv toolchain and # verilator are not built in the ci job as in travis) - - export QUESTASIM_HOME= - - export QUESTASIM_VERSION= - - export QUESTASIM_FLAGS= - - export RISCV=/scratch/$USER/projects/riscv_install - - export VERILATOR_ROOT=/scratch/$USER/projects/verilator-3.924 + - export QUESTASIM_HOME=/usr/pack/modelsim-10.6b-kgf/questasim/ + - export QUESTASIM_VERSION=-10.6b + - export QUESTASIM_FLAGS=-noautoldlibpath + - export CXX=g++-7.2.0 CC=gcc-7.2.0 + - export RISCV=/usr/scratch2/larain1/gitlabci/riscv_install + - export VERILATOR_ROOT=/usr/scratch2/larain1/gitlabci/verilator-3.924 # setup dependent paths - export PATH=${RISCV}/bin:$VERILATOR_ROOT/bin:${PATH} - export LIBRARY_PATH=$RISCV/lib - - export LD_LIBRARY_PATH=$RISCV/lib - - export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include - - export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include + - export LD_LIBRARY_PATH=$RISCV/lib:/usr/pack/gcc-7.2.0-af/linux-x64/lib64/ + - export C_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include:/usr/pack/gcc-7.2.0-af/linux-x64/include + - export CPLUS_INCLUDE_PATH=$RISCV/include:$VERILATOR_ROOT/include:/usr/pack/gcc-7.2.0-af/linux-x64/include # number of parallel jobs to use for make commands and simulation - export NUM_JOBS=4 - ci/make-tmp.sh - git submodule update --init --recursive - + variables: GIT_SUBMODULE_STRATEGY: recursive @@ -52,33 +52,9 @@ run-benchmarks-questa: dependencies: - build -# rv64ui-p-* tests -run-asm-tests1-verilator: - stage: test_std - script: - - make -j${NUM_JOBS} run-asm-tests1-verilator - dependencies: - - build - -# rv64ui-v-* tests -run-asm-tests2-verilator: - stage: test_std - script: - - make -j${NUM_JOBS} run-asm-tests2-verilator - dependencies: - - build - -run-benchmarks-verilator: - stage: test_std - script: - - make -j${NUM_JOBS} run-benchmarks-verilator - dependencies: - - build - torture: stage: test_std script: - make torture-rtest - - make torture-rtest-verilator dependencies: - build From 312e4c297447aafb879d2075efd13fbe3b56a88b Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Wed, 19 Sep 2018 12:46:16 +0200 Subject: [PATCH 47/94] Fix FPU wrapper problem --- src/fpu_wrap.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index 07461893a..29ed0edc4 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -397,7 +397,7 @@ module fpu_wrap ( //--------------------------------------------------------- // Input is ready whenever the register is free to accept a potentially spilling instruction - assign fpu_ready_o = reg_in_ready; + assign fpu_ready_o = ~reg_in_valid & (~reg_out_valid | reg_out_ready); // Input data goes to the buffer register if the received instruction cannot be handled assign reg_in_valid = fpu_valid_i & ~fpu_in_ready; From 5f5ff27b063c026a7fe1f4fd060e14dfe0165839 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 19 Sep 2018 22:11:15 +0200 Subject: [PATCH 48/94] :bug: Fix std_arith warnings crashing torture --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index abb1319b3..a7f9cc9de 100755 --- a/Makefile +++ b/Makefile @@ -260,7 +260,7 @@ run-torture: build +BASEDIR=$(riscv-torture-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ $(QUESTASIM_FLAGS) \ -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \ - -do "coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ + -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ ${top_level}_optimized +permissive-off \ +signature=$(riscv-torture-dir)/output/test.rtlsim.sig ++$(riscv-torture-dir)/output/test ++$(target-options) From 0c20da44b9bca384202791e598247741a58156e2 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Thu, 20 Sep 2018 21:29:23 +0200 Subject: [PATCH 49/94] :arrow_up: Update fpu to fix F2F bug --- src/fpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu b/src/fpu index 17976c9ad..98caa51e0 160000 --- a/src/fpu +++ b/src/fpu @@ -1 +1 @@ -Subproject commit 17976c9adf26c02c26d35df1899864abe6c23da2 +Subproject commit 98caa51e0b69fdb03dc4e7f0ecc26ed947bb3e7a From 44dff3aba901e8ef0546b8e344e112fc4b86dddb Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Fri, 21 Sep 2018 12:50:55 +0200 Subject: [PATCH 50/94] :wrench: Add overnight capability to makefile --- Makefile | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index a7f9cc9de..b85190dbe 100755 --- a/Makefile +++ b/Makefile @@ -22,6 +22,11 @@ verilator ?= ${VERILATOR_ROOT}/bin/verilator target-options ?= # additional definess defines ?= +# test name for torture runs (binary name) +test-location ?= output/test +# set to either nothing or -log +torture-logs := -log + # Sources # Package files -> compile first ariane_pkg := include/riscv_pkg.sv \ @@ -112,7 +117,7 @@ uvm-flags += +UVM_NO_RELNOTES list_incdir := $(foreach dir, ${incdir}, +incdir+$(dir)) # RISCV torture setup -riscv-torture-dir := tmp/riscv-torture/ +riscv-torture-dir := tmp/riscv-torture riscv-torture-bin := java -Xmx1G -Xss8M -XX:MaxPermSize=128M -jar sbt-launch.jar # Build the TB and module using QuestaSim @@ -246,8 +251,16 @@ torture-itest: cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -a output/test.S' torture-rtest: build - cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture defines=$(defines)" > call.sh && chmod +x call.sh - cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a output/test.S' | tee output/test.log + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture$(torture-logs) defines=$(defines) test-location=$(test-location)" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a $(test-location).S' | tee $(test-location).log + make check-torture test-location=$(test-location) + +torture-dummy: build + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture defines=$(defines) test-location=\$${@: -1}" > call.sh + +torture-rnight: build + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture$(torture-logs) defines=$(defines) test-location=\$${@: -1}" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && $(riscv-torture-bin) 'overnight/run -r ./call.sh -g none' | tee output/overnight.log make check-torture torture-rtest-verilator: verilate @@ -262,14 +275,26 @@ run-torture: build -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \ -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ ${top_level}_optimized +permissive-off \ - +signature=$(riscv-torture-dir)/output/test.rtlsim.sig ++$(riscv-torture-dir)/output/test ++$(target-options) + +signature=$(riscv-torture-dir)/$(test-location).rtlsim.sig ++$(riscv-torture-dir)/$(test-location) ++$(target-options) + +run-torture-log: build + vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles)+UVM_TESTNAME=${test_case} \ + +BASEDIR=$(riscv-torture-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi \ + -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; coverage save -onexit tmp/$@.ucdb; log -r /*; run -a; quit -code [coverage attribute -name TESTSTATUS -concise]" \ + ${top_level}_optimized +permissive-off \ + +signature=$(riscv-torture-dir)/$(test-location).rtlsim.sig ++$(riscv-torture-dir)/$(test-location) ++$(target-options) + cp vsim.wlf $(riscv-torture-dir)/$(test-location).wlf + cp trace_core_00_0.log $(riscv-torture-dir)/$(test-location).trace + cp transcript $(riscv-torture-dir)/$(test-location).transcript run-torture-verilator: verilate $(ver-library)/Variane_testharness +max-cycles=$(max_cycles) +signature=$(riscv-torture-dir)/output/test.rtlsim.sig $(riscv-torture-dir)/output/test check-torture: - grep 'All signatures match for output/test' $(riscv-torture-dir)/output/test.log - diff -s $(riscv-torture-dir)/output/test.spike.sig $(riscv-torture-dir)/output/test.rtlsim.sig + grep 'All signatures match for $(test-location)' $(riscv-torture-dir)/$(test-location).log + diff -s $(riscv-torture-dir)/$(test-location).spike.sig $(riscv-torture-dir)/$(test-location).rtlsim.sig clean: rm -rf $(riscv-torture-dir)/output/test* @@ -283,3 +308,4 @@ clean: check-benchmarks check-asm-tests \ torture-gen torture-itest torture-rtest \ run-torture run-torture-verilator check-torture check-torture-verilator + From ead91473a7803a7310a03877da55df795156d1d9 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 23 Sep 2018 15:25:07 +0200 Subject: [PATCH 51/94] :ambulance: Fix rs3 forwarding for FP operations Fixed a bug where the third operand in 3-operand FP operations like `fmadd` could be forwarded from the previous instruction operating on a gp register with the same address. --- src/scoreboard.sv | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 92abc3738..fb54b499f 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -220,7 +220,7 @@ module scoreboard #( end else if ((mem_q[i].sbe.rd == rs2_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_rs2_fpr(issue_instr_o.op))) begin rs2_o = mem_q[i].sbe.result; rs2_valid_o = mem_q[i].sbe.valid; - end else if (mem_q[i].sbe.rd == rs3_i) begin // rs3 is only considered in FP cases so no check needed + end else if ((mem_q[i].sbe.rd == rs3_i) && (is_rd_fpr(mem_q[i].sbe.op) == is_imm_fpr(issue_instr_o.op))) begin rs3_o = mem_q[i].sbe.result; rs3_valid_o = mem_q[i].sbe.valid; end @@ -245,7 +245,8 @@ module scoreboard #( rs2_valid_o = wb_valid_i[j]; break; end - if (mem_q[trans_id_i[j]].sbe.rd == rs3_i && wb_valid_i[j] && ~ex_i[j].valid) begin // rs3 only uses fpr + if (mem_q[trans_id_i[j]].sbe.rd == rs3_i && wb_valid_i[j] && ~ex_i[j].valid + && (is_rd_fpr(mem_q[trans_id_i[j]].sbe.op) == is_imm_fpr(issue_instr_o.op))) begin rs3_o = wbdata_i[j]; rs3_valid_o = wb_valid_i[j]; break; From f0acfd1893061a8e829d6fa289dd7291f5b8ddaa Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 23 Sep 2018 17:18:31 +0200 Subject: [PATCH 52/94] :sparkles: Add spike commit log feature --- include/ariane_pkg.sv | 9 +++++++++ include/riscv_pkg.sv | 16 ++++++++++++++++ src/util/instruction_tracer.svh | 13 +++++++++---- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index b1fc7ed87..b138acafb 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -90,6 +90,15 @@ package ariane_pkg; datasize: dm::DataCount, dataaddr: dm::DataAddr }; + + // if set to zero a flush will not invalidate the cache-lines, in a single core environment + // where coherence is not necessary this can improve performance. This needs to be switched on + // when more than one core is in a system + localparam logic INVALIDATE_ON_FLUSH = 1'b0; + + // enables a commit log which matches spikes commit log format for easier trace comparison + localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1; + // --------------- // Fetch Stage // --------------- diff --git a/include/riscv_pkg.sv b/include/riscv_pkg.sv index 8d7492861..977d947d5 100644 --- a/include/riscv_pkg.sv +++ b/include/riscv_pkg.sv @@ -478,4 +478,20 @@ package riscv; function automatic logic [31:0] illegal (); return 32'h00000000; endfunction + + // trace log compatible to spikes commit log feature + function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, logic [4:0] rd, logic [63:0] result); + string rd_s; + + if (rd < 10) rd_s = $sformatf("x %0d", rd); + else rd_s = $sformatf("x%0d", rd); + + if (rd != 0) begin + // 0 0x0000000080000118 (0xeecf8f93) x31 0x0000000080004000 + return $sformatf("%d 0x%h (0x%h) %s 0x%h\n", priv_lvl, pc, instr, rd_s, result); + end else begin + // 0 0x000000008000019c (0x0040006f) + return $sformatf("%d 0x%h (0x%h)\n", priv_lvl, pc, instr); + end + endfunction endpackage diff --git a/src/util/instruction_tracer.svh b/src/util/instruction_tracer.svh index d2604970f..7122effba 100644 --- a/src/util/instruction_tracer.svh +++ b/src/util/instruction_tracer.svh @@ -30,7 +30,7 @@ class instruction_tracer; logic [63:0] fp_reg_file [32]; // 64 bit clock tick count longint unsigned clk_ticks; - int f; + int f, commit_log; // address mapping // contains mappings of the form vaddr <-> paddr // should it print the instructions to the console @@ -48,11 +48,13 @@ class instruction_tracer; endfunction : new function void create_file(logic [5:0] cluster_id, logic [3:0] core_id); - string fn; + string fn, fn_commit_log; $sformat(fn, "trace_core_%h_%h.log", cluster_id, core_id); + $sformat(fn_commit_log, "trace_core_%h_%h_commit.log", cluster_id, core_id); $display("[TRACER] Output filename is: %s", fn); this.f = $fopen(fn,"w"); + if (ENABLE_SPIKE_COMMIT_LOG) this.commit_log = $fopen(fn_commit_log, "w"); endfunction : create_file task trace(); @@ -185,6 +187,9 @@ class instruction_tracer; instruction_trace_item iti = new ($time, clk_ticks, sbe, instr, this.gp_reg_file, this.fp_reg_file, result, paddr, priv_lvl, debug_mode, bp); // print instruction to console string print_instr = iti.printInstr(); + if (ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin + $fwrite(this.commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result)); + end uvm_report_info( "Tracer", print_instr, UVM_HIGH); $fwrite(this.f, {print_instr, "\n"}); endfunction @@ -197,8 +202,8 @@ class instruction_tracer; endfunction function void close(); - if (f) - $fclose(this.f); + if (f) $fclose(this.f); + if (ENABLE_SPIKE_COMMIT_LOG && this.commit_log) $fclose(this.commit_log); endfunction endclass : instruction_tracer From cef9639017291fdacf20411376cb3eca6421742f Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 23 Sep 2018 17:28:27 +0200 Subject: [PATCH 53/94] :sparkles: Add commit trace functionality to torture --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index b85190dbe..6fb2b2d40 100755 --- a/Makefile +++ b/Makefile @@ -287,6 +287,7 @@ run-torture-log: build +signature=$(riscv-torture-dir)/$(test-location).rtlsim.sig ++$(riscv-torture-dir)/$(test-location) ++$(target-options) cp vsim.wlf $(riscv-torture-dir)/$(test-location).wlf cp trace_core_00_0.log $(riscv-torture-dir)/$(test-location).trace + cp trace_core_00_0_commit.log $(riscv-torture-dir)/$(test-location).commit cp transcript $(riscv-torture-dir)/$(test-location).transcript run-torture-verilator: verilate From c26ae0f6a8c0099d2badc3853489d26d8aca4b78 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Mon, 24 Sep 2018 11:29:29 +0200 Subject: [PATCH 54/94] :art: Tweaks to Makefile --- Makefile | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 6fb2b2d40..dc232b50c 100755 --- a/Makefile +++ b/Makefile @@ -186,14 +186,14 @@ $(riscv-benchmarks): build # can use -jX to run ci tests in parallel using X processes run-asm-tests: $(riscv-asm-tests) - make check-asm-tests + $(MAKE) check-asm-tests check-asm-tests: ci/check-tests.sh tmp/riscv-asm-tests- $(shell wc -l $(riscv-asm-tests-list) | awk -F " " '{ print $1 }') # can use -jX to run ci tests in parallel using X processes run-benchmarks: $(riscv-benchmarks) - make check-benchmarks + $(MAKE) check-benchmarks check-benchmarks: ci/check-tests.sh tmp/riscv-benchmarks- $(shell wc -l $(riscv-benchmarks-list) | awk -F " " '{ print $1 }') @@ -225,7 +225,7 @@ verilate_command := $(verilator) # User Verilator, at some point in the future this will be auto-generated verilate: $(verilate_command) - cd $(ver-library) && make -j${NUM_JOBS} -f Variane_testharness.mk + cd $(ver-library) && $(MAKE) -j${NUM_JOBS} -f Variane_testharness.mk $(addsuffix -verilator,$(riscv-asm-tests)): verilate $(ver-library)/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@) @@ -251,22 +251,22 @@ torture-itest: cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -a output/test.S' torture-rtest: build - cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture$(torture-logs) defines=$(defines) test-location=$(test-location)" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) defines=$(defines) test-location=$(test-location)" > call.sh && chmod +x call.sh cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a $(test-location).S' | tee $(test-location).log make check-torture test-location=$(test-location) torture-dummy: build - cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture defines=$(defines) test-location=\$${@: -1}" > call.sh + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture defines=$(defines) test-location=\$${@: -1}" > call.sh torture-rnight: build - cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture$(torture-logs) defines=$(defines) test-location=\$${@: -1}" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture$(torture-logs) defines=$(defines) test-location=\$${@: -1}" > call.sh && chmod +x call.sh cd $(riscv-torture-dir) && $(riscv-torture-bin) 'overnight/run -r ./call.sh -g none' | tee output/overnight.log - make check-torture + $(MAKE) check-torture torture-rtest-verilator: verilate - cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && make run-torture-verilator defines=$(defines)" > call.sh && chmod +x call.sh + cd $(riscv-torture-dir) && printf "#!/bin/sh\ncd $(root-dir) && $(MAKE) run-torture-verilator defines=$(defines)" > call.sh && chmod +x call.sh cd $(riscv-torture-dir) && $(riscv-torture-bin) 'testrun/run -r ./call.sh -a output/test.S' | tee output/test.log - make check-torture + $(MAKE) check-torture run-torture: build vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles)+UVM_TESTNAME=${test_case} \ From 38b17e0e86f16c95bb679831c3c357eab4b9d199 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Mon, 24 Sep 2018 11:31:38 +0200 Subject: [PATCH 55/94] :arrow_up: Update fpu to newest version --- src/fpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu b/src/fpu index 98caa51e0..d79f013ee 160000 --- a/src/fpu +++ b/src/fpu @@ -1 +1 @@ -Subproject commit 98caa51e0b69fdb03dc4e7f0ecc26ed947bb3e7a +Subproject commit d79f013ee03c3ef1fac62932ea132364952cb600 From a22a15f55c9ece4d533fe5aed71df0a1085eb5f9 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Mon, 24 Sep 2018 11:38:13 +0200 Subject: [PATCH 56/94] :zap: Add data silencing to EX stage --- include/ariane_pkg.sv | 7 ++ src/ex_stage.sv | 156 +++++++++++++++++++++++++++++++++++++----- src/fpu_wrap.sv | 2 +- 3 files changed, 146 insertions(+), 19 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index b138acafb..4ee156baa 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -240,6 +240,13 @@ package ariane_pkg; VFMIN, VFMAX, VFSGNJ, VFSGNJN, VFSGNJX, VFEQ, VFNE, VFLT, VFGE, VFLE, VFGT, VFCPKAB_S, VFCPKCD_S, VFCPKAB_D, VFCPKCD_D } fu_op; + typedef struct packed { + fu_op operator; + logic [63:0] operand_a; + logic [63:0] operand_b; + logic [63:0] imm; + } fu_data_t; + // ------------------------------- // Extract Src/Dst FP Reg from Op // ------------------------------- diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 04f704b8d..01d1093a5 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -112,28 +112,76 @@ module ex_stage #( // ----- // ALU // ----- + fu_data_t alu_data; + assign alu_data.operator = alu_valid_i | branch_valid_i ? operator_i : ADD; + assign alu_data.operand_a = alu_valid_i | branch_valid_i ? operand_a_i : '0; + assign alu_data.operand_b = alu_valid_i | branch_valid_i ? operand_b_i : '0; + alu alu_i ( - .result_o ( alu_result_o ), - .alu_branch_res_o ( alu_branch_res ), - .* + .trans_id_i, + .alu_valid_i, + .operator_i ( alu_data.operator ), + .operand_a_i ( alu_data.operand_a ), + .operand_b_i ( alu_data.operand_b ), + .result_o ( alu_result_o ), + .alu_branch_res_o ( alu_branch_res ), + .alu_valid_o, + .alu_ready_o, + .alu_trans_id_o ); // -------------------- // Branch Engine // -------------------- + fu_data_t branch_data; + assign branch_data.operator = branch_valid_i ? operator_i : JALR; + assign branch_data.operand_a = branch_valid_i ? operand_a_i : '0; + assign branch_data.operand_b = branch_valid_i ? operand_b_i : '0; + assign branch_data.imm = branch_valid_i ? imm_i : '0; + branch_unit branch_unit_i ( + .trans_id_i, + .operator_i ( branch_data.operator ), + .operand_a_i ( branch_data.operand_a ), + .operand_b_i ( branch_data.operand_b ), + .imm_i ( branch_data.imm ), + .pc_i, + .is_compressed_instr_i, // any functional unit is valid, check that there is no accidental mis-predict - .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ), - .branch_comp_res_i ( alu_branch_res ), - .* + .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ), + .branch_valid_i, + .branch_comp_res_i ( alu_branch_res ), + .branch_ready_o, + .branch_valid_o, + .branch_result_o, + .branch_trans_id_o, + .branch_predict_i, + .resolved_branch_o, + .resolve_branch_o, + .branch_exception_o ); // ---------------- // Multiplication // ---------------- + fu_data_t mult_data; + assign mult_data.operator = mult_valid_i ? operator_i : MUL; + assign mult_data.operand_a = mult_valid_i ? operand_a_i : '0; + assign mult_data.operand_b = mult_valid_i ? operand_b_i : '0; + mult i_mult ( - .result_o ( mult_result_o ), - .* + .clk_i, + .rst_ni, + .flush_i, + .trans_id_i, + .mult_valid_i, + .operator_i ( mult_data.operator ), + .operand_a_i ( mult_data.operand_a ), + .operand_b_i ( mult_data.operand_b ), + .result_o ( mult_result_o ), + .mult_valid_o, + .mult_ready_o, + .mult_trans_id_o ); // ---------------- @@ -141,10 +189,31 @@ module ex_stage #( // ---------------- generate if (FP_PRESENT) begin : fpu_gen + fu_data_t fpu_data; + assign fpu_data.operator = fpu_valid_i ? operator_i : FSGNJ; + assign fpu_data.operand_a = fpu_valid_i ? operand_a_i : '0; + assign fpu_data.operand_b = fpu_valid_i ? operand_b_i : '0; + assign fpu_data.imm = fpu_valid_i ? imm_i : '0; + fpu_wrap fpu_i ( - .operand_c_i ( imm_i ), - .result_o ( fpu_result_o ), - .* + .clk_i, + .rst_ni, + .flush_i, + .trans_id_i, + .fu_i, + .fpu_valid_i, + .fpu_ready_o, + .operator_i ( fpu_data.operator ), + .operand_a_i ( fpu_data.operand_a[FLEN-1:0] ), + .operand_b_i ( fpu_data.operand_b[FLEN-1:0] ), + .operand_c_i ( fpu_data.imm[FLEN-1:0] ), + .fpu_fmt_i, + .fpu_rm_i, + .fpu_frm_i, + .fpu_trans_id_o, + .result_o ( fpu_result_o ), + .fpu_valid_o, + .fpu_exception_o ); end else begin : no_fpu_gen assign fpu_ready_o = '0; @@ -158,21 +227,72 @@ module ex_stage #( // ---------------- // Load-Store Unit // ---------------- + fu_data_t lsu_data; + assign lsu_data.operator = lsu_valid_i ? operator_i : LD; + assign lsu_data.operand_a = lsu_valid_i ? operand_a_i : '0; + assign lsu_data.operand_b = lsu_valid_i ? operand_b_i : '0; + assign lsu_data.imm = lsu_valid_i ? imm_i : '0; + lsu lsu_i ( - .commit_i ( lsu_commit_i ), - .commit_ready_o ( lsu_commit_ready_o ), - .dcache_req_ports_i ( dcache_req_ports_i ), - .dcache_req_ports_o ( dcache_req_ports_o ), - .* + .clk_i, + .rst_ni, + .flush_i, + .no_st_pending_o, + .fu_i, + .operator_i ( lsu_data.operator ), + .operand_a_i ( lsu_data.operand_a ), + .operand_b_i ( lsu_data.operand_b ), + .imm_i ( lsu_data.imm ), + .lsu_ready_o, + .lsu_valid_i, + .trans_id_i, + .lsu_trans_id_o, + .lsu_result_o, + .lsu_valid_o, + .commit_i ( lsu_commit_i ), + .commit_ready_o ( lsu_commit_ready_o ), + .enable_translation_i, + .en_ld_st_translation_i, + .icache_areq_i, + .icache_areq_o, + .priv_lvl_i, + .ld_st_priv_lvl_i, + .sum_i, + .mxr_i, + .satp_ppn_i, + .asid_i, + .flush_tlb_i, + .itlb_miss_o, + .dtlb_miss_o, + .dcache_req_ports_i, + .dcache_req_ports_o, + .lsu_exception_o ); // ----- // CSR // ----- + fu_data_t csr_data; + assign csr_data.operator = csr_valid_i ? operator_i : CSR_READ; + assign csr_data.operand_a = csr_valid_i ? operand_a_i : '0; + assign csr_data.operand_b = csr_valid_i ? operand_b_i : '0; + // CSR address buffer csr_buffer csr_buffer_i ( - .commit_i ( csr_commit_i ), - .* + .clk_i, + .rst_ni, + .flush_i, + .operator_i ( csr_data.operator ), + .operand_a_i ( csr_data.operand_a ), + .operand_b_i ( csr_data.operand_b ), + .trans_id_i, + .csr_ready_o, + .csr_valid_i, + .csr_trans_id_o, + .csr_result_o, + .csr_valid_o, + .commit_i ( csr_commit_i ), + .csr_addr_o ); diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index 29ed0edc4..00902e1c7 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -21,8 +21,8 @@ module fpu_wrap ( input logic flush_i, input logic [TRANS_ID_BITS-1:0] trans_id_i, input fu_t fu_i, - output logic fpu_ready_o, input logic fpu_valid_i, + output logic fpu_ready_o, input fu_op operator_i, input logic [FLEN-1:0] operand_a_i, input logic [FLEN-1:0] operand_b_i, From a3fd6f7a85eb788cac9ddc7ad600b0a4a1fc2285 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Mon, 24 Sep 2018 19:04:09 +0200 Subject: [PATCH 57/94] :wrench: Fix Bender.yml --- Bender.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Bender.yml b/Bender.yml index a0f83cd69..4be9be38b 100644 --- a/Bender.yml +++ b/Bender.yml @@ -11,7 +11,7 @@ dependencies: fpga-support: { git: "https://github.com/pulp-platform/fpga-support.git", version: 0.3.2 } sources: - - src/fpu_div_sqrt_mvp/hdl/fpu_utils/fpu_ff.sv + - src/fpu_div_sqrt_mvp/hdl/fpu_ff.sv - src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv - src/fpu_div_sqrt_mvp/hdl/control_mvp.sv - src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv From 772ffd09ec5559511ce8d02f99e53f9975f3e3ba Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 24 Sep 2018 22:10:01 +0200 Subject: [PATCH 58/94] Remove commit port of branch unit --- include/ariane_pkg.sv | 2 +- src/alu.sv | 60 +++++++++++++++++++++++++++++++------- src/ariane.sv | 21 ++++--------- src/debug/dm_sba.sv | 39 +++++++++++++------------ src/ex_stage.sv | 56 ++++++++++------------------------- src/issue_read_operands.sv | 3 +- src/issue_stage.sv | 3 +- 7 files changed, 93 insertions(+), 91 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 0a827fcfa..435f826a9 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -24,7 +24,7 @@ package ariane_pkg; localparam NR_SB_ENTRIES = 8; // number of scoreboard entries localparam TRANS_ID_BITS = $clog2(NR_SB_ENTRIES); // depending on the number of scoreboard entries we need that many bits // to uniquely identify the entry in the scoreboard - localparam NR_WB_PORTS = 6; + localparam NR_WB_PORTS = 5; localparam ASID_WIDTH = 1; localparam BTB_ENTRIES = 8; localparam BHT_ENTRIES = 32; diff --git a/src/alu.sv b/src/alu.sv index fadd053a2..73b87f31c 100644 --- a/src/alu.sv +++ b/src/alu.sv @@ -19,23 +19,31 @@ import ariane_pkg::*; -module alu -( +module alu ( + input logic [63:0] pc_i, input logic [TRANS_ID_BITS-1:0] trans_id_i, input logic alu_valid_i, + input logic branch_valid_i, input fu_op operator_i, input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, + input logic [63:0] imm_i, output logic [63:0] result_o, - output logic alu_branch_res_o, output logic alu_valid_o, output logic alu_ready_o, - output logic [TRANS_ID_BITS-1:0] alu_trans_id_o + output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, + output exception_t alu_exception_o, + + input logic fu_valid_i, + input logic is_compressed_instr_i, + input branchpredict_sbe_t branch_predict_i, + output branchpredict_t resolved_branch_o, + output logic resolve_branch_o ); // ALU is a single cycle instructions, hence it is always ready assign alu_ready_o = 1'b1; - assign alu_valid_o = alu_valid_i; + assign alu_valid_o = alu_valid_i | branch_valid_i; assign alu_trans_id_o = trans_id_i; logic [63:0] operand_a_rev; @@ -43,6 +51,8 @@ module alu logic [64:0] operand_b_neg; logic [65:0] adder_result_ext_o; logic less; // handles both signed and unsigned forms + logic alu_branch_res; + logic [63:0] branch_result; // bit reverse operand_a for left shifts and bit counting generate @@ -89,13 +99,13 @@ module alu // get the right branch comparison result always_comb begin : branch_resolve // set comparison by default - alu_branch_res_o = 1'b1; + alu_branch_res = 1'b1; case (operator_i) - EQ: alu_branch_res_o = adder_z_flag; - NE: alu_branch_res_o = ~adder_z_flag; - LTS, LTU: alu_branch_res_o = less; - GES, GEU: alu_branch_res_o = ~less; - default: alu_branch_res_o = 1'b1; + EQ: alu_branch_res = adder_z_flag; + NE: alu_branch_res = ~adder_z_flag; + LTS, LTU: alu_branch_res = less; + GES, GEU: alu_branch_res = ~less; + default: alu_branch_res = 1'b1; endcase end @@ -198,6 +208,34 @@ module alu default: ; // default case to suppress unique warning endcase + + if (branch_valid_i) result_o = branch_result; + end + // ---------------------- + // Branch Unit + // ---------------------- + branch_unit branch_unit_i ( + .trans_id_i, + .operator_i, + .operand_a_i, + .operand_b_i, + .imm_i, + .pc_i, + .is_compressed_instr_i, + // any functional unit is valid, check that there is no accidental mis-predict + .fu_valid_i, + .branch_valid_i, + .branch_comp_res_i ( alu_branch_res ), + .branch_ready_o ( ), // is always high + .branch_valid_o ( ), // high when input is high + .branch_result_o ( branch_result ), + .branch_trans_id_o ( ), // feed through + .branch_predict_i, + .resolved_branch_o, + .resolve_branch_o, + .branch_exception_o ( alu_exception_o ) + ); + endmodule diff --git a/src/ariane.sv b/src/ariane.sv index e02591329..d7e8f5e91 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -91,12 +91,8 @@ module ariane #( logic [TRANS_ID_BITS-1:0] alu_trans_id_ex_id; logic alu_valid_ex_id; logic [63:0] alu_result_ex_id; + exception_t alu_exception_ex_id; // Branches and Jumps - logic branch_ready_ex_id; - logic [TRANS_ID_BITS-1:0] branch_trans_id_ex_id; - logic [63:0] branch_result_ex_id; - exception_t branch_exception_ex_id; - logic branch_valid_ex_id; logic branch_valid_id_ex; branchpredict_sbe_t branch_predict_id_ex; @@ -305,7 +301,6 @@ module ariane #( .alu_ready_i ( alu_ready_ex_id ), .alu_valid_o ( alu_valid_id_ex ), // Branches and Jumps - .branch_ready_i ( branch_ready_ex_id ), .branch_valid_o ( branch_valid_id_ex ), // branch is valid .branch_predict_o ( branch_predict_id_ex ), // branch predict to ex .resolve_branch_i ( resolve_branch_ex_id ), // in order to resolve the branch @@ -326,10 +321,10 @@ module ariane #( // Commit .resolved_branch_i ( resolved_branch ), - .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, branch_trans_id_ex_id, csr_trans_id_ex_id, mult_trans_id_ex_id, fpu_trans_id_ex_id }), - .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, branch_result_ex_id, csr_result_ex_id, mult_result_ex_id, fpu_result_ex_id }), - .ex_ex_i ( {{$bits(exception_t){1'b0}}, lsu_exception_ex_id, branch_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}}, fpu_exception_ex_id }), - .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, branch_valid_ex_id, csr_valid_ex_id, mult_valid_ex_id, fpu_valid_ex_id }), + .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, csr_trans_id_ex_id, mult_trans_id_ex_id, fpu_trans_id_ex_id }), + .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, csr_result_ex_id, mult_result_ex_id, fpu_result_ex_id }), + .ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}}, fpu_exception_ex_id }), + .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, csr_valid_ex_id, mult_valid_ex_id, fpu_valid_ex_id }), .waddr_i ( waddr_commit_id ), .wdata_i ( wdata_commit_id ), @@ -361,13 +356,9 @@ module ariane #( .alu_result_o ( alu_result_ex_id ), .alu_trans_id_o ( alu_trans_id_ex_id ), .alu_valid_o ( alu_valid_ex_id ), + .alu_exception_o ( alu_exception_ex_id ), // Branches and Jumps - .branch_ready_o ( branch_ready_ex_id ), - .branch_valid_o ( branch_valid_ex_id ), .branch_valid_i ( branch_valid_id_ex ), - .branch_trans_id_o ( branch_trans_id_ex_id ), - .branch_result_o ( branch_result_ex_id ), - .branch_exception_o ( branch_exception_ex_id ), .branch_predict_i ( branch_predict_id_ex ), // branch predict to ex .resolved_branch_o ( resolved_branch ), .resolve_branch_o ( resolve_branch_ex_id ), diff --git a/src/debug/dm_sba.sv b/src/debug/dm_sba.sv index d316982a4..83594e3b9 100644 --- a/src/debug/dm_sba.sv +++ b/src/debug/dm_sba.sv @@ -128,27 +128,28 @@ module dm_sba ( end end + axi_adapter #( - .DATA_WIDTH ( 64 ) + .DATA_WIDTH ( 64 ) ) i_axi_master ( - .clk_i ( clk_i ), - .rst_ni ( dmactive_i ), - .req_i ( req ), - .type_i ( std_cache_pkg::SINGLE_REQ), - .gnt_o ( gnt ), - .gnt_id_o ( ), - .addr_i ( address ), - .we_i ( we ), - .wdata_i ( sbdata_i ), - .be_i ( be ), - .size_i ( sbaccess_i[1:0] ), - .id_i ( '0 ), - .valid_o ( sbdata_valid_o ), - .rdata_o ( sbdata_o ), - .id_o ( ), - .critical_word_o ( ), // not needed here - .critical_word_valid_o ( ), // not needed here - .axi ( axi_master ) + .clk_i ( clk_i ), + .rst_ni ( dmactive_i ), + .req_i ( req ), + .type_i ( std_cache_pkg::SINGLE_REQ ), + .gnt_o ( gnt ), + .gnt_id_o ( ), + .addr_i ( address ), + .we_i ( we ), + .wdata_i ( sbdata_i ), + .be_i ( be ), + .size_i ( sbaccess_i[1:0] ), + .id_i ( '0 ), + .valid_o ( sbdata_valid_o ), + .rdata_o ( sbdata_o ), + .id_o ( ), + .critical_word_o ( ), // not needed here + .critical_word_valid_o ( ), // not needed here + .axi ( axi_master ) ); diff --git a/src/ex_stage.sv b/src/ex_stage.sv index af4c06e43..8528e1103 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -16,8 +16,8 @@ import ariane_pkg::*; module ex_stage #( - parameter int ASID_WIDTH = 1 - ) ( + parameter int ASID_WIDTH = 1 +)( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_i, @@ -37,15 +37,10 @@ module ex_stage #( output logic alu_valid_o, // ALU result is valid output logic [63:0] alu_result_o, output logic [TRANS_ID_BITS-1:0] alu_trans_id_o, // ID of scoreboard entry at which to write back + output exception_t alu_exception_o, // Branches and Jumps - output logic branch_ready_o, input logic branch_valid_i, // we are using the branch unit - output logic branch_valid_o, // the calculated branch target is valid - output logic [63:0] branch_result_o, // branch target address out - input branchpredict_sbe_t branch_predict_i, // branch prediction in - output logic [TRANS_ID_BITS-1:0] branch_trans_id_o, - output exception_t branch_exception_o, // branch unit detected an exception - + input branchpredict_sbe_t branch_predict_i, output branchpredict_t resolved_branch_o, // the branch engine uses the write back from the ALU output logic resolve_branch_o, // to ID signaling that we resolved the branch // LSU @@ -115,52 +110,31 @@ module ex_stage #( // ALU // ----- fu_data_t alu_data; - assign alu_data.operator = alu_valid_i | branch_valid_i ? operator_i : ADD; - assign alu_data.operand_a = alu_valid_i | branch_valid_i ? operand_a_i : '0; - assign alu_data.operand_b = alu_valid_i | branch_valid_i ? operand_b_i : '0; + assign alu_data.operator = (alu_valid_i | branch_valid_i) ? operator_i : ADD; + assign alu_data.operand_a = (alu_valid_i | branch_valid_i) ? operand_a_i : '0; + assign alu_data.operand_b = (alu_valid_i | branch_valid_i) ? operand_b_i : '0; + assign alu_data.imm = (alu_valid_i | branch_valid_i) ? imm_i : '0; alu alu_i ( + .pc_i, .trans_id_i, .alu_valid_i, + .branch_valid_i, .operator_i ( alu_data.operator ), .operand_a_i ( alu_data.operand_a ), .operand_b_i ( alu_data.operand_b ), + .imm_i ( alu_data.imm ), .result_o ( alu_result_o ), - .alu_branch_res_o ( alu_branch_res ), .alu_valid_o, .alu_ready_o, - .alu_trans_id_o - ); + .alu_trans_id_o, + .alu_exception_o, - // -------------------- - // Branch Engine - // -------------------- - fu_data_t branch_data; - assign branch_data.operator = branch_valid_i ? operator_i : JALR; - assign branch_data.operand_a = branch_valid_i ? operand_a_i : '0; - assign branch_data.operand_b = branch_valid_i ? operand_b_i : '0; - assign branch_data.imm = branch_valid_i ? imm_i : '0; - - branch_unit branch_unit_i ( - .trans_id_i, - .operator_i ( branch_data.operator ), - .operand_a_i ( branch_data.operand_a ), - .operand_b_i ( branch_data.operand_b ), - .imm_i ( branch_data.imm ), - .pc_i, + .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ), .is_compressed_instr_i, - // any functional unit is valid, check that there is no accidental mis-predict - .fu_valid_i ( alu_valid_i || lsu_valid_i || csr_valid_i || mult_valid_i || fpu_valid_i ), - .branch_valid_i, - .branch_comp_res_i ( alu_branch_res ), - .branch_ready_o, - .branch_valid_o, - .branch_result_o, - .branch_trans_id_o, .branch_predict_i, .resolved_branch_o, - .resolve_branch_o, - .branch_exception_o + .resolve_branch_o ); // ---------------- diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 79f0be5e8..bb0384875 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -52,7 +52,6 @@ module issue_read_operands #( input logic alu_ready_i, // FU is ready output logic alu_valid_o, // Output is valid // Branches and Jumps - input logic branch_ready_i, output logic branch_valid_o, // this is a valid branch instruction output branchpredict_sbe_t branch_predict_o, // LSU @@ -137,7 +136,7 @@ module issue_read_operands #( ALU: fu_busy = ~alu_ready_i; CTRL_FLOW: - fu_busy = ~branch_ready_i; + fu_busy = ~alu_ready_i; MULT: fu_busy = ~mult_ready_i; FPU, diff --git a/src/issue_stage.sv b/src/issue_stage.sv index 8bbc0f01c..113c49988 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -48,8 +48,7 @@ module issue_stage #( input logic lsu_ready_i, output logic lsu_valid_o, // branch prediction - input logic branch_ready_i, - output logic branch_valid_o, // use branch prediction unit + output logic branch_valid_o, // use branch prediction unit output branchpredict_sbe_t branch_predict_o, input logic mult_ready_i, From 48385590f756d450ff6bf1443d09371d491c0b71 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 24 Sep 2018 23:39:33 +0200 Subject: [PATCH 59/94] Remove CSR buffer from scoreboard path --- include/ariane_pkg.sv | 2 +- src/alu.sv | 49 ++++++++++++++++++++++++-------- src/ariane.sv | 25 ++++++----------- src/csr_buffer.sv | 6 ---- src/ex_stage.sv | 57 ++++++++++++-------------------------- src/issue_read_operands.sv | 12 ++------ src/issue_stage.sv | 7 ++--- 7 files changed, 71 insertions(+), 87 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 435f826a9..d68bbf02a 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -24,7 +24,7 @@ package ariane_pkg; localparam NR_SB_ENTRIES = 8; // number of scoreboard entries localparam TRANS_ID_BITS = $clog2(NR_SB_ENTRIES); // depending on the number of scoreboard entries we need that many bits // to uniquely identify the entry in the scoreboard - localparam NR_WB_PORTS = 5; + localparam NR_WB_PORTS = 4; localparam ASID_WIDTH = 1; localparam BTB_ENTRIES = 8; localparam BHT_ENTRIES = 32; diff --git a/src/alu.sv b/src/alu.sv index 73b87f31c..62ea6ac51 100644 --- a/src/alu.sv +++ b/src/alu.sv @@ -20,10 +20,14 @@ import ariane_pkg::*; module alu ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + input logic flush_i, input logic [63:0] pc_i, input logic [TRANS_ID_BITS-1:0] trans_id_i, input logic alu_valid_i, input logic branch_valid_i, + input logic csr_valid_i, input fu_op operator_i, input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, @@ -38,12 +42,17 @@ module alu ( input logic is_compressed_instr_i, input branchpredict_sbe_t branch_predict_i, output branchpredict_t resolved_branch_o, - output logic resolve_branch_o + output logic resolve_branch_o, + + input logic commit_i, + // to CSR file + output logic [11:0] csr_addr_o // CSR address to commit stage ); - // ALU is a single cycle instructions, hence it is always ready - assign alu_ready_o = 1'b1; - assign alu_valid_o = alu_valid_i | branch_valid_i; + logic csr_ready; + + assign alu_ready_o = csr_ready; + assign alu_valid_o = alu_valid_i | branch_valid_i | csr_valid_i; assign alu_trans_id_o = trans_id_i; logic [63:0] operand_a_rev; @@ -52,7 +61,7 @@ module alu ( logic [65:0] adder_result_ext_o; logic less; // handles both signed and unsigned forms logic alu_branch_res; - logic [63:0] branch_result; + logic [63:0] branch_result, csr_result; // bit reverse operand_a for left shifts and bit counting generate @@ -209,7 +218,11 @@ module alu ( default: ; // default case to suppress unique warning endcase - if (branch_valid_i) result_o = branch_result; + if (branch_valid_i) begin + result_o = branch_result; + end else if (csr_valid_i) begin + result_o = csr_result; + end end @@ -227,15 +240,29 @@ module alu ( // any functional unit is valid, check that there is no accidental mis-predict .fu_valid_i, .branch_valid_i, - .branch_comp_res_i ( alu_branch_res ), - .branch_ready_o ( ), // is always high - .branch_valid_o ( ), // high when input is high - .branch_result_o ( branch_result ), - .branch_trans_id_o ( ), // feed through + .branch_comp_res_i ( alu_branch_res ), + .branch_ready_o ( ), // is always high + .branch_valid_o ( ), // high when input is high + .branch_result_o ( branch_result ), + .branch_trans_id_o ( ), // feed through .branch_predict_i, .resolved_branch_o, .resolve_branch_o, .branch_exception_o ( alu_exception_o ) ); + csr_buffer csr_buffer_i ( + .clk_i, + .rst_ni, + .flush_i, + .csr_valid_i, + .operator_i, + .operand_a_i, + .operand_b_i, + .csr_ready_o ( csr_ready ), + .csr_result_o ( csr_result ), + .commit_i, + .csr_addr_o + ); + endmodule diff --git a/src/ariane.sv b/src/ariane.sv index d7e8f5e91..f16f3b253 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -120,11 +120,7 @@ module ariane #( logic fpu_valid_ex_id; exception_t fpu_exception_ex_id; // CSR - logic csr_ready_ex_id; logic csr_valid_id_ex; - logic [TRANS_ID_BITS-1:0] csr_trans_id_ex_id; - logic [63:0] csr_result_ex_id; - logic csr_valid_ex_id; // -------------- // EX <-> COMMIT // -------------- @@ -316,15 +312,14 @@ module ariane #( .fpu_fmt_o ( fpu_fmt_id_ex ), .fpu_rm_o ( fpu_rm_id_ex ), // CSR - .csr_ready_i ( csr_ready_ex_id ), .csr_valid_o ( csr_valid_id_ex ), // Commit .resolved_branch_i ( resolved_branch ), - .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, csr_trans_id_ex_id, mult_trans_id_ex_id, fpu_trans_id_ex_id }), - .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, csr_result_ex_id, mult_result_ex_id, fpu_result_ex_id }), - .ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception_t){1'b0}}, {$bits(exception_t){1'b0}}, fpu_exception_ex_id }), - .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, csr_valid_ex_id, mult_valid_ex_id, fpu_valid_ex_id }), + .trans_id_i ( {alu_trans_id_ex_id, lsu_trans_id_ex_id, mult_trans_id_ex_id, fpu_trans_id_ex_id }), + .wbdata_i ( {alu_result_ex_id, lsu_result_ex_id, mult_result_ex_id, fpu_result_ex_id }), + .ex_ex_i ( {alu_exception_ex_id, lsu_exception_ex_id, {$bits(exception_t){1'b0}}, fpu_exception_ex_id }), + .wb_valid_i ( {alu_valid_ex_id, lsu_valid_ex_id, mult_valid_ex_id, fpu_valid_ex_id }), .waddr_i ( waddr_commit_id ), .wdata_i ( wdata_commit_id ), @@ -362,6 +357,10 @@ module ariane #( .branch_predict_i ( branch_predict_id_ex ), // branch predict to ex .resolved_branch_o ( resolved_branch ), .resolve_branch_o ( resolve_branch_ex_id ), + // CSR + .csr_valid_i ( csr_valid_id_ex ), + .csr_addr_o ( csr_addr_ex_csr ), + .csr_commit_i ( csr_commit_commit_ex ), // from commit // LSU .lsu_ready_o ( lsu_ready_ex_id ), .lsu_valid_i ( lsu_valid_id_ex ), @@ -391,14 +390,6 @@ module ariane #( .amo_valid_commit_i ( amo_valid_commit ), .amo_req_o ( amo_req ), .amo_resp_i ( amo_resp ), - // CSR - .csr_ready_o ( csr_ready_ex_id ), - .csr_valid_i ( csr_valid_id_ex ), - .csr_trans_id_o ( csr_trans_id_ex_id ), - .csr_result_o ( csr_result_ex_id ), - .csr_valid_o ( csr_valid_ex_id ), - .csr_addr_o ( csr_addr_ex_csr ), - .csr_commit_i ( csr_commit_commit_ex ), // from commit // Performance counters .itlb_miss_o ( itlb_miss_ex_perf ), .dtlb_miss_o ( dtlb_miss_ex_perf ), diff --git a/src/csr_buffer.sv b/src/csr_buffer.sv index 3afdc1def..9ddac2b65 100644 --- a/src/csr_buffer.sv +++ b/src/csr_buffer.sv @@ -23,13 +23,10 @@ module csr_buffer ( input fu_op operator_i, input logic [63:0] operand_a_i, input logic [63:0] operand_b_i, - input logic [TRANS_ID_BITS-1:0] trans_id_i, // transaction id, needed for WB output logic csr_ready_o, // FU is ready e.g. not busy input logic csr_valid_i, // Input is valid - output logic [TRANS_ID_BITS-1:0] csr_trans_id_o, // ID of scoreboard entry at which to write back output logic [63:0] csr_result_o, - output logic csr_valid_o, // transaction id for which the output is the requested one input logic commit_i, // commit the pending CSR OP // to CSR file @@ -43,9 +40,6 @@ module csr_buffer ( } csr_reg_n, csr_reg_q; // control logic, scoreboard signals - assign csr_trans_id_o = trans_id_i; - // CSR instructions for this post buffer are single cycle - assign csr_valid_o = csr_valid_i; assign csr_result_o = operand_a_i; assign csr_addr_o = csr_reg_q.csr_address; diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 8528e1103..04d0fe4e9 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -43,6 +43,10 @@ module ex_stage #( input branchpredict_sbe_t branch_predict_i, output branchpredict_t resolved_branch_o, // the branch engine uses the write back from the ALU output logic resolve_branch_o, // to ID signaling that we resolved the branch + // CSR + input logic csr_valid_i, + output logic [11:0] csr_addr_o, + input logic csr_commit_i, // LSU output logic lsu_ready_o, // FU is ready input logic lsu_valid_i, // Input is valid @@ -54,14 +58,6 @@ module ex_stage #( output exception_t lsu_exception_o, output logic no_st_pending_o, input logic amo_valid_commit_i, - // CSR - output logic csr_ready_o, - input logic csr_valid_i, - output logic [TRANS_ID_BITS-1:0] csr_trans_id_o, - output logic [63:0] csr_result_o, - output logic csr_valid_o, - output logic [11:0] csr_addr_o, - input logic csr_commit_i, // MULT output logic mult_ready_o, // FU is ready input logic mult_valid_i, // Output is valid @@ -110,16 +106,22 @@ module ex_stage #( // ALU // ----- fu_data_t alu_data; - assign alu_data.operator = (alu_valid_i | branch_valid_i) ? operator_i : ADD; - assign alu_data.operand_a = (alu_valid_i | branch_valid_i) ? operand_a_i : '0; - assign alu_data.operand_b = (alu_valid_i | branch_valid_i) ? operand_b_i : '0; - assign alu_data.imm = (alu_valid_i | branch_valid_i) ? imm_i : '0; + assign alu_data.operator = (alu_valid_i | branch_valid_i | csr_valid_i) ? operator_i : ADD; + assign alu_data.operand_a = (alu_valid_i | branch_valid_i | csr_valid_i) ? operand_a_i : '0; + assign alu_data.operand_b = (alu_valid_i | branch_valid_i | csr_valid_i) ? operand_b_i : '0; + assign alu_data.imm = (alu_valid_i | branch_valid_i | csr_valid_i) ? imm_i : '0; + // fixed latency FUs + // TOOD(zarubaf) Re-name this module and re-factor ALU alu alu_i ( + .clk_i, + .rst_ni, + .flush_i, .pc_i, .trans_id_i, .alu_valid_i, .branch_valid_i, + .csr_valid_i ( csr_valid_i ), .operator_i ( alu_data.operator ), .operand_a_i ( alu_data.operand_a ), .operand_b_i ( alu_data.operand_b ), @@ -134,7 +136,10 @@ module ex_stage #( .is_compressed_instr_i, .branch_predict_i, .resolved_branch_o, - .resolve_branch_o + .resolve_branch_o, + + .commit_i ( csr_commit_i ), + .csr_addr_o ( csr_addr_o ) ); // ---------------- @@ -248,30 +253,4 @@ module ex_stage #( .amo_resp_i ); - // ----- - // CSR - // ----- - fu_data_t csr_data; - assign csr_data.operator = csr_valid_i ? operator_i : CSR_READ; - assign csr_data.operand_a = csr_valid_i ? operand_a_i : '0; - assign csr_data.operand_b = csr_valid_i ? operand_b_i : '0; - - // CSR address buffer - csr_buffer csr_buffer_i ( - .clk_i, - .rst_ni, - .flush_i, - .operator_i ( csr_data.operator ), - .operand_a_i ( csr_data.operand_a ), - .operand_b_i ( csr_data.operand_b ), - .trans_id_i, - .csr_ready_o, - .csr_valid_i, - .csr_trans_id_o, - .csr_result_o, - .csr_valid_o, - .commit_i ( csr_commit_i ), - .csr_addr_o - ); - endmodule diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index bb0384875..87e693c76 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -17,7 +17,7 @@ import ariane_pkg::*; module issue_read_operands #( parameter int unsigned NR_COMMIT_PORTS = 2 - )( +)( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low // flush @@ -66,7 +66,6 @@ module issue_read_operands #( output logic [1:0] fpu_fmt_o, // FP fmt field from instr. output logic [2:0] fpu_rm_o, // FP rm field from instr. // CSR - input logic csr_ready_i, // FU is ready output logic csr_valid_o, // Output is valid // commit port input logic [NR_COMMIT_PORTS-1:0][4:0] waddr_i, @@ -133,19 +132,14 @@ module issue_read_operands #( unique case (issue_instr_i.fu) NONE: fu_busy = 1'b0; - ALU: - fu_busy = ~alu_ready_i; - CTRL_FLOW: + ALU, CTRL_FLOW, CSR: fu_busy = ~alu_ready_i; MULT: fu_busy = ~mult_ready_i; - FPU, - FPU_VEC: + FPU, FPU_VEC: fu_busy = ~fpu_ready_i; LOAD, STORE: fu_busy = ~lsu_ready_i; - CSR: - fu_busy = ~csr_ready_i; default: fu_busy = 1'b0; endcase diff --git a/src/issue_stage.sv b/src/issue_stage.sv index 113c49988..1cc08afc4 100644 --- a/src/issue_stage.sv +++ b/src/issue_stage.sv @@ -16,9 +16,9 @@ import ariane_pkg::*; module issue_stage #( - parameter int unsigned NR_ENTRIES = 8, - parameter int unsigned NR_WB_PORTS = 4, - parameter int unsigned NR_COMMIT_PORTS = 2 + parameter int unsigned NR_ENTRIES = 8, + parameter int unsigned NR_WB_PORTS = 4, + parameter int unsigned NR_COMMIT_PORTS = 2 )( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -59,7 +59,6 @@ module issue_stage #( output logic [1:0] fpu_fmt_o, // FP fmt field from instr. output logic [2:0] fpu_rm_o, // FP rm field from instr. - input logic csr_ready_i, output logic csr_valid_o, // write back port From e1edfb0305e410542e1a7251256c580f850f37c2 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 24 Sep 2018 23:41:37 +0200 Subject: [PATCH 60/94] Remove unused signals from branch unit --- src/alu.sv | 4 ---- src/branch_unit.sv | 8 -------- 2 files changed, 12 deletions(-) diff --git a/src/alu.sv b/src/alu.sv index 62ea6ac51..b9fe8d235 100644 --- a/src/alu.sv +++ b/src/alu.sv @@ -230,7 +230,6 @@ module alu ( // Branch Unit // ---------------------- branch_unit branch_unit_i ( - .trans_id_i, .operator_i, .operand_a_i, .operand_b_i, @@ -241,10 +240,7 @@ module alu ( .fu_valid_i, .branch_valid_i, .branch_comp_res_i ( alu_branch_res ), - .branch_ready_o ( ), // is always high - .branch_valid_o ( ), // high when input is high .branch_result_o ( branch_result ), - .branch_trans_id_o ( ), // feed through .branch_predict_i, .resolved_branch_o, .resolve_branch_o, diff --git a/src/branch_unit.sv b/src/branch_unit.sv index 7600ce35d..ff646d36a 100644 --- a/src/branch_unit.sv +++ b/src/branch_unit.sv @@ -15,7 +15,6 @@ import ariane_pkg::*; module branch_unit ( - input logic [TRANS_ID_BITS-1:0] trans_id_i, input fu_op operator_i, // comparison operation to perform input logic [63:0] operand_a_i, // contains content of RS 1 input logic [63:0] operand_b_i, // contains content of RS 2 @@ -25,10 +24,7 @@ module branch_unit ( input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict input logic branch_valid_i, input logic branch_comp_res_i, // branch comparison result from ALU - output logic branch_ready_o, - output logic branch_valid_o, output logic [63:0] branch_result_o, - output logic [TRANS_ID_BITS-1:0] branch_trans_id_o, input branchpredict_sbe_t branch_predict_i, // this is the address we predicted output branchpredict_t resolved_branch_o, // this is the actual address we are targeting @@ -38,10 +34,6 @@ module branch_unit ( ); logic [63:0] target_address; logic [63:0] next_pc; - // branches are single cycle at the moment, feed-through the control signals - assign branch_trans_id_o = trans_id_i; - assign branch_valid_o = branch_valid_i; - assign branch_ready_o = 1'b1; // we are always ready // here we handle the various possibilities of mis-predicts always_comb begin : mispredict_handler From bd2ca5e2efc4fcbd428469578d3faf978999b3ae Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 25 Sep 2018 11:47:38 +0200 Subject: [PATCH 61/94] Fix latch and timing loop in debu_req --- src/commit_stage.sv | 141 ++++++++++++++++++++++---------------------- src/csr_regfile.sv | 4 ++ 2 files changed, 76 insertions(+), 69 deletions(-) diff --git a/src/commit_stage.sv b/src/commit_stage.sv index aac704cef..c7f8dfd76 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -98,86 +98,89 @@ module commit_stage #( // furthermore if the debugger is requesting to debug do not commit this instruction if we are not yet in debug mode // also check that there is no atomic memory operation committing, right now this is the only operation // which will take longer than one cycle to commit - if (commit_instr_i[0].valid && !halt_i && (!debug_req_i || debug_mode_i)) begin + if (commit_instr_i[0].valid && !halt_i) begin + // we have to exclude the AMOs from debug mode as we are not jumping to debug + // while committing an AMO + if ((!debug_req_i || debug_mode_i)) begin + commit_ack_o[0] = 1'b1; + // register will be the all zero register. + // and also acknowledge the instruction, this is mainly done for the instruction tracer + // as it will listen on the instruction ack signal. For the overall result it does not make any + // difference as the whole pipeline is going to be flushed anyway. + if (!exception_o.valid) begin + // we can definitely write the register file + // if the instruction is not committing anything the destination + if (is_rd_fpr(commit_instr_i[0].op)) + we_fpr_o[0] = 1'b1; + else + we_gpr_o[0] = 1'b1; - commit_ack_o[0] = 1'b1; - // register will be the all zero register. - // and also acknowledge the instruction, this is mainly done for the instruction tracer - // as it will listen on the instruction ack signal. For the overall result it does not make any - // difference as the whole pipeline is going to be flushed anyway. - if (!exception_o.valid) begin - // we can definitely write the register file - // if the instruction is not committing anything the destination - if (is_rd_fpr(commit_instr_i[0].op)) - we_fpr_o[0] = 1'b1; - else - we_gpr_o[0] = 1'b1; + // check whether the instruction we retire was a store + // do not commit the instruction if we got an exception since the store buffer will be cleared + // by the subsequent flush triggered by an exception + if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin + // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) + if (commit_lsu_ready_i) + commit_lsu_o = 1'b1; + else // if the LSU buffer is not ready - do not commit, wait + commit_ack_o[0] = 1'b0; + end - // check whether the instruction we retire was a store - // do not commit the instruction if we got an exception since the store buffer will be cleared - // by the subsequent flush triggered by an exception - if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin - // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) - if (commit_lsu_ready_i) - commit_lsu_o = 1'b1; - else // if the LSU buffer is not ready - do not commit, wait - commit_ack_o[0] = 1'b0; + // --------- + // FPU Flags + // --------- + if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin + // write the CSR with potential exception flags from retiring floating point instruction + csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]}; + csr_write_fflags_o = 1'b1; + end end + // --------- - // FPU Flags + // CSR Logic // --------- - if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin - // write the CSR with potential exception flags from retiring floating point instruction - csr_wdata_o = {59'b0, commit_instr_i[0].ex.cause[4:0]}; - csr_write_fflags_o = 1'b1; + // check whether the instruction we retire was a CSR instruction + if (commit_instr_i[0].fu == CSR) begin + // write the CSR file + commit_csr_o = 1'b1; + wdata_o[0] = csr_rdata_i; + csr_op_o = commit_instr_i[0].op; + csr_wdata_o = commit_instr_i[0].result; + end + // ------------------ + // SFENCE.VMA Logic + // ------------------ + // check if this instruction was a SFENCE_VMA + if (commit_instr_i[0].op == SFENCE_VMA) begin + // no store pending so we can flush the TLBs and pipeline + sfence_vma_o = no_st_pending_i; + // wait for the store buffer to drain until flushing the pipeline + commit_ack_o[0] = no_st_pending_i; + end + // ------------------ + // FENCE.I Logic + // ------------------ + // Fence synchronizes data and instruction streams. That means that we need to flush the private icache + // and the private dcache. This is the most expensive instruction. + if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the I$ + fence_i_o = no_st_pending_i; + end + // ------------------ + // FENCE Logic + // ------------------ + if (commit_instr_i[0].op == FENCE) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the D$ + fence_o = no_st_pending_i; end - end - - - // --------- - // CSR Logic - // --------- - // check whether the instruction we retire was a CSR instruction - if (commit_instr_i[0].fu == CSR) begin - // write the CSR file - commit_csr_o = 1'b1; - wdata_o[0] = csr_rdata_i; - csr_op_o = commit_instr_i[0].op; - csr_wdata_o = commit_instr_i[0].result; - end - // ------------------ - // SFENCE.VMA Logic - // ------------------ - // check if this instruction was a SFENCE_VMA - if (commit_instr_i[0].op == SFENCE_VMA) begin - // no store pending so we can flush the TLBs and pipeline - sfence_vma_o = no_st_pending_i; - // wait for the store buffer to drain until flushing the pipeline - commit_ack_o[0] = no_st_pending_i; - end - // ------------------ - // FENCE.I Logic - // ------------------ - // Fence synchronizes data and instruction streams. That means that we need to flush the private icache - // and the private dcache. This is the most expensive instruction. - if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin - commit_ack_o[0] = no_st_pending_i; - // tell the controller to flush the I$ - fence_i_o = no_st_pending_i; - end - // ------------------ - // FENCE Logic - // ------------------ - if (commit_instr_i[0].op == FENCE) begin - commit_ack_o[0] = no_st_pending_i; - // tell the controller to flush the D$ - fence_o = no_st_pending_i; end // ------------------ // AMO // ------------------ - if (instr_0_is_amo && !exception_o.valid) begin + if (instr_0_is_amo && !commit_instr_i[0].ex.valid) begin // AMO finished commit_ack_o[0] = amo_resp_i.ack; // flush the pipeline diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 598b9ffc0..7db40daf2 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -306,6 +306,10 @@ module csr_regfile #( sscratch_d = sscratch_q; stval_d = stval_q; satp_d = satp_q; + + cycle_d = cycle_q; + instret_d = instret_q; + en_ld_st_translation_d = en_ld_st_translation_q; dirty_fp_state_csr = 1'b0; // check for correct access rights and that we are writing From 8f4e97d02755ad783f42ead232df3446a71231fc Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 26 Sep 2018 10:46:28 +0200 Subject: [PATCH 62/94] :bug: Fix combinatorial loop in debug module --- src/debug/dm_sba.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/debug/dm_sba.sv b/src/debug/dm_sba.sv index 83594e3b9..46d1680fe 100644 --- a/src/debug/dm_sba.sv +++ b/src/debug/dm_sba.sv @@ -111,7 +111,7 @@ module dm_sba ( end endcase // handle error case - if (sbaccess_i > 3 && state_d != Idle) begin + if (sbaccess_i > 3 && state_q != Idle) begin req = 1'b0; state_d = Idle; sberror_valid_o = 1'b1; From f189437571117507f038932ec0425d028868cd7f Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 25 Sep 2018 11:47:38 +0200 Subject: [PATCH 63/94] Fix latch and timing loop in debu_req --- src/commit_stage.sv | 123 ++++++++++++++++++++++---------------------- src/csr_regfile.sv | 4 ++ 2 files changed, 66 insertions(+), 61 deletions(-) diff --git a/src/commit_stage.sv b/src/commit_stage.sv index 06c1beeb9..e91be4e68 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -91,73 +91,74 @@ module commit_stage #( // furthermore if the debugger is requesting to debug do not commit this instruction if we are not yet in debug mode // also check that there is no atomic memory operation committing, right now this is the only operation // which will take longer than one cycle to commit - if (commit_instr_i[0].valid && !halt_i && (!debug_req_i || debug_mode_i)) begin + if (commit_instr_i[0].valid && !halt_i) begin + if (!debug_req_i || debug_mode_i) begin + commit_ack_o[0] = 1'b1; + // register will be the all zero register. + // and also acknowledge the instruction, this is mainly done for the instruction tracer + // as it will listen on the instruction ack signal. For the overall result it does not make any + // difference as the whole pipeline is going to be flushed anyway. + if (!exception_o.valid) begin + // we can definitely write the register file + // if the instruction is not committing anything the destination + we_o[0] = 1'b1; - commit_ack_o[0] = 1'b1; - // register will be the all zero register. - // and also acknowledge the instruction, this is mainly done for the instruction tracer - // as it will listen on the instruction ack signal. For the overall result it does not make any - // difference as the whole pipeline is going to be flushed anyway. - if (!exception_o.valid) begin - // we can definitely write the register file - // if the instruction is not committing anything the destination - we_o[0] = 1'b1; - - // check whether the instruction we retire was a store - // do not commit the instruction if we got an exception since the store buffer will be cleared - // by the subsequent flush triggered by an exception - if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin - // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) - if (commit_lsu_ready_i) - commit_lsu_o = 1'b1; - else // if the LSU buffer is not ready - do not commit, wait - commit_ack_o[0] = 1'b0; + // check whether the instruction we retire was a store + // do not commit the instruction if we got an exception since the store buffer will be cleared + // by the subsequent flush triggered by an exception + if (commit_instr_i[0].fu == STORE && !instr_0_is_amo) begin + // check if the LSU is ready to accept another commit entry (e.g.: a non-speculative store) + if (commit_lsu_ready_i) + commit_lsu_o = 1'b1; + else // if the LSU buffer is not ready - do not commit, wait + commit_ack_o[0] = 1'b0; + end end - end - // --------- - // CSR Logic - // --------- - // check whether the instruction we retire was a CSR instruction - if (commit_instr_i[0].fu == CSR) begin - // write the CSR file - commit_csr_o = 1'b1; - wdata_o[0] = csr_rdata_i; - csr_op_o = commit_instr_i[0].op; - csr_wdata_o = commit_instr_i[0].result; - end - // ------------------ - // SFENCE.VMA Logic - // ------------------ - // check if this instruction was a SFENCE_VMA - if (commit_instr_i[0].op == SFENCE_VMA) begin - // no store pending so we can flush the TLBs and pipeline - sfence_vma_o = no_st_pending_i; - // wait for the store buffer to drain until flushing the pipeline - commit_ack_o[0] = no_st_pending_i; - end - // ------------------ - // FENCE.I Logic - // ------------------ - // Fence synchronizes data and instruction streams. That means that we need to flush the private icache - // and the private dcache. This is the most expensive instruction. - if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin - commit_ack_o[0] = no_st_pending_i; - // tell the controller to flush the I$ - fence_i_o = no_st_pending_i; - end - // ------------------ - // FENCE Logic - // ------------------ - if (commit_instr_i[0].op == FENCE) begin - commit_ack_o[0] = no_st_pending_i; - // tell the controller to flush the D$ - fence_o = no_st_pending_i; + // --------- + // CSR Logic + // --------- + // check whether the instruction we retire was a CSR instruction + if (commit_instr_i[0].fu == CSR) begin + // write the CSR file + commit_csr_o = 1'b1; + wdata_o[0] = csr_rdata_i; + csr_op_o = commit_instr_i[0].op; + csr_wdata_o = commit_instr_i[0].result; + end + // ------------------ + // SFENCE.VMA Logic + // ------------------ + // check if this instruction was a SFENCE_VMA + if (commit_instr_i[0].op == SFENCE_VMA) begin + // no store pending so we can flush the TLBs and pipeline + sfence_vma_o = no_st_pending_i; + // wait for the store buffer to drain until flushing the pipeline + commit_ack_o[0] = no_st_pending_i; + end + // ------------------ + // FENCE.I Logic + // ------------------ + // Fence synchronizes data and instruction streams. That means that we need to flush the private icache + // and the private dcache. This is the most expensive instruction. + if (commit_instr_i[0].op == FENCE_I || (flush_dcache_i && commit_instr_i[0].fu != STORE)) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the I$ + fence_i_o = no_st_pending_i; + end + // ------------------ + // FENCE Logic + // ------------------ + if (commit_instr_i[0].op == FENCE) begin + commit_ack_o[0] = no_st_pending_i; + // tell the controller to flush the D$ + fence_o = no_st_pending_i; + end end // ------------------ // AMO // ------------------ - if (instr_0_is_amo && !exception_o.valid) begin + if (instr_0_is_amo && !commit_instr_i[0].ex.valid) begin // AMO finished commit_ack_o[0] = amo_resp_i.ack; // flush the pipeline @@ -239,4 +240,4 @@ module commit_stage #( end end -endmodule +endmodule \ No newline at end of file diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index b95100bc5..98e5abfd4 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -274,6 +274,10 @@ module csr_regfile #( sscratch_d = sscratch_q; stval_d = stval_q; satp_d = satp_q; + + cycle_d = cycle_q; + instret_d = instret_q; + en_ld_st_translation_d = en_ld_st_translation_q; // check for correct access rights and that we are writing From 313c78f07b64239f2e5048a8af4077c93b88baf4 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Thu, 27 Sep 2018 18:31:31 +0200 Subject: [PATCH 64/94] Fix single-stepping --- src/csr_regfile.sv | 18 +++++++++++++++--- src/scoreboard.sv | 4 +--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 7db40daf2..0e71ea504 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -619,9 +619,21 @@ module csr_regfile #( end // single step enable and we just retired an instruction - if (dcsr_q.step && (|commit_ack_i)) begin - // we saved the correct target address during execute - dpc_d = commit_instr_i[0].bp.predict_address; + if (dcsr_q.step && commit_ack_i[0]) begin + // valid CTRL flow change + if (commit_instr_i[0].fu == CTRL_FLOW) begin + // we saved the correct target address during execute + dpc_d = commit_instr_i[0].bp.predict_address; + // exception valid + end else if (ex_i.valid) begin + dpc_d = trap_vector_base_o; + // return from environment + end else if (eret_o) begin + dpc_d = epc_o; + // consecutive PC + end else begin + dpc_d = commit_instr_i[0].pc + (commit_instr_i[0].is_compressed ? 'h2 : 'h4); + end debug_mode_d = 1'b1; set_debug_pc_o = 1'b1; dcsr_d.cause = dm::CauseSingleStep; diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 35fac33f6..990c59858 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -129,9 +129,7 @@ module scoreboard #( mem_n[trans_id_i[i]].sbe.valid = 1'b1; mem_n[trans_id_i[i]].sbe.result = wbdata_i[i]; // save the target address of a branch (needed for debug in commit stage) - if (resolved_branch_i.valid) begin - mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; - end + mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; // write the exception back if it is valid if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; From c861ecfe5dc236f1c99222109dba91fcc6dc9f01 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 28 Sep 2018 21:41:06 +0200 Subject: [PATCH 65/94] :bug: Fix non-flushing AMOs --- src/cache_subsystem/miss_handler.sv | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cache_subsystem/miss_handler.sv b/src/cache_subsystem/miss_handler.sv index b119f68a6..31eed3a14 100644 --- a/src/cache_subsystem/miss_handler.sv +++ b/src/cache_subsystem/miss_handler.sv @@ -180,7 +180,7 @@ module miss_handler #( IDLE: begin // lowest priority are AMOs, wait until everything else is served before going for the AMOs - if (amo_req_i.req) begin + if (amo_req_i.req && !busy_i) begin // 1. Flush the cache if (!serve_amo_q) begin state_d = FLUSH_REQ_STATUS; @@ -203,6 +203,8 @@ module miss_handler #( // here comes the refill portion of code if (miss_req_valid[i] && !miss_req_bypass[i]) begin state_d = MISS; + // we are taking another request so don't take the AMO + serve_amo_d = 1'b0; // save to MSHR mshr_d.valid = 1'b1; mshr_d.we = miss_req_we[i]; From 7c422e4a83f5956f5ed5c58d95f4ca62e4181860 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 28 Sep 2018 21:56:59 +0200 Subject: [PATCH 66/94] :bug: Increase arbiter LSU arbiter depth --- src/lsu_arbiter.sv | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/lsu_arbiter.sv b/src/lsu_arbiter.sv index 0f4c73df5..7dafa1466 100644 --- a/src/lsu_arbiter.sv +++ b/src/lsu_arbiter.sv @@ -40,9 +40,10 @@ module lsu_arbiter ( // RR fashion. FIFOs need to be 2 deep in order to unconditionally accept loads and stores since we can // have a maximum of 2 outstanding loads. // if there are valid elements in the fifos, the unit posts the result on its output ports and expects it - // to be consumed unconditionally + // to be consumed unconditionally - localparam int DEPTH = 2; + // Important: this needs to be greater than 2 to unconditionally acept incoming requests + localparam int DEPTH = 4; typedef struct packed { logic [TRANS_ID_BITS-1:0] trans_id; @@ -64,9 +65,9 @@ module lsu_arbiter ( assign ld_in.result = ld_result_i; assign ld_in.ex = ld_ex_i; - assign trans_id_o = (idx) ? st_out.trans_id : ld_out.trans_id; - assign result_o = (idx) ? st_out.result : ld_out.result; - assign ex_o = (idx) ? st_out.ex : ld_out.ex; + assign trans_id_o = (idx) ? st_out.trans_id : ld_out.trans_id; + assign result_o = (idx) ? st_out.result : ld_out.result; + assign ex_o = (idx) ? st_out.ex : ld_out.ex; // round robin with "lookahead" for 2 requesters rrarbiter #( @@ -85,7 +86,7 @@ module lsu_arbiter ( fifo_v2 #( .dtype ( fifo_t ), .DEPTH ( DEPTH ) - ) i_ld_fifo ( + ) i_ld_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i ( flush_i ), @@ -98,12 +99,12 @@ module lsu_arbiter ( .push_i ( ld_valid_i ), .data_o ( ld_out ), .pop_i ( ld_ren ) - ); + ); fifo_v2 #( .dtype ( fifo_t ), .DEPTH ( DEPTH ) - ) i_st_fifo ( + ) i_st_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i ( flush_i ), @@ -116,7 +117,7 @@ module lsu_arbiter ( .push_i ( st_valid_i ), .data_o ( st_out ), .pop_i ( st_ren ) - ); + ); `ifndef SYNTHESIS From fc3251425be33bfdb27b89f901e91e51db9ebd4e Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 29 Sep 2018 16:10:47 +0200 Subject: [PATCH 67/94] Fix testability issue in debug module --- src/debug/dm_csrs.sv | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/debug/dm_csrs.sv b/src/debug/dm_csrs.sv index aee8177ab..894ab5076 100644 --- a/src/debug/dm_csrs.sv +++ b/src/debug/dm_csrs.sv @@ -438,14 +438,26 @@ module dm_csrs #( end assign dmactive_o = dmcontrol_q.dmactive; - // if the PoR is set we want to re-set the other system as well - assign ndmreset_o = dmcontrol_q.ndmreset | (~rst_ni); assign cmd_o = command_q; assign progbuf_o = progbuf_q; assign data_o = data_q; assign resp_queue_pop = dmi_resp_ready_i & ~resp_queue_empty; + logic ndmreset_n; + + // if the PoR is set we want to re-set the other system as well + rstgen_bypass i_rstgen_bypass ( + .clk_i ( clk_i ), + .rst_ni ( ~(dmcontrol_q.ndmreset | ~rst_ni) ), + .rst_test_mode_ni ( rst_ni ), + .test_mode_i ( testmode_i ), + .rst_no ( ndmreset_n ), + .init_no () // keep open + ); + + assign ndmreset_o = ~ndmreset_n; + // response FIFO fifo_v2 #( .dtype ( logic [31:0] ), @@ -468,8 +480,17 @@ module dm_csrs #( always_ff @(posedge clk_i or negedge rst_ni) begin // PoR if (~rst_ni) begin - dmcontrol_q <= '0; - havereset_q <= '1; + dmcontrol_q <= '0; + havereset_q <= '1; + // this is the only write-able bit during reset + cmderr_q <= dm::CmdErrNone; + command_q <= '0; + abstractauto_q <= '0; + progbuf_q <= '0; + data_q <= '0; + sbcs_q <= '0; + sbaddr_q <= '0; + sbdata_q <= '0; end else begin // synchronous re-set of debug module, active-low, except for dmactive if (!dmcontrol_q.dmactive) begin @@ -508,4 +529,4 @@ module dm_csrs #( end end end -endmodule +endmodule \ No newline at end of file From 4689f066962ccf75a7e93a46e1ee708983e439c3 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 29 Sep 2018 18:41:44 +0200 Subject: [PATCH 68/94] Improve testability of debug module --- .gitmodules | 3 +++ Makefile | 11 +++++---- src/clint/clint.sv | 4 ++-- src/common_cells | 2 +- src/debug/dm_csrs.sv | 2 +- src/debug/dm_mem.sv | 6 ++--- src/debug/dm_sba.sv | 7 +++--- src/debug/dm_top.sv | 5 ++-- src/debug/dmi_jtag.sv | 3 ++- src/debug/dmi_jtag_tap.sv | 20 ++++++++++++++-- src/tech_cells_generic | 1 + src/util/cluster_clock_gating.sv | 40 -------------------------------- tb/ariane_testharness.sv | 2 ++ 13 files changed, 47 insertions(+), 59 deletions(-) create mode 160000 src/tech_cells_generic delete mode 100644 src/util/cluster_clock_gating.sv diff --git a/.gitmodules b/.gitmodules index e11bbe389..d25c969d3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,3 +19,6 @@ [submodule "src/fpu_div_sqrt_mvp"] path = src/fpu_div_sqrt_mvp url = https://github.com/pulp-platform/fpu_div_sqrt_mvp.git +[submodule "src/tech_cells_generic"] + path = src/tech_cells_generic + url = https://github.com/pulp-platform/tech_cells_generic.git diff --git a/Makefile b/Makefile index 839ca25fc..907ae79c6 100755 --- a/Makefile +++ b/Makefile @@ -41,10 +41,12 @@ ariane_pkg := include/riscv_pkg.sv \ src/fpu/src/pkg/fpnew_pkg_constants.vhd # utility modules -util := $(wildcard src/util/*.svh) \ - src/util/instruction_tracer_pkg.sv \ - src/util/instruction_tracer_if.sv \ - src/util/cluster_clock_gating.sv \ +util := $(wildcard src/util/*.svh) \ + src/util/instruction_tracer_pkg.sv \ + src/util/instruction_tracer_if.sv \ + src/tech_cells_generic/src/cluster_clock_gating.sv \ + src/tech_cells_generic/src/cluster_clock_inverter.sv \ + src/tech_cells_generic/src/pulp_clock_mux2.sv \ src/util/sram.sv # Test packages @@ -75,6 +77,7 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ src/common_cells/src/deprecated/generic_fifo.sv \ src/common_cells/src/deprecated/pulp_sync.sv \ src/common_cells/src/deprecated/find_first_one.sv \ + src/common_cells/src/rstgen_bypass.sv \ src/axi/src/axi_cut.sv \ src/axi/src/axi_join.sv \ src/fpga-support/rtl/SyncSpRamBeNx64.sv \ diff --git a/src/clint/clint.sv b/src/clint/clint.sv index a53766497..6b58686ed 100644 --- a/src/clint/clint.sv +++ b/src/clint/clint.sv @@ -24,7 +24,7 @@ module clint #( )( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low - + input logic testmode_i, AXI_BUS.Slave slave, input logic rtc_i, // Real-time clock in (usually 32.768 kHz) @@ -146,7 +146,7 @@ module clint #( // 1. Put the RTC input through a classic two stage edge-triggered synchronizer to filter out any // metastability effects (or at least make them unlikely :-)) sync_wedge i_sync_edge ( - .en_i ( 1'b1 ), + .en_i ( ~testmode_i ), .serial_i ( rtc_i ), .r_edge_o ( increase_timer ), .f_edge_o ( ), // left open diff --git a/src/common_cells b/src/common_cells index 9278bc769..21a060d2c 160000 --- a/src/common_cells +++ b/src/common_cells @@ -1 +1 @@ -Subproject commit 9278bc769f3efd006864a7ef7721f2796ed968e6 +Subproject commit 21a060d2c2c75173312b82cc72db96a2c62e66c5 diff --git a/src/debug/dm_csrs.sv b/src/debug/dm_csrs.sv index 894ab5076..f3a0aaca3 100644 --- a/src/debug/dm_csrs.sv +++ b/src/debug/dm_csrs.sv @@ -492,6 +492,7 @@ module dm_csrs #( sbaddr_q <= '0; sbdata_q <= '0; end else begin + havereset_q <= havereset_d; // synchronous re-set of debug module, active-low, except for dmactive if (!dmcontrol_q.dmactive) begin dmcontrol_q.haltreq <= '0; @@ -516,7 +517,6 @@ module dm_csrs #( sbaddr_q <= '0; sbdata_q <= '0; end else begin - havereset_q <= havereset_d; dmcontrol_q <= dmcontrol_d; cmderr_q <= cmderr_d; command_q <= command_d; diff --git a/src/debug/dm_mem.sv b/src/debug/dm_mem.sv index 36d1f63be..3cfb09c2d 100644 --- a/src/debug/dm_mem.sv +++ b/src/debug/dm_mem.sv @@ -20,7 +20,7 @@ module dm_mem #( parameter int NrHarts = -1 )( input logic clk_i, // Clock - input logic dmactive_i, // debug module reset + input logic rst_ni, // debug module reset output logic [NrHarts-1:0] debug_req_o, input logic [19:0] hartsel_i, @@ -363,8 +363,8 @@ module dm_mem #( // the ROM base address assign fwd_rom_d = (addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]) ? 1'b1 : 1'b0; - always_ff @(posedge clk_i) begin - if (~dmactive_i) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin fwd_rom_q <= 1'b0; rdata_q <= '0; halted_q <= 1'b0; diff --git a/src/debug/dm_sba.sv b/src/debug/dm_sba.sv index 46d1680fe..7b46d92cb 100644 --- a/src/debug/dm_sba.sv +++ b/src/debug/dm_sba.sv @@ -18,6 +18,7 @@ module dm_sba ( input logic clk_i, // Clock + input logic rst_ni, input logic dmactive_i, // synchronous reset active low AXI_BUS.Master axi_master, @@ -120,8 +121,8 @@ module dm_sba ( // further error handling should go here ... end - always_ff @(posedge clk_i) begin - if (~dmactive_i) begin + always_ff @(posedge clk_i or negedge rst_ni) begin + if (~rst_ni) begin state_q <= Idle; end else begin state_q <= state_d; @@ -133,7 +134,7 @@ module dm_sba ( .DATA_WIDTH ( 64 ) ) i_axi_master ( .clk_i ( clk_i ), - .rst_ni ( dmactive_i ), + .rst_ni ( rst_ni ), .req_i ( req ), .type_i ( std_cache_pkg::SINGLE_REQ ), .gnt_o ( gnt ), diff --git a/src/debug/dm_top.sv b/src/debug/dm_top.sv index 5e3431677..303c414af 100644 --- a/src/debug/dm_top.sv +++ b/src/debug/dm_top.sv @@ -143,6 +143,7 @@ module dm_top #( dm_sba i_dm_sba ( .clk_i ( clk_i ), + .rst_ni ( rst_ni ), .dmactive_i ( dmactive_o ), .axi_master, .sbaddress_i ( sbaddress_csrs_sba ), @@ -166,7 +167,7 @@ module dm_top #( .NrHarts (NrHarts) ) i_dm_mem ( .clk_i ( clk_i ), - .dmactive_i ( dmactive_o ), + .rst_ni ( rst_ni ), .debug_req_o ( debug_req_o ), .hartsel_i ( hartsel ), .haltreq_i ( haltreq ), @@ -197,7 +198,7 @@ module dm_top #( .AXI_USER_WIDTH ( AxiUserWidth ) ) i_axi2mem ( .clk_i ( clk_i ), - .rst_ni ( dmactive_o ), + .rst_ni ( rst_ni ), .slave ( axi_slave ), .req_o ( req ), .we_o ( we ), diff --git a/src/debug/dmi_jtag.sv b/src/debug/dmi_jtag.sv index 430ccae51..2b6e8a091 100644 --- a/src/debug/dmi_jtag.sv +++ b/src/debug/dmi_jtag.sv @@ -19,9 +19,9 @@ module dmi_jtag ( input logic clk_i, // DMI Clock input logic rst_ni, // Asynchronous reset active low + input logic testmode_i, output logic dmi_rst_no, // hard reset - output dm::dmi_req_t dmi_req_o, output logic dmi_req_valid_o, input logic dmi_req_ready_i, @@ -218,6 +218,7 @@ module dmi_jtag ( .td_i, .td_o, .tdo_oe_o, + .testmode_i ( testmode_i ), .test_logic_reset_o ( test_logic_reset ), .shift_dr_o ( shift_dr ), .update_dr_o ( update_dr ), diff --git a/src/debug/dmi_jtag_tap.sv b/src/debug/dmi_jtag_tap.sv index 5d55bacb1..ae4b2fcfb 100644 --- a/src/debug/dmi_jtag_tap.sv +++ b/src/debug/dmi_jtag_tap.sv @@ -25,6 +25,7 @@ module dmi_jtag_tap #( input logic td_i, // JTAG test data input pad output logic td_o, // JTAG test data output pad output logic tdo_oe_o, // Data out output enable + input logic testmode_i, output logic test_logic_reset_o, output logic shift_dr_o, output logic update_dr_o, @@ -207,8 +208,23 @@ module dmi_jtag_tap #( end - // TDO changes state at negative edge of TCK - always_ff @(negedge tck_i, negedge trst_ni) begin + // DFT + logic tck_n, tck_ni; + + cluster_clock_inverter i_tck_inv ( + .clk_i ( tck_i ), + .clk_o ( tck_ni ) + ); + + pulp_clock_mux2 i_dft_tck_mux ( + .clk0_i ( tck_ni ), + .clk1_i ( tck_i ), // bypass the inverted clock for testing + .clk_sel_i ( testmode_i ), + .clk_o ( tck_n ) + ); + + // TDO changes state at negative edge of TCK + always_ff @(posedge tck_n, negedge trst_ni) begin if (~trst_ni) begin td_o <= 1'b0; tdo_oe_o <= 1'b0; diff --git a/src/tech_cells_generic b/src/tech_cells_generic new file mode 160000 index 000000000..ffe7818dc --- /dev/null +++ b/src/tech_cells_generic @@ -0,0 +1 @@ +Subproject commit ffe7818dc24eba29cf3634d404d1b3b85034272b diff --git a/src/util/cluster_clock_gating.sv b/src/util/cluster_clock_gating.sv deleted file mode 100644 index f2b10b29f..000000000 --- a/src/util/cluster_clock_gating.sv +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. -// -// Behavioural GLock Gating -// File: cluster_clock_gating.sv -// Author: ? -// Date: ? - -module cluster_clock_gating ( - input logic clk_i, - input logic en_i, - input logic test_en_i, - output logic clk_o - ); - -`ifdef PULP_FPGA_EMUL - // no clock gates in FPGA flow - assign clk_o = clk_i; -`elsif verilator - assign clk_o = clk_i; -`else - logic clk_en; - - always_latch - begin - if (clk_i == 1'b0) - clk_en <= en_i | test_en_i; - end - - assign clk_o = clk_i & clk_en; -`endif - -endmodule diff --git a/tb/ariane_testharness.sv b/tb/ariane_testharness.sv index 3b451ca10..66103be8b 100644 --- a/tb/ariane_testharness.sv +++ b/tb/ariane_testharness.sv @@ -131,6 +131,7 @@ module ariane_testharness #( dmi_jtag i_dmi_jtag ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), + .testmode_i ( test_en ), .dmi_req_o ( jtag_dmi_req ), .dmi_req_valid_o ( jtag_req_valid ), .dmi_req_ready_i ( debug_req_ready ), @@ -300,6 +301,7 @@ module ariane_testharness #( ) i_clint ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), + .testmode_i ( test_en ), .slave ( master[1] ), .rtc_i ( rtc_i ), .timer_irq_o ( timer_irq ), From 403db830498041d9353ae02cf07ef48dadb2abe1 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 28 Sep 2018 21:56:59 +0200 Subject: [PATCH 69/94] :bug: Increase arbiter LSU arbiter depth --- src/lsu_arbiter.sv | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/lsu_arbiter.sv b/src/lsu_arbiter.sv index 0f4c73df5..7dafa1466 100644 --- a/src/lsu_arbiter.sv +++ b/src/lsu_arbiter.sv @@ -40,9 +40,10 @@ module lsu_arbiter ( // RR fashion. FIFOs need to be 2 deep in order to unconditionally accept loads and stores since we can // have a maximum of 2 outstanding loads. // if there are valid elements in the fifos, the unit posts the result on its output ports and expects it - // to be consumed unconditionally + // to be consumed unconditionally - localparam int DEPTH = 2; + // Important: this needs to be greater than 2 to unconditionally acept incoming requests + localparam int DEPTH = 4; typedef struct packed { logic [TRANS_ID_BITS-1:0] trans_id; @@ -64,9 +65,9 @@ module lsu_arbiter ( assign ld_in.result = ld_result_i; assign ld_in.ex = ld_ex_i; - assign trans_id_o = (idx) ? st_out.trans_id : ld_out.trans_id; - assign result_o = (idx) ? st_out.result : ld_out.result; - assign ex_o = (idx) ? st_out.ex : ld_out.ex; + assign trans_id_o = (idx) ? st_out.trans_id : ld_out.trans_id; + assign result_o = (idx) ? st_out.result : ld_out.result; + assign ex_o = (idx) ? st_out.ex : ld_out.ex; // round robin with "lookahead" for 2 requesters rrarbiter #( @@ -85,7 +86,7 @@ module lsu_arbiter ( fifo_v2 #( .dtype ( fifo_t ), .DEPTH ( DEPTH ) - ) i_ld_fifo ( + ) i_ld_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i ( flush_i ), @@ -98,12 +99,12 @@ module lsu_arbiter ( .push_i ( ld_valid_i ), .data_o ( ld_out ), .pop_i ( ld_ren ) - ); + ); fifo_v2 #( .dtype ( fifo_t ), .DEPTH ( DEPTH ) - ) i_st_fifo ( + ) i_st_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .flush_i ( flush_i ), @@ -116,7 +117,7 @@ module lsu_arbiter ( .push_i ( st_valid_i ), .data_o ( st_out ), .pop_i ( st_ren ) - ); + ); `ifndef SYNTHESIS From d3e6b982f31e198607a1dbce71659fb734af4f04 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Thu, 27 Sep 2018 18:31:31 +0200 Subject: [PATCH 70/94] Fix single-stepping --- src/csr_regfile.sv | 31 ++++++++++++++++++++++--------- src/scoreboard.sv | 2 ++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 45120ee4b..347d1fbdc 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -105,9 +105,9 @@ module csr_regfile #( riscv::status_rv64_t mstatus_q, mstatus_d; riscv::satp_t satp_q, satp_d; riscv::dcsr_t dcsr_q, dcsr_d; - + logic mtvec_rst_load_q;// used to determine whether we came out of reset - + logic [63:0] dpc_q, dpc_d; logic [63:0] dscratch0_q, dscratch0_d; logic [63:0] mtvec_q, mtvec_d; @@ -245,16 +245,16 @@ module csr_regfile #( dpc_d = dpc_q; dscratch0_d = dscratch0_q; mstatus_d = mstatus_q; - + // check whether we come out of reset - // this is a workaround. some tools have issues - // having boot_addr_i in the asynchronous + // this is a workaround. some tools have issues + // having boot_addr_i in the asynchronous // reset assignment to mtvec_d, even though // boot_addr_i will be assigned a constant - // on the top-level. + // on the top-level. if (mtvec_rst_load_q) begin mtvec_d = boot_addr_i + 'h40; - end else begin + end else begin mtvec_d = mtvec_q; end @@ -537,8 +537,21 @@ module csr_regfile #( end // single step enable and we just retired an instruction - if (dcsr_q.step && (|commit_ack_i)) begin - dpc_d = next_pc; + if (dcsr_q.step && commit_ack_i[0]) begin + // valid CTRL flow change + if (commit_instr_i[0].fu == CTRL_FLOW) begin + // we saved the correct target address during execute + dpc_d = commit_instr_i[0].bp.predict_address; + // exception valid + end else if (ex_i.valid) begin + dpc_d = trap_vector_base_o; + // return from environment + end else if (eret_o) begin + dpc_d = epc_o; + // consecutive PC + end else begin + dpc_d = commit_instr_i[0].pc + (commit_instr_i[0].is_compressed ? 'h2 : 'h4); + end debug_mode_d = 1'b1; set_debug_pc_o = 1'b1; dcsr_d.cause = dm::CauseSingleStep; diff --git a/src/scoreboard.sv b/src/scoreboard.sv index 77a80fca6..ec7f6af7f 100644 --- a/src/scoreboard.sv +++ b/src/scoreboard.sv @@ -123,6 +123,8 @@ module scoreboard #( if (wb_valid_i[i] && mem_n[trans_id_i[i]].issued) begin mem_n[trans_id_i[i]].sbe.valid = 1'b1; mem_n[trans_id_i[i]].sbe.result = wbdata_i[i]; + // save the target address of a branch (needed for debug in commit stage) + mem_n[trans_id_i[i]].sbe.bp.predict_address = resolved_branch_i.target_address; // write the exception back if it is valid if (ex_i[i].valid) mem_n[trans_id_i[i]].sbe.ex = ex_i[i]; From c7117c820ff42e54b1dc6827db7bf3550bf09346 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Fri, 28 Sep 2018 21:41:06 +0200 Subject: [PATCH 71/94] :bug: Fix non-flushing AMOs --- src/cache_subsystem/miss_handler.sv | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/cache_subsystem/miss_handler.sv b/src/cache_subsystem/miss_handler.sv index d09e8ff90..04246b4f3 100644 --- a/src/cache_subsystem/miss_handler.sv +++ b/src/cache_subsystem/miss_handler.sv @@ -152,7 +152,18 @@ module miss_handler #( case (state_q) IDLE: begin - + // lowest priority are AMOs, wait until everything else is served before going for the AMOs + if (amo_req_i.req && !busy_i) begin + // 1. Flush the cache + if (!serve_amo_q) begin + state_d = FLUSH_REQ_STATUS; + serve_amo_d = 1'b1; + // 2. Do the AMO + end else begin + state_d = AMO_LOAD; + serve_amo_d = 1'b0; + end + end // check if we want to flush and can flush e.g.: we are not busy anymore // TODO: Check that the busy flag is indeed needed if (flush_i && !busy_i) begin @@ -165,6 +176,8 @@ module miss_handler #( // here comes the refill portion of code if (miss_req_valid[i] && !miss_req_bypass[i]) begin state_d = MISS; + // we are taking another request so don't take the AMO + serve_amo_d = 1'b0; // save to MSHR mshr_d.valid = 1'b1; mshr_d.we = miss_req_we[i]; From 18ba836650b80ab2a726261085318061ed306c57 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 7 Oct 2018 09:10:05 +0200 Subject: [PATCH 72/94] :wrench: Add logging option to simc tests --- Makefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Makefile b/Makefile index 907ae79c6..3f77106a6 100755 --- a/Makefile +++ b/Makefile @@ -171,6 +171,13 @@ simc: build -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; run -all; exit" \ ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options) +simc-log: build + vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ + +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ + $(QUESTASIM_FLAGS) \ + -gblso $(RISCV)/lib/libfesvr.so -sv_lib $(dpi-library)/ariane_dpi -do " set StdArithNoWarnings 1; set NumericStdNoWarnings 1; log -r /*; run -all; exit" \ + ${top_level}_optimized +permissive-off ++$(riscv-test-dir)/$(riscv-test) ++$(target-options) + $(riscv-asm-tests): build vsim${questa_version} +permissive -64 -c -lib ${library} +max-cycles=$(max_cycles) +UVM_TESTNAME=${test_case} \ +BASEDIR=$(riscv-test-dir) $(uvm-flags) "+UVM_VERBOSITY=LOW" -coverage -classdebug +jtag_rbb_enable=0 \ From 80199beed13e802c732ca2ea15843e088f5fdb61 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 7 Oct 2018 09:12:03 +0200 Subject: [PATCH 73/94] :construction: Fix scalar float decoding and issue --- include/ariane_pkg.sv | 3 +- src/decoder.sv | 12 ++++++-- src/fpu | 2 +- src/fpu_wrap.sv | 58 ++++++++++++++++++++++++++++---------- src/issue_read_operands.sv | 3 +- 5 files changed, 58 insertions(+), 20 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index d68bbf02a..04b65d537 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -291,10 +291,11 @@ package ariane_pkg; return 1'b0; endfunction; - // ternary operations encode the rs3 address in the imm field + // ternary operations encode the rs3 address in the imm field, also add/sub function automatic logic is_imm_fpr (input fu_op op); if (FP_PRESENT) begin // makes function static for non-fp case unique case (op) inside + [FADD:FSUB], // ADD/SUB need inputs as Operand B/C [FMADD:FNMADD] : return 1'b1; // Fused Computational Operations default : return 1'b0; // all other ops endcase diff --git a/src/decoder.sv b/src/decoder.sv index 77cfdc66f..a0de8dc79 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -733,8 +733,16 @@ module decoder ( check_fprm = 1'b1; // decode FP instruction unique case (instr.rftype.funct5) - 5'b00000: instruction_o.op = FADD; // fadd.fmt - FP Addition - 5'b00001: instruction_o.op = FSUB; // fsub.fmt - FP Subtraction + 5'b00000: begin + instruction_o.op = FADD; // fadd.fmt - FP Addition + instruction_o.rs2 = instr.rftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00001: begin + instruction_o.op = FSUB; // fsub.fmt - FP Subtraction + instruction_o.rs2 = instr.rftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end 5'b00010: instruction_o.op = FMUL; // fmul.fmt - FP Multiplication 5'b00011: instruction_o.op = FDIV; // fdiv.fmt - FP Division 5'b01011: begin diff --git a/src/fpu b/src/fpu index d79f013ee..1cfdf5d62 160000 --- a/src/fpu +++ b/src/fpu @@ -1 +1 @@ -Subproject commit d79f013ee03c3ef1fac62932ea132364952cb600 +Subproject commit 1cfdf5d62c3144001501aeb985c2e3ca95afda01 diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index 00902e1c7..ad1093e21 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -58,7 +58,8 @@ module fpu_wrap ( logic [OPBITS-1:0] OP_F2I; logic [OPBITS-1:0] OP_I2F; logic [OPBITS-1:0] OP_F2F; - logic [OPBITS-1:0] OP_CPK; + logic [OPBITS-1:0] OP_CPKAB; + logic [OPBITS-1:0] OP_CPKCD; logic [FMTBITS-1:0] FMT_FP32; logic [FMTBITS-1:0] FMT_FP64; @@ -90,7 +91,8 @@ module fpu_wrap ( .OP_F2I ( OP_F2I ), .OP_I2F ( OP_I2F ), .OP_F2F ( OP_F2F ), - .OP_CPK ( OP_CPK ), + .OP_CPKAB ( OP_CPKAB ), + .OP_CPKCD ( OP_CPKCD ), .FMT_NUMBITS ( FMT_NUMBITS ), .FMT_FP32 ( FMT_FP32 ), .FMT_FP64 ( FMT_FP64 ), @@ -146,6 +148,7 @@ module fpu_wrap ( always_comb begin : input_translation automatic logic vec_replication; // control honoring of replication flag + automatic logic check_ah; // Default Values operand_a_n = operand_a_i; @@ -160,6 +163,7 @@ module fpu_wrap ( fpu_vec_op_n = fu_i == FPU_VEC; fpu_tag_n = trans_id_i; vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field + check_ah = 1'b0; // whether set AH encoding from MSB of rm_i // Scalar Rounding Modes - some ops encode inside RM but use smaller range if (!(fpu_rm_i inside {[3'b000:3'b100]})) @@ -187,8 +191,8 @@ module fpu_wrap ( endcase - // Operations (this can modify the rounding mode field!) - case (operator_i) + // Operations (this can modify the rounding mode field and format!) + unique case (operator_i) // Addition FADD : fpu_op_n = OP_ADD; // Subtraction is modified ADD @@ -201,7 +205,11 @@ module fpu_wrap ( // Division FDIV : fpu_op_n = OP_DIV; // Min/Max - OP is encoded in rm (000-001) - FMIN_MAX : fpu_op_n = OP_MINMAX; + FMIN_MAX : begin + fpu_op_n = OP_MINMAX; + fpu_rm_n = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end // Square Root FSQRT : fpu_op_n = OP_SQRT; // Fused Multiply Add @@ -286,26 +294,40 @@ module fpu_wrap ( end end // Scalar Sign Injection - op encoded in rm (000-010) - FSGNJ : fpu_op_n = OP_SGNJ; + FSGNJ : begin + fpu_op_n = OP_SGNJ; + fpu_rm_n = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end // Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding FMV_F2X : begin fpu_op_n = OP_SGNJ; fpu_rm_n = 3'b011; // passthrough without checking nan-box fpu_op_mod_n = 1'b1; // no NaN-Boxing - operand_b_n = operand_a_n; + check_ah = 1'b1; // AH has RM MSB encoding + // operand_b_n = operand_a_n; vec_replication = 1'b0; // no replication, we set second operand end // Move from GPR to FPR - mapped to NOP since no recoding FMV_X2F : begin fpu_op_n = OP_SGNJ; fpu_rm_n = 3'b011; // passthrough without checking nan-box - operand_b_n = operand_a_n; + check_ah = 1'b1; // AH has RM MSB encoding + // operand_b_n = operand_a_n; vec_replication = 1'b0; // no replication, we set second operand end // Scalar Comparisons - op encoded in rm (000-010) - FCMP : fpu_op_n = OP_CMP; + FCMP : begin + fpu_op_n = OP_CMP; + fpu_rm_n = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end // Classification - FCLASS : fpu_op_n = OP_CLASS; + FCLASS : begin + fpu_op_n = OP_CLASS; + fpu_rm_n = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end // Vectorial Minimum - set up scalar encoding in rm VFMIN : begin fpu_op_n = OP_MINMAX; @@ -371,9 +393,15 @@ module fpu_wrap ( // VFCPKCD_D : // by default set opb = opa to have a sgnj nop - default : operand_b_n = operand_a_n; + // default : operand_b_n = operand_a_n; + default : ; //nothing endcase + // Scalar AH encoding fixing + if (!fpu_vec_op_n && check_ah) + if (fpu_rm_i[2]) + fpu_fmt_n = FMT_FP16ALT; + // Replication if (fpu_vec_op_n && vec_replication) case (fpu_fmt_n) @@ -384,10 +412,10 @@ module fpu_wrap ( endcase // fpu_fmt_n // Ugly but needs to be done: map additions to operands B and C - if (fpu_op_n == OP_ADD) begin - operand_c_n = operand_b_n; - operand_b_n = operand_a_n; - end + // if (fpu_op_n == OP_ADD) begin + // operand_c_n = operand_b_n; + // operand_b_n = operand_a_n; + // end end diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index 87e693c76..bf2fe44c6 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -228,7 +228,8 @@ module issue_read_operands #( operand_a_n = {52'b0, issue_instr_i.rs1[4:0]}; end // or is it an immediate (including PC), this is not the case for a store and control flow instructions - if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW)) begin + // also make sure the imm is not already used as an FP operand + if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_imm_fpr(issue_instr_i.op)) begin operand_b_n = issue_instr_i.result; end end From 7b6087c0f33a03f2000d22dbc777a882736325f8 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 7 Oct 2018 09:56:28 +0200 Subject: [PATCH 74/94] :art: Fix instruction tracer for new FADD/FSUB --- src/util/instruction_trace_item.svh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/util/instruction_trace_item.svh b/src/util/instruction_trace_item.svh index 95a0c6cec..d976179c6 100644 --- a/src/util/instruction_trace_item.svh +++ b/src/util/instruction_trace_item.svh @@ -240,8 +240,8 @@ class instruction_trace_item; INSTR_FNSMSUB: s = this.printR4Instr("fnmsub"); INSTR_FNMADD: s = this.printR4Instr("fnmadd"); - INSTR_FADD: s = this.printRFInstr("fadd", 1'b1); - INSTR_FSUB: s = this.printRFInstr("fsub", 1'b1); + INSTR_FADD: s = this.printRFBCInstr("fadd", 1'b1); + INSTR_FSUB: s = this.printRFBCInstr("fsub", 1'b1); INSTR_FMUL: s = this.printRFInstr("fmul", 1'b1); INSTR_FDIV: s = this.printRFInstr("fdiv", 1'b1); INSTR_FSQRT: s = this.printRFInstr1Op("fsqrt", 1'b1); @@ -366,6 +366,21 @@ class instruction_trace_item; return $sformatf("%-12s %4s, %s, %s", mnemonic, regAddrToStr(rd), regAddrToStr(rs1), regAddrToStr(rs2)); endfunction // printRInstr + function string printRFBCInstr(input string mnemonic, input bit use_rnd); + + result_regs.push_back(rd); + result_fpr.push_back(is_rd_fpr(sbe.op)); + read_regs.push_back(rs2); + read_fpr.push_back(is_rs2_fpr(sbe.op)); + read_regs.push_back(sbe.result[4:0]); + read_fpr.push_back(is_imm_fpr(sbe.op)); + + if (use_rnd && instr[14:12]!=3'b111) + return $sformatf("%-12s %4s, %s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), is_imm_fpr(sbe.op)?fpRegAddrToStr(sbe.result[4:0]):regAddrToStr(sbe.result[4:0]), fpRmToStr(instr[14:12])); + else + return $sformatf("%-12s %4s, %s, %s", $sformatf("%s.%s",mnemonic, fpFmtToStr(instr[26:25])), is_rd_fpr(sbe.op)?fpRegAddrToStr(rd):regAddrToStr(rd), is_rs2_fpr(sbe.op)?fpRegAddrToStr(rs2):regAddrToStr(rs2), is_imm_fpr(sbe.op)?fpRegAddrToStr(sbe.result[4:0]):regAddrToStr(sbe.result[4:0])); + endfunction // printRFInstr + function string printRFInstr(input string mnemonic, input bit use_rnd); result_regs.push_back(rd); From 7ececad95f76cd218198b4f96b46c83f68d637fb Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 7 Oct 2018 10:10:57 +0200 Subject: [PATCH 75/94] :art: Refactor FPU wrapper --- src/fpu_wrap.sv | 773 +++++++++++++++++++++++------------------------- 1 file changed, 377 insertions(+), 396 deletions(-) diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index ad1093e21..185ad1fc9 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -118,18 +118,18 @@ module fpu_wrap ( //------------------------------------------------- // Inputs to the FPU and protocol inversion buffer //------------------------------------------------- - logic [FLEN-1:0] operand_a_n, operand_a_q, operand_a; - logic [FLEN-1:0] operand_b_n, operand_b_q, operand_b; - logic [FLEN-1:0] operand_c_n, operand_c_q, operand_c; - logic [OPBITS-1:0] fpu_op_n, fpu_op_q, fpu_op; - logic fpu_op_mod_n, fpu_op_mod_q, fpu_op_mod; - logic [FMTBITS-1:0] fpu_fmt_n, fpu_fmt_q, fpu_fmt; - logic [FMTBITS-1:0] fpu_fmt2_n, fpu_fmt2_q, fpu_fmt2; - logic [IFMTBITS-1:0] fpu_ifmt_n, fpu_ifmt_q, fpu_ifmt; - logic [2:0] fpu_rm_n, fpu_rm_q, fpu_rm; - logic fpu_vec_op_n, fpu_vec_op_q, fpu_vec_op; + logic [FLEN-1:0] operand_a_d, operand_a_q, operand_a; + logic [FLEN-1:0] operand_b_d, operand_b_q, operand_b; + logic [FLEN-1:0] operand_c_d, operand_c_q, operand_c; + logic [OPBITS-1:0] fpu_op_d, fpu_op_q, fpu_op; + logic fpu_op_mod_d, fpu_op_mod_q, fpu_op_mod; + logic [FMTBITS-1:0] fpu_fmt_d, fpu_fmt_q, fpu_fmt; + logic [FMTBITS-1:0] fpu_fmt2_d, fpu_fmt2_q, fpu_fmt2; + logic [IFMTBITS-1:0] fpu_ifmt_d, fpu_ifmt_q, fpu_ifmt; + logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm; + logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op; - logic [TRANS_ID_BITS-1:0] fpu_tag_n, fpu_tag_q, fpu_tag; + logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag; logic fpu_in_ready, reg_in_ready; logic fpu_in_valid, reg_in_valid; @@ -139,422 +139,403 @@ module fpu_wrap ( logic [4:0] fpu_status; - // generate if (FP_PRESENT) begin : fpu_gen + //----------------------------- + // Translate inputs + //----------------------------- - //----------------------------- - // Translate inputs - //----------------------------- + always_comb begin : input_translation - always_comb begin : input_translation + automatic logic vec_replication; // control honoring of replication flag + automatic logic check_ah; - automatic logic vec_replication; // control honoring of replication flag - automatic logic check_ah; + // Default Values + operand_a_d = operand_a_i; + operand_b_d = operand_b_i; + operand_c_d = operand_c_i; + fpu_op_d = OP_SGNJ; // sign injection by default + fpu_op_mod_d = 1'b0; + fpu_fmt_d = FMT_FP32; + fpu_fmt2_d = FMT_FP32; + fpu_ifmt_d = IFMT_INT32; + fpu_rm_d = fpu_rm_i; + fpu_vec_op_d = fu_i == FPU_VEC; + fpu_tag_d = trans_id_i; + vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field + check_ah = 1'b0; // whether set AH encoding from MSB of rm_i - // Default Values - operand_a_n = operand_a_i; - operand_b_n = operand_b_i; - operand_c_n = operand_c_i; - fpu_op_n = OP_SGNJ; // sign injection by default - fpu_op_mod_n = 1'b0; - fpu_fmt_n = FMT_FP32; - fpu_fmt2_n = FMT_FP32; - fpu_ifmt_n = IFMT_INT32; - fpu_rm_n = fpu_rm_i; - fpu_vec_op_n = fu_i == FPU_VEC; - fpu_tag_n = trans_id_i; - vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field - check_ah = 1'b0; // whether set AH encoding from MSB of rm_i + // Scalar Rounding Modes - some ops encode inside RM but use smaller range + if (!(fpu_rm_i inside {[3'b000:3'b100]})) + fpu_rm_d = fpu_frm_i; - // Scalar Rounding Modes - some ops encode inside RM but use smaller range - if (!(fpu_rm_i inside {[3'b000:3'b100]})) - fpu_rm_n = fpu_frm_i; + // Vectorial ops always consult FRM + if (fpu_vec_op_d) + fpu_rm_d = fpu_frm_i; - // Vectorial ops always consult FRM - if (fpu_vec_op_n) - fpu_rm_n = fpu_frm_i; - - // Formats - unique case (fpu_fmt_i) - // FP32 - 2'b00 : fpu_fmt_n = FMT_FP32; - // FP64 or FP16ALT (vectorial) - 2'b01 : fpu_fmt_n = fpu_vec_op_n ? FMT_FP16ALT : FMT_FP64; - // FP16 or FP16ALT (scalar) - 2'b10 : begin - if (!fpu_vec_op_n && fpu_rm_i==3'b101) - fpu_fmt_n = FMT_FP16ALT; - else - fpu_fmt_n = FMT_FP16; - end - // FP8 - default : fpu_fmt_n = FMT_FP8; - endcase + // Formats + unique case (fpu_fmt_i) + // FP32 + 2'b00 : fpu_fmt_d = FMT_FP32; + // FP64 or FP16ALT (vectorial) + 2'b01 : fpu_fmt_d = fpu_vec_op_d ? FMT_FP16ALT : FMT_FP64; + // FP16 or FP16ALT (scalar) + 2'b10 : begin + if (!fpu_vec_op_d && fpu_rm_i==3'b101) + fpu_fmt_d = FMT_FP16ALT; + else + fpu_fmt_d = FMT_FP16; + end + // FP8 + default : fpu_fmt_d = FMT_FP8; + endcase - // Operations (this can modify the rounding mode field and format!) - unique case (operator_i) - // Addition - FADD : fpu_op_n = OP_ADD; - // Subtraction is modified ADD - FSUB : begin - fpu_op_n = OP_ADD; - fpu_op_mod_n = 1'b1; + // Operations (this can modify the rounding mode field and format!) + unique case (operator_i) + // Addition + FADD : fpu_op_d = OP_ADD; + // Subtraction is modified ADD + FSUB : begin + fpu_op_d = OP_ADD; + fpu_op_mod_d = 1'b1; + end + // Multiplication + FMUL : fpu_op_d = OP_MUL; + // Division + FDIV : fpu_op_d = OP_DIV; + // Min/Max - OP is encoded in rm (000-001) + FMIN_MAX : begin + fpu_op_d = OP_MINMAX; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Square Root + FSQRT : fpu_op_d = OP_SQRT; + // Fused Multiply Add + FMADD : fpu_op_d = OP_FMADD; + // Fused Multiply Subtract is modified FMADD + FMSUB : begin + fpu_op_d = OP_FMADD; + fpu_op_mod_d = 1'b1; + end + // Fused Negated Multiply Subtract + FNMSUB : fpu_op_d = OP_FNMSUB; + // Fused Negated Multiply Add is modified FNMSUB + FNMADD : begin + fpu_op_d = OP_FNMSUB; + fpu_op_mod_d = 1'b1; + end + // Float to Int Cast - Op encoded in lowest two imm bits or rm + FCVT_F2I : begin + fpu_op_d = OP_F2I; + // Vectorial Ops encoded in rm (000-001) + if (fpu_vec_op_d) begin + fpu_op_mod_d = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00 : fpu_ifmt_d = IFMT_INT32; + 2'b01, + 2'b10 : fpu_ifmt_d = IFMT_INT16; + 2'b11 : fpu_ifmt_d = IFMT_INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_d = operand_c_i[0]; + if (operand_c_i[1]) + fpu_ifmt_d = IFMT_INT64; + else + fpu_ifmt_d = IFMT_INT32; end - // Multiplication - FMUL : fpu_op_n = OP_MUL; - // Division - FDIV : fpu_op_n = OP_DIV; - // Min/Max - OP is encoded in rm (000-001) - FMIN_MAX : begin - fpu_op_n = OP_MINMAX; - fpu_rm_n = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit - check_ah = 1'b1; // AH has RM MSB encoding + end + // Int to Float Cast - Op encoded in lowest two imm bits or rm + FCVT_I2F : begin + fpu_op_d = OP_I2F; + // Vectorial Ops encoded in rm (000-001) + if (fpu_vec_op_d) begin + fpu_op_mod_d = fpu_rm_i[0]; + vec_replication = 1'b0; // no replication, R bit used for op + unique case (fpu_fmt_i) + 2'b00 : fpu_ifmt_d = IFMT_INT32; + 2'b01, + 2'b10 : fpu_ifmt_d = IFMT_INT16; + 2'b11 : fpu_ifmt_d = IFMT_INT8; + endcase + // Scalar casts encoded in imm + end else begin + fpu_op_mod_d = operand_c_i[0]; + if (operand_c_i[1]) + fpu_ifmt_d = IFMT_INT64; + else + fpu_ifmt_d = IFMT_INT32; end - // Square Root - FSQRT : fpu_op_n = OP_SQRT; - // Fused Multiply Add - FMADD : fpu_op_n = OP_FMADD; - // Fused Multiply Subtract is modified FMADD - FMSUB : begin - fpu_op_n = OP_FMADD; - fpu_op_mod_n = 1'b1; + end + // Float to Float Cast - Source format encoded in lowest two/three imm bits + FCVT_F2F : begin + fpu_op_d = OP_F2F; + // Vectorial ops encoded in lowest two imm bits + if (fpu_vec_op_d) begin + vec_replication = 1'b0; // no replication for casts (not needed) + unique case (operand_c_i[1:0]) + 2'b00: fpu_fmt2_d = FMT_FP32; + 2'b01: fpu_fmt2_d = FMT_FP16ALT; + 2'b10: fpu_fmt2_d = FMT_FP16; + 2'b11: fpu_fmt2_d = FMT_FP8; + endcase + // Scalar ops encoded in lowest three imm bits + end else begin + unique case (operand_c_i[2:0]) + 3'b000: fpu_fmt2_d = FMT_FP32; + 3'b001: fpu_fmt2_d = FMT_FP64; + 3'b010: fpu_fmt2_d = FMT_FP16; + 3'b110: fpu_fmt2_d = FMT_FP16ALT; + 3'b011: fpu_fmt2_d = FMT_FP8; + endcase end - // Fused Negated Multiply Subtract - FNMSUB : fpu_op_n = OP_FNMSUB; - // Fused Negated Multiply Add is modified FNMSUB - FNMADD : begin - fpu_op_n = OP_FNMSUB; - fpu_op_mod_n = 1'b1; - end - // Float to Int Cast - Op encoded in lowest two imm bits or rm - FCVT_F2I : begin - fpu_op_n = OP_F2I; - // Vectorial Ops encoded in rm (000-001) - if (fpu_vec_op_n) begin - fpu_op_mod_n = fpu_rm_i[0]; - vec_replication = 1'b0; // no replication, R bit used for op - unique case (fpu_fmt_i) - 2'b00 : fpu_ifmt_n = IFMT_INT32; - 2'b01, - 2'b10 : fpu_ifmt_n = IFMT_INT16; - 2'b11 : fpu_ifmt_n = IFMT_INT8; - endcase - // Scalar casts encoded in imm - end else begin - fpu_op_mod_n = operand_c_n[0]; - if (operand_c_n[1]) - fpu_ifmt_n = IFMT_INT64; - else - fpu_ifmt_n = IFMT_INT32; - end - end - // Int to Float Cast - Op encoded in lowest two imm bits or rm - FCVT_I2F : begin - fpu_op_n = OP_I2F; - // Vectorial Ops encoded in rm (000-001) - if (fpu_vec_op_n) begin - fpu_op_mod_n = fpu_rm_i[0]; - vec_replication = 1'b0; // no replication, R bit used for op - unique case (fpu_fmt_i) - 2'b00 : fpu_ifmt_n = IFMT_INT32; - 2'b01, - 2'b10 : fpu_ifmt_n = IFMT_INT16; - 2'b11 : fpu_ifmt_n = IFMT_INT8; - endcase - // Scalar casts encoded in imm - end else begin - fpu_op_mod_n = operand_c_n[0]; - if (operand_c_n[1]) - fpu_ifmt_n = IFMT_INT64; - else - fpu_ifmt_n = IFMT_INT32; - end - end - // Float to Float Cast - Source format encoded in lowest two/three imm bits - FCVT_F2F : begin - fpu_op_n = OP_F2F; - // Vectorial ops encoded in lowest two imm bits - if (fpu_vec_op_n) begin - vec_replication = 1'b0; // no replication for casts (not needed) - unique case (operand_c_n[1:0]) - 2'b00: fpu_fmt2_n = FMT_FP32; - 2'b01: fpu_fmt2_n = FMT_FP16ALT; - 2'b10: fpu_fmt2_n = FMT_FP16; - 2'b11: fpu_fmt2_n = FMT_FP8; - endcase - // Scalar ops encoded in lowest three imm bits - end else begin - unique case (operand_c_n[2:0]) - 3'b000: fpu_fmt2_n = FMT_FP32; - 3'b001: fpu_fmt2_n = FMT_FP64; - 3'b010: fpu_fmt2_n = FMT_FP16; - 3'b110: fpu_fmt2_n = FMT_FP16ALT; - 3'b011: fpu_fmt2_n = FMT_FP8; - endcase - end - end - // Scalar Sign Injection - op encoded in rm (000-010) - FSGNJ : begin - fpu_op_n = OP_SGNJ; - fpu_rm_n = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit - check_ah = 1'b1; // AH has RM MSB encoding - end - // Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding - FMV_F2X : begin - fpu_op_n = OP_SGNJ; - fpu_rm_n = 3'b011; // passthrough without checking nan-box - fpu_op_mod_n = 1'b1; // no NaN-Boxing - check_ah = 1'b1; // AH has RM MSB encoding - // operand_b_n = operand_a_n; - vec_replication = 1'b0; // no replication, we set second operand - end - // Move from GPR to FPR - mapped to NOP since no recoding - FMV_X2F : begin - fpu_op_n = OP_SGNJ; - fpu_rm_n = 3'b011; // passthrough without checking nan-box - check_ah = 1'b1; // AH has RM MSB encoding - // operand_b_n = operand_a_n; - vec_replication = 1'b0; // no replication, we set second operand - end - // Scalar Comparisons - op encoded in rm (000-010) - FCMP : begin - fpu_op_n = OP_CMP; - fpu_rm_n = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit - check_ah = 1'b1; // AH has RM MSB encoding - end - // Classification - FCLASS : begin - fpu_op_n = OP_CLASS; - fpu_rm_n = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit - check_ah = 1'b1; // AH has RM MSB encoding - end - // Vectorial Minimum - set up scalar encoding in rm - VFMIN : begin - fpu_op_n = OP_MINMAX; - fpu_rm_n = 3'b000; // min - end - // Vectorial Maximum - set up scalar encoding in rm - VFMAX : begin - fpu_op_n = OP_MINMAX; - fpu_rm_n = 3'b001; // max - end - // Vectorial Sign Injection - set up scalar encoding in rm - VFSGNJ : begin - fpu_op_n = OP_SGNJ; - fpu_rm_n = 3'b000; // sgnj - end - // Vectorial Negated Sign Injection - set up scalar encoding in rm - VFSGNJN : begin - fpu_op_n = OP_SGNJ; - fpu_rm_n = 3'b001; // sgnjn - end - // Vectorial Xored Sign Injection - set up scalar encoding in rm - VFSGNJX : begin - fpu_op_n = OP_SGNJ; - fpu_rm_n = 3'b010; // sgnjx - end - // Vectorial Equals - set up scalar encoding in rm - VFEQ : begin - fpu_op_n = OP_CMP; - fpu_rm_n = 3'b010; // eq - end - // Vectorial Not Equals - set up scalar encoding in rm - VFNE : begin - fpu_op_n = OP_CMP; - fpu_op_mod_n = 1'b1; // invert output - fpu_rm_n = 3'b010; // eq - end - // Vectorial Less Than - set up scalar encoding in rm - VFLT : begin - fpu_op_n = OP_CMP; - fpu_rm_n = 3'b001; // lt - end - // Vectorial Greater or Equal - set up scalar encoding in rm - VFGE : begin - fpu_op_n = OP_CMP; - fpu_op_mod_n = 1'b1; // invert output - fpu_rm_n = 3'b001; // lt - end - // Vectorial Less or Equal - set up scalar encoding in rm - VFLE : begin - fpu_op_n = OP_CMP; - fpu_rm_n = 3'b000; // le - end - // Vectorial Greater Than - set up scalar encoding in rm - VFGT : begin - fpu_op_n = OP_CMP; - fpu_op_mod_n = 1'b1; // invert output - fpu_rm_n = 3'b000; // le + end + // Scalar Sign Injection - op encoded in rm (000-010) + FSGNJ : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Move from FPR to GPR - mapped to SGNJ-passthrough since no recoding + FMV_F2X : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b011; // passthrough without checking nan-box + fpu_op_mod_d = 1'b1; // no NaN-Boxing + check_ah = 1'b1; // AH has RM MSB encoding + // operand_b_d = operand_a_d; + vec_replication = 1'b0; // no replication, we set second operand + end + // Move from GPR to FPR - mapped to NOP since no recoding + FMV_X2F : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b011; // passthrough without checking nan-box + check_ah = 1'b1; // AH has RM MSB encoding + // operand_b_d = operand_a_d; + vec_replication = 1'b0; // no replication, we set second operand + end + // Scalar Comparisons - op encoded in rm (000-010) + FCMP : begin + fpu_op_d = OP_CMP; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Classification + FCLASS : begin + fpu_op_d = OP_CLASS; + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + check_ah = 1'b1; // AH has RM MSB encoding + end + // Vectorial Minimum - set up scalar encoding in rm + VFMIN : begin + fpu_op_d = OP_MINMAX; + fpu_rm_d = 3'b000; // min + end + // Vectorial Maximum - set up scalar encoding in rm + VFMAX : begin + fpu_op_d = OP_MINMAX; + fpu_rm_d = 3'b001; // max + end + // Vectorial Sign Injection - set up scalar encoding in rm + VFSGNJ : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b000; // sgnj + end + // Vectorial Negated Sign Injection - set up scalar encoding in rm + VFSGNJN : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b001; // sgnjn + end + // Vectorial Xored Sign Injection - set up scalar encoding in rm + VFSGNJX : begin + fpu_op_d = OP_SGNJ; + fpu_rm_d = 3'b010; // sgnjx + end + // Vectorial Equals - set up scalar encoding in rm + VFEQ : begin + fpu_op_d = OP_CMP; + fpu_rm_d = 3'b010; // eq + end + // Vectorial Not Equals - set up scalar encoding in rm + VFNE : begin + fpu_op_d = OP_CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b010; // eq end + // Vectorial Less Than - set up scalar encoding in rm + VFLT : begin + fpu_op_d = OP_CMP; + fpu_rm_d = 3'b001; // lt + end + // Vectorial Greater or Equal - set up scalar encoding in rm + VFGE : begin + fpu_op_d = OP_CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b001; // lt + end + // Vectorial Less or Equal - set up scalar encoding in rm + VFLE : begin + fpu_op_d = OP_CMP; + fpu_rm_d = 3'b000; // le + end + // Vectorial Greater Than - set up scalar encoding in rm + VFGT : begin + fpu_op_d = OP_CMP; + fpu_op_mod_d = 1'b1; // invert output + fpu_rm_d = 3'b000; // le + end - // VFCPKAB_S : - // VFCPKCD_S : - // VFCPKAB_D : - // VFCPKCD_D : + // VFCPKAB_S : + // VFCPKCD_S : + // VFCPKAB_D : + // VFCPKCD_D : - // by default set opb = opa to have a sgnj nop - // default : operand_b_n = operand_a_n; - default : ; //nothing - endcase + // by default set opb = opa to have a sgnj nop + // default : operand_b_d = operand_a_d; + default : ; //nothing + endcase - // Scalar AH encoding fixing - if (!fpu_vec_op_n && check_ah) - if (fpu_rm_i[2]) - fpu_fmt_n = FMT_FP16ALT; + // Scalar AH encoding fixing + if (!fpu_vec_op_d && check_ah) + if (fpu_rm_i[2]) + fpu_fmt_d = FMT_FP16ALT; - // Replication - if (fpu_vec_op_n && vec_replication) - case (fpu_fmt_n) - FMT_FP32 : operand_b_n = RVD ? {2{operand_b_n[31:0]}} : operand_b_n; - FMT_FP16, - FMT_FP16ALT : operand_b_n = RVD ? {4{operand_b_n[15:0]}} : {2{operand_b_n[15:0]}}; - FMT_FP8 : operand_b_n = RVD ? {8{operand_b_n[7:0]}} : {4{operand_b_n[7:0]}}; - endcase // fpu_fmt_n - - // Ugly but needs to be done: map additions to operands B and C - // if (fpu_op_n == OP_ADD) begin - // operand_c_n = operand_b_n; - // operand_b_n = operand_a_n; - // end - - end + // Replication + if (fpu_vec_op_d && vec_replication) + case (fpu_fmt_d) + FMT_FP32 : operand_b_d = RVD ? {2{operand_b_i[31:0]}} : operand_b_i; + FMT_FP16, + FMT_FP16ALT : operand_b_d = RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; + FMT_FP8 : operand_b_d = RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; + endcase // fpu_fmt_d + end - //--------------------------------------------------------- - // Upstream protocol inversion: InValid depends on InReady - //--------------------------------------------------------- + //--------------------------------------------------------- + // Upstream protocol inversion: InValid depends on InReady + //--------------------------------------------------------- - // Input is ready whenever the register is free to accept a potentially spilling instruction - assign fpu_ready_o = ~reg_in_valid & (~reg_out_valid | reg_out_ready); + // Input is ready whenever the register is free to accept a potentially spilling instruction + assign fpu_ready_o = ~reg_in_valid & (~reg_out_valid | reg_out_ready); - // Input data goes to the buffer register if the received instruction cannot be handled - assign reg_in_valid = fpu_valid_i & ~fpu_in_ready; + // Input data goes to the buffer register if the received instruction cannot be handled + assign reg_in_valid = fpu_valid_i & ~fpu_in_ready; - // Data being applied to unit is taken from the register if there's an instruction waiting - assign fpu_in_valid = reg_out_valid | fpu_valid_i; + // Data being applied to unit is taken from the register if there's an instruction waiting + assign fpu_in_valid = reg_out_valid | fpu_valid_i; - // The input register is ready to accept new data if: - // 1. The current instruction will be processed by the fpu - // 2. There is no instruction waiting in the register - assign reg_in_ready = reg_out_ready | ~reg_out_valid; + // The input register is ready to accept new data if: + // 1. The current instruction will be processed by the fpu + // 2. There is no instruction waiting in the register + assign reg_in_ready = reg_out_ready | ~reg_out_valid; - // Register output side is signalled ready if: - // 1. The operation held in the reg is valid and will be processed - // 2. The register doesn't hold a valid instructin - assign reg_out_ready = fpu_in_ready | ~reg_out_valid; + // Register output side is signalled ready if: + // 1. The operation held in the reg is valid and will be processed + // 2. The register doesn't hold a valid instructin + assign reg_out_ready = fpu_in_ready | ~reg_out_valid; - // Buffer register - always_ff @(posedge clk_i or negedge rst_ni) begin : fp_buffer_reg - if(~rst_ni) begin - reg_out_valid <= '0; - operand_a_q <= '0; - operand_b_q <= '0; - operand_c_q <= '0; - fpu_op_q <= '0; - fpu_op_mod_q <= '0; - fpu_fmt_q <= '0; - fpu_fmt2_q <= '0; - fpu_ifmt_q <= '0; - fpu_rm_q <= '0; - fpu_vec_op_q <= '0; - fpu_tag_q <= '0; - end else begin - if (reg_out_ready || flush_i) begin // Only advance pipeline if unit is ready for our op - reg_out_valid <= reg_in_valid & ~flush_i; - if (reg_in_valid) begin // clock gate data to save poer - operand_a_q <= operand_a_n; - operand_b_q <= operand_b_n; - operand_c_q <= operand_c_n; - fpu_op_q <= fpu_op_n; - fpu_op_mod_q <= fpu_op_mod_n; - fpu_fmt_q <= fpu_fmt_n; - fpu_fmt2_q <= fpu_fmt2_n; - fpu_ifmt_q <= fpu_ifmt_n; - fpu_rm_q <= fpu_rm_n; - fpu_vec_op_q <= fpu_vec_op_n; - fpu_tag_q <= fpu_tag_n; - end + // Buffer register + always_ff @(posedge clk_i or negedge rst_ni) begin : fp_buffer_reg + if(~rst_ni) begin + reg_out_valid <= '0; + operand_a_q <= '0; + operand_b_q <= '0; + operand_c_q <= '0; + fpu_op_q <= '0; + fpu_op_mod_q <= '0; + fpu_fmt_q <= '0; + fpu_fmt2_q <= '0; + fpu_ifmt_q <= '0; + fpu_rm_q <= '0; + fpu_vec_op_q <= '0; + fpu_tag_q <= '0; + end else begin + if (reg_out_ready || flush_i) begin // Only advance pipeline if unit is ready for our op + reg_out_valid <= reg_in_valid & ~flush_i; + if (reg_in_valid) begin // clock gate data to save poer + operand_a_q <= operand_a_d; + operand_b_q <= operand_b_d; + operand_c_q <= operand_c_d; + fpu_op_q <= fpu_op_d; + fpu_op_mod_q <= fpu_op_mod_d; + fpu_fmt_q <= fpu_fmt_d; + fpu_fmt2_q <= fpu_fmt2_d; + fpu_ifmt_q <= fpu_ifmt_d; + fpu_rm_q <= fpu_rm_d; + fpu_vec_op_q <= fpu_vec_op_d; + fpu_tag_q <= fpu_tag_d; end end end + end - // Select FPU input data: from register if valid data in register, else directly vom input - assign operand_a = reg_out_valid ? operand_a_q : operand_a_n; - assign operand_b = reg_out_valid ? operand_b_q : operand_b_n; - assign operand_c = reg_out_valid ? operand_c_q : operand_c_n; - assign fpu_op = reg_out_valid ? fpu_op_q : fpu_op_n; - assign fpu_op_mod = reg_out_valid ? fpu_op_mod_q : fpu_op_mod_n; - assign fpu_fmt = reg_out_valid ? fpu_fmt_q : fpu_fmt_n; - assign fpu_fmt2 = reg_out_valid ? fpu_fmt2_q : fpu_fmt2_n; - assign fpu_ifmt = reg_out_valid ? fpu_ifmt_q : fpu_ifmt_n; - assign fpu_rm = reg_out_valid ? fpu_rm_q : fpu_rm_n; - assign fpu_vec_op = reg_out_valid ? fpu_vec_op_q : fpu_vec_op_n; - assign fpu_tag = reg_out_valid ? fpu_tag_q : fpu_tag_n; + // Select FPU input data: from register if valid data in register, else directly vom input + assign operand_a = reg_out_valid ? operand_a_q : operand_a_d; + assign operand_b = reg_out_valid ? operand_b_q : operand_b_d; + assign operand_c = reg_out_valid ? operand_c_q : operand_c_d; + assign fpu_op = reg_out_valid ? fpu_op_q : fpu_op_d; + assign fpu_op_mod = reg_out_valid ? fpu_op_mod_q : fpu_op_mod_d; + assign fpu_fmt = reg_out_valid ? fpu_fmt_q : fpu_fmt_d; + assign fpu_fmt2 = reg_out_valid ? fpu_fmt2_q : fpu_fmt2_d; + assign fpu_ifmt = reg_out_valid ? fpu_ifmt_q : fpu_ifmt_d; + assign fpu_rm = reg_out_valid ? fpu_rm_q : fpu_rm_d; + assign fpu_vec_op = reg_out_valid ? fpu_vec_op_q : fpu_vec_op_d; + assign fpu_tag = reg_out_valid ? fpu_tag_q : fpu_tag_d; - //--------------- - // FPU instance - //--------------- - fpnew_top #( - .WIDTH ( FLEN ), - .TAG_WIDTH ( TRANS_ID_BITS ), - .RV64 ( 1'b1 ), - .RVF ( RVF ), - .RVD ( RVD ), - .Xf16 ( XF16 ), - .Xf16alt ( XF16ALT ), - .Xf8 ( XF8 ), - .Xfvec ( XFVEC ), - // TODO MOVE THESE VALUES TO PACKAGE - .LATENCY_COMP_F ( 31'h2 ), - .LATENCY_COMP_D ( 31'h3 ), - .LATENCY_COMP_Xf16 ( 31'h2 ), - .LATENCY_COMP_Xf16alt ( 31'h2 ), - .LATENCY_COMP_Xf8 ( 31'h1 ), - .LATENCY_DIVSQRT ( 31'h1 ), - .LATENCY_NONCOMP ( 31'h0 ), - .LATENCY_CONV ( 31'h1 ) - ) fpnew_top_i ( - .Clk_CI ( clk_i ), - .Reset_RBI ( rst_ni ), - .A_DI ( operand_a ), - .B_DI ( operand_b ), - .C_DI ( operand_c ), - .RoundMode_SI ( fpu_rm ), - .Op_SI ( fpu_op ), - .OpMod_SI ( fpu_op_mod ), - .VectorialOp_SI ( fpu_vec_op ), - .FpFmt_SI ( fpu_fmt ), - .FpFmt2_SI ( fpu_fmt2 ), - .IntFmt_SI ( fpu_ifmt ), - .Tag_DI ( fpu_tag ), - .InValid_SI ( fpu_in_valid ), - .InReady_SO ( fpu_in_ready ), - .Flush_SI ( flush_i ), - .Z_DO ( result_o ), - .Status_DO ( fpu_status ), - .Tag_DO ( fpu_trans_id_o ), - .OutValid_SO ( fpu_out_valid ), - .OutReady_SI ( fpu_out_ready ) - ); + //--------------- + // FPU instance + //--------------- + fpnew_top #( + .WIDTH ( FLEN ), + .TAG_WIDTH ( TRANS_ID_BITS ), + .RV64 ( 1'b1 ), + .RVF ( RVF ), + .RVD ( RVD ), + .Xf16 ( XF16 ), + .Xf16alt ( XF16ALT ), + .Xf8 ( XF8 ), + .Xfvec ( XFVEC ), + // TODO MOVE THESE VALUES TO PACKAGE + .LATENCY_COMP_F ( 31'h2 ), + .LATENCY_COMP_D ( 31'h3 ), + .LATENCY_COMP_Xf16 ( 31'h2 ), + .LATENCY_COMP_Xf16alt ( 31'h2 ), + .LATENCY_COMP_Xf8 ( 31'h1 ), + .LATENCY_DIVSQRT ( 31'h1 ), + .LATENCY_NONCOMP ( 31'h0 ), + .LATENCY_CONV ( 31'h1 ) + ) fpnew_top_i ( + .Clk_CI ( clk_i ), + .Reset_RBI ( rst_ni ), + .A_DI ( operand_a ), + .B_DI ( operand_b ), + .C_DI ( operand_c ), + .RoundMode_SI ( fpu_rm ), + .Op_SI ( fpu_op ), + .OpMod_SI ( fpu_op_mod ), + .VectorialOp_SI ( fpu_vec_op ), + .FpFmt_SI ( fpu_fmt ), + .FpFmt2_SI ( fpu_fmt2 ), + .IntFmt_SI ( fpu_ifmt ), + .Tag_DI ( fpu_tag ), + .InValid_SI ( fpu_in_valid ), + .InReady_SO ( fpu_in_ready ), + .Flush_SI ( flush_i ), + .Z_DO ( result_o ), + .Status_DO ( fpu_status ), + .Tag_DO ( fpu_trans_id_o ), + .OutValid_SO ( fpu_out_valid ), + .OutReady_SI ( fpu_out_ready ) + ); - // Pack status flag into exception cause, tval ignored in wb, exception is always invalid - assign fpu_exception_o.cause = {59'h0, fpu_status}; - assign fpu_exception_o.valid = 1'b0; + // Pack status flag into exception cause, tval ignored in wb, exception is always invalid + assign fpu_exception_o.cause = {59'h0, fpu_status}; + assign fpu_exception_o.valid = 1'b0; - // Donwstream write port is dedicated to FPU and always ready - assign fpu_out_ready = 1'b1; + // Donwstream write port is dedicated to FPU and always ready + assign fpu_out_ready = 1'b1; - // Downstream valid from unit - assign fpu_valid_o = fpu_out_valid; - - // end else begin : no_fpu_gen - - // assign fpu_ready_o = 1'b0; - // assign fpu_trans_id_o = '0; - // assign result_o = '0; - // assign fpu_valid_o = 1'b0; - // assign fpu_exception_o = '0; - // end - // endgenerate + // Downstream valid from unit + assign fpu_valid_o = fpu_out_valid; endmodule From 037f81bb64bfb6c9db62eb6672703b68ffca936f Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 7 Oct 2018 11:27:38 +0200 Subject: [PATCH 76/94] :bug: Fix F2X register selection --- include/ariane_pkg.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 04b65d537..36971b725 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -282,7 +282,7 @@ package ariane_pkg; [FSD:FSB], // FP Stores [FADD:FMIN_MAX], // Computational Operations (no sqrt) [FMADD:FNMADD], // Fused Computational Operations - FSGNJ, // Sign Injections + [FSGNJ:FMV_F2X], // Sign Injections and moves mapped to SGNJ FCMP, // Comparisons [VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops default : return 1'b0; // all other ops From ef2a2d26b90dc88789984c6c9e73c7a855c46610 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 7 Oct 2018 14:28:28 +0200 Subject: [PATCH 77/94] :bug: Fix vectorial float decoding and issue --- include/ariane_pkg.sv | 27 ++++++------ src/decoder.sv | 38 +++++++++++----- src/fpu | 2 +- src/fpu_wrap.sv | 88 +++++++++++++++++++++++++++----------- src/issue_read_operands.sv | 7 +-- 5 files changed, 109 insertions(+), 53 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 36971b725..f19307e37 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -262,7 +262,7 @@ package ariane_pkg; function automatic logic is_rs1_fpr (input fu_op op); if (FP_PRESENT) begin // makes function static for non-fp case unique case (op) inside - [FADD:FNMADD], // Computational Operations + [FMUL:FNMADD], // Computational Operations (except ADD/SUB) FCVT_F2I, // Float-Int Casts FCVT_F2F, // Float-Float Casts FSGNJ, // Sign Injections @@ -282,6 +282,7 @@ package ariane_pkg; [FSD:FSB], // FP Stores [FADD:FMIN_MAX], // Computational Operations (no sqrt) [FMADD:FNMADD], // Fused Computational Operations + FCVT_F2F, // Vectorial F2F Conversions requrie target [FSGNJ:FMV_F2X], // Sign Injections and moves mapped to SGNJ FCMP, // Comparisons [VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops @@ -295,9 +296,10 @@ package ariane_pkg; function automatic logic is_imm_fpr (input fu_op op); if (FP_PRESENT) begin // makes function static for non-fp case unique case (op) inside - [FADD:FSUB], // ADD/SUB need inputs as Operand B/C - [FMADD:FNMADD] : return 1'b1; // Fused Computational Operations - default : return 1'b0; // all other ops + [FADD:FSUB], // ADD/SUB need inputs as Operand B/C + [FMADD:FNMADD], // Fused Computational Operations + [VFCPKAB_S:VFCPKCD_D] : return 1'b1; // Vectorial FP cast and pack ops + default : return 1'b0; // all other ops endcase end else return 1'b0; @@ -306,14 +308,15 @@ package ariane_pkg; function automatic logic is_rd_fpr (input fu_op op); if (FP_PRESENT) begin // makes function static for non-fp case unique case (op) inside - [FLD:FLB], // FP Loads - [FADD:FNMADD], // Computational Operations - FCVT_I2F, // Int-Float Casts - FCVT_F2F, // Float-Float Casts - FSGNJ, // Sign Injections - FMV_X2F, // GPR-FPR Moves - [VFMIN:VFCPKCD_D] : return 1'b1; // Additional Vectorial FP ops - default : return 1'b0; // all other ops + [FLD:FLB], // FP Loads + [FADD:FNMADD], // Computational Operations + FCVT_I2F, // Int-Float Casts + FCVT_F2F, // Float-Float Casts + FSGNJ, // Sign Injections + FMV_X2F, // GPR-FPR Moves + [VFMIN:VFSGNJX], // Vectorial MIN/MAX and SGNJ + [VFCPKAB_S:VFCPKCD_D] : return 1'b1; // Vectorial FP cast and pack ops + default : return 1'b0; // all other ops endcase end else return 1'b0; diff --git a/src/decoder.sv b/src/decoder.sv index a0de8dc79..22995c5f3 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -257,8 +257,18 @@ module decoder ( allow_replication = 1'b1; // decode vectorial FP instruction unique case (instr.rvftype.vecfltop) - 5'b00001 : instruction_o.op = FADD; // vfadd.vfmt - Vectorial FP Addition - 5'b00010 : instruction_o.op = FSUB; // vfsub.vfmt - Vectorial FP Subtraction + 5'b00001 : begin + instruction_o.op = FADD; // vfadd.vfmt - Vectorial FP Addition + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2 = instr.rvftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end + 5'b00010 : begin + instruction_o.op = FSUB; // vfsub.vfmt - Vectorial FP Subtraction + instruction_o.rs1 = '0; // Operand A is set to 0 + instruction_o.rs2 = instr.rvftype.rs1; // Operand B is set to rs1 + imm_select = IIMM; // Operand C is set to rs2 + end 5'b00011 : instruction_o.op = FMUL; // vfmul.vfmt - Vectorial FP Multiplication 5'b00100 : instruction_o.op = FDIV; // vfdiv.vfmt - Vectorial FP Division 5'b00101 : begin @@ -285,6 +295,7 @@ module decoder ( 5'b01100 : begin unique case (instr.rvftype.rs2) inside // operation encoded in rs2, `inside` for matching ? 5'b00000 : begin + instruction_o.rs2 = instr.rvftype.rs1; // set rs2 = rs1 so we can map FMV to SGNJ in the unit if (instr.rvftype.repl) instruction_o.op = FMV_F2X; // vfmv.x.vfmt - FPR to GPR Move else @@ -300,7 +311,9 @@ module decoder ( 5'b00011 : instruction_o.op = FCVT_I2F; // vfcvt.vfmt.x - Vectorial Int to FP Conversion 5'b001?? : begin instruction_o.op = FCVT_F2F; // vfcvt.vfmt.vfmt - Vectorial FP to FP Conversion - allow_replication = 1'b0; // R must not be set + instruction_o.rs2 = instr.rvftype.rd; // set rs2 = rd as target vector for conversion + imm_select = IIMM; // rs2 holds part of the intruction + // TODO CHECK R bit for valid fmt combinations // determine source format unique case (instr.rvftype.rs2[21:20]) // Only process instruction if corresponding extension is active (static) @@ -320,7 +333,7 @@ module decoder ( end 5'b01110 : begin check_fprm = 1'b0; // no rounding for sign-injection - instruction_o.op = VFSGNJN; // vfsgnjN.vfmt - Vectorial FP Negated Sign Injection + instruction_o.op = VFSGNJN; // vfsgnjn.vfmt - Vectorial FP Negated Sign Injection end 5'b01111 : begin check_fprm = 1'b0; // no rounding for sign-injection @@ -351,8 +364,8 @@ module decoder ( instruction_o.op = VFGT; // vfgt.vfmt - Vectorial FP Greater Than end 5'b11000 : begin - allow_replication = 1'b0; // no replication for cast-and-pack instruction_o.op = VFCPKAB_S; // vfcpka/b.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, lowest 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) if (~RVF) illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 // check destination format unique case (instr.rvftype.vfmt) @@ -374,8 +387,8 @@ module decoder ( endcase end 5'b11001 : begin - allow_replication = 1'b0; // no replication for cast-and-pack instruction_o.op = VFCPKCD_S; // vfcpkc/d.vfmt.s - Vectorial FP Cast-and-Pack from 2x FP32, second 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) if (~RVF) illegal_instr = 1'b1; // if we don't support RVF, we can't cast from FP32 // check destination format unique case (instr.rvftype.vfmt) @@ -390,8 +403,8 @@ module decoder ( endcase end 5'b11010 : begin - allow_replication = 1'b0; // no replication for cast-and-pack instruction_o.op = VFCPKAB_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, lowest 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) if (~RVD) illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 // check destination format unique case (instr.rvftype.vfmt) @@ -413,8 +426,8 @@ module decoder ( endcase end 5'b11011 : begin - allow_replication = 1'b0; // no replication for cast-and-pack instruction_o.op = VFCPKCD_D; // vfcpka/b.vfmt.d - Vectorial FP Cast-and-Pack from 2x FP64, second 4 entries + imm_select = SIMM; // rd into result field (upper bits don't matter) if (~RVD) illegal_instr = 1'b1; // if we don't support RVD, we can't cast from FP64 // check destination format unique case (instr.rvftype.vfmt) @@ -432,7 +445,7 @@ module decoder ( endcase // check format - unique case (instr.rftype.fmt) + unique case (instr.rvftype.vfmt) // Only process instruction if corresponding extension is active (static) 2'b00: if (~RVFVEC) illegal_instr = 1'b1; 2'b01: if (~XF16ALTVEC) illegal_instr = 1'b1; @@ -735,11 +748,13 @@ module decoder ( unique case (instr.rftype.funct5) 5'b00000: begin instruction_o.op = FADD; // fadd.fmt - FP Addition + instruction_o.rs1 = '0; // Operand A is set to 0 instruction_o.rs2 = instr.rftype.rs1; // Operand B is set to rs1 imm_select = IIMM; // Operand C is set to rs2 end 5'b00001: begin instruction_o.op = FSUB; // fsub.fmt - FP Subtraction + instruction_o.rs1 = '0; // Operand A is set to 0 instruction_o.rs2 = instr.rftype.rs1; // Operand B is set to rs1 imm_select = IIMM; // Operand C is set to rs2 end @@ -773,8 +788,9 @@ module decoder ( end end 5'b01000: begin - instruction_o.op = FCVT_F2F; // fcvt.fmt.fmt - FP to FP Conversion - imm_select = IIMM; // rs2 holds part of the intruction + instruction_o.op = FCVT_F2F; // fcvt.fmt.fmt - FP to FP Conversion + instruction_o.rs2 = instr.rvftype.rs1; // tie rs2 to rs1 to be safe (vectors use rs2) + imm_select = IIMM; // rs2 holds part of the intruction if (instr.rftype.rs2[24:23]) illegal_instr = 1'b1; // bits [22:20] used, other bits must be 0 // check source format unique case (instr.rftype.rs2[22:20]) diff --git a/src/fpu b/src/fpu index 1cfdf5d62..74c4bdc29 160000 --- a/src/fpu +++ b/src/fpu @@ -1 +1 @@ -Subproject commit 1cfdf5d62c3144001501aeb985c2e3ca95afda01 +Subproject commit 74c4bdc29ee871accc1fc81accfcfd8d8e164ff8 diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index 185ad1fc9..67686cd96 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -25,8 +25,8 @@ module fpu_wrap ( output logic fpu_ready_o, input fu_op operator_i, input logic [FLEN-1:0] operand_a_i, - input logic [FLEN-1:0] operand_b_i, - input logic [FLEN-1:0] operand_c_i, + input logic [FLEN-1:0] operand_b_i, // imm will be here unless used as operand + input logic [FLEN-1:0] operand_c_i, // imm will be here unless used as operand input logic [1:0] fpu_fmt_i, input logic [2:0] fpu_rm_i, input logic [2:0] fpu_frm_i, @@ -146,12 +146,13 @@ module fpu_wrap ( always_comb begin : input_translation automatic logic vec_replication; // control honoring of replication flag - automatic logic check_ah; + automatic logic replicate_c; // replicate operand C instead of B (for ADD/SUB) + automatic logic check_ah; // Decide for AH from RM field encoding // Default Values operand_a_d = operand_a_i; - operand_b_d = operand_b_i; - operand_c_d = operand_c_i; + operand_b_d = operand_b_i; // immediates come through this port unless used as operand + operand_c_d = operand_c_i; // immediates come through this port unless used as operand fpu_op_d = OP_SGNJ; // sign injection by default fpu_op_mod_d = 1'b0; fpu_fmt_d = FMT_FP32; @@ -161,7 +162,8 @@ module fpu_wrap ( fpu_vec_op_d = fu_i == FPU_VEC; fpu_tag_d = trans_id_i; vec_replication = fpu_rm_i[0]; // replication bit is sent via rm field - check_ah = 1'b0; // whether set AH encoding from MSB of rm_i + replicate_c = 1'b0; + check_ah = 1'b0; // whether set scalar AH encoding from MSB of rm_i // Scalar Rounding Modes - some ops encode inside RM but use smaller range if (!(fpu_rm_i inside {[3'b000:3'b100]})) @@ -192,11 +194,15 @@ module fpu_wrap ( // Operations (this can modify the rounding mode field and format!) unique case (operator_i) // Addition - FADD : fpu_op_d = OP_ADD; + FADD : begin + fpu_op_d = OP_ADD; + replicate_c = 1'b1; // second operand is in C + end // Subtraction is modified ADD FSUB : begin fpu_op_d = OP_ADD; fpu_op_mod_d = 1'b1; + replicate_c = 1'b1; // second operand is in C end // Multiplication FMUL : fpu_op_d = OP_MUL; @@ -227,7 +233,7 @@ module fpu_wrap ( // Float to Int Cast - Op encoded in lowest two imm bits or rm FCVT_F2I : begin fpu_op_d = OP_F2I; - // Vectorial Ops encoded in rm (000-001) + // Vectorial Ops encoded in R bit if (fpu_vec_op_d) begin fpu_op_mod_d = fpu_rm_i[0]; vec_replication = 1'b0; // no replication, R bit used for op @@ -249,7 +255,7 @@ module fpu_wrap ( // Int to Float Cast - Op encoded in lowest two imm bits or rm FCVT_I2F : begin fpu_op_d = OP_I2F; - // Vectorial Ops encoded in rm (000-001) + // Vectorial Ops encoded in R bit if (fpu_vec_op_d) begin fpu_op_mod_d = fpu_rm_i[0]; vec_replication = 1'b0; // no replication, R bit used for op @@ -303,7 +309,6 @@ module fpu_wrap ( fpu_rm_d = 3'b011; // passthrough without checking nan-box fpu_op_mod_d = 1'b1; // no NaN-Boxing check_ah = 1'b1; // AH has RM MSB encoding - // operand_b_d = operand_a_d; vec_replication = 1'b0; // no replication, we set second operand end // Move from GPR to FPR - mapped to NOP since no recoding @@ -311,7 +316,6 @@ module fpu_wrap ( fpu_op_d = OP_SGNJ; fpu_rm_d = 3'b011; // passthrough without checking nan-box check_ah = 1'b1; // AH has RM MSB encoding - // operand_b_d = operand_a_d; vec_replication = 1'b0; // no replication, we set second operand end // Scalar Comparisons - op encoded in rm (000-010) @@ -323,7 +327,7 @@ module fpu_wrap ( // Classification FCLASS : begin fpu_op_d = OP_CLASS; - fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit + fpu_rm_d = {1'b0, fpu_rm_i[1:0]}; // mask out AH encoding bit - CLASS doesn't care anyways check_ah = 1'b1; // AH has RM MSB encoding end // Vectorial Minimum - set up scalar encoding in rm @@ -384,14 +388,36 @@ module fpu_wrap ( fpu_op_mod_d = 1'b1; // invert output fpu_rm_d = 3'b000; // le end + // Vectorial Convert-and-Pack from FP32, lower 4 entries + VFCPKAB_S : begin + fpu_op_d = OP_CPKAB; + fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_fmt2_d = FMT_FP32; // Cast from FP32 + end + // Vectorial Convert-and-Pack from FP32, upper 4 entries + VFCPKCD_S : begin + fpu_op_d = OP_CPKCD; + fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_fmt2_d = FMT_FP64; // Cast from FP64 + end + // Vectorial Convert-and-Pack from FP64, lower 4 entries + VFCPKAB_S : begin + fpu_op_d = OP_CPKAB; + fpu_op_mod_d = fpu_rm_i[0]; // A/B selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_fmt2_d = FMT_FP64; // Cast from FP64 + end + // Vectorial Convert-and-Pack from FP64, upper 4 entries + VFCPKCD_S : begin + fpu_op_d = OP_CPKCD; + fpu_op_mod_d = fpu_rm_i[0]; // C/D selection from R bit + vec_replication = 1'b0; // no replication, R bit used for op + fpu_fmt2_d = FMT_FP64; // Cast from FP64 + end - // VFCPKAB_S : - // VFCPKCD_S : - // VFCPKAB_D : - // VFCPKCD_D : - - // by default set opb = opa to have a sgnj nop - // default : operand_b_d = operand_a_d; + // No changes per default default : ; //nothing endcase @@ -401,13 +427,23 @@ module fpu_wrap ( fpu_fmt_d = FMT_FP16ALT; // Replication - if (fpu_vec_op_d && vec_replication) - case (fpu_fmt_d) - FMT_FP32 : operand_b_d = RVD ? {2{operand_b_i[31:0]}} : operand_b_i; - FMT_FP16, - FMT_FP16ALT : operand_b_d = RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; - FMT_FP8 : operand_b_d = RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; - endcase // fpu_fmt_d + if (fpu_vec_op_d && vec_replication) begin + if (replicate_c) begin + unique case (fpu_fmt_d) + FMT_FP32 : operand_c_d = RVD ? {2{operand_c_i[31:0]}} : operand_c_i; + FMT_FP16, + FMT_FP16ALT : operand_c_d = RVD ? {4{operand_c_i[15:0]}} : {2{operand_c_i[15:0]}}; + FMT_FP8 : operand_c_d = RVD ? {8{operand_c_i[7:0]}} : {4{operand_c_i[7:0]}}; + endcase // fpu_fmt_d + end else begin + unique case (fpu_fmt_d) + FMT_FP32 : operand_b_d = RVD ? {2{operand_b_i[31:0]}} : operand_b_i; + FMT_FP16, + FMT_FP16ALT : operand_b_d = RVD ? {4{operand_b_i[15:0]}} : {2{operand_b_i[15:0]}}; + FMT_FP8 : operand_b_d = RVD ? {8{operand_b_i[7:0]}} : {4{operand_b_i[7:0]}}; + endcase // fpu_fmt_d + end + end end diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index bf2fe44c6..f1bdeb933 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -199,7 +199,8 @@ module issue_read_operands #( // default is regfiles (gpr or fpr) operand_a_n = operand_a_regfile; operand_b_n = operand_b_regfile; - // immediates are the third operands in the store case or for certain fp operations + // immediates are the third operands in the store case + // for FP operations, the imm field can also be the third operand from the regfile imm_n = is_imm_fpr(issue_instr_i.op) ? operand_c_regfile : issue_instr_i.result; trans_id_n = issue_instr_i.trans_id; fu_n = issue_instr_i.fu; @@ -228,8 +229,8 @@ module issue_read_operands #( operand_a_n = {52'b0, issue_instr_i.rs1[4:0]}; end // or is it an immediate (including PC), this is not the case for a store and control flow instructions - // also make sure the imm is not already used as an FP operand - if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_imm_fpr(issue_instr_i.op)) begin + // also make sure operand B is not already used as an FP operand + if (issue_instr_i.use_imm && (issue_instr_i.fu != STORE) && (issue_instr_i.fu != CTRL_FLOW) && !is_rs2_fpr(issue_instr_i.op)) begin operand_b_n = issue_instr_i.result; end end From 2c1db773aa006c4184138a95e5c7fe524ccf023d Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Sun, 7 Oct 2018 16:44:46 +0200 Subject: [PATCH 78/94] :bug: Fix commit log showing all fpr writes as gpr --- include/riscv_pkg.sv | 9 +++++---- src/util/instruction_tracer.svh | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/riscv_pkg.sv b/include/riscv_pkg.sv index 37e90f784..cd7a14e98 100644 --- a/include/riscv_pkg.sv +++ b/include/riscv_pkg.sv @@ -495,13 +495,14 @@ package riscv; // trace log compatible to spikes commit log feature // pragma translate_off - function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, logic [4:0] rd, logic [63:0] result); + function string spikeCommitLog(logic [63:0] pc, priv_lvl_t priv_lvl, logic [31:0] instr, logic [4:0] rd, logic [63:0] result, logic rd_fpr); string rd_s; + automatic string rf_s = rd_fpr ? "f" : "x"; - if (rd < 10) rd_s = $sformatf("x %0d", rd); - else rd_s = $sformatf("x%0d", rd); + if (rd < 10) rd_s = $sformatf("%s %0d", rf_s, rd); + else rd_s = $sformatf("%s%0d", rf_s, rd); - if (rd != 0) begin + if (rd_fpr || rd != 0) begin // 0 0x0000000080000118 (0xeecf8f93) x31 0x0000000080004000 return $sformatf("%d 0x%h (0x%h) %s 0x%h\n", priv_lvl, pc, instr, rd_s, result); end else begin diff --git a/src/util/instruction_tracer.svh b/src/util/instruction_tracer.svh index 6cb0d9bee..87f5d730a 100644 --- a/src/util/instruction_tracer.svh +++ b/src/util/instruction_tracer.svh @@ -190,7 +190,7 @@ class instruction_tracer; // print instruction to console string print_instr = iti.printInstr(); if (ENABLE_SPIKE_COMMIT_LOG && !debug_mode) begin - $fwrite(this.commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result)); + $fwrite(this.commit_log, riscv::spikeCommitLog(sbe.pc, priv_lvl, instr, sbe.rd, result, is_rd_fpr(sbe.op))); end uvm_report_info( "Tracer", print_instr, UVM_HIGH); $fwrite(this.f, {print_instr, "\n"}); From e98d13d0482a274b4caca0b862514e80126f536e Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Mon, 8 Oct 2018 12:44:52 +0200 Subject: [PATCH 79/94] :bug: Fix various FP issues and freezes --- fpuwave.do | 802 ++++++++++++++++++++++++++++++++++++++++++ include/ariane_pkg.sv | 12 +- src/fpu | 2 +- src/fpu_div_sqrt_mvp | 2 +- src/fpu_wrap.sv | 139 +++++--- 5 files changed, 897 insertions(+), 60 deletions(-) create mode 100644 fpuwave.do diff --git a/fpuwave.do b/fpuwave.do new file mode 100644 index 000000000..f5380e36b --- /dev/null +++ b/fpuwave.do @@ -0,0 +1,802 @@ +onerror {resume} +quietly WaveActivateNextPane {} 0 +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/pc_i +add wave -noupdate -divider {HandShake In} +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Op_SI +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/InValid_SI +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/InReady_SO +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/A_DI +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/B_DI +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/C_DI +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Tag_DI +add wave -noupdate -divider {HandShake Out} +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OutValid_SO +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OutReady_SI +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/result_o +add wave -noupdate /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Tag_DO +add wave -noupdate -divider Unit +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/Reset_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/A_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/B_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/C_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/ABox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/BBox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/CBox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/RoundMode_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/Op_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/OpMod_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/VectorialOp_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/FpFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/Tag_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/InValid_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/InReady_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/Flush_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/Z_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/Status_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/Tag_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/Zext_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/OutValid_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/OutReady_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/FmtInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/FmtOutResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/FmtOutStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/FmtOutTags_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/FmtOutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/FmtOutReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/FmtOutResult2d_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/ArbInResults_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/FmtOutTags2d_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/ArbInStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/ArbInTags_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/ArbInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/ArbInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/OutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/OutputProcessed_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/RoundRobin_SP +add wave -noupdate -expand -group Wrapper -expand -group Top -group AddMul /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_addmul_block/RoundRobin_SN +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Reset_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/A_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/ABox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/RoundMode_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/SrcFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/DstFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Tag_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InValid_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InReady_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Flush_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Z_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Status_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Tag_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Zext_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/OutValid_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/OutReady_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/SrcFmt_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/DstFmt_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Sign_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f -expand /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/FmtInputExp_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InputExp_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f -expand /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/FmtInputMant_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InputMant_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InputMantZero_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InputZero_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InputInf_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InputNan_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/SigNan_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InputNormal_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/OFBeforeRound_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/OFAfterRound_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/UFAfterRound_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/SpecialRes_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/SpecialResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/SpecialStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/MantLeadingZeroes_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/ExpNormShift_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InternalExp_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/InternalMant_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f -radix decimal /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/DestExp_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/FinalExp_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/MantPreshift_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/ShiftedMant_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/FinalMant_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f -radix decimal /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/MantShamt_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/FmtPreRndRes_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/PreRndRes_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/RoundSticky_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/ResRounded_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/ResRoundedSignCorr_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/RegularStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Result_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -group f2f /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/g_f2f/i_fp_f2fcasts/Status_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 -divider +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Reset_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/A_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/ABox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/RoundMode_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Op_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/OpMod_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/FpFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/FpFmt2_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/IntFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Tag_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/InValid_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/InReady_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Flush_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Z_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Status_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Tag_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Zext_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/OutValid_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/OutReady_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/OutReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2IInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/I2FInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2FInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2IInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/I2FInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2FInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2IOutResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/I2FOutResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2FOutResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2IOutStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/I2FOutStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2FOutStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2IResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/I2FResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2FResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2IOutTag_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/I2FOutTag_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2FOutTag_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2IZext_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/I2FZext_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2FZext_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2IOutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/I2FOutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/F2FOutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Result_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Status_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/Zext_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/TagInt_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/TagIntPiped_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi -group Lane0 /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_conv_multi/OutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Reset_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/A_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/B_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/C_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/ABox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/BBox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/CBox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/RoundMode_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Op_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/OpMod_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/FpFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/FpFmt2_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/IntFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/VectorialOp_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Tag_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/InValid_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/InReady_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Flush_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Z_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Status_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Tag_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Zext_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/OutValid_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/OutReady_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/Target_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/SrcFmtWidth_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/DstFmtSlv_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/IsDstFmtInt_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/SrcShift_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/DstShift_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/DstCPK_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/TagInt_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/VecTag_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/DstVecTag_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/VectorialOp_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/TargetInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/TargetOutReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/FmtOpResults_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/IntFmtOpResults_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/LaneResults_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/ResultVectorial_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/IsResultFmtInt_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/IsResultCPK_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/ResultShift_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/ResultFpFmt_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/ResultIntFmt_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/LaneStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/LaneOutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/LaneInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/LaneZext_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/LaneTags_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/TargetDelayed_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv -expand -group Multi /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/g_mergedOps/i_conv_multifmt_slice/PackedResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/Reset_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/A_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/B_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/C_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/ABox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/BBox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/CBox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/RoundMode_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/Op_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/OpMod_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FpFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FpFmt2_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/IntFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/VectorialOp_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/Tag_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/InValid_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/InReady_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/Flush_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/Z_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/Status_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/Tag_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/Zext_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/OutValid_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/OutReady_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FmtInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FmtOutResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FmtOutStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FmtOutTags_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FmtOutZext_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FmtOutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FmtOutReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FmtOutResult2d_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/ArbInResults_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/FmtOutTags2d_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/ArbInStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/ArbInTags_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/ArbInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/ArbInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/OutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/OutputProcessed_S +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/ArbOutTag_D +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/RoundRobin_SP +add wave -noupdate -expand -group Wrapper -expand -group Top -group Conv /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_conv_block/RoundRobin_SN +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Rst_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Div_start_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Sqrt_start_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Operand_a_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Operand_b_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/RM_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Precision_ctl_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Format_sel_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Kill_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Result_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Fflags_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Ready_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Done_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Exp_a_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Exp_b_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Mant_a_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Mant_b_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Exp_z_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Mant_z_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Sign_z_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Start_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/RM_dly_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Div_enable_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Sqrt_enable_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Inf_a_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Inf_b_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Zero_a_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Zero_b_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/NaN_a_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/NaN_b_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/SNaN_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Special_case_SB +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Special_case_dly_SB +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/Full_precision_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/FP32_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/FP64_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/FP16_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -expand -group divsqrt_inst /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_divsqrt/FP16ALT_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Reset_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/A_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/B_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/ABox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/BBox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/RoundMode_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Op_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/OpMod_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/FpFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Tag_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/InValid_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/InReady_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Flush_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Z_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Status_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Tag_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Zext_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/OutValid_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/OutReady_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/InReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/IsInFP8_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/DivValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/SqrtValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/DivSqrtReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/A_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/B_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/Fmt_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/DivSqrtDone_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/DivSqrtResultPre_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/DivSqrtResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/DivSqrtStatusSlv_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/DivSqrtStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/CurrentTag_DP +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/IsOutFP8_SP +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/HoldResult_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/HoldResult_DP +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/HoldStatus_DP +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/PipeInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/PipeInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/PipeInDataSel_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/PipeInResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/PipeInStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/State_DP +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/State_DN +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/Reset_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/Result_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/Status_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/Tag_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/InValid_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/InReady_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/Flush_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/ResultPiped_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/StatusPiped_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/TagPiped_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/OutValid_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/OutReady_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/ResPipe_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/StatPipe_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe -expand /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/TagPipe_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/ValidPipe_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt -expand -group Wrapper -group divPipe /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/g_mergedOps/i_divsqrt_multifmt_slice/g_sliceLanes(0)/g_laneInst/i_fp_divsqrt_multi/i_fp_pipe/StageReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/Reset_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/A_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/B_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/C_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/ABox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/BBox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/CBox_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/RoundMode_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/OpMod_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FpFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/VectorialOp_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/Tag_DI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/Flush_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/Z_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/Status_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/Tag_DO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/Zext_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/OutValid_SO +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/OutReady_SI +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FmtInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FmtOutResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FmtOutStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FmtOutTags_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FmtOutZext_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FmtOutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FmtOutReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FmtOutResult2d_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/ArbInResults_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/FmtOutTags2d_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/ArbInStatus_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/ArbInTags_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/ArbInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/ArbInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/OutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/OutputProcessed_S +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/ArbOutTag_D +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/RoundRobin_SP +add wave -noupdate -expand -group Wrapper -expand -group Top -expand -group DivSqrt /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/i_divsqrt_block/RoundRobin_SN +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Clk_CI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Reset_RBI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/A_DI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/B_DI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/C_DI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/RoundMode_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Op_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OpMod_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/VectorialOp_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/FpFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/FpFmt2_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/IntFmt_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Tag_DI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Flush_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/InValid_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/InReady_SO +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Z_DO +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Status_DO +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OutValid_SO +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OutReady_SI +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/Tag_DO +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OpGrpInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OpGrpInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/ABox_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/BBox_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/CBox_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/AddMulResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/DivSqrtResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/NonCompResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/ConvResult_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OpGrpOutResults_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OpGrpOutStatuses_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OpGrpOutTags_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OpGrpOutZext_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OpGrpOutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OpGrpOutReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/ArbInResults_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/ArbInStatuses_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/ArbInTags_D +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/ArbInValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/ArbInReady_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OutValid_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/OutputProcessed_S +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/RoundRobin_SP +add wave -noupdate -expand -group Wrapper -expand -group Top /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpnew_top_i/i_fpnew/RoundRobin_SN +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/clk_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/rst_ni +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/flush_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/trans_id_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fu_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_valid_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_ready_o +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operator_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_a_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_b_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_c_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_fmt_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_rm_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_frm_i +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_trans_id_o +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/result_o +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_valid_o +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_exception_o +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_a_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_a_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_a +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_b_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_b_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_b +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_c_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_c_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/operand_c +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_op_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_op_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_op +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_op_mod_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_op_mod_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_op_mod +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_fmt_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_fmt_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_fmt +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_fmt2_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_fmt2_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_fmt2 +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_ifmt_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_ifmt_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_ifmt +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_rm_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_rm_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_rm +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_vec_op_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_vec_op_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_vec_op +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_tag_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_tag_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_tag +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_in_ready +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_in_valid +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_out_ready +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_out_valid +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/fpu_status +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/state_q +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/state_d +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/hold_inputs +add wave -noupdate -expand -group Wrapper /ariane_tb/dut/i_ariane/ex_stage_i/fpu_gen/fpu_i/use_hold +add wave -noupdate -divider Ariane +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/clk_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rst_ni +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/flush_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/issue_instr_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/issue_instr_valid_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/issue_ack_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rs1_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rs1_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rs1_valid_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rs2_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rs2_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rs2_valid_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rs3_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rs3_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rs3_valid_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rd_clobber_gpr_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rd_clobber_fpr_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fu_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operator_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operand_a_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operand_b_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/imm_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/trans_id_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/pc_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/is_compressed_instr_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/alu_ready_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/alu_valid_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/branch_valid_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/branch_predict_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/lsu_ready_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/lsu_valid_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/mult_ready_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/mult_valid_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_ready_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_valid_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_fmt_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_rm_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/csr_valid_o +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/waddr_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/wdata_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/we_gpr_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/we_fpr_i +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/stall +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fu_busy +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operand_a_regfile +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operand_b_regfile +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operand_c_regfile +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operand_a_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operand_a_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operand_b_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operand_b_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/imm_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/imm_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/alu_valid_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/alu_valid_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/mult_valid_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/mult_valid_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_valid_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_valid_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_fmt_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_fmt_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_rm_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fpu_rm_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/lsu_valid_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/lsu_valid_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/csr_valid_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/csr_valid_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/branch_valid_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/branch_valid_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/trans_id_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/trans_id_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operator_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/operator_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fu_n +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fu_q +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/forward_rs1 +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/forward_rs2 +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/forward_rs3 +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/orig_instr +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/rdata +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/raddr_pack +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/waddr_pack +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/wdata_pack +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/we_pack +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands -expand /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fprdata +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fp_raddr_pack +add wave -noupdate -expand -group {Issue stage} -group Issue_Read_Operands /ariane_tb/dut/i_ariane/issue_stage_i/i_issue_read_operands/fp_wdata_pack +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/clk_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rst_ni +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/flush_unissued_instr_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/flush_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/decoded_instr_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/decoded_instr_valid_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/is_ctrl_flow_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/decoded_instr_ack_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/fu_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/operator_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/operand_a_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/operand_b_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/imm_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/trans_id_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/pc_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/is_compressed_instr_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/alu_ready_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/alu_valid_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/resolve_branch_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/lsu_ready_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/lsu_valid_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/branch_valid_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/branch_predict_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/mult_ready_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/mult_valid_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/fpu_ready_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/fpu_valid_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/fpu_fmt_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/fpu_rm_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/csr_valid_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/trans_id_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/resolved_branch_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/wbdata_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/ex_ex_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/wb_valid_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/waddr_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/wdata_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/we_gpr_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/we_fpr_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/commit_instr_o +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/commit_ack_i +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rd_clobber_gpr_sb_iro +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rd_clobber_fpr_sb_iro +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rs1_iro_sb +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rs1_sb_iro +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rs1_valid_sb_iro +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rs2_iro_sb +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rs2_sb_iro +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rs2_valid_iro_sb +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rs3_iro_sb +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rs3_sb_iro +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/rs3_valid_iro_sb +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/issue_instr_rename_sb +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/issue_instr_valid_rename_sb +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/issue_ack_sb_rename +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/issue_instr_sb_iro +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/issue_instr_valid_sb_iro +add wave -noupdate -expand -group {Issue stage} /ariane_tb/dut/i_ariane/issue_stage_i/issue_ack_iro_sb +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/clk_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rst_ni +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/flush_unissued_instr_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/flush_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/unresolved_branch_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rd_clobber_gpr_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rd_clobber_fpr_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rs1_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rs1_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rs1_valid_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rs2_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rs2_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rs2_valid_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rs3_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rs3_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/rs3_valid_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/commit_instr_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/commit_ack_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/decoded_instr_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/decoded_instr_valid_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/decoded_instr_ack_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/issue_instr_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/issue_instr_valid_o +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/issue_ack_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/trans_id_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/wbdata_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/ex_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/wb_valid_i +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/issue_cnt_n +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/issue_cnt_q +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/issue_pointer_n +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/issue_pointer_q +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/commit_pointer_n +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/commit_pointer_q +add wave -noupdate -expand -group {Issue stage} -group ScoreBoard /ariane_tb/dut/i_ariane/issue_stage_i/i_scoreboard/issue_full +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/clk_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/rst_ni +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/flush_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fu_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/operator_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/operand_a_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/operand_b_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/imm_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/trans_id_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/pc_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/is_compressed_instr_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/alu_ready_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/alu_valid_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/alu_valid_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/alu_result_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/alu_trans_id_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/alu_exception_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/branch_valid_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/branch_predict_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/resolved_branch_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/resolve_branch_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/csr_valid_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/csr_addr_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/csr_commit_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/lsu_ready_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/lsu_valid_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/lsu_valid_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/lsu_result_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/lsu_trans_id_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/lsu_commit_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/lsu_commit_ready_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/lsu_exception_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/no_st_pending_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/amo_valid_commit_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/mult_ready_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/mult_valid_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/mult_trans_id_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/mult_result_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/mult_valid_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fpu_ready_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fpu_valid_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fpu_fmt_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fpu_rm_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fpu_frm_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fpu_trans_id_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fpu_result_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fpu_valid_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/fpu_exception_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/enable_translation_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/en_ld_st_translation_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/flush_tlb_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/priv_lvl_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/ld_st_priv_lvl_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/sum_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/mxr_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/satp_ppn_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/asid_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/icache_areq_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/icache_areq_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/dcache_req_ports_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/dcache_req_ports_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/amo_req_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/amo_resp_i +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/itlb_miss_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/dtlb_miss_o +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/alu_branch_res +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/alu_data +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/mult_data +add wave -noupdate -group {EX stage} /ariane_tb/dut/i_ariane/ex_stage_i/lsu_data +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/pc_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/is_compressed_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/is_illegal_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/instruction_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/branch_predict_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/ex_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/priv_lvl_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/debug_mode_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/fs_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/frm_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/tvm_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/tw_i +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/tsr_i +add wave -noupdate -expand -group Decoder -expand /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/instruction_o +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/is_control_flow_instr_o +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/illegal_instr +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/ecall +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/ebreak +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/check_fprm +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/instr +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/imm_select +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/imm_i_type +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/imm_s_type +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/imm_sb_type +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/imm_u_type +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/imm_uj_type +add wave -noupdate -expand -group Decoder /ariane_tb/dut/i_ariane/id_stage_i/decoder_i/imm_bi_type +TreeUpdate [SetDefaultTree] +WaveRestoreCursors {{Cursor 6} {25911563 ns} 1} {{Cursor 3} {25812676 ns} 0} +quietly wave cursor active 2 +configure wave -namecolwidth 259 +configure wave -valuecolwidth 178 +configure wave -justifyvalue left +configure wave -signalnamewidth 1 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 +configure wave -gridoffset 0 +configure wave -gridperiod 1 +configure wave -griddelta 40 +configure wave -timeline 0 +configure wave -timelineunits ns +update +WaveRestoreZoom {25810978 ns} {25815752 ns} diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index f19307e37..23ada2890 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -45,6 +45,16 @@ package ariane_pkg; localparam bit XF8 = 1'b1; // Is quarter-precision float extension (Xf8) enabled localparam bit XFVEC = 1'b1; // Is vectorial float extension (Xfvec) enabled + // Transprecision float unit + localparam logic [30:0] LAT_COMP_FP32 = 'd3; + localparam logic [30:0] LAT_COMP_FP64 = 'd4; + localparam logic [30:0] LAT_COMP_FP16 = 'd3; + localparam logic [30:0] LAT_COMP_FP16ALT = 'd3; + localparam logic [30:0] LAT_COMP_FP8 = 'd2; + localparam logic [30:0] LAT_DIVSQRT = 'd2; + localparam logic [30:0] LAT_NONCOMP = 'd1; + localparam logic [30:0] LAT_CONV = 'd2; + // -------------------------------------- // vvvv Don't change these by hand! vvvv localparam bit FP_PRESENT = RVF | RVD | XF16 | XF16ALT | XF8; @@ -92,7 +102,7 @@ package ariane_pkg; }; // enables a commit log which matches spikes commit log format for easier trace comparison - localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b0; + localparam bit ENABLE_SPIKE_COMMIT_LOG = 1'b1; // ------------- Dangerouse ------------- // if set to zero a flush will not invalidate the cache-lines, in a single core environment diff --git a/src/fpu b/src/fpu index 74c4bdc29..be1d311be 160000 --- a/src/fpu +++ b/src/fpu @@ -1 +1 @@ -Subproject commit 74c4bdc29ee871accc1fc81accfcfd8d8e164ff8 +Subproject commit be1d311be99ff5e64095f142631eb88d472dafc6 diff --git a/src/fpu_div_sqrt_mvp b/src/fpu_div_sqrt_mvp index 6d48ba414..962b37d46 160000 --- a/src/fpu_div_sqrt_mvp +++ b/src/fpu_div_sqrt_mvp @@ -1 +1 @@ -Subproject commit 6d48ba414973a6e920d895aae1b2976cf283196a +Subproject commit 962b37d464de4809b3e56b4c45451f2c459e338f diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index 67686cd96..dc3406bfd 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -131,13 +131,15 @@ module fpu_wrap ( logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag; - logic fpu_in_ready, reg_in_ready; - logic fpu_in_valid, reg_in_valid; - logic fpu_out_ready, reg_out_ready; - logic fpu_out_valid, reg_out_valid; + logic fpu_in_ready, fpu_in_valid; + logic fpu_out_ready, fpu_out_valid; logic [4:0] fpu_status; + // FSM to handle protocol inversion + enum logic {READY, STALL} state_q, state_d; + logic hold_inputs; + logic use_hold; //----------------------------- // Translate inputs @@ -451,29 +453,53 @@ module fpu_wrap ( // Upstream protocol inversion: InValid depends on InReady //--------------------------------------------------------- - // Input is ready whenever the register is free to accept a potentially spilling instruction - assign fpu_ready_o = ~reg_in_valid & (~reg_out_valid | reg_out_ready); + always_comb begin : p_inputFSM + // Default Values + fpu_ready_o = 1'b0; + fpu_in_valid = 1'b0; + hold_inputs = 1'b0; // hold register disabled + use_hold = 1'b0; // inputs go directly to unit + state_d = state_q; // stay in the same state - // Input data goes to the buffer register if the received instruction cannot be handled - assign reg_in_valid = fpu_valid_i & ~fpu_in_ready; + // FSM + unique case (state_q) + // Default state, ready for instructions + READY : begin + fpu_ready_o = 1'b1; // Act as if FPU ready + fpu_in_valid = fpu_valid_i; // Forward input valid to FPU + // There is a transaction but the FPU can't handle it + if (fpu_valid_i & ~fpu_in_ready) begin + fpu_ready_o = 1'b0; // No token given to Issue + hold_inputs = 1'b1; // save inputs to the holding register + state_d = STALL; // stall future incoming requests + end + end + // We're stalling the upstream (ready=0) + STALL : begin + fpu_in_valid = 1'b1; // we have data for the FPU + use_hold = 1'b1; // the data comes from the hold reg + // Wait until it's consumed + if (fpu_in_ready) begin + fpu_ready_o = 1'b1; // Give a token to issue + state_d = READY; // accept future requests + end + end + // Default: emit default values + default : ; + endcase - // Data being applied to unit is taken from the register if there's an instruction waiting - assign fpu_in_valid = reg_out_valid | fpu_valid_i; + // Flushing will override issue and go back to idle + if (flush_i) begin + fpu_in_valid = 1'b0; + state_d = READY; + end - // The input register is ready to accept new data if: - // 1. The current instruction will be processed by the fpu - // 2. There is no instruction waiting in the register - assign reg_in_ready = reg_out_ready | ~reg_out_valid; + end - // Register output side is signalled ready if: - // 1. The operation held in the reg is valid and will be processed - // 2. The register doesn't hold a valid instructin - assign reg_out_ready = fpu_in_ready | ~reg_out_valid; - - // Buffer register - always_ff @(posedge clk_i or negedge rst_ni) begin : fp_buffer_reg + // Buffer register and FSM state holding + always_ff @(posedge clk_i or negedge rst_ni) begin : fp_hold_reg if(~rst_ni) begin - reg_out_valid <= '0; + state_q <= READY; operand_a_q <= '0; operand_b_q <= '0; operand_c_q <= '0; @@ -486,37 +512,36 @@ module fpu_wrap ( fpu_vec_op_q <= '0; fpu_tag_q <= '0; end else begin - if (reg_out_ready || flush_i) begin // Only advance pipeline if unit is ready for our op - reg_out_valid <= reg_in_valid & ~flush_i; - if (reg_in_valid) begin // clock gate data to save poer - operand_a_q <= operand_a_d; - operand_b_q <= operand_b_d; - operand_c_q <= operand_c_d; - fpu_op_q <= fpu_op_d; - fpu_op_mod_q <= fpu_op_mod_d; - fpu_fmt_q <= fpu_fmt_d; - fpu_fmt2_q <= fpu_fmt2_d; - fpu_ifmt_q <= fpu_ifmt_d; - fpu_rm_q <= fpu_rm_d; - fpu_vec_op_q <= fpu_vec_op_d; - fpu_tag_q <= fpu_tag_d; - end + state_q <= state_d; + // Hold register is [TRIGGERED] by FSM + if (hold_inputs) begin + operand_a_q <= operand_a_d; + operand_b_q <= operand_b_d; + operand_c_q <= operand_c_d; + fpu_op_q <= fpu_op_d; + fpu_op_mod_q <= fpu_op_mod_d; + fpu_fmt_q <= fpu_fmt_d; + fpu_fmt2_q <= fpu_fmt2_d; + fpu_ifmt_q <= fpu_ifmt_d; + fpu_rm_q <= fpu_rm_d; + fpu_vec_op_q <= fpu_vec_op_d; + fpu_tag_q <= fpu_tag_d; end end end - // Select FPU input data: from register if valid data in register, else directly vom input - assign operand_a = reg_out_valid ? operand_a_q : operand_a_d; - assign operand_b = reg_out_valid ? operand_b_q : operand_b_d; - assign operand_c = reg_out_valid ? operand_c_q : operand_c_d; - assign fpu_op = reg_out_valid ? fpu_op_q : fpu_op_d; - assign fpu_op_mod = reg_out_valid ? fpu_op_mod_q : fpu_op_mod_d; - assign fpu_fmt = reg_out_valid ? fpu_fmt_q : fpu_fmt_d; - assign fpu_fmt2 = reg_out_valid ? fpu_fmt2_q : fpu_fmt2_d; - assign fpu_ifmt = reg_out_valid ? fpu_ifmt_q : fpu_ifmt_d; - assign fpu_rm = reg_out_valid ? fpu_rm_q : fpu_rm_d; - assign fpu_vec_op = reg_out_valid ? fpu_vec_op_q : fpu_vec_op_d; - assign fpu_tag = reg_out_valid ? fpu_tag_q : fpu_tag_d; + // Select FPU input data: from register if valid data in register, else directly from input + assign operand_a = use_hold ? operand_a_q : operand_a_d; + assign operand_b = use_hold ? operand_b_q : operand_b_d; + assign operand_c = use_hold ? operand_c_q : operand_c_d; + assign fpu_op = use_hold ? fpu_op_q : fpu_op_d; + assign fpu_op_mod = use_hold ? fpu_op_mod_q : fpu_op_mod_d; + assign fpu_fmt = use_hold ? fpu_fmt_q : fpu_fmt_d; + assign fpu_fmt2 = use_hold ? fpu_fmt2_q : fpu_fmt2_d; + assign fpu_ifmt = use_hold ? fpu_ifmt_q : fpu_ifmt_d; + assign fpu_rm = use_hold ? fpu_rm_q : fpu_rm_d; + assign fpu_vec_op = use_hold ? fpu_vec_op_q : fpu_vec_op_d; + assign fpu_tag = use_hold ? fpu_tag_q : fpu_tag_d; //--------------- // FPU instance @@ -532,14 +557,14 @@ module fpu_wrap ( .Xf8 ( XF8 ), .Xfvec ( XFVEC ), // TODO MOVE THESE VALUES TO PACKAGE - .LATENCY_COMP_F ( 31'h2 ), - .LATENCY_COMP_D ( 31'h3 ), - .LATENCY_COMP_Xf16 ( 31'h2 ), - .LATENCY_COMP_Xf16alt ( 31'h2 ), - .LATENCY_COMP_Xf8 ( 31'h1 ), - .LATENCY_DIVSQRT ( 31'h1 ), - .LATENCY_NONCOMP ( 31'h0 ), - .LATENCY_CONV ( 31'h1 ) + .LATENCY_COMP_F ( LAT_COMP_FP32 ), + .LATENCY_COMP_D ( LAT_COMP_FP64 ), + .LATENCY_COMP_Xf16 ( LAT_COMP_FP16 ), + .LATENCY_COMP_Xf16alt ( LAT_COMP_FP16ALT ), + .LATENCY_COMP_Xf8 ( LAT_COMP_FP8 ), + .LATENCY_DIVSQRT ( LAT_DIVSQRT ), + .LATENCY_NONCOMP ( LAT_NONCOMP ), + .LATENCY_CONV ( LAT_CONV ) ) fpnew_top_i ( .Clk_CI ( clk_i ), .Reset_RBI ( rst_ni ), From 2549ecd683f97bb2552b2f511a26c1c2536e4688 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 8 Oct 2018 12:57:12 +0200 Subject: [PATCH 80/94] Make AMOs optional --- include/ariane_pkg.sv | 6 ++++-- src/commit_stage.sv | 2 +- src/csr_regfile.sv | 2 +- src/decoder.sv | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 23ada2890..18ac4cf31 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -37,7 +37,7 @@ package ariane_pkg; // Floating-point extensions configuration localparam bit RVF = 1'b1; // Is F extension enabled localparam bit RVD = 1'b1; // Is D extension enabled - + localparam bit RVA = 1'b0; // Is A extension enabled // Transprecision floating-point extensions configuration localparam bit XF16 = 1'b1; // Is half-precision float extension (Xf16) enabled @@ -76,7 +76,9 @@ package ariane_pkg; // ^^^^ until here ^^^^ // --------------------- - localparam logic [63:0] ISA_CODE = (1 << 0) // A - Atomic Instructions extension + localparam logic [63:0] ARIANE_MARCHID = 64'd3; + + localparam logic [63:0] ISA_CODE = (RVA << 0) // A - Atomic Instructions extension | (1 << 2) // C - Compressed extension | (RVD << 3) // D - Double precsision floating-point extension | (RVF << 5) // F - Single precsision floating-point extension diff --git a/src/commit_stage.sv b/src/commit_stage.sv index c7f8dfd76..635845f68 100644 --- a/src/commit_stage.sv +++ b/src/commit_stage.sv @@ -180,7 +180,7 @@ module commit_stage #( // ------------------ // AMO // ------------------ - if (instr_0_is_amo && !commit_instr_i[0].ex.valid) begin + if (RVA && instr_0_is_amo && !commit_instr_i[0].ex.valid) begin // AMO finished commit_ack_o[0] = amo_resp_i.ack; // flush the pipeline diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 0e71ea504..80e52fa1d 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -219,7 +219,7 @@ module csr_regfile #( riscv::CSR_PMPCFG0: csr_rdata = pmpcfg0_q; riscv::CSR_PMPADDR0: csr_rdata = pmpaddr0_q; riscv::CSR_MVENDORID: csr_rdata = 64'b0; // not implemented - riscv::CSR_MARCHID: csr_rdata = 64'b0; // PULP, anonymous source (no allocated ID yet) + riscv::CSR_MARCHID: csr_rdata = ARIANE_MARCHID; riscv::CSR_MIMPID: csr_rdata = 64'b0; // not implemented riscv::CSR_MHARTID: csr_rdata = {53'b0, cluster_id_i[5:0], 1'b0, core_id_i[3:0]}; riscv::CSR_MCYCLE: csr_rdata = cycle_q; diff --git a/src/decoder.sv b/src/decoder.sv index 22995c5f3..4362d551e 100644 --- a/src/decoder.sv +++ b/src/decoder.sv @@ -895,7 +895,7 @@ module decoder ( instruction_o.rd[4:0] = instr.atype.rd; // TODO(zarubaf): Ordering // words - if (instr.stype.funct3 == 3'h2) begin + if (RVA && instr.stype.funct3 == 3'h2) begin unique case (instr.instr[31:27]) 5'h0: instruction_o.op = AMO_ADDW; 5'h1: instruction_o.op = AMO_SWAPW; @@ -914,7 +914,7 @@ module decoder ( default: illegal_instr = 1'b1; endcase // double words - end else if (instr.stype.funct3 == 3'h3) begin + end else if (RVA && instr.stype.funct3 == 3'h3) begin unique case (instr.instr[31:27]) 5'h0: instruction_o.op = AMO_ADDD; 5'h1: instruction_o.op = AMO_SWAPD; From 00334ccac37e02ec8781049513b5bd1f7d3f4f13 Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Mon, 8 Oct 2018 15:10:58 +0200 Subject: [PATCH 81/94] :arrow_up: Bump FPU --- src/fpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu b/src/fpu index be1d311be..39626c97b 160000 --- a/src/fpu +++ b/src/fpu @@ -1 +1 @@ -Subproject commit be1d311be99ff5e64095f142631eb88d472dafc6 +Subproject commit 39626c97bca0d1245ca969b68654a19b65b0b9e6 From 0a2534aca17ecaa1c0d3eccb8ca77b991c072a5f Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 8 Oct 2018 16:48:57 +0200 Subject: [PATCH 82/94] :sparkles: Add precision control signal for div/sqrt --- include/riscv_pkg.sv | 7 ++++--- src/ariane.sv | 3 +++ src/csr_regfile.sv | 4 +++- src/ex_stage.sv | 1 + 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/riscv_pkg.sv b/include/riscv_pkg.sv index cd7a14e98..977d689df 100644 --- a/include/riscv_pkg.sv +++ b/include/riscv_pkg.sv @@ -413,9 +413,10 @@ package riscv; // Floating-Point control and status register (32-bit!) typedef struct packed { - logic [31:8] reserved; // reserved for L extension, return 0 otherwise - logic [2:0] frm; // float rounding mode - logic [4:0] fflags; // float exception flags + logic [31:15] reserved; // reserved for L extension, return 0 otherwise + logic [6:0] fprec; // div/sqrt precision control + logic [2:0] frm; // float rounding mode + logic [4:0] fflags; // float exception flags } fcsr_t; // ----- diff --git a/src/ariane.sv b/src/ariane.sv index f16f3b253..bfdf5f4b5 100644 --- a/src/ariane.sv +++ b/src/ariane.sv @@ -149,6 +149,7 @@ module ariane #( logic [4:0] fflags_csr_commit; riscv::xs_t fs; logic [2:0] frm_csr_id_issue_ex; + logic [6:0] fprec_csr_ex; logic enable_translation_csr_ex; logic en_ld_st_translation_csr_ex; riscv::priv_lvl_t ld_st_priv_lvl_csr_ex; @@ -383,6 +384,7 @@ module ariane #( .fpu_fmt_i ( fpu_fmt_id_ex ), .fpu_rm_i ( fpu_rm_id_ex ), .fpu_frm_i ( frm_csr_id_issue_ex ), + .fpu_prec_i ( fprec_csr_ex ), .fpu_trans_id_o ( fpu_trans_id_ex_id ), .fpu_result_o ( fpu_result_ex_id ), .fpu_valid_o ( fpu_valid_ex_id ), @@ -475,6 +477,7 @@ module ariane #( .fs_o ( fs ), .fflags_o ( fflags_csr_commit ), .frm_o ( frm_csr_id_issue_ex ), + .fprec_o ( fprec_csr_ex ), .ld_st_priv_lvl_o ( ld_st_priv_lvl_csr_ex ), .en_translation_o ( enable_translation_csr_ex ), .en_ld_st_translation_o ( en_ld_st_translation_csr_ex ), diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 80e52fa1d..4498d0632 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -53,6 +53,7 @@ module csr_regfile #( output riscv::xs_t fs_o, // Floating point extension status output logic [4:0] fflags_o, // Floating-Point Accured Exceptions output logic [2:0] frm_o, // Floating-Point Dynamic Rounding Mode + output logic [6:0] fprec_o, // Floating-Point Precision Control // MMU output logic en_translation_o, // enable VA translation output logic en_ld_st_translation_o, // enable VA translation for load and stores @@ -341,7 +342,7 @@ module csr_regfile #( update_access_exception = 1'b1; end else begin dirty_fp_state_csr = 1'b1; - fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space + fcsr_d[14:0] = csr_wdata[14:0]; // ignore writes to reserved space // this instruction has side-effects flush_o = 1'b1; end @@ -900,6 +901,7 @@ module csr_regfile #( // FPU outputs assign fflags_o = fcsr_q.fflags; assign frm_o = fcsr_q.frm; + assign fprec_o = fcsr_q.fprec; // MMU outputs assign satp_ppn_o = satp_q.ppn; assign asid_o = satp_q.asid[ASID_WIDTH-1:0]; diff --git a/src/ex_stage.sv b/src/ex_stage.sv index 04d0fe4e9..f78c827e9 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -70,6 +70,7 @@ module ex_stage #( input logic [1:0] fpu_fmt_i, // FP format input logic [2:0] fpu_rm_i, // FP rm input logic [2:0] fpu_frm_i, // FP frm csr + input logic [6:0] fpu_prec_i, // FP precision control output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, output logic [63:0] fpu_result_o, output logic fpu_valid_o, From d3b275421dee02611d9b4b20019e8ba52753aabb Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Tue, 9 Oct 2018 19:53:16 +0200 Subject: [PATCH 83/94] Update FPU --- src/fpu | 2 +- src/fpu_wrap.sv | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/fpu b/src/fpu index 39626c97b..9ca638cb9 160000 --- a/src/fpu +++ b/src/fpu @@ -1 +1 @@ -Subproject commit 39626c97bca0d1245ca969b68654a19b65b0b9e6 +Subproject commit 9ca638cb9001da830ef9698cac2652fdbaea1b91 diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index dc3406bfd..e9a33c0a2 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -490,7 +490,6 @@ module fpu_wrap ( // Flushing will override issue and go back to idle if (flush_i) begin - fpu_in_valid = 1'b0; state_d = READY; end From 160644ad9a5318bf0b06bee79ca1c6af05823b57 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Wed, 10 Oct 2018 17:19:09 +0200 Subject: [PATCH 84/94] :bug: Fix spurious PTW fault bubbling to frontend --- src/cache_subsystem/std_icache.sv | 39 ++++++++++++++++++------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/cache_subsystem/std_icache.sv b/src/cache_subsystem/std_icache.sv index a6dcc2def..5fbb295b5 100644 --- a/src/cache_subsystem/std_icache.sv +++ b/src/cache_subsystem/std_icache.sv @@ -50,8 +50,8 @@ module std_icache #( logic flushing_d, flushing_q; // signals - logic [ICACHE_SET_ASSOC-1:0] req; // request to data memory - logic [ICACHE_SET_ASSOC-1:0] vld_req; // request to valid/tag memory + logic [ICACHE_SET_ASSOC-1:0] req; // request to data memory + logic [ICACHE_SET_ASSOC-1:0] vld_req; // request to valid/tag memory logic [(ICACHE_LINE_WIDTH+7)/8-1:0] data_be; // byte enable for data memory logic [(2**NR_AXI_REFILLS-1):0][7:0] be; // byte enable logic [$clog2(ICACHE_NUM_WORD)-1:0] addr; // this is a cache-line address, to memory array @@ -109,24 +109,24 @@ module std_icache #( .rdata_o ( data_rdata[i] ) ); end - + // -------------------- // Tag Comparison and way select // -------------------- // cacheline selected by hit - logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; - + logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; + assign idx = vaddr_q[ICACHE_BYTE_OFFSET-1:2]; - generate + generate for (genvar i=0;i compare the tag TAG_CMP, TAG_CMP_SAVED: begin areq_o.fetch_req = 1'b1; // request address translation - + // (speculatively) request the content of all arrays req = '1; vld_req = '1; @@ -255,7 +255,7 @@ module std_icache #( dreq_o.ready = 1'b1; dreq_o.valid = 1'b1; vaddr_d = dreq_i.vaddr; - + // we've got another request if (dreq_i.req) begin // save the index and stay in compare mode @@ -335,7 +335,7 @@ module std_icache #( req = evict_way_q; vld_req = evict_way_q; - + if (axi.r_valid) begin we = 1'b1; tag_wdata.tag = tag_q; @@ -380,7 +380,14 @@ module std_icache #( endcase // those are the states where we need to wait a little longer until we can safely exit - if (dreq_i.kill_s2 && !(state_q inside {REFILL, WAIT_AXI_R_RESP, WAIT_KILLED_REFILL, WAIT_KILLED_AXI_R_RESP}) && !dreq_o.ready) begin + if (dreq_i.kill_s2 && !(state_q inside { + REFILL, + WAIT_AXI_R_RESP, + WAIT_KILLED_AXI_R_RESP, + WAIT_KILLED_REFILL, + WAIT_ADDRESS_TRANSLATION, + WAIT_ADDRESS_TRANSLATION_KILLED}) + && !dreq_o.ready) begin state_d = IDLE; end @@ -443,14 +450,14 @@ module std_icache #( //pragma translate_off `ifndef VERILATOR initial begin - assert ($bits(axi.aw_addr) == 64) + assert ($bits(axi.aw_addr) == 64) else $fatal(1, "[icache] Ariane needs a 64-bit bus"); end // assert that cache only hits on one way onehot: assert property ( - @(posedge clk_i) disable iff (~rst_ni) $onehot0(hit)) + @(posedge clk_i) disable iff (~rst_ni) $onehot0(hit)) else $fatal(1, "[icache] Hit should be one-hot encoded"); `endif -//pragma translate_on +//pragma translate_on endmodule From 17e323088a51c067eb5b8a12fc0ef3b5f3fb516b Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Wed, 10 Oct 2018 20:52:51 +0200 Subject: [PATCH 85/94] Add transprecision FCSR --- include/riscv_pkg.sv | 1 + src/csr_regfile.sv | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/include/riscv_pkg.sv b/include/riscv_pkg.sv index 977d689df..ba5fdf34b 100644 --- a/include/riscv_pkg.sv +++ b/include/riscv_pkg.sv @@ -325,6 +325,7 @@ package riscv; CSR_FFLAGS = 12'h001, CSR_FRM = 12'h002, CSR_FCSR = 12'h003, + CSR_FTRAN = 12'h800, // Supervisor Mode CSRs CSR_SSTATUS = 12'h100, CSR_SIE = 12'h104, diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv index 4498d0632..541d7c2eb 100644 --- a/src/csr_regfile.sv +++ b/src/csr_regfile.sv @@ -174,7 +174,15 @@ module csr_regfile #( if (mstatus_q.fs == riscv::Off) begin read_access_exception = 1'b1; end else begin - csr_rdata = {32'b0, fcsr_q}; + csr_rdata = {56'b0, fcsr_q.frm, fcsr_q.fflags}; + end + end + // non-standard extension + riscv::CSR_FTRAN: begin + if (mstatus_q.fs == riscv::Off) begin + read_access_exception = 1'b1; + end else begin + csr_rdata = {57'b0, fcsr_q.fprec}; end end // debug registers @@ -342,7 +350,17 @@ module csr_regfile #( update_access_exception = 1'b1; end else begin dirty_fp_state_csr = 1'b1; - fcsr_d[14:0] = csr_wdata[14:0]; // ignore writes to reserved space + fcsr_d[7:0] = csr_wdata[7:0]; // ignore writes to reserved space + // this instruction has side-effects + flush_o = 1'b1; + end + end + riscv::CSR_FTRAN: begin + if (mstatus_q.fs == riscv::Off) begin + update_access_exception = 1'b1; + end else begin + dirty_fp_state_csr = 1'b1; + fcsr_d.fprec = csr_wdata[6:0]; // ignore writes to reserved space // this instruction has side-effects flush_o = 1'b1; end From 891579aaab3e819964a4840d752e3cd29cae852e Mon Sep 17 00:00:00 2001 From: Stefan Mach Date: Wed, 10 Oct 2018 21:16:33 +0200 Subject: [PATCH 86/94] :arrow_up: Bump FPU and add divsqrt CSR --- Bender.yml | 10 +++++----- src/ex_stage.sv | 1 + src/fpu | 2 +- src/fpu_wrap.sv | 2 ++ 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Bender.yml b/Bender.yml index 4be9be38b..5b169a76d 100644 --- a/Bender.yml +++ b/Bender.yml @@ -31,17 +31,17 @@ sources: - src/fpu/src/ops/fp_fma.vhd - src/fpu/src/ops/fp_divsqrt_multi.vhd - src/fpu/src/ops/fp_noncomp.vhd - - src/fpu/src/ops/fp_f2fcasts.vhd - - src/fpu/src/ops/fp_f2icasts.vhd - - src/fpu/src/ops/fp_i2fcasts.vhd - - src/fpu/src/ops/fp_conv_multi.vhd + - src/fpu/src/ops/fp_f2fcasts_fmt.vhd + - src/fpu/src/ops/fp_f2icasts_fmt.vhd + - src/fpu/src/ops/fp_i2fcasts_fmt.vhd - src/fpu/src/subunits/addmul_fmt_slice.vhd - src/fpu/src/subunits/addmul_block.vhd - src/fpu/src/subunits/divsqrt_multifmt_slice.vhd - src/fpu/src/subunits/divsqrt_block.vhd - src/fpu/src/subunits/noncomp_fmt_slice.vhd - src/fpu/src/subunits/noncomp_block.vhd - - src/fpu/src/subunits/conv_multifmt_slice.vhd + - src/fpu/src/subunits/conv_fmt_slice.vhd + - src/fpu/src/subunits/conv_ifmt_slice.vhd - src/fpu/src/subunits/conv_block.vhd - src/fpu/src/fpnew.vhd - src/fpu/src/fpnew_top.vhd diff --git a/src/ex_stage.sv b/src/ex_stage.sv index f78c827e9..acb41cc46 100644 --- a/src/ex_stage.sv +++ b/src/ex_stage.sv @@ -192,6 +192,7 @@ module ex_stage #( .fpu_fmt_i, .fpu_rm_i, .fpu_frm_i, + .fpu_prec_i, .fpu_trans_id_o, .result_o ( fpu_result_o ), .fpu_valid_o, diff --git a/src/fpu b/src/fpu index 9ca638cb9..86ca5b67f 160000 --- a/src/fpu +++ b/src/fpu @@ -1 +1 @@ -Subproject commit 9ca638cb9001da830ef9698cac2652fdbaea1b91 +Subproject commit 86ca5b67f50d87b83999881a01d2ee74b9cd6868 diff --git a/src/fpu_wrap.sv b/src/fpu_wrap.sv index e9a33c0a2..eb2775b2e 100644 --- a/src/fpu_wrap.sv +++ b/src/fpu_wrap.sv @@ -30,6 +30,7 @@ module fpu_wrap ( input logic [1:0] fpu_fmt_i, input logic [2:0] fpu_rm_i, input logic [2:0] fpu_frm_i, + input logic [6:0] fpu_prec_i, output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, output logic [FLEN-1:0] result_o, output logic fpu_valid_o, @@ -578,6 +579,7 @@ module fpu_wrap ( .FpFmt2_SI ( fpu_fmt2 ), .IntFmt_SI ( fpu_ifmt ), .Tag_DI ( fpu_tag ), + .PrecCtl_SI ( fpu_prec_i ), .InValid_SI ( fpu_in_valid ), .InReady_SO ( fpu_in_ready ), .Flush_SI ( flush_i ), From ca51c8ed4829ef134d0356a1d1657b8a71cb1ed8 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 13 Oct 2018 16:15:56 +0200 Subject: [PATCH 87/94] Merge fpnew branch Disable FPU for the verilator target by default --- Makefile | 33 +++++++++++++++++---------------- include/ariane_pkg.sv | 14 +++++++------- src/fpu_div_sqrt_mvp | 2 +- src/issue_read_operands.sv | 5 +++-- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 3f77106a6..e29b71074 100755 --- a/Makefile +++ b/Makefile @@ -29,24 +29,23 @@ torture-logs := -log # Sources # Package files -> compile first -ariane_pkg := include/riscv_pkg.sv \ - src/debug/dm_pkg.sv \ - include/ariane_pkg.sv \ - include/std_cache_pkg.sv \ - src/axi/src/axi_pkg.sv \ - include/axi_intf.sv \ - src/fpu/src/pkg/fpnew_pkg.vhd \ - src/fpu/src/pkg/fpnew_fmts_pkg.vhd \ - src/fpu/src/pkg/fpnew_comps_pkg.vhd \ - src/fpu/src/pkg/fpnew_pkg_constants.vhd +ariane_pkg := include/riscv_pkg.sv \ + src/debug/dm_pkg.sv \ + include/ariane_pkg.sv \ + include/std_cache_pkg.sv \ + src/axi/src/axi_pkg.sv \ + include/axi_intf.sv \ + src/fpu/src/pkg/fpnew_pkg.vhd \ + src/fpu/src/pkg/fpnew_fmts_pkg.vhd \ + src/fpu/src/pkg/fpnew_comps_pkg.vhd \ + src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \ + src/fpu/src/pkg/fpnew_pkg_constants.vhd # utility modules util := $(wildcard src/util/*.svh) \ src/util/instruction_tracer_pkg.sv \ src/util/instruction_tracer_if.sv \ src/tech_cells_generic/src/cluster_clock_gating.sv \ - src/tech_cells_generic/src/cluster_clock_inverter.sv \ - src/tech_cells_generic/src/pulp_clock_mux2.sv \ src/util/sram.sv # Test packages @@ -60,8 +59,8 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ $(wildcard src/fpu/src/utils/*.vhd) \ $(wildcard src/fpu/src/ops/*.vhd) \ $(wildcard src/fpu/src/subunits/*.vhd) \ - src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv \ - $(wildcard src/fpu_div_sqrt_mvp/hdl/*.sv) \ + $(filter-out src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv, \ + $(wildcard src/fpu_div_sqrt_mvp/hdl/*.sv)) \ $(wildcard src/frontend/*.sv) \ $(wildcard src/cache_subsystem/*.sv) \ $(wildcard bootrom/*.sv) \ @@ -90,6 +89,8 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ src/common_cells/src/lzc.sv \ src/common_cells/src/rrarbiter.sv \ src/common_cells/src/lfsr_8bit.sv \ + src/tech_cells_generic/src/cluster_clock_inverter.sv \ + src/tech_cells_generic/src/pulp_clock_mux2.sv \ tb/ariane_testharness.sv \ tb/common/SimDTM.sv \ tb/common/SimJTAG.sv @@ -210,8 +211,8 @@ check-benchmarks: # verilator-specific verilate_command := $(verilator) \ - $(ariane_pkg) \ - $(filter-out tb/ariane_bt.sv,$(src)) \ + $(filter-out %.vhd, $(ariane_pkg)) \ + $(filter-out src/fpu_wrap.sv, $(filter-out %.vhd, $(src))) \ +define+$(defines) \ src/util/sram.sv \ +incdir+src/axi_node \ diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv index 18ac4cf31..7db67cb16 100644 --- a/include/ariane_pkg.sv +++ b/include/ariane_pkg.sv @@ -35,15 +35,15 @@ package ariane_pkg; localparam ENABLE_RENAME = 1'b1; // Floating-point extensions configuration - localparam bit RVF = 1'b1; // Is F extension enabled - localparam bit RVD = 1'b1; // Is D extension enabled - localparam bit RVA = 1'b0; // Is A extension enabled + localparam bit RVF = 1'b0; // Is F extension enabled + localparam bit RVD = 1'b0; // Is D extension enabled + localparam bit RVA = 1'b1; // Is A extension enabled // Transprecision floating-point extensions configuration - localparam bit XF16 = 1'b1; // Is half-precision float extension (Xf16) enabled - localparam bit XF16ALT = 1'b1; // Is alternative half-precision float extension (Xf16alt) enabled - localparam bit XF8 = 1'b1; // Is quarter-precision float extension (Xf8) enabled - localparam bit XFVEC = 1'b1; // Is vectorial float extension (Xfvec) enabled + localparam bit XF16 = 1'b0; // Is half-precision float extension (Xf16) enabled + localparam bit XF16ALT = 1'b0; // Is alternative half-precision float extension (Xf16alt) enabled + localparam bit XF8 = 1'b0; // Is quarter-precision float extension (Xf8) enabled + localparam bit XFVEC = 1'b0; // Is vectorial float extension (Xfvec) enabled // Transprecision float unit localparam logic [30:0] LAT_COMP_FP32 = 'd3; diff --git a/src/fpu_div_sqrt_mvp b/src/fpu_div_sqrt_mvp index 962b37d46..3736c4c84 160000 --- a/src/fpu_div_sqrt_mvp +++ b/src/fpu_div_sqrt_mvp @@ -1 +1 @@ -Subproject commit 962b37d464de4809b3e56b4c45451f2c459e338f +Subproject commit 3736c4c844074bd64c3c505c017181db71b738b4 diff --git a/src/issue_read_operands.sv b/src/issue_read_operands.sv index f1bdeb933..492e2b305 100644 --- a/src/issue_read_operands.sv +++ b/src/issue_read_operands.sv @@ -365,11 +365,12 @@ module issue_read_operands #( // pack signals logic [2:0][4:0] fp_raddr_pack; logic [NR_COMMIT_PORTS-1:0][63:0] fp_wdata_pack; - assign fp_raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; - assign fp_wdata_pack = {wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]}; generate if (FP_PRESENT) begin : float_regfile_gen + assign fp_raddr_pack = {issue_instr_i.result[4:0], issue_instr_i.rs2[4:0], issue_instr_i.rs1[4:0]}; + assign fp_wdata_pack = {wdata_i[1][FLEN-1:0], wdata_i[0][FLEN-1:0]}; + ariane_regfile #( .DATA_WIDTH ( FLEN ), .NR_READ_PORTS ( 3 ), From 00c55669de4e1d01c26d1e2ca5ddf213dd77f97b Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 13 Oct 2018 16:17:01 +0200 Subject: [PATCH 88/94] Reset debug request FIFO --- src/debug/dmi_jtag.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/debug/dmi_jtag.sv b/src/debug/dmi_jtag.sv index 2b6e8a091..49df7ce5b 100644 --- a/src/debug/dmi_jtag.sv +++ b/src/debug/dmi_jtag.sv @@ -37,7 +37,7 @@ module dmi_jtag ( output logic td_o, // JTAG test data output pad output logic tdo_oe_o // Data out output enable ); - assign dmi_rst_no = 1'b1; + assign dmi_rst_no = rst_ni; logic test_logic_reset; logic shift_dr; From 90e075793c4073184a2163380353921603c2ac1a Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 13 Oct 2018 16:26:20 +0200 Subject: [PATCH 89/94] Disable floating point tests in CI update README --- README.md | 4 ++++ ci/riscv-asm-tests.list | 38 -------------------------------------- 2 files changed, 4 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index 72ec74dc7..766e0dcfd 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,10 @@ $ make simc riscv-test-dir=$RISCV/riscv64-unknown-elf/bin riscv-test=pk target-o > Be patient! RTL simulation is way slower than Spike. If you think that you ran into problems you can inspect the trace files. +### FPU Support + +> There is preliminary support for floating point extensions F and D. At the moment floating point support will only be available in QuestaSim as the FPU is written in VHDL. This is likely to change. The floating point extensions can be enabled by setting `RVF` and `RVD` to `1'b1` in the `include/ariane_pkg.sv` file. + ## FPGA Emulation Coming. diff --git a/ci/riscv-asm-tests.list b/ci/riscv-asm-tests.list index b007298f2..e03f7c392 100644 --- a/ci/riscv-asm-tests.list +++ b/ci/riscv-asm-tests.list @@ -127,44 +127,6 @@ rv64um-v-divw rv64um-v-divuw rv64um-v-remw rv64um-v-remuw -rv64uf-p-fadd -rv64uf-p-fclass -rv64uf-p-fcmp -rv64uf-p-fcvt -rv64uf-p-fcvt_w -rv64uf-p-fdiv -rv64uf-p-fmadd -rv64uf-p-fmin -rv64uf-p-ldst -rv64uf-p-move -rv64uf-p-recoding -rv64uf-v-fadd -rv64uf-v-fclass -rv64uf-v-fcmp -rv64uf-v-fcvt -rv64uf-v-fcvt_w -rv64uf-v-fdiv -rv64uf-v-fmadd -rv64uf-v-fmin -rv64uf-v-ldst -rv64uf-v-move -rv64uf-v-recoding -rv64ud-p-fadd -rv64ud-p-fclass -rv64ud-p-fcmp -rv64ud-p-fcvt -rv64ud-p-fcvt_w -rv64ud-p-fdiv -rv64ud-p-fmadd -rv64ud-p-fmin -rv64ud-v-fadd -rv64ud-v-fclass -rv64ud-v-fcmp -rv64ud-v-fcvt -rv64ud-v-fcvt_w -rv64ud-v-fdiv -rv64ud-v-fmadd -rv64ud-v-fmin rv64ua-p-amoadd_d rv64ua-p-amoadd_w rv64ua-p-amoor_d From f56e2c095ee8555196fa456ba40ae08bce0c0437 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sat, 13 Oct 2018 16:40:58 +0200 Subject: [PATCH 90/94] :green_heart: Fix duplicate entries in file list --- Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile b/Makefile index e29b71074..1445cb108 100755 --- a/Makefile +++ b/Makefile @@ -71,8 +71,6 @@ src := $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv)) \ $(wildcard src/debug/debug_rom/*.sv) \ src/fpu/src/fpnew.vhd \ src/fpu/src/fpnew_top.vhd \ - src/fpu_div_sqrt_mvp/hdl/fpu_ff.sv \ - src/fpga-support/rtl/SyncSpRamBeNx64.sv \ src/common_cells/src/deprecated/generic_fifo.sv \ src/common_cells/src/deprecated/pulp_sync.sv \ src/common_cells/src/deprecated/find_first_one.sv \ From 2c0759e25623ca783615fb9991c3b6b3cf1ab49a Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 14 Oct 2018 15:51:06 +0200 Subject: [PATCH 91/94] Add CODEOWNERS --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) create mode 100644 CODEOWNERS diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 000000000..e3285d0ad --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @zarubaf @msfschaffner From d2327550cf4f9128d3e3284b5908bd6cfcc165cc Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Sun, 14 Oct 2018 17:30:33 +0200 Subject: [PATCH 92/94] Add newline to test list --- ci/riscv-asm-tests.list | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/riscv-asm-tests.list b/ci/riscv-asm-tests.list index 01e12844b..e03f7c392 100644 --- a/ci/riscv-asm-tests.list +++ b/ci/riscv-asm-tests.list @@ -164,4 +164,4 @@ rv64ua-v-amomin_d rv64ua-v-amomin_w rv64ua-v-amominu_d rv64ua-v-amominu_w -rv64ua-v-lrsc \ No newline at end of file +rv64ua-v-lrsc From 9941255339bac20349ed397c9613ee2db8858557 Mon Sep 17 00:00:00 2001 From: msfschaffner Date: Mon, 15 Oct 2018 10:01:49 +0200 Subject: [PATCH 93/94] Update ariane_regfile.sv Delete double semicolon in regfile --- src/ariane_regfile.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ariane_regfile.sv b/src/ariane_regfile.sv index f8a58dac3..8e54a8278 100644 --- a/src/ariane_regfile.sv +++ b/src/ariane_regfile.sv @@ -43,7 +43,7 @@ module ariane_regfile_lol #( input logic [NR_WRITE_PORTS-1:0] we_i ); - localparam ADDR_WIDTH = 5;; + localparam ADDR_WIDTH = 5; localparam NUM_WORDS = 2**ADDR_WIDTH; logic [NUM_WORDS-1:ZERO_REG_ZERO] mem_clocks; From 5045538d413a5a5ece3c816d7c824348ea741aa1 Mon Sep 17 00:00:00 2001 From: Florian Zaruba Date: Mon, 15 Oct 2018 11:55:27 +0200 Subject: [PATCH 94/94] :arrow_up: Bump FPU dependency --- src/fpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fpu b/src/fpu index 86ca5b67f..00e257917 160000 --- a/src/fpu +++ b/src/fpu @@ -1 +1 @@ -Subproject commit 86ca5b67f50d87b83999881a01d2ee74b9cd6868 +Subproject commit 00e2579173f1412f06d4eb95d6b98d0eb1cd2e94