decode optimization

This commit is contained in:
Blaise Tine 2021-06-28 05:06:30 -07:00
parent f84c8a0b5d
commit 6ae2f5199d
8 changed files with 132 additions and 165 deletions

View file

@ -25,7 +25,7 @@ module VX_alu_unit #(
wire stall_in, stall_out;
`UNUSED_VAR (alu_req_if.op_mod)
wire is_br_op = `IS_BR_MOD(alu_req_if.op_mod);
wire is_br_op = `ALU_IS_BR(alu_req_if.op_mod);
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
wire alu_signed = `ALU_SIGNED(alu_op);
@ -117,7 +117,7 @@ module VX_alu_unit #(
wire mul_wb;
wire [`NUM_THREADS-1:0][31:0] mul_data;
wire is_mul_op = `IS_MUL_MOD(alu_req_if.op_mod);
wire is_mul_op = `ALU_IS_MUL(alu_req_if.op_mod);
VX_muldiv muldiv (
.clk (clk),

View file

@ -49,8 +49,8 @@ module VX_csr_data #(
end
if (fpu_to_csr_if.write_enable) begin
fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fpu_to_csr_if.write_fflags
| fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0];
fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0]
| fpu_to_csr_if.write_fflags;
end
if (write_enable) begin

View file

@ -59,11 +59,11 @@ module VX_csr_unit #(
wire [31:0] csr_read_data_qual = write_hazard ? csr_updated_data_s1 : csr_read_data;
reg [31:0] csr_updated_data;
reg [31:0] csr_updated_data;
reg csr_we_s0_unqual;
always @(*) begin
csr_we_s0_unqual = (csr_req_data != 0);
case (csr_req_if.op_type)
`CSR_RW: begin
csr_updated_data = csr_req_data;
@ -71,15 +71,10 @@ module VX_csr_unit #(
end
`CSR_RS: begin
csr_updated_data = csr_read_data_qual | csr_req_data;
csr_we_s0_unqual = (csr_req_data != 0);
end
`CSR_RC: begin
csr_updated_data = csr_read_data_qual & ~csr_req_data;
csr_we_s0_unqual = (csr_req_data != 0);
end
//`CSR_RC
default: begin
csr_updated_data = 'x;
csr_we_s0_unqual = 0;
csr_updated_data = csr_read_data_qual & ~csr_req_data;
end
endcase
end

View file

@ -2,9 +2,17 @@
`include "VX_print_instr.vh"
`ifdef EXT_F_ENABLE
`define USED_REGS(f,r) used_regs[{f,r}] = 1
`define SET_REG(d,f,s) \
d = {f, s}
`define USED_REG(d,f,s) \
`SET_REG(d,f,s); \
used_regs[{f, s}] = 1
`else
`define USED_REGS(f,r) used_regs[r] = 1
`define SET_REG(d,f,s) \
d = s
`define USED_REG(d,f,s) \
`SET_REG(d,f,s); \
used_regs[s] = 1
`endif
module VX_decode #(
@ -28,10 +36,9 @@ module VX_decode #(
reg [`EX_BITS-1:0] ex_type;
reg [`OP_BITS-1:0] op_type;
reg [`MOD_BITS-1:0] op_mod;
reg [4:0] rd_r, rs1_r, rs2_r, rs3_r;
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
reg [31:0] imm;
reg use_rd, use_PC, use_imm;
reg rd_fp, rs1_fp, rs2_fp;
reg is_join, is_wstall;
reg [`NUM_REGS-1:0] used_regs;
@ -56,20 +63,17 @@ module VX_decode #(
ex_type = 0;
op_type = 'x;
op_mod = 'x;
rd_r = 'x;
rs1_r = 'x;
rs2_r = 'x;
rs3_r = 'x;
imm = 'x;
use_imm = 'x;
use_PC = 'x;
use_rd = 0;
use_PC = 0;
use_imm = 0;
rd_fp = 0;
rs1_fp = 0;
rs2_fp = 0;
is_join = 0;
is_wstall = 0;
used_regs = 0;
rd_r = rd;
rs1_r = rs1;
rs2_r = rs2;
rs3_r = rs3;
used_regs = 0;
case (opcode)
`INST_I: begin
@ -86,11 +90,12 @@ module VX_decode #(
default:;
endcase
op_mod = 0;
imm = {{20{alu_imm[11]}}, alu_imm};
use_rd = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
use_PC = 0;
imm = {{20{alu_imm[11]}}, alu_imm};
`USED_REG (rd_r, 1'b0, rd);
`USED_REG (rs1_r, 1'b0, rs1);
end
`INST_R: begin
ex_type = `EX_ALU;
@ -123,54 +128,57 @@ module VX_decode #(
default:;
endcase
op_mod = 0;
end
use_rd = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
end
use_rd = 1;
use_imm = 0;
use_PC = 0;
`USED_REG (rd_r, 1'b0, rd);
`USED_REG (rs1_r, 1'b0, rs1);
`USED_REG (rs2_r, 1'b0, rs2);
end
`INST_LUI: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`ALU_LUI);
op_mod = 0;
rs1_r = 0;
imm = {upper_imm, 12'(0)};
use_rd = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, 5'b0);
use_imm = 1;
use_PC = 0;
imm = {upper_imm, 12'(0)};
`USED_REG (rd_r, 1'b0, rd);
`USED_REG (rs1_r, 1'b0, 5'b0);
end
`INST_AUIPC: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`ALU_AUIPC);
op_mod = 0;
imm = {upper_imm, 12'(0)};
use_rd = 1;
use_PC = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
use_PC = 1;
imm = {upper_imm, 12'(0)};
`USED_REG (rd_r, 1'b0, rd);
end
`INST_JAL: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`BR_JAL);
op_mod = 1;
imm = {{11{jal_imm[20]}}, jal_imm};
use_rd = 1;
use_PC = 1;
use_imm = 1;
use_PC = 1;
is_wstall = 1;
`USED_REGS (1'b0, rd);
imm = {{11{jal_imm[20]}}, jal_imm};
`USED_REG (rd_r, 1'b0, rd);
end
`INST_JALR: begin
ex_type = `EX_ALU;
op_type = `OP_BITS'(`BR_JALR);
op_mod = 1;
imm = {{20{jalr_imm[11]}}, jalr_imm};
use_rd = 1;
use_imm = 1;
use_PC = 0;
is_wstall = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b0, rs1);
imm = {{20{jalr_imm[11]}}, jalr_imm};
`USED_REG (rd_r, 1'b0, rd);
`USED_REG (rs1_r, 1'b0, rs1);
end
`INST_B: begin
ex_type = `EX_ALU;
@ -184,12 +192,12 @@ module VX_decode #(
default:;
endcase
op_mod = 1;
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
use_PC = 1;
use_imm = 1;
use_PC = 1;
is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
`USED_REG (rs1_r, 1'b0, rs1);
`USED_REG (rs2_r, 1'b0, rs2);
end
`INST_SYS : begin
if (func3 == 0) begin
@ -203,26 +211,28 @@ module VX_decode #(
default:;
endcase
op_mod = 1;
imm = 32'd4;
use_rd = 1;
use_PC = 1;
use_imm = 1;
`USED_REGS (1'b0, rd);
use_PC = 1;
imm = 32'd4;
`USED_REG (rd_r, 1'b0, rd);
end else begin
ex_type = `EX_CSR;
case (func3[1:0])
2'h0: op_type = `OP_BITS'(`CSR_RW);
2'h1: op_type = `OP_BITS'(`CSR_RW);
2'h2: op_type = `OP_BITS'(`CSR_RS);
2'h3: op_type = `OP_BITS'(`CSR_RC);
default:;
endcase
imm = 32'(u_12);
use_rd = 1;
use_imm = func3[2];
`USED_REGS (1'b0, rd);
if (!func3[2])
`USED_REGS (1'b0, rs1);
use_imm = func3[2];
imm = 32'(u_12); // addr
`USED_REG (rd_r, 1'b0, rd);
if (func3[2]) begin
`SET_REG(rs1_r, 1'b0, rs1); // imm
end else begin
`USED_REG (rs1_r, 1'b0, rs1);
end
end
end
`ifdef EXT_F_ENABLE
@ -231,13 +241,10 @@ module VX_decode #(
`INST_L: begin
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b0, func3});
use_rd = 1;
imm = {{20{u_12[11]}}, u_12};
use_rd = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS ((opcode == `INST_FL), rd);
`ifdef EXT_F_ENABLE
rd_fp = (opcode == `INST_FL);
`endif
`USED_REG (rd_r, (opcode == `INST_FL), rd);
`USED_REG (rs1_r, 1'b0, rs1);
end
`ifdef EXT_F_ENABLE
`INST_FS,
@ -246,11 +253,8 @@ module VX_decode #(
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b1, func3});
imm = {{20{func7[6]}}, func7, rd};
`USED_REGS (1'b0, rs1);
`USED_REGS ((opcode == `INST_FS), rs2);
`ifdef EXT_F_ENABLE
rs2_fp = (opcode == `INST_FS);
`endif
`USED_REG (rs1_r, 1'b0, rs1);
`USED_REG (rs2_r, (opcode == `INST_FS), rs2);
end
`ifdef EXT_F_ENABLE
`INST_FMADD,
@ -261,80 +265,61 @@ module VX_decode #(
op_type = `OP_BITS'(opcode[3:0]);
op_mod = func3;
use_rd = 1;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
`USED_REGS (1'b1, rs3);
`USED_REG (rd_r, 1'b1, rd);
`USED_REG (rs1_r, 1'b1, rs1);
`USED_REG (rs2_r, 1'b1, rs2);
`USED_REG (rs3_r, 1'b1, rs3);
end
`INST_FCI: begin
ex_type = `EX_FPU;
op_mod = func3;
use_rd = 1;
op_mod = func3;
use_rd = 1;
case (func7)
7'h00, // FADD
7'h04, // FSUB
7'h08, // FMUL
7'h0C: // FDIV
begin
7'h00, // FADD
7'h04, // FSUB
7'h08, // FMUL
7'h0C: begin // FDIV
op_type = `OP_BITS'(func7[3:0]);
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
`USED_REG (rd_r, 1'b1, rd);
`USED_REG (rs1_r, 1'b1, rs1);
`USED_REG (rs2_r, 1'b1, rs2);
end
7'h2C: begin
op_type = `OP_BITS'(`FPU_SQRT);
rd_fp = 1;
rs1_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REG (rd_r, 1'b1, rd);
`USED_REG (rs1_r, 1'b1, rs1);
end
7'h50: begin
op_type = `OP_BITS'(`FPU_CMP);
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
`USED_REG (rd_r, 1'b0, rd);
`USED_REG (rs1_r, 1'b1, rs1);
`USED_REG (rs2_r, 1'b1, rs2);
end
7'h60: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
rs1_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
`USED_REG (rd_r, 1'b0, rd);
`USED_REG (rs1_r, 1'b1, rs1);
end
7'h68: begin
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
rd_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b0, rs1);
`USED_REG (rd_r, 1'b1, rd);
`USED_REG (rs1_r, 1'b0, rs1);
end
7'h10: begin
// FSGNJ=0, FSGNJN=1, FSGNJX=2
op_type = `OP_BITS'(`FPU_MISC);
op_mod = {1'b0, func3[1:0]};
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
`USED_REG (rd_r, 1'b1, rd);
`USED_REG (rs1_r, 1'b1, rs1);
`USED_REG (rs2_r, 1'b1, rs2);
end
7'h14: begin
// FMIN=3, FMAX=4
op_type = `OP_BITS'(`FPU_MISC);
op_mod = func3[0] ? 4 : 3;
rd_fp = 1;
rs1_fp = 1;
rs2_fp = 1;
`USED_REGS (1'b1, rd);
`USED_REGS (1'b1, rs1);
`USED_REGS (1'b1, rs2);
`USED_REG (rd_r, 1'b1, rd);
`USED_REG (rs1_r, 1'b1, rs1);
`USED_REG (rs2_r, 1'b1, rs2);
end
7'h70: begin
if (func3[0]) begin
@ -344,17 +329,16 @@ module VX_decode #(
// FMV.X.W=5
op_type = `OP_BITS'(`FPU_MISC);
op_mod = 5;
end
rs1_fp = 1;
`USED_REGS (1'b0, rd);
`USED_REGS (1'b1, rs1);
end
`USED_REG (rd_r, 1'b0, rd);
`USED_REG (rs1_r, 1'b1, rs1);
end
7'h78: begin
// FMV.W.X=6
op_type = `OP_BITS'(`FPU_MISC);
op_mod = 6;
rd_fp = 1;
`USED_REGS (1'b1, rd);
op_mod = 6;
`USED_REG (rd_r, 1'b1, rd);
`USED_REG (rs1_r, 1'b0, rs1);
end
default:;
endcase
@ -366,17 +350,17 @@ module VX_decode #(
3'h0: begin
op_type = `OP_BITS'(`GPU_TMC);
is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REG (rs1_r, 1'b0, rs1);
end
3'h1: begin
op_type = `OP_BITS'(`GPU_WSPAWN);
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
`USED_REG (rs1_r, 1'b0, rs1);
`USED_REG (rs2_r, 1'b0, rs2);
end
3'h2: begin
op_type = `OP_BITS'(`GPU_SPLIT);
is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REG (rs1_r, 1'b0, rs1);
end
3'h3: begin
op_type = `OP_BITS'(`GPU_JOIN);
@ -385,8 +369,8 @@ module VX_decode #(
3'h4: begin
op_type = `OP_BITS'(`GPU_BAR);
is_wstall = 1;
`USED_REGS (1'b0, rs1);
`USED_REGS (1'b0, rs2);
`USED_REG (rs1_r, 1'b0, rs1);
`USED_REG (rs2_r, 1'b0, rs2);
end
default:;
endcase
@ -396,32 +380,20 @@ module VX_decode #(
end
// disable write to integer register r0
wire wb = use_rd && (rd_fp || (rd_r != 0));
assign decode_if.valid = ifetch_rsp_if.valid;
assign decode_if.wid = ifetch_rsp_if.wid;
assign decode_if.tmask = ifetch_rsp_if.tmask;
assign decode_if.PC = ifetch_rsp_if.PC;
assign decode_if.ex_type = ex_type;
assign decode_if.op_type = op_type;
assign decode_if.op_mod = op_mod;
assign decode_if.wb = wb;
`ifdef EXT_F_ENABLE
assign decode_if.rd = {rd_fp, rd_r};
assign decode_if.rs1 = {rs1_fp, rs1_r};
assign decode_if.rs2 = {rs2_fp, rs2_r};
assign decode_if.rs3 = {1'b1, rs3_r};
`else
`UNUSED_VAR (rd_fp)
`UNUSED_VAR (rs1_fp)
`UNUSED_VAR (rs2_fp)
assign decode_if.rd = rd_r;
assign decode_if.rs1 = rs1_r;
assign decode_if.rs2 = rs2_r;
assign decode_if.rs3 = rs3_r;
`endif
wire wb = use_rd && (| rd_r);
assign decode_if.valid = ifetch_rsp_if.valid;
assign decode_if.wid = ifetch_rsp_if.wid;
assign decode_if.tmask = ifetch_rsp_if.tmask;
assign decode_if.PC = ifetch_rsp_if.PC;
assign decode_if.ex_type = ex_type;
assign decode_if.op_type = op_type;
assign decode_if.op_mod = op_mod;
assign decode_if.wb = wb;
assign decode_if.rd = rd_r;
assign decode_if.rs1 = rs1_r;
assign decode_if.rs2 = rs2_r;
assign decode_if.rs3 = rs3_r;
assign decode_if.imm = imm;
assign decode_if.use_PC = use_PC;
assign decode_if.use_imm = use_imm;

View file

@ -99,6 +99,8 @@
`define ALU_OP(x) x[`ALU_BITS-1:0]
`define ALU_OP_CLASS(x) x[3:2]
`define ALU_SIGNED(x) x[0]
`define ALU_IS_BR(x) x[0]
`define ALU_IS_MUL(x) x[1]
`define BR_EQ 4'b0000
`define BR_NE 4'b0010
@ -119,7 +121,6 @@
`define BR_NEG(x) x[1]
`define BR_LESS(x) x[2]
`define BR_STATIC(x) x[3]
`define IS_BR_MOD(x) x[0]
`define MUL_MUL 3'h0
`define MUL_MULH 3'h1
@ -131,8 +132,7 @@
`define MUL_REMU 3'h7
`define MUL_BITS 3
`define MUL_OP(x) x[`MUL_BITS-1:0]
`define IS_DIV_OP(x) x[2]
`define IS_MUL_MOD(x) x[1]
`define MUL_IS_DIV(x) x[2]
`define FMT_B 3'b000
`define FMT_H 3'b001

View file

@ -128,7 +128,7 @@ module VX_execute #(
// special workaround to get RISC-V tests Pass/Fail status
wire ebreak /* verilator public */;
assign ebreak = alu_req_if.valid && alu_req_if.ready
&& `IS_BR_MOD(alu_req_if.op_mod)
&& `ALU_IS_BR(alu_req_if.op_mod)
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);

View file

@ -33,7 +33,7 @@ module VX_muldiv (
input wire ready_out
);
wire is_div_op = `IS_DIV_OP(alu_op);
wire is_div_op = `MUL_IS_DIV(alu_op);
wire [`NUM_THREADS-1:0][31:0] mul_result;
wire [`NW_BITS-1:0] mul_wid_out;

View file

@ -23,7 +23,7 @@ task print_ex_op (
);
case (ex_type)
`EX_ALU: begin
if (`IS_BR_MOD(op_mod)) begin
if (`ALU_IS_BR(op_mod)) begin
case (`BR_BITS'(op_type))
`BR_EQ: $write("BEQ");
`BR_NE: $write("BNE");
@ -40,7 +40,7 @@ task print_ex_op (
`BR_DRET: $write("DRET");
default: $write("?");
endcase
end else if (`IS_MUL_MOD(op_mod)) begin
end else if (`ALU_IS_MUL(op_mod)) begin
case (`MUL_BITS'(op_type))
`MUL_MUL: $write("MUL");
`MUL_MULH: $write("MULH");