Optimize shift operations

This commit is contained in:
Olof Kindgren 2021-01-16 16:39:33 +01:00
parent d5febe8f63
commit 5e4181d204
10 changed files with 40 additions and 72 deletions

View file

@ -38,11 +38,11 @@ serv_alu
.. image:: serv_alu.png
serv_alu handles alu and shift operations. The first input operand (A) comes from i_rs1 and the second operand (B) comes from i_rs2 or i_imm depending on the type of operation. The data passes through one or more of the add/sub, shift control or bool logic unit and finally ends up in o_rd to be written to the destination register. The output o_cmp is used for conditional branches to decide whether or not to take the branch.
serv_alu handles alu and shift operations. The first input operand (A) comes from i_rs1 and the second operand (B) comes from i_rs2 or i_imm depending on the type of operation. The data passes through the add/sub or bool logic unit and finally ends up in o_rd to be written to the destination register. The output o_cmp is used for conditional branches to decide whether or not to take the branch.
The add/sub unit can do additions A+B or subtractions A-B by converting it to A+B̅+1. Subtraction mode (i_sub = 1) is also used for the comparisions in the slt* and conditional branch instructions. Finally, it is also used to negate the B operand for left shifts by clearing the A operand (i_shift_op =1). The +1 used in subtraction mode is done by preloading the carry input with 1. Less-than comparisons are handled by converting the expression A<B to A-B<0 and checking the MSB, which will be set when the result is less than 0. This however requires sign-extending the operands to 33-bit inputs. For signed operands (when i_cmp_sig is set), the extra bit is the same as the MSB. For unsigned, the extra bit is always 0. Because the ALU is only active for 32 cycles, the 33rd bit must be calculated in parallel to the ordinary addition. The result from this operations is available in result_lt. For equality checks, result_eq checks that all bits are 0 from the subtraction.
The add/sub unit can do additions A+B or subtractions A-B by converting it to A+B̅+1. Subtraction mode (i_sub = 1) is also used for the comparisions in the slt* and conditional branch instructions. The +1 used in subtraction mode is done by preloading the carry input with 1. Less-than comparisons are handled by converting the expression A<B to A-B<0 and checking the MSB, which will be set when the result is less than 0. This however requires sign-extending the operands to 33-bit inputs. For signed operands (when i_cmp_sig is set), the extra bit is the same as the MSB. For unsigned, the extra bit is always 0. Because the ALU is only active for 32 cycles, the 33rd bit must be calculated in parallel to the ordinary addition. The result from this operations is available in result_lt. For equality checks, result_eq checks that all bits are 0 from the subtraction.
For shift operations, the data to be shifted resides in bufreg. The shift control unit in the ALU keeps track of how many steps to shift the bufreg and sign-extends/zero-pads the shifted data depending on the type (arithmetic/logic right/left) of shift operation
For shift operations, the data to be shifted resides in bufreg. The shift control unit in the ALU keeps track of how many steps to shift the bufreg.
.. image:: serv_alu_int.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

After

Width:  |  Height:  |  Size: 58 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

After

Width:  |  Height:  |  Size: 28 KiB

Before After
Before After

View file

@ -3,21 +3,19 @@ module serv_alu
(
input wire clk,
//State
input wire i_init,
input wire i_en,
input wire i_cnt0,
input wire i_cnt_done,
input wire i_shamt_en,
output wire o_cmp,
output wire o_sh_done,
output wire o_sh_done_r,
//Control
input wire i_shift_op,
input wire i_op_b_rs2,
input wire i_sub,
input wire [1:0] i_bool_op,
input wire i_cmp_eq,
input wire i_cmp_sig,
input wire i_sh_right,
input wire i_sh_signed,
input wire [3:0] i_rd_sel,
//Data
input wire i_rs1,
@ -28,41 +26,26 @@ module serv_alu
wire result_add;
wire result_eq;
wire result_sh;
reg result_lt_r;
reg eq_r;
reg [4:0] shamt;
reg shamt_msb;
reg [5:0] shamt_r;
wire add_cy;
reg add_cy_r;
wire op_b = i_op_b_rs2 ? i_rs2 : i_imm;
serv_shift shift
(
.i_clk (clk),
.i_load (i_cnt_done),
.i_shamt (shamt),
.i_shamt_msb (shamt_msb),
.i_signbit (i_sh_signed & i_rs1),
.i_right (i_sh_right),
.o_done (o_sh_done),
.i_d (i_buf),
.o_q (result_sh));
//Sign-extended operands
wire rs1_sx = i_rs1 & i_cmp_sig;
wire op_b_sx = op_b & i_cmp_sig;
wire result_lt = rs1_sx + ~op_b_sx + add_cy;
wire add_a = i_rs1 & ~i_shift_op;
wire add_b = op_b^i_sub;
assign {add_cy,result_add} = add_a+add_b+add_cy_r;
assign {add_cy,result_add} = i_rs1+add_b+add_cy_r;
assign result_eq = !result_add & eq_r;
@ -72,11 +55,15 @@ module serv_alu
wire result_bool = BOOL_LUT[{i_bool_op, i_rs1, op_b}];
assign o_rd = (i_rd_sel[0] & result_add) |
(i_rd_sel[1] & result_sh) |
(i_rd_sel[1] & i_buf) |
(i_rd_sel[2] & result_lt_r & i_cnt0) |
(i_rd_sel[3] & result_bool);
wire [5:0] shamt = i_init ? {1'b0,op_b,shamt_r[4:1]} : shamt_r-1;
assign o_sh_done = shamt[5];
assign o_sh_done_r = shamt_r[5];
always @(posedge clk) begin
add_cy_r <= i_en ? add_cy : i_sub;
@ -85,10 +72,8 @@ module serv_alu
end
eq_r <= result_eq | ~i_en;
if (i_shamt_en) begin
shamt_msb <= add_cy;
shamt <= {result_add,shamt[4:1]};
end
if (i_shamt_en)
shamt_r <= shamt;
end
endmodule

View file

@ -11,6 +11,7 @@ module serv_bufreg
input wire i_rs1_en,
input wire i_imm_en,
input wire i_clr_lsb,
input wire i_sh_signed,
//Data
input wire i_rs1,
input wire i_imm,
@ -31,14 +32,14 @@ module serv_bufreg
c_r <= c & i_en;
if (i_en)
data <= {i_init ? q : o_q, data[31:3]};
data <= {i_init ? q : (data[31] & i_sh_signed), data[31:3]};
if (i_init ? (i_cnt0 | i_cnt1) : i_en)
o_lsb <= {i_init ? q : data[2],o_lsb[1]};
end
assign o_q = o_lsb[0];
assign o_q = o_lsb[0] & i_en;
assign o_dbus_adr = {data, 2'b00};
endmodule

View file

@ -131,10 +131,18 @@ module serv_decode
assign o_e_op = opcode[4] & opcode[2] & !op21 & !(|funct3);
//opcode & funct3 & imm30
//True for sub, sll*, b*, slt*
//False for add*, sr*
assign o_alu_sub = (!funct3[2] & (funct3[0] | (opcode[3] & imm30))) | funct3[1] | opcode[4];
/*
True for sub, b*, slt*
False for add*
op opcode f3 i30
b* 11000 xxx x t
addi 00100 000 x f
slt* 0x100 01x x t
add 01100 000 0 f
sub 01100 000 1 t
*/
assign o_alu_sub = funct3[1] | funct3[0] | (opcode[3] & imm30) | opcode[4];
/*
Bits 26, 22, 21 and 20 are enough to uniquely identify the eight supported CSR regs

View file

@ -1,30 +0,0 @@
`default_nettype none
module serv_shift
(
input wire i_clk,
input wire i_load,
input wire [4:0] i_shamt,
input wire i_shamt_msb,
input wire i_signbit,
input wire i_right,
output wire o_done,
input wire i_d,
output wire o_q);
reg signbit;
reg [5:0] cnt;
reg wrapped;
always @(posedge i_clk) begin
cnt <= cnt + 6'd1;
if (i_load) begin
cnt <= 6'd0;
signbit <= i_signbit & i_right;
end
wrapped <= cnt[5] | (i_shamt_msb & !i_right);
end
assign o_done = (cnt[4:0] == i_shamt);
assign o_q = (i_right^wrapped) ? i_d : signbit;
endmodule

View file

@ -19,6 +19,7 @@ module serv_state
input wire i_branch_op,
input wire i_mem_op,
input wire i_shift_op,
input wire i_sh_right,
input wire i_slt_op,
input wire i_e_op,
input wire i_rd_op,
@ -37,6 +38,7 @@ module serv_state
input wire i_ctrl_misalign,
output wire o_alu_shamt_en,
input wire i_alu_sh_done,
input wire i_alu_sh_done_r,
output wire o_dbus_cyc,
output wire [1:0] o_mem_bytecnt,
input wire i_mem_misalign,
@ -67,7 +69,8 @@ module serv_state
assign cnt4 = (o_cnt[4:2] == 3'd1) & o_cnt_r[0];
assign o_cnt7 = (o_cnt[4:2] == 3'd1) & o_cnt_r[3];
assign o_alu_shamt_en = (o_cnt0to3 | cnt4) & o_init;
assign o_alu_shamt_en = o_cnt0to3 | cnt4 | !o_init;
//Take branch for jump or branch instructions (opcode == 1x0xx) if
//a) It's an unconditional branch (opcode[0] == 1)
@ -89,7 +92,7 @@ module serv_state
assign o_rf_rreq = i_ibus_ack | (stage_two_req & trap_pending);
//Prepare RF for writes when everything is ready to enter stage two
assign o_rf_wreq = ((i_shift_op & i_alu_sh_done & init_done) | (i_mem_op & i_dbus_ack) | (stage_two_req & (i_slt_op | i_branch_op))) & !trap_pending;
assign o_rf_wreq = ((i_shift_op & (i_alu_sh_done | !i_sh_right) & init_done) | (i_mem_op & i_dbus_ack) | (stage_two_req & (i_slt_op | i_branch_op))) & !trap_pending;
assign o_rf_rd_en = i_rd_op & o_cnt_en & !o_init;
@ -104,7 +107,7 @@ module serv_state
shift : Shift in during phase 1. Continue shifting between phases (except
for the first cycle after init). Shift out during phase 2
*/
assign o_bufreg_en = (o_cnt_en & (o_init | o_ctrl_trap | i_branch_op)) | (!stage_two_req & i_shift_op);
assign o_bufreg_en = (o_cnt_en & (o_init | o_ctrl_trap | i_branch_op)) | (i_shift_op & !stage_two_req & (i_sh_right | i_alu_sh_done_r));
assign o_ibus_cyc = ibus_cyc & !i_rst;

View file

@ -117,6 +117,7 @@ module serv_top
wire alu_sh_signed;
wire alu_sh_right;
wire alu_sh_done;
wire alu_sh_done_r;
wire [3:0] alu_rd_sel;
wire rs1;
@ -180,6 +181,7 @@ module serv_top
.i_ctrl_misalign(lsb[1]),
.o_alu_shamt_en (alu_shamt_en),
.i_alu_sh_done (alu_sh_done),
.i_alu_sh_done_r (alu_sh_done_r),
.o_mem_bytecnt (mem_bytecnt),
.i_mem_misalign (mem_misalign),
//Control
@ -188,6 +190,7 @@ module serv_top
.i_branch_op (branch_op),
.i_mem_op (mem_op),
.i_shift_op (shift_op),
.i_sh_right (alu_sh_right),
.i_slt_op (slt_op),
.i_e_op (e_op),
.i_rd_op (rd_op),
@ -284,6 +287,7 @@ module serv_top
.i_init (init),
.o_lsb (lsb),
//Control
.i_sh_signed (alu_sh_signed),
.i_rs1_en (bufreg_rs1_en),
.i_imm_en (bufreg_imm_en),
.i_clr_lsb (bufreg_clr_lsb),
@ -328,20 +332,18 @@ module serv_top
.clk (clk),
//State
.i_en (cnt_en),
.i_init (init),
.i_cnt0 (cnt0),
.i_cnt_done (cnt_done),
.i_shamt_en (alu_shamt_en),
.o_cmp (alu_cmp),
.o_sh_done (alu_sh_done),
.o_sh_done_r (alu_sh_done_r),
//Control
.i_shift_op (shift_op),
.i_op_b_rs2 (op_b_source),
.i_sub (alu_sub),
.i_bool_op (alu_bool_op),
.i_cmp_eq (alu_cmp_eq),
.i_cmp_sig (alu_cmp_sig),
.i_sh_right (alu_sh_right),
.i_sh_signed (alu_sh_signed),
.i_rd_sel (alu_rd_sel),
//Data
.i_rs1 (rs1),

View file

@ -6,7 +6,6 @@ filesets:
core:
files:
- rtl/serv_params.vh : {is_include_file : true}
- rtl/serv_shift.v
- rtl/serv_bufreg.v
- rtl/serv_alu.v
- rtl/serv_csr.v