mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
Adding new serial multiplier for low-cost 64-bit integer multiplication
This commit is contained in:
parent
fdf82842b2
commit
6117fb48fe
8 changed files with 241 additions and 72 deletions
|
@ -250,7 +250,7 @@ CONFIGS="-DENABLE_DPI -DNUM_FPU_UNITS=2" ./ci/blackbox.sh --driver=rtlsim --app=
|
|||
CONFIGS="-DENABLE_DPI" AXI_BUS=1 ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=demo
|
||||
|
||||
# adjust l1 block size to match l2
|
||||
CONFIGS="-DENABLE_DPI DL1_LINE_SIZE=64" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"
|
||||
CONFIGS="-DENABLE_DPI -DL1_LINE_SIZE=64" ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --app=io_addr --args="-n1"
|
||||
|
||||
# test cache banking
|
||||
CONFIGS="-DENABLE_DPI -DSMEM_NUM_BANKS=4 -DDCACHE_NUM_BANKS=1" ./ci/blackbox.sh --driver=rtlsim --app=sgemm
|
||||
|
|
|
@ -428,7 +428,7 @@ module VX_core_top #(
|
|||
`ifdef EXT_RASTER_ENABLE
|
||||
input wire raster_req_valid,
|
||||
input raster_stamp_t [`NUM_THREADS-1:0] raster_req_stamps,
|
||||
input wire raster_req_empty,
|
||||
input wire raster_req_done,
|
||||
output wire raster_req_ready,
|
||||
`endif
|
||||
|
||||
|
@ -568,7 +568,7 @@ module VX_core_top #(
|
|||
|
||||
assign raster_req_if.valid = raster_req_valid;
|
||||
assign raster_req_if.stamps = raster_req_stamps;
|
||||
assign raster_req_if.empty=raster_req_empty;
|
||||
assign raster_req_if.done = raster_req_done;
|
||||
assign raster_req_ready = raster_req_if.ready;
|
||||
`endif
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ module VX_muldiv (
|
|||
);
|
||||
localparam UUID_WIDTH = `UP(`UUID_BITS);
|
||||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
localparam TAGW = UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + 1;
|
||||
|
||||
`UNUSED_VAR (alu_op)
|
||||
`UNUSED_VAR (op_mod)
|
||||
|
@ -57,8 +58,7 @@ module VX_muldiv (
|
|||
|
||||
wire mul_valid_out;
|
||||
wire mul_valid_in = valid_in && is_mulx_op;
|
||||
wire mul_ready_in = ~stall_out || ~mul_valid_out;
|
||||
|
||||
|
||||
wire is_mulh_in = `INST_M_IS_MULH(alu_op);
|
||||
wire is_signed_mul_a = `INST_M_SIGNED_A(alu_op);
|
||||
wire is_signed_mul_b = is_signed_op;
|
||||
|
@ -67,6 +67,7 @@ module VX_muldiv (
|
|||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] mul_result_tmp;
|
||||
|
||||
wire mul_ready_in = ~stall_out || ~mul_valid_out;
|
||||
wire mul_fire_in = mul_valid_in && mul_ready_in;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
|
@ -97,14 +98,54 @@ module VX_muldiv (
|
|||
wire is_mulh_out;
|
||||
wire is_mul_w_out;
|
||||
|
||||
`ifdef XLEN_64
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN:0] mul_in1;
|
||||
wire [`NUM_THREADS-1:0][`XLEN:0] mul_in2;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign mul_in1[i] = is_alu_w ? {{(`XLEN-31){alu_in1[i][31]}}, alu_in1[i][31:0]} : {is_signed_mul_a && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
assign mul_in2[i] = is_alu_w ? {{(`XLEN-31){alu_in2[i][31]}}, alu_in2[i][31:0]} : {is_signed_mul_b && alu_in2[i][`XLEN-1], alu_in2[i]};
|
||||
end
|
||||
|
||||
wire mul_ready_in;
|
||||
wire mul_ready_out = ~stall_out;
|
||||
|
||||
VX_serial_mul #(
|
||||
.A_WIDTH (`XLEN+1),
|
||||
.LANES (`NUM_THREADS),
|
||||
.SIGNED (1)
|
||||
) multiplier (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (mul_valid_in),
|
||||
.ready_in (mul_ready_in),
|
||||
.valid_out (mul_valid_out),
|
||||
.ready_out (mul_ready_out),
|
||||
.dataa (mul_in1),
|
||||
.datab (mul_in2),
|
||||
.result (mul_result_tmp)
|
||||
);
|
||||
|
||||
reg [TAGW+2-1:0] mul_tag_r;
|
||||
always @(posedge clk) begin
|
||||
if (mul_valid_in && mul_ready_in) begin
|
||||
mul_tag_r <= {uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_mulh_in, is_alu_w};
|
||||
end
|
||||
end
|
||||
|
||||
assign {mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out, is_mul_w_out} = mul_tag_r;
|
||||
|
||||
`else
|
||||
|
||||
wire mul_ready_in = ~stall_out || ~mul_valid_out;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`XLEN:0] mul_in1 = is_alu_w ? {{(`XLEN-31){alu_in1[i][31]}}, alu_in1[i][31:0]} : {is_signed_mul_a && alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
wire [`XLEN:0] mul_in2 = is_alu_w ? {{(`XLEN-31){alu_in2[i][31]}}, alu_in2[i][31:0]} : {is_signed_mul_b && alu_in2[i][`XLEN-1], alu_in2[i]};
|
||||
|
||||
VX_multiplier #(
|
||||
.A_WIDTH (`XLEN+1),
|
||||
.B_WIDTH (`XLEN+1),
|
||||
.R_WIDTH (2*(`XLEN+1)),
|
||||
.SIGNED (1),
|
||||
.LATENCY (`LATENCY_IMUL)
|
||||
) multiplier (
|
||||
|
@ -117,7 +158,7 @@ module VX_muldiv (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + 1 + 1 + 1),
|
||||
.DATAW (1 + TAGW + 1 + 1),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) mul_shift_reg (
|
||||
|
@ -128,6 +169,8 @@ module VX_muldiv (
|
|||
.data_out ({mul_valid_out, mul_uuid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out, is_mul_w_out})
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
`ifdef XLEN_64
|
||||
assign mul_result_out[i] = is_mulh_out ? mul_result_tmp[i][2*(`XLEN)-1:`XLEN] :
|
||||
|
@ -181,7 +224,7 @@ module VX_muldiv (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW (1 + UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + 1 + (`NUM_THREADS * `XLEN)),
|
||||
.DATAW (1 + TAGW + (`NUM_THREADS * `XLEN)),
|
||||
.DEPTH (`LATENCY_IMUL),
|
||||
.RESETW (1)
|
||||
) div_shift_reg (
|
||||
|
@ -205,19 +248,16 @@ module VX_muldiv (
|
|||
.WIDTHD (`XLEN),
|
||||
.WIDTHQ (`XLEN),
|
||||
.WIDTHR (`XLEN),
|
||||
.LANES (`NUM_THREADS),
|
||||
.TAGW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + `XLEN + `NR_BITS + 1 + 1 + 1)
|
||||
.LANES (`NUM_THREADS)
|
||||
) divide (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (div_valid_in),
|
||||
.ready_in (div_ready_in),
|
||||
.tag_in ({uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_rem_op, is_alu_w}),
|
||||
|
||||
.ready_out (div_ready_out),
|
||||
.valid_out (div_valid_out),
|
||||
.tag_out ({div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out}),
|
||||
.ready_out (div_ready_out),
|
||||
|
||||
.is_signed (is_signed_op),
|
||||
.numer (div_in1),
|
||||
|
@ -227,6 +267,15 @@ module VX_muldiv (
|
|||
.remainder (div_remainder)
|
||||
);
|
||||
|
||||
reg [TAGW+2-1:0] div_tag_r;
|
||||
always @(posedge clk) begin
|
||||
if (div_valid_in && div_ready_in) begin
|
||||
div_tag_r <= {uuid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_rem_op, is_alu_w};
|
||||
end
|
||||
end
|
||||
|
||||
assign {div_uuid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out, is_div_w_out} = div_tag_r;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
`ifdef XLEN_64
|
||||
assign div_result_out[i] = is_rem_op_out ? (is_div_w_out ? `XLEN'($signed(div_remainder[i][31:0])) : div_remainder[i]) :
|
||||
|
@ -236,6 +285,7 @@ module VX_muldiv (
|
|||
`UNUSED_VAR (is_div_w_out)
|
||||
`endif
|
||||
end
|
||||
|
||||
`endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -2,9 +2,9 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_multiplier #(
|
||||
parameter A_WIDTH = 1,
|
||||
parameter B_WIDTH = 1,
|
||||
parameter R_WIDTH = 1,
|
||||
parameter A_WIDTH = 1,
|
||||
parameter B_WIDTH = A_WIDTH,
|
||||
parameter R_WIDTH = A_WIDTH + B_WIDTH,
|
||||
parameter SIGNED = 0,
|
||||
parameter LATENCY = 0
|
||||
) (
|
||||
|
@ -16,31 +16,51 @@ module VX_multiplier #(
|
|||
);
|
||||
`STATIC_ASSERT ((LATENCY <= 3), ("invalid parameter"))
|
||||
|
||||
wire [R_WIDTH-1:0] result_unqual;
|
||||
wire [A_WIDTH-1:0] dataa_w;
|
||||
wire [B_WIDTH-1:0] datab_w;
|
||||
wire [R_WIDTH-1:0] result_w;
|
||||
|
||||
if (SIGNED != 0) begin
|
||||
assign result_unqual = $signed(dataa) * $signed(datab);
|
||||
assign result_w = $signed(dataa_w) * $signed(datab_w);
|
||||
end else begin
|
||||
assign result_unqual = dataa * datab;
|
||||
assign result_w = dataa_w * datab_w;
|
||||
end
|
||||
|
||||
if (LATENCY == 0) begin
|
||||
assign result = result_unqual;
|
||||
assign dataa_w = dataa;
|
||||
assign datab_w = datab;
|
||||
assign result = result_w;
|
||||
end else begin
|
||||
reg [R_WIDTH-1:0] result_pipe [LATENCY-1:0];
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
result_pipe[0] <= result_unqual;
|
||||
end
|
||||
end
|
||||
for (genvar i = 1; i < LATENCY; ++i) begin
|
||||
if (LATENCY >= 2) begin
|
||||
reg [A_WIDTH-1:0] dataa_p [LATENCY-2:0];
|
||||
reg [B_WIDTH-1:0] datab_p [LATENCY-2:0];
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
result_pipe[i] <= result_pipe[i-1];
|
||||
dataa_p[0] <= dataa;
|
||||
datab_p[0] <= datab;
|
||||
end
|
||||
end
|
||||
for (genvar i = 2; i < LATENCY; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
dataa_p[i-1] <= dataa_p[i-2];
|
||||
datab_p[i-1] <= datab_p[i-2];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign dataa_w = dataa_p[LATENCY-2];
|
||||
assign datab_w = datab_p[LATENCY-2];
|
||||
end else begin
|
||||
assign dataa_w = dataa;
|
||||
assign datab_w = datab;
|
||||
end
|
||||
assign result = result_pipe[LATENCY-1];
|
||||
reg [R_WIDTH-1:0] result_r;
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
result_r <= result_w;
|
||||
end
|
||||
end
|
||||
assign result = result_r;
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
//`TRACING_OFF
|
||||
`TRACING_OFF
|
||||
module VX_scope_switch #(
|
||||
parameter N = 0
|
||||
) (
|
||||
|
@ -47,4 +47,4 @@ module VX_scope_switch #(
|
|||
end
|
||||
|
||||
endmodule
|
||||
//`TRACING_ON
|
||||
`TRACING_ON
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
//`TRACING_OFF
|
||||
`TRACING_OFF
|
||||
module VX_scope_tap #(
|
||||
parameter SCOPE_ID = 0, // scope identifier
|
||||
parameter SCOPE_IDW = 8, // scope identifier width
|
||||
|
@ -297,4 +297,4 @@ module VX_scope_tap #(
|
|||
assign bus_out = bus_out_r;
|
||||
|
||||
endmodule
|
||||
//`TRACING_ON
|
||||
`TRACING_ON
|
||||
|
|
|
@ -6,19 +6,16 @@ module VX_serial_div #(
|
|||
parameter WIDTHD = 1,
|
||||
parameter WIDTHQ = 1,
|
||||
parameter WIDTHR = 1,
|
||||
parameter LANES = 1,
|
||||
parameter TAGW = 1
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire is_signed,
|
||||
input wire [LANES-1:0][WIDTHN-1:0] numer,
|
||||
|
@ -28,7 +25,7 @@ module VX_serial_div #(
|
|||
output wire [LANES-1:0][WIDTHR-1:0] remainder
|
||||
);
|
||||
localparam MIN_ND = (WIDTHN < WIDTHD) ? WIDTHN : WIDTHD;
|
||||
localparam CNTRW = $clog2(WIDTHN+1);
|
||||
localparam CNTRW = $clog2(WIDTHN);
|
||||
|
||||
reg [LANES-1:0][WIDTHN + MIN_ND:0] working;
|
||||
reg [LANES-1:0][WIDTHD-1:0] denom_r;
|
||||
|
@ -40,12 +37,7 @@ module VX_serial_div #(
|
|||
reg [LANES-1:0] inv_quot, inv_rem;
|
||||
|
||||
reg [CNTRW-1:0] cntr;
|
||||
reg loaded;
|
||||
|
||||
reg [TAGW-1:0] tag_r;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
wire pop = valid_out && ready_out;
|
||||
reg busy, done;
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire negate_numer = is_signed && numer[i][WIDTHN-1];
|
||||
|
@ -54,51 +46,52 @@ module VX_serial_div #(
|
|||
assign denom_qual[i] = negate_denom ? -$signed(denom[i]) : denom[i];
|
||||
assign sub_result[i] = working[i][WIDTHN + MIN_ND : WIDTHN] - denom_r[i];
|
||||
end
|
||||
|
||||
wire busy = (cntr != 0);
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
wire pop = valid_out && ready_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
cntr <= '0;
|
||||
loaded <= 0;
|
||||
busy <= 0;
|
||||
done <= 0;
|
||||
end else begin
|
||||
if (push) begin
|
||||
cntr <= WIDTHN;
|
||||
loaded <= 1;
|
||||
end else if (busy) begin
|
||||
cntr <= cntr - CNTRW'(1);
|
||||
if (push) begin
|
||||
busy <= 1;
|
||||
end
|
||||
if (pop) begin
|
||||
loaded <= 0;
|
||||
if (busy && cntr == 0) begin
|
||||
busy <= 0;
|
||||
done <= 1;
|
||||
end
|
||||
if (pop) begin
|
||||
done <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
cntr <= cntr - CNTRW'(1);
|
||||
if (push) begin
|
||||
for (integer i = 0; i < LANES; ++i) begin
|
||||
working[i] <= {{WIDTHD{1'b0}}, numer_qual[i], 1'b0};
|
||||
denom_r[i] <= denom_qual[i];
|
||||
inv_quot[i] <= (denom[i] != 0) && is_signed && (numer[i][31] ^ denom[i][31]);
|
||||
inv_rem[i] <= is_signed && numer[i][31];
|
||||
end
|
||||
tag_r <= tag_in;
|
||||
end else if (busy) begin
|
||||
for (integer i = 0; i < LANES; ++i) begin
|
||||
working[i] <= sub_result[i][WIDTHD] ? {working[i][WIDTHN+MIN_ND-1:0], 1'b0} :
|
||||
{sub_result[i][WIDTHD-1:0], working[i][WIDTHN-1:0], 1'b1};
|
||||
end
|
||||
cntr <= CNTRW'(WIDTHN-1);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
working[i] <= {{WIDTHD{1'b0}}, numer_qual[i], 1'b0};
|
||||
denom_r[i] <= denom_qual[i];
|
||||
inv_quot[i] <= (denom[i] != 0) && is_signed && (numer[i][31] ^ denom[i][31]);
|
||||
inv_rem[i] <= is_signed && numer[i][31];
|
||||
end else if (busy) begin
|
||||
working[i] <= sub_result[i][WIDTHD] ? {working[i][WIDTHN+MIN_ND-1:0], 1'b0} :
|
||||
{sub_result[i][WIDTHD-1:0], working[i][WIDTHN-1:0], 1'b1};
|
||||
end
|
||||
end
|
||||
wire [WIDTHQ-1:0] q = working[i][WIDTHQ-1:0];
|
||||
wire [WIDTHR-1:0] r = working[i][WIDTHN+WIDTHR:WIDTHN+1];
|
||||
assign quotient[i] = inv_quot[i] ? -$signed(q) : q;
|
||||
assign remainder[i] = inv_rem[i] ? -$signed(r) : r;
|
||||
end
|
||||
|
||||
assign ready_in = ~loaded;
|
||||
assign tag_out = tag_r;
|
||||
assign valid_out = loaded && ~busy;
|
||||
assign ready_in = ~busy && ~done;
|
||||
assign valid_out = done;
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
|
|
106
hw/rtl/libs/VX_serial_mul.sv
Normal file
106
hw/rtl/libs/VX_serial_mul.sv
Normal file
|
@ -0,0 +1,106 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
// Iterative integer multiplier
|
||||
// An adaptation of ZipCPU algorithm for a multi-lane elastic architecture.
|
||||
// https://zipcpu.com/zipcpu/2021/07/03/slowmpy.html
|
||||
|
||||
`TRACING_OFF
|
||||
module VX_serial_mul #(
|
||||
parameter A_WIDTH = 1,
|
||||
parameter B_WIDTH = A_WIDTH,
|
||||
parameter R_WIDTH = A_WIDTH + B_WIDTH,
|
||||
parameter SIGNED = 0,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out,
|
||||
|
||||
input wire [LANES-1:0][A_WIDTH-1:0] dataa,
|
||||
input wire [LANES-1:0][B_WIDTH-1:0] datab,
|
||||
output wire [LANES-1:0][R_WIDTH-1:0] result
|
||||
);
|
||||
localparam X_WIDTH = SIGNED ? `MAX(A_WIDTH, B_WIDTH) : A_WIDTH;
|
||||
localparam Y_WIDTH = SIGNED ? `MAX(A_WIDTH, B_WIDTH) : B_WIDTH;
|
||||
localparam P_WIDTH = X_WIDTH + Y_WIDTH;
|
||||
|
||||
localparam CNTRW = $clog2(X_WIDTH);
|
||||
|
||||
reg [LANES-1:0][X_WIDTH-1:0] a;
|
||||
reg [LANES-1:0][Y_WIDTH-1:0] b;
|
||||
reg [LANES-1:0][P_WIDTH-1:0] p;
|
||||
|
||||
reg [CNTRW-1:0] cntr;
|
||||
reg busy, done;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
wire pop = valid_out && ready_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
busy <= 0;
|
||||
done <= 0;
|
||||
end else begin
|
||||
if (push) begin
|
||||
busy <= 1;
|
||||
end
|
||||
if (busy && cntr == 0) begin
|
||||
done <= 1;
|
||||
end
|
||||
if (pop) begin
|
||||
busy <= 0;
|
||||
done <= 0;
|
||||
end
|
||||
end
|
||||
cntr <= cntr - CNTRW'(1);
|
||||
if (push) begin
|
||||
cntr <= CNTRW'(X_WIDTH-1);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < LANES; ++i) begin
|
||||
wire [X_WIDTH-1:0] axb = b[i][0] ? a[i] : '0;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
if (SIGNED) begin
|
||||
a[i] <= X_WIDTH'($signed(dataa[i]));
|
||||
b[i] <= Y_WIDTH'($signed(datab[i]));
|
||||
end else begin
|
||||
a[i] <= dataa[i];
|
||||
b[i] <= datab[i];
|
||||
end
|
||||
p[i] <= 0;
|
||||
end else if (busy) begin
|
||||
b[i] <= (b[i] >> 1);
|
||||
p[i][Y_WIDTH-2:0] <= p[i][Y_WIDTH-1:1];
|
||||
if (SIGNED) begin
|
||||
if (cntr == 0) begin
|
||||
p[i][P_WIDTH-1:Y_WIDTH-1] <= {1'b0, p[i][P_WIDTH-1:Y_WIDTH]} + {1'b0, axb[X_WIDTH-1], ~axb[X_WIDTH-2:0]};
|
||||
end else begin
|
||||
p[i][P_WIDTH-1:Y_WIDTH-1] <= {1'b0, p[i][P_WIDTH-1:Y_WIDTH]} + {1'b0, ~axb[X_WIDTH-1], axb[X_WIDTH-2:0]};
|
||||
end
|
||||
end else begin
|
||||
p[i][P_WIDTH-1:Y_WIDTH-1] <= {1'b0, p[i][P_WIDTH-1:Y_WIDTH]} + ((b[i][0]) ? {1'b0, a[i]} : 0);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (SIGNED) begin
|
||||
assign result[i] = R_WIDTH'(p[i][P_WIDTH-1:0] + {1'b1, {(X_WIDTH-2){1'b0}}, 1'b1, {(Y_WIDTH){1'b0}}});
|
||||
end else begin
|
||||
assign result[i] = R_WIDTH'(p[i]);
|
||||
end
|
||||
end
|
||||
`UNUSED_VAR (p)
|
||||
|
||||
assign ready_in = ~busy && ~done;
|
||||
assign valid_out = done;
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
Loading…
Add table
Add a link
Reference in a new issue