GPRs optimization - disabling BRAM's read-during-write bypass block.

This commit is contained in:
Blaise Tine 2021-08-28 15:34:36 -07:00
parent 12b8b4af24
commit f3ba27b138
2 changed files with 108 additions and 106 deletions

View file

@ -17,71 +17,100 @@ module VX_gpr_stage #(
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
// ensure r0 never gets written, which can happen before the reset
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
`ifdef EXT_F_ENABLE
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2, raddr3;
assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
wire [(`NUM_THREADS * 4)-1:0] wren;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign wren [i * 4 +: 4] = {4{write_enable && writeback_if.tmask[i]}};
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
VX_dp_ram #(
.RD_PORTS (3),
.DATAW (32),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)
) dp_ram (
.clk (clk),
.wren (write_enable && writeback_if.tmask[i]),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.rden (3'b111),
.raddr ({raddr3, raddr2, raddr1}),
.rdata ({rdata3[i], rdata2[i], rdata1[i]})
);
reg [`NUM_THREADS-1:0][31:0] last_wdata;
reg [$clog2(RAM_SIZE)-1:0] last_waddr;
reg [`NUM_THREADS-1:0] last_wmask;
always @(posedge clk) begin
last_wdata <= writeback_if.data;
last_wmask <= {`NUM_THREADS{write_enable}} & writeback_if.tmask;
last_waddr <= waddr;
end
assign gpr_rsp_if.rs1_data = rdata1;
assign gpr_rsp_if.rs2_data = rdata2;
assign gpr_rsp_if.rs3_data = rdata3;
`else
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2;
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
assign waddr = {writeback_if.wid, writeback_if.rd};
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
`UNUSED_VAR (gpr_req_if.rs3)
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
for (genvar i = 0; i < `NUM_THREADS; i++) begin
VX_dp_ram #(
.RD_PORTS (2),
.DATAW (32),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)
) dp_ram (
.clk (clk),
.wren (write_enable && writeback_if.tmask[i]),
.waddr (waddr),
.wdata (writeback_if.data[i]),
.rden (2'b11),
.raddr ({raddr2, raddr1}),
.rdata ({rdata2[i], rdata1[i]})
);
VX_dp_ram #(
.DATAW (32 * `NUM_THREADS),
.SIZE (RAM_SIZE),
.BYTEENW (`NUM_THREADS * 4),
.INIT_ENABLE (1),
.INIT_VALUE (0),
.NO_RWCHECK (1)
) dp_ram1 (
.clk (clk),
.wren (wren),
.waddr (waddr),
.wdata (writeback_if.data),
.rden (1'b1),
.raddr (raddr1),
.rdata (rdata1)
);
VX_dp_ram #(
.DATAW (32 * `NUM_THREADS),
.SIZE (RAM_SIZE),
.BYTEENW (`NUM_THREADS * 4),
.INIT_ENABLE (1),
.INIT_VALUE (0),
.NO_RWCHECK (1)
) dp_ram2 (
.clk (clk),
.wren (wren),
.waddr (waddr),
.wdata (writeback_if.data),
.rden (1'b1),
.raddr (raddr2),
.rdata (rdata2)
);
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign gpr_rsp_if.rs1_data[i] = (last_wmask[i] && (raddr1 == last_waddr)) ? last_wdata[i] : rdata1[i];
assign gpr_rsp_if.rs2_data[i] = (last_wmask[i] && (raddr2 == last_waddr)) ? last_wdata[i] : rdata2[i];
end
`ifdef EXT_F_ENABLE
wire [`NUM_THREADS-1:0][31:0] rdata3;
wire [$clog2(RAM_SIZE)-1:0] raddr3;
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
assign gpr_rsp_if.rs1_data = rdata1;
assign gpr_rsp_if.rs2_data = rdata2;
assign gpr_rsp_if.rs3_data = 0;
VX_dp_ram #(
.DATAW (32 * `NUM_THREADS),
.SIZE (RAM_SIZE),
.BYTEENW (`NUM_THREADS * 4),
.INIT_ENABLE (1),
.INIT_VALUE (0),
.NO_RWCHECK (1)
) dp_ram3 (
.clk (clk),
.wren (wren),
.waddr (waddr),
.wdata (writeback_if.data),
.rden (1'b1),
.raddr (raddr3),
.rdata (rdata3)
);
for (genvar i = 0; i < `NUM_THREADS; i++) begin
assign gpr_rsp_if.rs3_data[i] = (last_wmask[i] && (raddr3 == last_waddr)) ? last_wdata[i] : rdata3[i];
end
`else
`UNUSED_VAR (gpr_req_if.rs3)
assign gpr_rsp_if.rs3_data = 'x;
`endif
assign writeback_if.ready = 1'b1;

View file

@ -2,7 +2,6 @@
`TRACING_OFF
module VX_dp_ram #(
parameter RD_PORTS = 1,
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
@ -14,18 +13,16 @@ module VX_dp_ram #(
parameter INIT_FILE = "",
parameter [DATAW-1:0] INIT_VALUE = 0
) (
input wire clk,
input wire [BYTEENW-1:0] wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire [RD_PORTS-1:0] rden,
input wire [RD_PORTS-1:0][ADDRW-1:0] raddr,
output wire [RD_PORTS-1:0][DATAW-1:0] rdata
input wire clk,
input wire [BYTEENW-1:0] wren,
input wire [ADDRW-1:0] waddr,
input wire [DATAW-1:0] wdata,
input wire rden,
input wire [ADDRW-1:0] raddr,
output wire [DATAW-1:0] rdata
);
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
`STATIC_ASSERT(!LUTRAM || (RD_PORTS == 1), ("multi-porting not supported on LUTRAM"))
`define RAM_INITIALIZATION \
if (INIT_ENABLE) begin \
@ -94,7 +91,7 @@ module VX_dp_ram #(
end
end else begin
if (OUTPUT_REG) begin
reg [RD_PORTS-1:0][DATAW-1:0] rdata_r;
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
@ -106,10 +103,8 @@ module VX_dp_ram #(
if (wren[i])
ram[waddr][i] <= wdata[i * 8 +: 8];
end
for (integer i = 0; i < RD_PORTS; ++i) begin
if (rden[i])
rdata_r[i] <= ram[raddr[i]];
end
if (rden)
rdata_r <= ram[raddr];
end
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
@ -119,10 +114,8 @@ module VX_dp_ram #(
always @(posedge clk) begin
if (wren)
ram[waddr] <= wdata;
for (integer i = 0; i < RD_PORTS; ++i) begin
if (rden[i])
rdata_r[i] <= ram[raddr[i]];
end
if (rden)
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
@ -140,9 +133,7 @@ module VX_dp_ram #(
ram[waddr][i] <= wdata[i * 8 +: 8];
end
end
for (genvar i = 0; i < RD_PORTS; ++i) begin
assign rdata[i] = ram[raddr[i]];
end
assign rdata = ram[raddr];
end else begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
@ -152,9 +143,7 @@ module VX_dp_ram #(
if (wren)
ram[waddr] <= wdata;
end
for (genvar i = 0; i < RD_PORTS; ++i) begin
assign rdata[i] = ram[raddr[i]];
end
assign rdata = ram[raddr];
end
end else begin
if (BYTEENW > 1) begin
@ -168,9 +157,7 @@ module VX_dp_ram #(
ram[waddr][i] <= wdata[i * 8 +: 8];
end
end
for (genvar i = 0; i < RD_PORTS; ++i) begin
assign rdata[i] = ram[raddr[i]];
end
assign rdata = ram[raddr];
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
@ -180,16 +167,14 @@ module VX_dp_ram #(
if (wren)
ram[waddr] <= wdata;
end
for (genvar i = 0; i < RD_PORTS; ++i) begin
assign rdata[i] = ram[raddr[i]];
end
assign rdata = ram[raddr];
end
end
end
end
`else
if (OUTPUT_REG) begin
reg [RD_PORTS-1:0][DATAW-1:0] rdata_r;
reg [DATAW-1:0] rdata_r;
if (BYTEENW > 1) begin
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
@ -200,10 +185,8 @@ module VX_dp_ram #(
if (wren[i])
ram[waddr][i] <= wdata[i * 8 +: 8];
end
for (integer i = 0; i < RD_PORTS; ++i) begin
if (rden[i])
rdata_r[i] <= ram[raddr[i]];
end
if (rden)
rdata_r <= ram[raddr];
end
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
@ -213,10 +196,8 @@ module VX_dp_ram #(
always @(posedge clk) begin
if (wren)
ram[waddr] <= wdata;
for (integer i = 0; i < RD_PORTS; ++i) begin
if (rden[i])
rdata_r[i] <= ram[raddr[i]];
end
if (rden)
rdata_r <= ram[raddr];
end
end
assign rdata = rdata_r;
@ -244,13 +225,9 @@ module VX_dp_ram #(
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_waddr)
for (genvar i = 0; i < RD_PORTS; ++i) begin
assign rdata[i] = ram[raddr[i]];
end
assign rdata = ram[raddr];
end else begin
for (genvar i = 0; i < RD_PORTS; ++i) begin
assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]];
end
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
end
end else begin
reg [DATAW-1:0] ram [SIZE-1:0];
@ -271,17 +248,13 @@ module VX_dp_ram #(
`UNUSED_VAR (prev_write)
`UNUSED_VAR (prev_data)
`UNUSED_VAR (prev_waddr)
for (genvar i = 0; i < RD_PORTS; ++i) begin
assign rdata[i] = ram[raddr[i]];
end
assign rdata = ram[raddr];
end else begin
for (genvar i = 0; i < RD_PORTS; ++i) begin
assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]];
end
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
end
end
end
`endif
`endif
endmodule
`TRACING_ON