mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
GPRs optimization - disabling BRAM's read-during-write bypass block.
This commit is contained in:
parent
12b8b4af24
commit
f3ba27b138
2 changed files with 108 additions and 106 deletions
|
@ -17,71 +17,100 @@ module VX_gpr_stage #(
|
|||
`UNUSED_PARAM (CORE_ID)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
|
||||
|
||||
// ensure r0 never gets written, which can happen before the reset
|
||||
wire write_enable = writeback_if.valid && (writeback_if.rd != 0);
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
|
||||
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2, raddr3;
|
||||
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||
wire [(`NUM_THREADS * 4)-1:0] wren;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign wren [i * 4 +: 4] = {4{write_enable && writeback_if.tmask[i]}};
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_dp_ram #(
|
||||
.RD_PORTS (3),
|
||||
.DATAW (32),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.wren (write_enable && writeback_if.tmask[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.rden (3'b111),
|
||||
.raddr ({raddr3, raddr2, raddr1}),
|
||||
.rdata ({rdata3[i], rdata2[i], rdata1[i]})
|
||||
);
|
||||
reg [`NUM_THREADS-1:0][31:0] last_wdata;
|
||||
reg [$clog2(RAM_SIZE)-1:0] last_waddr;
|
||||
reg [`NUM_THREADS-1:0] last_wmask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
last_wdata <= writeback_if.data;
|
||||
last_wmask <= {`NUM_THREADS{write_enable}} & writeback_if.tmask;
|
||||
last_waddr <= waddr;
|
||||
end
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rdata1;
|
||||
assign gpr_rsp_if.rs2_data = rdata2;
|
||||
assign gpr_rsp_if.rs3_data = rdata3;
|
||||
`else
|
||||
localparam RAM_SIZE = `NUM_WARPS * `NUM_REGS;
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2;
|
||||
wire [$clog2(RAM_SIZE)-1:0] waddr, raddr1, raddr2;
|
||||
|
||||
|
||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
`UNUSED_VAR (gpr_req_if.rs3)
|
||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
VX_dp_ram #(
|
||||
.RD_PORTS (2),
|
||||
.DATAW (32),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.wren (write_enable && writeback_if.tmask[i]),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data[i]),
|
||||
.rden (2'b11),
|
||||
.raddr ({raddr2, raddr1}),
|
||||
.rdata ({rdata2[i], rdata1[i]})
|
||||
);
|
||||
VX_dp_ram #(
|
||||
.DATAW (32 * `NUM_THREADS),
|
||||
.SIZE (RAM_SIZE),
|
||||
.BYTEENW (`NUM_THREADS * 4),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
.NO_RWCHECK (1)
|
||||
) dp_ram1 (
|
||||
.clk (clk),
|
||||
.wren (wren),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data),
|
||||
.rden (1'b1),
|
||||
.raddr (raddr1),
|
||||
.rdata (rdata1)
|
||||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (32 * `NUM_THREADS),
|
||||
.SIZE (RAM_SIZE),
|
||||
.BYTEENW (`NUM_THREADS * 4),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
.NO_RWCHECK (1)
|
||||
) dp_ram2 (
|
||||
.clk (clk),
|
||||
.wren (wren),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data),
|
||||
.rden (1'b1),
|
||||
.raddr (raddr2),
|
||||
.rdata (rdata2)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign gpr_rsp_if.rs1_data[i] = (last_wmask[i] && (raddr1 == last_waddr)) ? last_wdata[i] : rdata1[i];
|
||||
assign gpr_rsp_if.rs2_data[i] = (last_wmask[i] && (raddr2 == last_waddr)) ? last_wdata[i] : rdata2[i];
|
||||
end
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [`NUM_THREADS-1:0][31:0] rdata3;
|
||||
wire [$clog2(RAM_SIZE)-1:0] raddr3;
|
||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
||||
|
||||
assign gpr_rsp_if.rs1_data = rdata1;
|
||||
assign gpr_rsp_if.rs2_data = rdata2;
|
||||
assign gpr_rsp_if.rs3_data = 0;
|
||||
VX_dp_ram #(
|
||||
.DATAW (32 * `NUM_THREADS),
|
||||
.SIZE (RAM_SIZE),
|
||||
.BYTEENW (`NUM_THREADS * 4),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0),
|
||||
.NO_RWCHECK (1)
|
||||
) dp_ram3 (
|
||||
.clk (clk),
|
||||
.wren (wren),
|
||||
.waddr (waddr),
|
||||
.wdata (writeback_if.data),
|
||||
.rden (1'b1),
|
||||
.raddr (raddr3),
|
||||
.rdata (rdata3)
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign gpr_rsp_if.rs3_data[i] = (last_wmask[i] && (raddr3 == last_waddr)) ? last_wdata[i] : rdata3[i];
|
||||
end
|
||||
`else
|
||||
`UNUSED_VAR (gpr_req_if.rs3)
|
||||
assign gpr_rsp_if.rs3_data = 'x;
|
||||
`endif
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
`TRACING_OFF
|
||||
module VX_dp_ram #(
|
||||
parameter RD_PORTS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter BYTEENW = 1,
|
||||
|
@ -14,18 +13,16 @@ module VX_dp_ram #(
|
|||
parameter INIT_FILE = "",
|
||||
parameter [DATAW-1:0] INIT_VALUE = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire [BYTEENW-1:0] wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire [RD_PORTS-1:0] rden,
|
||||
input wire [RD_PORTS-1:0][ADDRW-1:0] raddr,
|
||||
output wire [RD_PORTS-1:0][DATAW-1:0] rdata
|
||||
input wire clk,
|
||||
input wire [BYTEENW-1:0] wren,
|
||||
input wire [ADDRW-1:0] waddr,
|
||||
input wire [DATAW-1:0] wdata,
|
||||
input wire rden,
|
||||
input wire [ADDRW-1:0] raddr,
|
||||
output wire [DATAW-1:0] rdata
|
||||
);
|
||||
|
||||
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
|
||||
`STATIC_ASSERT(!LUTRAM || (RD_PORTS == 1), ("multi-porting not supported on LUTRAM"))
|
||||
|
||||
|
||||
`define RAM_INITIALIZATION \
|
||||
if (INIT_ENABLE) begin \
|
||||
|
@ -94,7 +91,7 @@ module VX_dp_ram #(
|
|||
end
|
||||
end else begin
|
||||
if (OUTPUT_REG) begin
|
||||
reg [RD_PORTS-1:0][DATAW-1:0] rdata_r;
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
@ -106,10 +103,8 @@ module VX_dp_ram #(
|
|||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
for (integer i = 0; i < RD_PORTS; ++i) begin
|
||||
if (rden[i])
|
||||
rdata_r[i] <= ram[raddr[i]];
|
||||
end
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
@ -119,10 +114,8 @@ module VX_dp_ram #(
|
|||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
ram[waddr] <= wdata;
|
||||
for (integer i = 0; i < RD_PORTS; ++i) begin
|
||||
if (rden[i])
|
||||
rdata_r[i] <= ram[raddr[i]];
|
||||
end
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
|
@ -140,9 +133,7 @@ module VX_dp_ram #(
|
|||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
||||
assign rdata[i] = ram[raddr[i]];
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
`NO_RW_RAM_CHECK reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
|
@ -152,9 +143,7 @@ module VX_dp_ram #(
|
|||
if (wren)
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
||||
assign rdata[i] = ram[raddr[i]];
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
end else begin
|
||||
if (BYTEENW > 1) begin
|
||||
|
@ -168,9 +157,7 @@ module VX_dp_ram #(
|
|||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
||||
assign rdata[i] = ram[raddr[i]];
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
||||
|
@ -180,16 +167,14 @@ module VX_dp_ram #(
|
|||
if (wren)
|
||||
ram[waddr] <= wdata;
|
||||
end
|
||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
||||
assign rdata[i] = ram[raddr[i]];
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
`else
|
||||
if (OUTPUT_REG) begin
|
||||
reg [RD_PORTS-1:0][DATAW-1:0] rdata_r;
|
||||
reg [DATAW-1:0] rdata_r;
|
||||
if (BYTEENW > 1) begin
|
||||
reg [BYTEENW-1:0][7:0] ram [SIZE-1:0];
|
||||
|
||||
|
@ -200,10 +185,8 @@ module VX_dp_ram #(
|
|||
if (wren[i])
|
||||
ram[waddr][i] <= wdata[i * 8 +: 8];
|
||||
end
|
||||
for (integer i = 0; i < RD_PORTS; ++i) begin
|
||||
if (rden[i])
|
||||
rdata_r[i] <= ram[raddr[i]];
|
||||
end
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
@ -213,10 +196,8 @@ module VX_dp_ram #(
|
|||
always @(posedge clk) begin
|
||||
if (wren)
|
||||
ram[waddr] <= wdata;
|
||||
for (integer i = 0; i < RD_PORTS; ++i) begin
|
||||
if (rden[i])
|
||||
rdata_r[i] <= ram[raddr[i]];
|
||||
end
|
||||
if (rden)
|
||||
rdata_r <= ram[raddr];
|
||||
end
|
||||
end
|
||||
assign rdata = rdata_r;
|
||||
|
@ -244,13 +225,9 @@ module VX_dp_ram #(
|
|||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
||||
assign rdata[i] = ram[raddr[i]];
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
||||
assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]];
|
||||
end
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
end
|
||||
end else begin
|
||||
reg [DATAW-1:0] ram [SIZE-1:0];
|
||||
|
@ -271,17 +248,13 @@ module VX_dp_ram #(
|
|||
`UNUSED_VAR (prev_write)
|
||||
`UNUSED_VAR (prev_data)
|
||||
`UNUSED_VAR (prev_waddr)
|
||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
||||
assign rdata[i] = ram[raddr[i]];
|
||||
end
|
||||
assign rdata = ram[raddr];
|
||||
end else begin
|
||||
for (genvar i = 0; i < RD_PORTS; ++i) begin
|
||||
assign rdata[i] = (prev_write && (prev_waddr == raddr[i])) ? prev_data : ram[raddr[i]];
|
||||
end
|
||||
assign rdata = (prev_write && (prev_waddr == raddr)) ? prev_data : ram[raddr];
|
||||
end
|
||||
end
|
||||
end
|
||||
`endif
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
Loading…
Add table
Add a link
Reference in a new issue