mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
local memory area optimization
This commit is contained in:
parent
52c5f1ff6b
commit
4c1b3fd88d
1 changed files with 34 additions and 19 deletions
|
@ -94,7 +94,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
wire [NUM_BANKS-1:0][REQ_SEL_WIDTH-1:0] per_bank_req_idx;
|
||||
wire [NUM_BANKS-1:0] per_bank_req_ready;
|
||||
|
||||
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_all;
|
||||
wire [NUM_BANKS-1:0][REQ_DATAW-1:0] per_bank_req_data_aos;
|
||||
|
||||
wire [NUM_REQS-1:0] req_valid_in;
|
||||
wire [NUM_REQS-1:0][REQ_DATAW-1:0] req_data_in;
|
||||
|
@ -111,7 +111,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
req_bank_addr[i],
|
||||
mem_bus_if[i].req_data.byteen,
|
||||
mem_bus_if[i].req_data.data,
|
||||
mem_bus_if[i].req_data.tag};
|
||||
mem_bus_if[i].req_data.tag
|
||||
};
|
||||
assign mem_bus_if[i].req_ready = req_ready_in[i];
|
||||
end
|
||||
|
||||
|
@ -135,7 +136,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.sel_in (req_bank_idx),
|
||||
.ready_in (req_ready_in),
|
||||
.valid_out (per_bank_req_valid),
|
||||
.data_out (per_bank_req_data_all),
|
||||
.data_out (per_bank_req_data_aos),
|
||||
.sel_out (per_bank_req_idx),
|
||||
.ready_out (per_bank_req_ready)
|
||||
);
|
||||
|
@ -146,7 +147,8 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
per_bank_req_addr[i],
|
||||
per_bank_req_byteen[i],
|
||||
per_bank_req_data[i],
|
||||
per_bank_req_tag[i]} = per_bank_req_data_all[i];
|
||||
per_bank_req_tag[i]
|
||||
} = per_bank_req_data_aos[i];
|
||||
end
|
||||
|
||||
// banks access
|
||||
|
@ -160,17 +162,18 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (bank_reset, reset);
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
wire bank_req_valid, bank_req_ready;
|
||||
wire bank_rsp_valid, bank_rsp_ready;
|
||||
wire [WORD_WIDTH-1:0] bank_rsp_data;
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (WORD_WIDTH),
|
||||
.SIZE (WORDS_PER_BANK),
|
||||
.WRENW (WORD_SIZE)
|
||||
.WRENW (WORD_SIZE),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.reset (1'b0),
|
||||
.read (1'b1),
|
||||
.reset (reset),
|
||||
.read (per_bank_req_valid[i] && per_bank_req_ready[i] && ~per_bank_req_rw[i]),
|
||||
.write (per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i]),
|
||||
.wren (per_bank_req_byteen[i]),
|
||||
.addr (per_bank_req_addr[i]),
|
||||
|
@ -178,19 +181,31 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
.rdata (bank_rsp_data)
|
||||
);
|
||||
|
||||
// drop write response
|
||||
assign bank_req_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i];
|
||||
assign per_bank_req_ready[i] = bank_req_ready || per_bank_req_rw[i];
|
||||
// read-during-write hazard detection
|
||||
reg [BANK_ADDR_WIDTH-1:0] last_wr_addr;
|
||||
reg last_wr_valid;
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
last_wr_valid <= 0;
|
||||
end else begin
|
||||
last_wr_valid <= per_bank_req_valid[i] && per_bank_req_ready[i] && per_bank_req_rw[i];
|
||||
end
|
||||
last_wr_addr <= per_bank_req_addr[i];
|
||||
end
|
||||
wire is_rdw_hazard = last_wr_valid && ~per_bank_req_rw[i] && (per_bank_req_addr[i] == last_wr_addr);
|
||||
|
||||
// drop write response and stall on read-during-write hazard
|
||||
assign bank_rsp_valid = per_bank_req_valid[i] && ~per_bank_req_rw[i] && ~is_rdw_hazard;
|
||||
assign per_bank_req_ready[i] = (bank_rsp_ready || per_bank_req_rw[i]) && ~is_rdw_hazard;
|
||||
|
||||
// register BRAM output
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH),
|
||||
.SIZE (1)
|
||||
VX_pipe_buffer #(
|
||||
.DATAW (REQ_SEL_WIDTH + WORD_WIDTH + TAG_WIDTH)
|
||||
) bank_buf (
|
||||
.clk (clk),
|
||||
.reset (bank_reset),
|
||||
.valid_in (bank_req_valid),
|
||||
.ready_in (bank_req_ready),
|
||||
.valid_in (bank_rsp_valid),
|
||||
.ready_in (bank_rsp_ready),
|
||||
.data_in ({per_bank_req_idx[i], bank_rsp_data, per_bank_req_tag[i]}),
|
||||
.data_out ({per_bank_rsp_idx[i], per_bank_rsp_data[i], per_bank_rsp_tag[i]}),
|
||||
.valid_out (per_bank_rsp_valid[i]),
|
||||
|
@ -200,10 +215,10 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
|
||||
// bank responses gather
|
||||
|
||||
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data2;
|
||||
wire [NUM_BANKS-1:0][RSP_DATAW-1:0] per_bank_rsp_data_aos;
|
||||
|
||||
for (genvar i = 0; i < NUM_BANKS; ++i) begin
|
||||
assign per_bank_rsp_data2[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
|
||||
assign per_bank_rsp_data_aos[i] = {per_bank_rsp_data[i], per_bank_rsp_tag[i]};
|
||||
end
|
||||
|
||||
wire [NUM_REQS-1:0] rsp_valid_out;
|
||||
|
@ -222,7 +237,7 @@ module VX_local_mem import VX_gpu_pkg::*; #(
|
|||
`UNUSED_PIN (collisions),
|
||||
.sel_in (per_bank_rsp_idx),
|
||||
.valid_in (per_bank_rsp_valid),
|
||||
.data_in (per_bank_rsp_data2),
|
||||
.data_in (per_bank_rsp_data_aos),
|
||||
.ready_in (per_bank_rsp_ready),
|
||||
.valid_out (rsp_valid_out),
|
||||
.data_out (rsp_data_out),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue