mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
removed dup address detection, replaced with coalescing
This commit is contained in:
parent
4766787478
commit
454b9e7444
2 changed files with 33 additions and 62 deletions
|
@ -275,13 +275,6 @@
|
|||
`define LSU_LINE_SIZE `MIN(`NUM_LSU_LANES * (`XLEN / 8), `L1_LINE_SIZE)
|
||||
`endif
|
||||
|
||||
// LSU Duplicate Address Check
|
||||
`ifdef LSU_DUP_ENABLE
|
||||
`define LSU_DUP_ENABLED 1
|
||||
`else
|
||||
`define LSU_DUP_ENABLED 0
|
||||
`endif
|
||||
|
||||
// Size of LSU Core Request Queue
|
||||
`ifndef LSUQ_IN_SIZE
|
||||
`define LSUQ_IN_SIZE (2 * (`NUM_THREADS / `NUM_LSU_LANES))
|
||||
|
|
|
@ -38,8 +38,8 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
localparam LSUQ_SIZEW = `LOG2UP(`LSUQ_IN_SIZE);
|
||||
localparam REQ_ASHIFT = `CLOG2(WORD_SIZE);
|
||||
|
||||
// tag_id = wid + PC + tmask + rd + op_type + align + is_dup + pid + pkt_addr
|
||||
localparam TAG_ID_WIDTH = `NW_WIDTH + `XLEN + NUM_LANES + `NR_BITS + `INST_LSU_BITS + (NUM_LANES * (REQ_ASHIFT)) + `LSU_DUP_ENABLED + PID_WIDTH + LSUQ_SIZEW;
|
||||
// tag_id = wid + PC + rd + op_type + align + pid + pkt_addr
|
||||
localparam TAG_ID_WIDTH = `NW_WIDTH + `XLEN + `NR_BITS + `INST_LSU_BITS + (NUM_LANES * (REQ_ASHIFT)) + PID_WIDTH + LSUQ_SIZEW;
|
||||
|
||||
// tag = uuid + tag_id
|
||||
localparam TAG_WIDTH = `UUID_WIDTH + TAG_ID_WIDTH;
|
||||
|
@ -88,23 +88,6 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
assign full_addr[i] = execute_if[block_idx].data.rs1_data[i][`XLEN-1:0] + execute_if[block_idx].data.imm;
|
||||
end
|
||||
|
||||
// detect duplicate addresses
|
||||
|
||||
wire lsu_is_dup;
|
||||
`ifdef LSU_DUP_ENABLE
|
||||
if (NUM_LANES > 1) begin
|
||||
wire [NUM_LANES-2:0] addr_matches;
|
||||
for (genvar i = 0; i < (NUM_LANES-1); ++i) begin
|
||||
assign addr_matches[i] = (execute_if[block_idx].data.rs1_data[i+1] == execute_if[block_idx].data.rs1_data[0]) || ~execute_if[block_idx].data.tmask[i+1];
|
||||
end
|
||||
assign lsu_is_dup = execute_if[block_idx].data.tmask[0] && (& addr_matches);
|
||||
end else begin
|
||||
assign lsu_is_dup = 0;
|
||||
end
|
||||
`else
|
||||
assign lsu_is_dup = 0;
|
||||
`endif
|
||||
|
||||
wire mem_req_empty;
|
||||
wire st_rsp_ready;
|
||||
wire lsu_valid, lsu_ready;
|
||||
|
@ -139,10 +122,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
assign lsu_ready = mem_req_ready
|
||||
&& (~mem_req_rw || st_rsp_ready); // writes commit directly
|
||||
|
||||
for (genvar i = 0; i < NUM_LANES; ++i) begin
|
||||
assign mem_req_mask[i] = execute_if[block_idx].data.tmask[i] && (~lsu_is_dup || (i == 0));
|
||||
end
|
||||
|
||||
assign mem_req_mask = execute_if[block_idx].data.tmask;
|
||||
assign mem_req_rw = ~execute_if[block_idx].data.wb;
|
||||
|
||||
wire mem_req_fire = mem_req_valid && mem_req_ready;
|
||||
|
@ -282,11 +262,15 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
`UNUSED_VAR (pkt_raddr)
|
||||
end
|
||||
|
||||
// pack memory request tag
|
||||
assign mem_req_tag = {
|
||||
execute_if[block_idx].data.uuid, execute_if[block_idx].data.wid, execute_if[block_idx].data.tmask, execute_if[block_idx].data.PC, execute_if[block_idx].data.rd, execute_if[block_idx].data.op_type, req_align, execute_if[block_idx].data.pid, pkt_waddr
|
||||
`ifdef LSU_DUP_ENABLE
|
||||
, lsu_is_dup
|
||||
`endif
|
||||
execute_if[block_idx].data.uuid,
|
||||
execute_if[block_idx].data.wid,
|
||||
execute_if[block_idx].data.PC,
|
||||
execute_if[block_idx].data.rd,
|
||||
execute_if[block_idx].data.op_type,
|
||||
req_align, execute_if[block_idx].data.pid,
|
||||
pkt_waddr
|
||||
};
|
||||
|
||||
wire [DCACHE_CHANNELS-1:0] cache_req_valid;
|
||||
|
@ -304,7 +288,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
`RESET_RELAY (mem_scheduler_reset, reset);
|
||||
|
||||
VX_mem_scheduler #(
|
||||
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched", CORE_ID)),
|
||||
.INSTANCE_ID ($sformatf("core%0d-lsu-memsched%0d", CORE_ID, block_idx)),
|
||||
.CORE_REQS (`NUM_LSU_LANES),
|
||||
.MEM_CHANNELS(DCACHE_CHANNELS),
|
||||
.WORD_SIZE (WORD_SIZE),
|
||||
|
@ -374,30 +358,27 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
wire [`UUID_WIDTH-1:0] rsp_uuid;
|
||||
wire [`NW_WIDTH-1:0] rsp_wid;
|
||||
wire [NUM_LANES-1:0] rsp_tmask_uq;
|
||||
wire [`XLEN-1:0] rsp_pc;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire [`INST_LSU_BITS-1:0] rsp_op_type;
|
||||
wire [NUM_LANES-1:0][REQ_ASHIFT-1:0] rsp_align;
|
||||
wire [PID_WIDTH-1:0] rsp_pid;
|
||||
wire rsp_is_dup;
|
||||
|
||||
`ifndef LSU_DUP_ENABLE
|
||||
assign rsp_is_dup = 0;
|
||||
`endif
|
||||
|
||||
assign {
|
||||
rsp_uuid, rsp_wid, rsp_tmask_uq, rsp_pc, rsp_rd, rsp_op_type, rsp_align, rsp_pid, pkt_raddr
|
||||
`ifdef LSU_DUP_ENABLE
|
||||
, rsp_is_dup
|
||||
`endif
|
||||
} = mem_rsp_tag;
|
||||
`UNUSED_VAR (rsp_op_type)
|
||||
|
||||
// unpack memory response tag
|
||||
assign {
|
||||
rsp_uuid,
|
||||
rsp_wid,
|
||||
rsp_pc, rsp_rd,
|
||||
rsp_op_type,
|
||||
rsp_align,
|
||||
rsp_pid,
|
||||
pkt_raddr
|
||||
} = mem_rsp_tag;
|
||||
|
||||
// load response formatting
|
||||
|
||||
reg [NUM_LANES-1:0][`XLEN-1:0] rsp_data;
|
||||
wire [NUM_LANES-1:0] rsp_tmask;
|
||||
|
||||
`ifdef XLEN_64
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -410,11 +391,10 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
|
||||
for (genvar i = 0; i < NUM_LANES; i++) begin
|
||||
`ifdef XLEN_64
|
||||
wire [63:0] rsp_data64 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? (rsp_align[0][2] ? mem_rsp_data[0][63:32] : mem_rsp_data[0][31:0]) :
|
||||
(rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]);
|
||||
wire [63:0] rsp_data64 = mem_rsp_data[i];
|
||||
wire [31:0] rsp_data32 = (rsp_align[i][2] ? mem_rsp_data[i][63:32] : mem_rsp_data[i][31:0]);
|
||||
`else
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
|
||||
wire [31:0] rsp_data32 = mem_rsp_data[i];
|
||||
`endif
|
||||
wire [15:0] rsp_data16 = rsp_align[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
|
||||
wire [7:0] rsp_data8 = rsp_align[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
@ -435,9 +415,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
default: rsp_data[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
assign rsp_tmask = rsp_is_dup ? rsp_tmask_uq : mem_rsp_mask;
|
||||
end
|
||||
|
||||
// load commit
|
||||
|
||||
|
@ -449,7 +427,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
.reset (reset),
|
||||
.valid_in (mem_rsp_valid),
|
||||
.ready_in (mem_rsp_ready),
|
||||
.data_in ({rsp_uuid, rsp_wid, rsp_tmask, rsp_pc, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
||||
.data_in ({rsp_uuid, rsp_wid, mem_rsp_mask, rsp_pc, rsp_rd, rsp_data, rsp_pid, mem_rsp_sop_pkt, mem_rsp_eop_pkt}),
|
||||
.data_out ({commit_ld_if.data.uuid, commit_ld_if.data.wid, commit_ld_if.data.tmask, commit_ld_if.data.PC, commit_ld_if.data.rd, commit_ld_if.data.data, commit_ld_if.data.pid, commit_ld_if.data.sop, commit_ld_if.data.eop}),
|
||||
.valid_out (commit_ld_if.valid),
|
||||
.ready_out (commit_ld_if.ready)
|
||||
|
@ -507,24 +485,24 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, data=", mem_req_tag, mem_req_byteen));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_req_data, NUM_LANES);
|
||||
`TRACE(1, (", is_dup=%b (#%0d)\n", lsu_is_dup, execute_if[block_idx].data.uuid));
|
||||
`TRACE(1, (" (#%0d)\n", execute_if[block_idx].data.uuid));
|
||||
end else begin
|
||||
`TRACE(1, ("%d: D$%0d Rd Req: wid=%0d, PC=0x%0h, tmask=%b, addr=", $time, CORE_ID, execute_if[block_idx].data.wid, execute_if[block_idx].data.PC, mem_req_mask));
|
||||
`TRACE_ARRAY1D(1, "0x%h", full_addr, NUM_LANES);
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, rd=%0d, is_dup=%b (#%0d)\n", mem_req_tag, mem_req_byteen, execute_if[block_idx].data.rd, lsu_is_dup, execute_if[block_idx].data.uuid));
|
||||
`TRACE(1, (", tag=0x%0h, byteen=0x%0h, rd=%0d (#%0d)\n", mem_req_tag, mem_req_byteen, execute_if[block_idx].data.rd, execute_if[block_idx].data.uuid));
|
||||
end
|
||||
end
|
||||
if (mem_rsp_fire) begin
|
||||
`TRACE(1, ("%d: D$%0d Rsp: wid=%0d, PC=0x%0h, tmask=%b, tag=0x%0h, rd=%0d, sop=%b, eop=%b, data=",
|
||||
$time, CORE_ID, rsp_wid, rsp_pc, mem_rsp_mask, mem_rsp_tag, rsp_rd, mem_rsp_sop, mem_rsp_eop));
|
||||
`TRACE_ARRAY1D(1, "0x%0h", mem_rsp_data, NUM_LANES);
|
||||
`TRACE(1, (", is_dup=%b (#%0d)\n", rsp_is_dup, rsp_uuid));
|
||||
`TRACE(1, (" (#%0d)\n", rsp_uuid));
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
`ifdef DBG_SCOPE_LSU
|
||||
if (CORE_ID == 0) begin
|
||||
if (CORE_ID == 0 && block_idx == 0) begin
|
||||
`ifdef SCOPE
|
||||
VX_scope_tap #(
|
||||
.SCOPE_ID (3),
|
||||
|
@ -548,7 +526,7 @@ module VX_lsu_unit import VX_gpu_pkg::*; #(
|
|||
ila_lsu ila_lsu_inst (
|
||||
.clk (clk),
|
||||
.probe0 ({mem_req_data_0, execute_if[0].data.uuid, execute_if[0].data.wid, execute_if[0].data.PC, mem_req_mask, full_addr_0, mem_req_byteen, mem_req_rw, mem_req_ready, mem_req_valid}),
|
||||
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, rsp_tmask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
|
||||
.probe1 ({rsp_data_0, rsp_uuid, mem_rsp_eop, rsp_pc, rsp_rd, mem_rsp_mask, rsp_wid, mem_rsp_ready, mem_rsp_valid}),
|
||||
.probe2 ({cache_bus_if.req_data.data, cache_bus_if.req_data.tag, cache_bus_if.req_data.byteen, cache_bus_if.req_data.addr, cache_bus_if.req_data.rw, cache_bus_if.req_ready, cache_bus_if.req_valid}),
|
||||
.probe3 ({cache_bus_if.rsp_data.data, cache_bus_if.rsp_data.tag, cache_bus_if.rsp_ready, cache_bus_if.rsp_valid})
|
||||
);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue