mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
async bram optimization
This commit is contained in:
parent
f49084b298
commit
a5381fd788
9 changed files with 135 additions and 156 deletions
50
hw/rtl/cache/VX_cache_bank.sv
vendored
50
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -167,7 +167,6 @@ module VX_cache_bank #(
|
|||
wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1;
|
||||
wire [NUM_WAYS-1:0] tag_matches_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
|
||||
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
|
||||
wire mshr_pending_st0, mshr_pending_st1;
|
||||
wire mshr_empty;
|
||||
|
||||
|
@ -380,14 +379,14 @@ module VX_cache_bank #(
|
|||
assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0;
|
||||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
|
||||
.RESETW (1)
|
||||
) pipe_reg1 (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.enable (~pipe_stall),
|
||||
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1})
|
||||
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}),
|
||||
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag hit
|
||||
|
@ -473,25 +472,20 @@ module VX_cache_bank #(
|
|||
.dirty_byteen(dirty_byteen_st1)
|
||||
);
|
||||
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
|
||||
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
|
||||
wire mshr_allocate_st0 = valid_st0 && is_creq_st0;
|
||||
wire mshr_lookup_st0 = mshr_allocate_st0;
|
||||
|
||||
wire mshr_finalize_st1 = valid_st1 && is_creq_st1;
|
||||
|
||||
// release allocated mshr entry if we had a hit
|
||||
wire mshr_release_st1;
|
||||
if (WRITEBACK) begin : g_mshr_release
|
||||
assign mshr_release_st1 = is_hit_st1;
|
||||
assign mshr_release_st1 = valid_st1 && is_creq_st1 && is_hit_st1;
|
||||
end else begin : g_mshr_release_ro
|
||||
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
|
||||
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
|
||||
// this can happen when writes are sent late, when the fill was already in flight.
|
||||
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
|
||||
assign mshr_release_st1 = valid_st1 && is_creq_st1 && (is_hit_st1 || (rw_st1 && ~mshr_pending_st1));
|
||||
end
|
||||
|
||||
wire mshr_dequeue = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall;
|
||||
wire mshr_dequeue = mshr_release_st1 && ~pipe_stall;
|
||||
|
||||
VX_pending_size #(
|
||||
.SIZE (MSHR_SIZE)
|
||||
|
@ -513,6 +507,8 @@ module VX_cache_bank #(
|
|||
.LINE_SIZE (LINE_SIZE),
|
||||
.NUM_BANKS (NUM_BANKS),
|
||||
.MSHR_SIZE (MSHR_SIZE),
|
||||
.WRITEBACK (WRITEBACK),
|
||||
.RDW_STALL (1),
|
||||
.UUID_WIDTH (UUID_WIDTH),
|
||||
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
|
||||
) cache_mshr (
|
||||
|
@ -520,8 +516,8 @@ module VX_cache_bank #(
|
|||
.reset (reset),
|
||||
|
||||
.deq_req_uuid (req_uuid_sel),
|
||||
.lkp_req_uuid (req_uuid_st0),
|
||||
.fin_req_uuid (req_uuid_st1),
|
||||
.alc_req_uuid (req_uuid_st0),
|
||||
.rel_req_uuid (req_uuid_st1),
|
||||
|
||||
// memory fill
|
||||
.fill_valid (mem_rsp_fire),
|
||||
|
@ -542,32 +538,14 @@ module VX_cache_bank #(
|
|||
.allocate_rw (rw_st0),
|
||||
.allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
|
||||
.allocate_id (mshr_alloc_id_st0),
|
||||
.allocate_prev (mshr_prev_st0),
|
||||
.allocate_pending(mshr_pending_st0),
|
||||
`UNUSED_PIN (allocate_ready),
|
||||
|
||||
// lookup
|
||||
.lookup_valid (mshr_lookup_st0 && ~pipe_stall),
|
||||
.lookup_addr (addr_st0),
|
||||
.lookup_pending (mshr_lookup_pending_st0),
|
||||
.lookup_rw (mshr_lookup_rw_st0),
|
||||
|
||||
// finalize
|
||||
.finalize_valid (mshr_finalize_st1 && ~pipe_stall),
|
||||
.finalize_release(mshr_release_st1),
|
||||
.finalize_pending(mshr_pending_st1),
|
||||
.finalize_id (mshr_id_st1),
|
||||
.finalize_prev (mshr_prev_st1)
|
||||
// release
|
||||
.release_valid (mshr_release_st1 && ~pipe_stall),
|
||||
.release_id (mshr_id_st1)
|
||||
);
|
||||
|
||||
// check if there are pending requests to same line in the MSHR
|
||||
wire [MSHR_SIZE-1:0] lookup_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches
|
||||
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
|
||||
&& (i != mshr_id_st0) // exclude current mshr id
|
||||
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
|
||||
end
|
||||
assign mshr_pending_st0 = (| lookup_matches);
|
||||
|
||||
// schedule core response
|
||||
|
||||
wire crsp_queue_valid, crsp_queue_ready;
|
||||
|
|
8
hw/rtl/cache/VX_cache_data.sv
vendored
8
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -193,16 +193,16 @@ module VX_cache_data #(
|
|||
`ifdef DBG_TRACE_CACHE
|
||||
always @(posedge clk) begin
|
||||
if (fill) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data))
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data))
|
||||
end
|
||||
if (flush) begin
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data))
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data))
|
||||
end
|
||||
if (read) begin
|
||||
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid))
|
||||
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid))
|
||||
end
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid))
|
||||
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid))
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
134
hw/rtl/cache/VX_cache_mshr.sv
vendored
134
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -24,36 +24,22 @@
|
|||
// arrival and are dequeued in the same order.
|
||||
// Each entry has a next pointer to the next entry pending for the same cache line.
|
||||
//
|
||||
// During the fill operation, the MSHR will release the MSHR entry at fill_id
|
||||
// During the fill request, the MSHR will release the MSHR entry at fill_id
|
||||
// which represents the first request in the pending list that initiated the memory fill.
|
||||
//
|
||||
// The dequeue operation directly follows the fill operation and will release
|
||||
// The dequeue response directly follows the fill request and will release
|
||||
// all the subsequent entries linked to fill_id (pending the same cache line).
|
||||
//
|
||||
// During the allocation operation, the MSHR will allocate the next free slot
|
||||
// During the allocation request, the MSHR will allocate the next free slot
|
||||
// for the incoming core request. We return the allocated slot id as well as
|
||||
// the slot id of the previous entry for the same cache line. This is used to
|
||||
// link the new entry to the pending list during finalization.
|
||||
// link the new entry to the pending list.
|
||||
//
|
||||
// The lookup operation is used to find all pending entries for a given cache line.
|
||||
// This is used to by the cache bank to determine if a cache miss is already pending
|
||||
// and therefore avoid issuing a memory fill request.
|
||||
//
|
||||
// The finalize operation is used to release the allocated MSHR entry if we had a hit.
|
||||
// If we had a miss and finalize_pending is true, we link the allocated entry to
|
||||
// its corresponding pending list (via finalize_prev).
|
||||
// The release request is used to invalidate the allocated MSHR entry if we had a cache hit.
|
||||
//
|
||||
// Warning: This MSHR implementation is strongly coupled with the bank pipeline
|
||||
// and as such changes to either module requires careful evaluation.
|
||||
//
|
||||
// This architecture implements three pipeline stages:
|
||||
// - Arbitration: cache bank arbitration before entering pipeline.
|
||||
// fill and dequeue operations are executed at this stage.
|
||||
// - stage 0: cache bank tag access stage.
|
||||
// allocate and lookup operations are executed at this stage.
|
||||
// - stage 1: cache bank tdatag access stage.
|
||||
// finalize operation is executed at this stage.
|
||||
//
|
||||
|
||||
module VX_cache_mshr #(
|
||||
parameter `STRING INSTANCE_ID= "",
|
||||
|
@ -68,6 +54,11 @@ module VX_cache_mshr #(
|
|||
parameter UUID_WIDTH = 0,
|
||||
// MSHR parameters
|
||||
parameter DATA_WIDTH = 1,
|
||||
// Enable cache writeback
|
||||
parameter WRITEBACK = 0,
|
||||
// Cache stall on read during write
|
||||
RDW_STALL = 0,
|
||||
|
||||
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -75,8 +66,8 @@ module VX_cache_mshr #(
|
|||
|
||||
`IGNORE_UNUSED_BEGIN
|
||||
input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid,
|
||||
input wire[`UP(UUID_WIDTH)-1:0] lkp_req_uuid,
|
||||
input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid,
|
||||
input wire[`UP(UUID_WIDTH)-1:0] alc_req_uuid,
|
||||
input wire[`UP(UUID_WIDTH)-1:0] rel_req_uuid,
|
||||
`IGNORE_UNUSED_END
|
||||
|
||||
// memory fill
|
||||
|
@ -98,21 +89,12 @@ module VX_cache_mshr #(
|
|||
input wire allocate_rw,
|
||||
input wire [DATA_WIDTH-1:0] allocate_data,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
|
||||
output wire [MSHR_ADDR_WIDTH-1:0] allocate_prev,
|
||||
output wire allocate_pending,
|
||||
output wire allocate_ready,
|
||||
|
||||
// lookup
|
||||
input wire lookup_valid,
|
||||
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
|
||||
output wire [MSHR_SIZE-1:0] lookup_pending,
|
||||
output wire [MSHR_SIZE-1:0] lookup_rw,
|
||||
|
||||
// finalize
|
||||
input wire finalize_valid,
|
||||
input wire finalize_release,
|
||||
input wire finalize_pending,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] finalize_prev
|
||||
// release
|
||||
input wire release_valid,
|
||||
input wire [MSHR_ADDR_WIDTH-1:0] release_id
|
||||
);
|
||||
`UNUSED_PARAM (BANK_ID)
|
||||
|
||||
|
@ -130,13 +112,15 @@ module VX_cache_mshr #(
|
|||
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n;
|
||||
|
||||
wire [MSHR_ADDR_WIDTH-1:0] prev_idx;
|
||||
reg [MSHR_ADDR_WIDTH-1:0] post_alloc_id, post_alloc_previd;
|
||||
reg post_alloc_val;
|
||||
|
||||
wire allocate_fire = allocate_valid && allocate_ready;
|
||||
wire dequeue_fire = dequeue_valid && dequeue_ready;
|
||||
|
||||
wire [MSHR_SIZE-1:0] addr_matches;
|
||||
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches
|
||||
assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr);
|
||||
assign addr_matches[i] = valid_table[i] && (addr_table[i] == allocate_addr);
|
||||
end
|
||||
|
||||
VX_lzc #(
|
||||
|
@ -148,6 +132,7 @@ module VX_cache_mshr #(
|
|||
.valid_out (allocate_rdy_n)
|
||||
);
|
||||
|
||||
// find matching tail-entry
|
||||
VX_priority_encoder #(
|
||||
.N (MSHR_SIZE)
|
||||
) prev_sel (
|
||||
|
@ -172,18 +157,19 @@ module VX_cache_mshr #(
|
|||
valid_table_n[dequeue_id] = 0;
|
||||
if (next_table[dequeue_id]) begin
|
||||
dequeue_id_n = next_index[dequeue_id];
|
||||
end else if (!RDW_STALL && post_alloc_val && (post_alloc_previd == dequeue_id)) begin
|
||||
dequeue_id_n = post_alloc_id;
|
||||
end else begin
|
||||
dequeue_val_n = 0;
|
||||
end
|
||||
end
|
||||
|
||||
if (finalize_valid) begin
|
||||
if (finalize_release) begin
|
||||
valid_table_n[finalize_id] = 0;
|
||||
end
|
||||
if (finalize_pending) begin
|
||||
next_table_x[finalize_prev] = 1;
|
||||
end
|
||||
if (release_valid) begin
|
||||
valid_table_n[release_id] = 0;
|
||||
end
|
||||
|
||||
if (post_alloc_val) begin
|
||||
next_table_x[post_alloc_previd] = 1;
|
||||
end
|
||||
|
||||
next_table_n = next_table_x;
|
||||
|
@ -198,39 +184,43 @@ module VX_cache_mshr #(
|
|||
valid_table <= '0;
|
||||
allocate_rdy <= 0;
|
||||
dequeue_val <= 0;
|
||||
post_alloc_val <= 0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
allocate_rdy <= allocate_rdy_n;
|
||||
dequeue_val <= dequeue_val_n;
|
||||
post_alloc_val <= allocate_fire && allocate_pending;
|
||||
end
|
||||
|
||||
if (allocate_fire) begin
|
||||
addr_table[allocate_id] <= allocate_addr;
|
||||
addr_table[allocate_id] <= allocate_addr;
|
||||
write_table[allocate_id] <= allocate_rw;
|
||||
end
|
||||
|
||||
if (finalize_valid && finalize_pending) begin
|
||||
next_index[finalize_prev] <= finalize_id;
|
||||
if (post_alloc_val) begin
|
||||
next_index[post_alloc_previd] <= post_alloc_id;
|
||||
end
|
||||
|
||||
dequeue_id_r <= dequeue_id_n;
|
||||
allocate_id_r <= allocate_id_n;
|
||||
next_table <= next_table_n;
|
||||
post_alloc_id <= allocate_id;
|
||||
post_alloc_previd <= prev_idx;
|
||||
end
|
||||
|
||||
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
|
||||
`RUNTIME_ASSERT((~release_valid || valid_table[release_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[release_id], BANK_ID), release_id, rel_req_uuid))
|
||||
|
||||
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.LUTRAM (1)
|
||||
.DATAW (DATA_WIDTH),
|
||||
.SIZE (MSHR_SIZE),
|
||||
.RADDR_REG (1)
|
||||
) entries (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -239,7 +229,7 @@ module VX_cache_mshr #(
|
|||
.wren (1'b1),
|
||||
.waddr (allocate_id_r),
|
||||
.wdata (allocate_data),
|
||||
.raddr (dequeue_id_r),
|
||||
.raddr (dequeue_id_n),
|
||||
.rdata (dequeue_data)
|
||||
);
|
||||
|
||||
|
@ -247,18 +237,17 @@ module VX_cache_mshr #(
|
|||
|
||||
assign allocate_ready = allocate_rdy;
|
||||
assign allocate_id = allocate_id_r;
|
||||
assign allocate_prev = prev_idx;
|
||||
if (WRITEBACK) begin : g_pending_wb
|
||||
assign allocate_pending = |addr_matches;
|
||||
end else begin : g_pending_wt
|
||||
// exclude write requests if writethrough
|
||||
assign allocate_pending = |(addr_matches & ~write_table);
|
||||
end
|
||||
|
||||
assign dequeue_valid = dequeue_val;
|
||||
assign dequeue_addr = addr_table[dequeue_id_r];
|
||||
assign dequeue_rw = write_table[dequeue_id_r];
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
|
||||
// return pending entries for the given cache line
|
||||
assign lookup_pending = addr_matches;
|
||||
assign lookup_rw = write_table;
|
||||
|
||||
`UNUSED_VAR (lookup_valid)
|
||||
assign dequeue_valid = dequeue_val;
|
||||
assign dequeue_addr = addr_table[dequeue_id_r];
|
||||
assign dequeue_rw = write_table[dequeue_id_r];
|
||||
assign dequeue_id = dequeue_id_r;
|
||||
|
||||
`ifdef DBG_TRACE_CACHE
|
||||
reg show_table;
|
||||
|
@ -266,23 +255,18 @@ module VX_cache_mshr #(
|
|||
if (reset) begin
|
||||
show_table <= 0;
|
||||
end else begin
|
||||
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
|
||||
show_table <= allocate_fire || post_alloc_val || release_valid || fill_valid || dequeue_fire;
|
||||
end
|
||||
if (allocate_fire) begin
|
||||
`TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid))
|
||||
`TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid))
|
||||
end
|
||||
if (lookup_valid) begin
|
||||
`TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid))
|
||||
end
|
||||
if (finalize_valid) begin
|
||||
`TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid))
|
||||
if (release_valid) begin
|
||||
`TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, release_id, rel_req_uuid))
|
||||
end
|
||||
if (fill_valid) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
|
||||
`CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
|
||||
end
|
||||
if (dequeue_fire) begin
|
||||
`TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,
|
||||
|
|
14
hw/rtl/cache/VX_cache_tags.sv
vendored
14
hw/rtl/cache/VX_cache_tags.sv
vendored
|
@ -142,26 +142,26 @@ module VX_cache_tags #(
|
|||
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx};
|
||||
always @(posedge clk) begin
|
||||
if (fill) begin
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
|
||||
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
|
||||
end
|
||||
if (init) begin
|
||||
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx))
|
||||
`TRACE(3, ("%t: %s init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx))
|
||||
end
|
||||
if (flush) begin
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty))
|
||||
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty))
|
||||
end
|
||||
if (lookup) begin
|
||||
if (tag_matches != 0) begin
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
|
||||
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
|
||||
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
|
||||
end
|
||||
end else begin
|
||||
if (write) begin
|
||||
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
|
||||
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
|
||||
end else begin
|
||||
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
|
||||
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -51,9 +51,8 @@ module VX_fetch import VX_gpu_pkg::*; #(
|
|||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (`PC_BITS + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS),
|
||||
.LUTRAM (1)
|
||||
.DATAW (`PC_BITS + `NUM_THREADS),
|
||||
.SIZE (`NUM_WARPS)
|
||||
) tag_store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
module VX_ipdom_stack #(
|
||||
parameter WIDTH = 1,
|
||||
parameter DEPTH = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter ADDRW = `LOG2UP(DEPTH)
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -33,7 +32,7 @@ module VX_ipdom_stack #(
|
|||
);
|
||||
reg slot_set [DEPTH-1:0];
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr, wr_ptr;
|
||||
reg [ADDRW-1:0] rd_ptr, rd_ptr_n, wr_ptr;
|
||||
|
||||
reg empty_r, full_r;
|
||||
|
||||
|
@ -41,35 +40,42 @@ module VX_ipdom_stack #(
|
|||
|
||||
wire d_set_n = slot_set[rd_ptr];
|
||||
|
||||
always @(*) begin
|
||||
rd_ptr_n = rd_ptr;
|
||||
if (push) begin
|
||||
rd_ptr_n = wr_ptr;
|
||||
end else if (pop) begin
|
||||
rd_ptr_n = rd_ptr - ADDRW'(d_set_n);
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr <= '0;
|
||||
wr_ptr <= '0;
|
||||
empty_r <= 1;
|
||||
full_r <= 0;
|
||||
rd_ptr <= '0;
|
||||
end else begin
|
||||
`ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time));
|
||||
`ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time));
|
||||
`ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time));
|
||||
if (push) begin
|
||||
rd_ptr <= wr_ptr;
|
||||
wr_ptr <= wr_ptr + ADDRW'(1);
|
||||
empty_r <= 0;
|
||||
full_r <= (ADDRW'(DEPTH-1) == wr_ptr);
|
||||
end else if (pop) begin
|
||||
wr_ptr <= wr_ptr - ADDRW'(d_set_n);
|
||||
rd_ptr <= rd_ptr - ADDRW'(d_set_n);
|
||||
empty_r <= (rd_ptr == 0) && (d_set_n == 1);
|
||||
full_r <= 0;
|
||||
end
|
||||
rd_ptr <= rd_ptr_n;
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (WIDTH * 2),
|
||||
.SIZE (DEPTH),
|
||||
.OUT_REG (OUT_REG ? 1 : 0),
|
||||
.LUTRAM (OUT_REG ? 0 : 1)
|
||||
.DATAW (WIDTH * 2),
|
||||
.SIZE (DEPTH),
|
||||
.RADDR_REG (1)
|
||||
) store (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -78,7 +84,7 @@ module VX_ipdom_stack #(
|
|||
.wren (1'b1),
|
||||
.waddr (wr_ptr),
|
||||
.wdata ({q1, q0}),
|
||||
.raddr (rd_ptr),
|
||||
.raddr (rd_ptr_n),
|
||||
.rdata ({d1, d0})
|
||||
);
|
||||
|
||||
|
@ -94,7 +100,7 @@ module VX_ipdom_stack #(
|
|||
|
||||
VX_pipe_register #(
|
||||
.DATAW (1),
|
||||
.DEPTH (OUT_REG)
|
||||
.DEPTH (0)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -48,8 +48,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
|
|||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks
|
||||
VX_ipdom_stack #(
|
||||
.WIDTH (`NUM_THREADS+`PC_BITS),
|
||||
.DEPTH (`DV_STACK_SIZE),
|
||||
.OUT_REG (0)
|
||||
.DEPTH (`DV_STACK_SIZE)
|
||||
) ipdom_stack (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -20,7 +20,7 @@ module VX_fifo_queue #(
|
|||
parameter ALM_FULL = (DEPTH - 1),
|
||||
parameter ALM_EMPTY = 1,
|
||||
parameter OUT_REG = 0,
|
||||
parameter LUTRAM = 1,
|
||||
parameter LUTRAM = 0,
|
||||
parameter SIZEW = `CLOG2(DEPTH+1)
|
||||
) (
|
||||
input wire clk,
|
||||
|
@ -80,30 +80,38 @@ module VX_fifo_queue #(
|
|||
reg [DATAW-1:0] dout_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_n_r;
|
||||
reg [ADDRW-1:0] rd_ptr_n_r, rd_ptr_n_n;
|
||||
|
||||
always @(*) begin
|
||||
rd_ptr_n_n = rd_ptr_r;
|
||||
if (pop) begin
|
||||
if (DEPTH > 2) begin
|
||||
rd_ptr_n_n = rd_ptr_r + ADDRW'(2);
|
||||
end else begin // (DEPTH == 2);
|
||||
rd_ptr_n_n = ~rd_ptr_n_r;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_ptr_r <= '0;
|
||||
rd_ptr_r <= '0;
|
||||
rd_ptr_n_r <= 1;
|
||||
wr_ptr_r <= '0;
|
||||
rd_ptr_r <= '0;
|
||||
rd_ptr_n_r <= '0;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
if (pop) begin
|
||||
rd_ptr_r <= rd_ptr_n_r;
|
||||
if (DEPTH > 2) begin
|
||||
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
|
||||
end else begin // (DEPTH == 2);
|
||||
rd_ptr_n_r <= ~rd_ptr_n_r;
|
||||
end
|
||||
end
|
||||
rd_ptr_n_r <= rd_ptr_n_n;
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (DEPTH),
|
||||
.LUTRAM (LUTRAM)
|
||||
.LUTRAM (LUTRAM),
|
||||
.RADDR_REG (1)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -112,7 +120,7 @@ module VX_fifo_queue #(
|
|||
.wren (1'b1),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_n_r),
|
||||
.raddr (rd_ptr_n_n),
|
||||
.rdata (dout)
|
||||
);
|
||||
|
||||
|
@ -130,23 +138,28 @@ module VX_fifo_queue #(
|
|||
|
||||
end else begin : g_no_out_reg
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
|
||||
always @(*) begin
|
||||
rd_ptr_n = rd_ptr_r + ADDRW'(pop);
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r <= '0;
|
||||
wr_ptr_r <= '0;
|
||||
rd_ptr_r <= '0;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
|
||||
rd_ptr_r <= rd_ptr_n;
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (DEPTH),
|
||||
.LUTRAM (LUTRAM)
|
||||
.LUTRAM (LUTRAM),
|
||||
.RADDR_REG (1)
|
||||
) dp_ram (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -155,7 +168,7 @@ module VX_fifo_queue #(
|
|||
.wren (1'b1),
|
||||
.waddr (wr_ptr_r),
|
||||
.wdata (data_in),
|
||||
.raddr (rd_ptr_r),
|
||||
.raddr (rd_ptr_n),
|
||||
.rdata (data_out)
|
||||
);
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
module VX_index_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter LUTRAM = 1,
|
||||
parameter LUTRAM = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue