async bram optimization

This commit is contained in:
Blaise Tine 2024-10-09 04:14:15 -07:00
parent f49084b298
commit a5381fd788
9 changed files with 135 additions and 156 deletions

View file

@ -167,7 +167,6 @@ module VX_cache_bank #(
wire [NUM_WAYS-1:0] way_idx_st0, way_idx_st1;
wire [NUM_WAYS-1:0] tag_matches_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_alloc_id_st0;
wire [MSHR_ADDR_WIDTH-1:0] mshr_prev_st0, mshr_prev_st1;
wire mshr_pending_st0, mshr_pending_st1;
wire mshr_empty;
@ -380,14 +379,14 @@ module VX_cache_bank #(
assign line_tag2_st0 = (is_fill_st0 || is_flush2_st0) ? evict_tag_st0 : line_tag_st0;
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + `UP(FLAGS_WIDTH) + `CS_TAG_SEL_BITS + `CS_LINE_SEL_BITS + `CS_LINE_WIDTH + WORD_SIZE + WORD_SEL_WIDTH + REQ_SEL_WIDTH + TAG_WIDTH + MSHR_ADDR_WIDTH + NUM_WAYS + 1 + 1),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~pipe_stall),
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, mshr_prev_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, mshr_prev_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1})
.data_in ({valid_st0, is_init_st0, is_replay_st0, is_fill_st0, is_flush2_st0, is_creq_st0, rw_st0, flags_st0, line_tag2_st0, line_idx_st0, data_st0, byteen_st0, word_idx_st0, req_idx_st0, tag_st0, mshr_id_st0, way_idx_st0, evict_dirty_st0, mshr_pending_st0}),
.data_out ({valid_st1, is_init_st1, is_replay_st1, is_fill_st1, is_flush_st1, is_creq_st1, rw_st1, flags_st1, line_tag_st1, line_idx_st1, data_st1, byteen_st1, word_idx_st1, req_idx_st1, tag_st1, mshr_id_st1, way_idx_st1, evict_dirty_st1, mshr_pending_st1})
);
// we have a tag hit
@ -473,25 +472,20 @@ module VX_cache_bank #(
.dirty_byteen(dirty_byteen_st1)
);
wire [MSHR_SIZE-1:0] mshr_lookup_pending_st0;
wire [MSHR_SIZE-1:0] mshr_lookup_rw_st0;
wire mshr_allocate_st0 = valid_st0 && is_creq_st0;
wire mshr_lookup_st0 = mshr_allocate_st0;
wire mshr_finalize_st1 = valid_st1 && is_creq_st1;
// release allocated mshr entry if we had a hit
wire mshr_release_st1;
if (WRITEBACK) begin : g_mshr_release
assign mshr_release_st1 = is_hit_st1;
assign mshr_release_st1 = valid_st1 && is_creq_st1 && is_hit_st1;
end else begin : g_mshr_release_ro
// we need to keep missed write requests in MSHR if there is already a pending entry to the same address
// this ensures that missed write requests are replayed locally in case a pending fill arrives without the write content
// this can happen when writes are sent late, when the fill was already in flight.
assign mshr_release_st1 = is_hit_st1 || (rw_st1 && ~mshr_pending_st1);
assign mshr_release_st1 = valid_st1 && is_creq_st1 && (is_hit_st1 || (rw_st1 && ~mshr_pending_st1));
end
wire mshr_dequeue = mshr_finalize_st1 && mshr_release_st1 && ~pipe_stall;
wire mshr_dequeue = mshr_release_st1 && ~pipe_stall;
VX_pending_size #(
.SIZE (MSHR_SIZE)
@ -513,6 +507,8 @@ module VX_cache_bank #(
.LINE_SIZE (LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.MSHR_SIZE (MSHR_SIZE),
.WRITEBACK (WRITEBACK),
.RDW_STALL (1),
.UUID_WIDTH (UUID_WIDTH),
.DATA_WIDTH (WORD_SEL_WIDTH + WORD_SIZE + `CS_WORD_WIDTH + TAG_WIDTH + REQ_SEL_WIDTH)
) cache_mshr (
@ -520,8 +516,8 @@ module VX_cache_bank #(
.reset (reset),
.deq_req_uuid (req_uuid_sel),
.lkp_req_uuid (req_uuid_st0),
.fin_req_uuid (req_uuid_st1),
.alc_req_uuid (req_uuid_st0),
.rel_req_uuid (req_uuid_st1),
// memory fill
.fill_valid (mem_rsp_fire),
@ -542,32 +538,14 @@ module VX_cache_bank #(
.allocate_rw (rw_st0),
.allocate_data ({word_idx_st0, byteen_st0, write_data_st0, tag_st0, req_idx_st0}),
.allocate_id (mshr_alloc_id_st0),
.allocate_prev (mshr_prev_st0),
.allocate_pending(mshr_pending_st0),
`UNUSED_PIN (allocate_ready),
// lookup
.lookup_valid (mshr_lookup_st0 && ~pipe_stall),
.lookup_addr (addr_st0),
.lookup_pending (mshr_lookup_pending_st0),
.lookup_rw (mshr_lookup_rw_st0),
// finalize
.finalize_valid (mshr_finalize_st1 && ~pipe_stall),
.finalize_release(mshr_release_st1),
.finalize_pending(mshr_pending_st1),
.finalize_id (mshr_id_st1),
.finalize_prev (mshr_prev_st1)
// release
.release_valid (mshr_release_st1 && ~pipe_stall),
.release_id (mshr_id_st1)
);
// check if there are pending requests to same line in the MSHR
wire [MSHR_SIZE-1:0] lookup_matches;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_lookup_matches
assign lookup_matches[i] = mshr_lookup_pending_st0[i]
&& (i != mshr_id_st0) // exclude current mshr id
&& (WRITEBACK || ~mshr_lookup_rw_st0[i]); // exclude write requests if writethrough
end
assign mshr_pending_st0 = (| lookup_matches);
// schedule core response
wire crsp_queue_valid, crsp_queue_ready;

View file

@ -193,16 +193,16 @@ module VX_cache_data #(
`ifdef DBG_TRACE_CACHE
always @(posedge clk) begin
if (fill) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data))
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, fill_data))
end
if (flush) begin
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data))
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, byteen=0x%h, data=0x%h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, dirty_byteen, dirty_data))
end
if (read) begin
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid))
`TRACE(3, ("%t: %s read: addr=0x%0h, way=%b, line=%0d, wsel=%0d, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, read_data, req_uuid))
end
if (write) begin
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, blk_addr=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid))
`TRACE(3, ("%t: %s write: addr=0x%0h, way=%b, line=%0d, wsel=%0d, byteen=0x%h, data=0x%h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), way_idx, line_idx, word_idx, write_byteen, write_data, req_uuid))
end
end
`endif

View file

@ -24,36 +24,22 @@
// arrival and are dequeued in the same order.
// Each entry has a next pointer to the next entry pending for the same cache line.
//
// During the fill operation, the MSHR will release the MSHR entry at fill_id
// During the fill request, the MSHR will release the MSHR entry at fill_id
// which represents the first request in the pending list that initiated the memory fill.
//
// The dequeue operation directly follows the fill operation and will release
// The dequeue response directly follows the fill request and will release
// all the subsequent entries linked to fill_id (pending the same cache line).
//
// During the allocation operation, the MSHR will allocate the next free slot
// During the allocation request, the MSHR will allocate the next free slot
// for the incoming core request. We return the allocated slot id as well as
// the slot id of the previous entry for the same cache line. This is used to
// link the new entry to the pending list during finalization.
// link the new entry to the pending list.
//
// The lookup operation is used to find all pending entries for a given cache line.
// This is used to by the cache bank to determine if a cache miss is already pending
// and therefore avoid issuing a memory fill request.
//
// The finalize operation is used to release the allocated MSHR entry if we had a hit.
// If we had a miss and finalize_pending is true, we link the allocated entry to
// its corresponding pending list (via finalize_prev).
// The release request is used to invalidate the allocated MSHR entry if we had a cache hit.
//
// Warning: This MSHR implementation is strongly coupled with the bank pipeline
// and as such changes to either module requires careful evaluation.
//
// This architecture implements three pipeline stages:
// - Arbitration: cache bank arbitration before entering pipeline.
// fill and dequeue operations are executed at this stage.
// - stage 0: cache bank tag access stage.
// allocate and lookup operations are executed at this stage.
// - stage 1: cache bank tdatag access stage.
// finalize operation is executed at this stage.
//
module VX_cache_mshr #(
parameter `STRING INSTANCE_ID= "",
@ -68,6 +54,11 @@ module VX_cache_mshr #(
parameter UUID_WIDTH = 0,
// MSHR parameters
parameter DATA_WIDTH = 1,
// Enable cache writeback
parameter WRITEBACK = 0,
// Cache stall on read during write
RDW_STALL = 0,
parameter MSHR_ADDR_WIDTH = `LOG2UP(MSHR_SIZE)
) (
input wire clk,
@ -75,8 +66,8 @@ module VX_cache_mshr #(
`IGNORE_UNUSED_BEGIN
input wire[`UP(UUID_WIDTH)-1:0] deq_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] lkp_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] fin_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] alc_req_uuid,
input wire[`UP(UUID_WIDTH)-1:0] rel_req_uuid,
`IGNORE_UNUSED_END
// memory fill
@ -98,21 +89,12 @@ module VX_cache_mshr #(
input wire allocate_rw,
input wire [DATA_WIDTH-1:0] allocate_data,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_id,
output wire [MSHR_ADDR_WIDTH-1:0] allocate_prev,
output wire allocate_pending,
output wire allocate_ready,
// lookup
input wire lookup_valid,
input wire [`CS_LINE_ADDR_WIDTH-1:0] lookup_addr,
output wire [MSHR_SIZE-1:0] lookup_pending,
output wire [MSHR_SIZE-1:0] lookup_rw,
// finalize
input wire finalize_valid,
input wire finalize_release,
input wire finalize_pending,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_id,
input wire [MSHR_ADDR_WIDTH-1:0] finalize_prev
// release
input wire release_valid,
input wire [MSHR_ADDR_WIDTH-1:0] release_id
);
`UNUSED_PARAM (BANK_ID)
@ -130,13 +112,15 @@ module VX_cache_mshr #(
reg [MSHR_ADDR_WIDTH-1:0] dequeue_id_r, dequeue_id_n;
wire [MSHR_ADDR_WIDTH-1:0] prev_idx;
reg [MSHR_ADDR_WIDTH-1:0] post_alloc_id, post_alloc_previd;
reg post_alloc_val;
wire allocate_fire = allocate_valid && allocate_ready;
wire dequeue_fire = dequeue_valid && dequeue_ready;
wire [MSHR_SIZE-1:0] addr_matches;
for (genvar i = 0; i < MSHR_SIZE; ++i) begin : g_addr_matches
assign addr_matches[i] = valid_table[i] && (addr_table[i] == lookup_addr);
assign addr_matches[i] = valid_table[i] && (addr_table[i] == allocate_addr);
end
VX_lzc #(
@ -148,6 +132,7 @@ module VX_cache_mshr #(
.valid_out (allocate_rdy_n)
);
// find matching tail-entry
VX_priority_encoder #(
.N (MSHR_SIZE)
) prev_sel (
@ -172,18 +157,19 @@ module VX_cache_mshr #(
valid_table_n[dequeue_id] = 0;
if (next_table[dequeue_id]) begin
dequeue_id_n = next_index[dequeue_id];
end else if (!RDW_STALL && post_alloc_val && (post_alloc_previd == dequeue_id)) begin
dequeue_id_n = post_alloc_id;
end else begin
dequeue_val_n = 0;
end
end
if (finalize_valid) begin
if (finalize_release) begin
valid_table_n[finalize_id] = 0;
end
if (finalize_pending) begin
next_table_x[finalize_prev] = 1;
end
if (release_valid) begin
valid_table_n[release_id] = 0;
end
if (post_alloc_val) begin
next_table_x[post_alloc_previd] = 1;
end
next_table_n = next_table_x;
@ -198,39 +184,43 @@ module VX_cache_mshr #(
valid_table <= '0;
allocate_rdy <= 0;
dequeue_val <= 0;
post_alloc_val <= 0;
end else begin
valid_table <= valid_table_n;
allocate_rdy <= allocate_rdy_n;
dequeue_val <= dequeue_val_n;
post_alloc_val <= allocate_fire && allocate_pending;
end
if (allocate_fire) begin
addr_table[allocate_id] <= allocate_addr;
addr_table[allocate_id] <= allocate_addr;
write_table[allocate_id] <= allocate_rw;
end
if (finalize_valid && finalize_pending) begin
next_index[finalize_prev] <= finalize_id;
if (post_alloc_val) begin
next_index[post_alloc_previd] <= post_alloc_id;
end
dequeue_id_r <= dequeue_id_n;
allocate_id_r <= allocate_id_n;
next_table <= next_table_n;
post_alloc_id <= allocate_id;
post_alloc_previd <= prev_idx;
end
`RUNTIME_ASSERT((~allocate_fire || ~valid_table[allocate_id_r]), ("%t: *** %s inuse allocation: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, lkp_req_uuid))
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id_r, alc_req_uuid))
`RUNTIME_ASSERT((~finalize_valid || valid_table[finalize_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[finalize_id], BANK_ID), finalize_id, fin_req_uuid))
`RUNTIME_ASSERT((~release_valid || valid_table[release_id]), ("%t: *** %s invalid release: addr=0x%0h, id=%0d (#%0d)", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[release_id], BANK_ID), release_id, rel_req_uuid))
`RUNTIME_ASSERT((~fill_valid || valid_table[fill_id]), ("%t: *** %s invalid fill: addr=0x%0h, id=%0d", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), fill_id))
VX_dp_ram #(
.DATAW (DATA_WIDTH),
.SIZE (MSHR_SIZE),
.LUTRAM (1)
.DATAW (DATA_WIDTH),
.SIZE (MSHR_SIZE),
.RADDR_REG (1)
) entries (
.clk (clk),
.reset (reset),
@ -239,7 +229,7 @@ module VX_cache_mshr #(
.wren (1'b1),
.waddr (allocate_id_r),
.wdata (allocate_data),
.raddr (dequeue_id_r),
.raddr (dequeue_id_n),
.rdata (dequeue_data)
);
@ -247,18 +237,17 @@ module VX_cache_mshr #(
assign allocate_ready = allocate_rdy;
assign allocate_id = allocate_id_r;
assign allocate_prev = prev_idx;
if (WRITEBACK) begin : g_pending_wb
assign allocate_pending = |addr_matches;
end else begin : g_pending_wt
// exclude write requests if writethrough
assign allocate_pending = |(addr_matches & ~write_table);
end
assign dequeue_valid = dequeue_val;
assign dequeue_addr = addr_table[dequeue_id_r];
assign dequeue_rw = write_table[dequeue_id_r];
assign dequeue_id = dequeue_id_r;
// return pending entries for the given cache line
assign lookup_pending = addr_matches;
assign lookup_rw = write_table;
`UNUSED_VAR (lookup_valid)
assign dequeue_valid = dequeue_val;
assign dequeue_addr = addr_table[dequeue_id_r];
assign dequeue_rw = write_table[dequeue_id_r];
assign dequeue_id = dequeue_id_r;
`ifdef DBG_TRACE_CACHE
reg show_table;
@ -266,23 +255,18 @@ module VX_cache_mshr #(
if (reset) begin
show_table <= 0;
end else begin
show_table <= allocate_fire || lookup_valid || finalize_valid || fill_valid || dequeue_fire;
show_table <= allocate_fire || post_alloc_val || release_valid || fill_valid || dequeue_fire;
end
if (allocate_fire) begin
`TRACE(3, ("%t: %s allocate: addr=0x%0h, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_prev, allocate_id, lkp_req_uuid))
`TRACE(3, ("%t: %s allocate: addr=0x%0h, id=%0d, pending=%b, prev=%0d (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(allocate_addr, BANK_ID), allocate_id, allocate_pending, prev_idx, alc_req_uuid))
end
if (lookup_valid) begin
`TRACE(3, ("%t: %s lookup: addr=0x%0h, matches=%b (#%0d)\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(lookup_addr, BANK_ID), lookup_pending, lkp_req_uuid))
end
if (finalize_valid) begin
`TRACE(3, ("%t: %s finalize release=%b, pending=%b, prev=%0d, id=%0d (#%0d)\n", $time, INSTANCE_ID,
finalize_release, finalize_pending, finalize_prev, finalize_id, fin_req_uuid))
if (release_valid) begin
`TRACE(3, ("%t: %s release: id=%0d (#%0d)\n", $time, INSTANCE_ID, release_id, rel_req_uuid))
end
if (fill_valid) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(addr_table[fill_id], BANK_ID), `CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
`TRACE(3, ("%t: %s fill: addr=0x%0h, id=%0d\n", $time, INSTANCE_ID,
`CS_LINE_TO_FULL_ADDR(fill_addr, BANK_ID), fill_id))
end
if (dequeue_fire) begin
`TRACE(3, ("%t: %s dequeue: addr=0x%0h, id=%0d (#%0d)\n", $time, INSTANCE_ID,

View file

@ -142,26 +142,26 @@ module VX_cache_tags #(
wire [`CS_LINE_ADDR_WIDTH-1:0] evict_line_addr = {evict_tag, line_idx};
always @(posedge clk) begin
if (fill) begin
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
`TRACE(3, ("%t: %s fill: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h, dirty=%b, evict_addr=0x%0h\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), evict_way, line_idx, line_tag, evict_dirty, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID)))
end
if (init) begin
`TRACE(3, ("%t: %s init: addr=0x%0h, blk_addr=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx))
`TRACE(3, ("%t: %s init: addr=0x%0h, line=%0d\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx))
end
if (flush) begin
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, blk_addr=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty))
`TRACE(3, ("%t: %s flush: addr=0x%0h, way=%b, line=%0d, dirty=%b\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(evict_line_addr, BANK_ID), way_idx, line_idx, evict_dirty))
end
if (lookup) begin
if (tag_matches != 0) begin
if (write) begin
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
`TRACE(3, ("%t: %s write-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
end else begin
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, blk_addr=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
`TRACE(3, ("%t: %s read-hit: addr=0x%0h, way=%b, line=%0d, tag_id=0x%0h (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), tag_matches, line_idx, line_tag, req_uuid))
end
end else begin
if (write) begin
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
`TRACE(3, ("%t: %s write-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
end else begin
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, blk_addr=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
`TRACE(3, ("%t: %s read-miss: addr=0x%0h, line=%0d, tag_id=0x%0h, (#%0d)\n", $time, INSTANCE_ID, `CS_LINE_TO_FULL_ADDR(line_addr, BANK_ID), line_idx, line_tag, req_uuid))
end
end
end

View file

@ -51,9 +51,8 @@ module VX_fetch import VX_gpu_pkg::*; #(
wire [`NUM_THREADS-1:0] rsp_tmask;
VX_dp_ram #(
.DATAW (`PC_BITS + `NUM_THREADS),
.SIZE (`NUM_WARPS),
.LUTRAM (1)
.DATAW (`PC_BITS + `NUM_THREADS),
.SIZE (`NUM_WARPS)
) tag_store (
.clk (clk),
.reset (reset),

View file

@ -16,7 +16,6 @@
module VX_ipdom_stack #(
parameter WIDTH = 1,
parameter DEPTH = 1,
parameter OUT_REG = 0,
parameter ADDRW = `LOG2UP(DEPTH)
) (
input wire clk,
@ -33,7 +32,7 @@ module VX_ipdom_stack #(
);
reg slot_set [DEPTH-1:0];
reg [ADDRW-1:0] rd_ptr, wr_ptr;
reg [ADDRW-1:0] rd_ptr, rd_ptr_n, wr_ptr;
reg empty_r, full_r;
@ -41,35 +40,42 @@ module VX_ipdom_stack #(
wire d_set_n = slot_set[rd_ptr];
always @(*) begin
rd_ptr_n = rd_ptr;
if (push) begin
rd_ptr_n = wr_ptr;
end else if (pop) begin
rd_ptr_n = rd_ptr - ADDRW'(d_set_n);
end
end
always @(posedge clk) begin
if (reset) begin
rd_ptr <= '0;
wr_ptr <= '0;
empty_r <= 1;
full_r <= 0;
rd_ptr <= '0;
end else begin
`ASSERT(~push || ~full, ("%t: runtime error: writing to a full stack!", $time));
`ASSERT(~pop || ~empty, ("%t: runtime error: reading an empty stack!", $time));
`ASSERT(~push || ~pop, ("%t: runtime error: push and pop in same cycle not supported!", $time));
if (push) begin
rd_ptr <= wr_ptr;
wr_ptr <= wr_ptr + ADDRW'(1);
empty_r <= 0;
full_r <= (ADDRW'(DEPTH-1) == wr_ptr);
end else if (pop) begin
wr_ptr <= wr_ptr - ADDRW'(d_set_n);
rd_ptr <= rd_ptr - ADDRW'(d_set_n);
empty_r <= (rd_ptr == 0) && (d_set_n == 1);
full_r <= 0;
end
rd_ptr <= rd_ptr_n;
end
end
VX_dp_ram #(
.DATAW (WIDTH * 2),
.SIZE (DEPTH),
.OUT_REG (OUT_REG ? 1 : 0),
.LUTRAM (OUT_REG ? 0 : 1)
.DATAW (WIDTH * 2),
.SIZE (DEPTH),
.RADDR_REG (1)
) store (
.clk (clk),
.reset (reset),
@ -78,7 +84,7 @@ module VX_ipdom_stack #(
.wren (1'b1),
.waddr (wr_ptr),
.wdata ({q1, q0}),
.raddr (rd_ptr),
.raddr (rd_ptr_n),
.rdata ({d1, d0})
);
@ -94,7 +100,7 @@ module VX_ipdom_stack #(
VX_pipe_register #(
.DATAW (1),
.DEPTH (OUT_REG)
.DEPTH (0)
) pipe_reg (
.clk (clk),
.reset (reset),

View file

@ -48,8 +48,7 @@ module VX_split_join import VX_gpu_pkg::*; #(
for (genvar i = 0; i < `NUM_WARPS; ++i) begin : g_ipdom_stacks
VX_ipdom_stack #(
.WIDTH (`NUM_THREADS+`PC_BITS),
.DEPTH (`DV_STACK_SIZE),
.OUT_REG (0)
.DEPTH (`DV_STACK_SIZE)
) ipdom_stack (
.clk (clk),
.reset (reset),

View file

@ -20,7 +20,7 @@ module VX_fifo_queue #(
parameter ALM_FULL = (DEPTH - 1),
parameter ALM_EMPTY = 1,
parameter OUT_REG = 0,
parameter LUTRAM = 1,
parameter LUTRAM = 0,
parameter SIZEW = `CLOG2(DEPTH+1)
) (
input wire clk,
@ -80,30 +80,38 @@ module VX_fifo_queue #(
reg [DATAW-1:0] dout_r;
reg [ADDRW-1:0] wr_ptr_r;
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] rd_ptr_n_r;
reg [ADDRW-1:0] rd_ptr_n_r, rd_ptr_n_n;
always @(*) begin
rd_ptr_n_n = rd_ptr_r;
if (pop) begin
if (DEPTH > 2) begin
rd_ptr_n_n = rd_ptr_r + ADDRW'(2);
end else begin // (DEPTH == 2);
rd_ptr_n_n = ~rd_ptr_n_r;
end
end
end
always @(posedge clk) begin
if (reset) begin
wr_ptr_r <= '0;
rd_ptr_r <= '0;
rd_ptr_n_r <= 1;
wr_ptr_r <= '0;
rd_ptr_r <= '0;
rd_ptr_n_r <= '0;
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
if (pop) begin
rd_ptr_r <= rd_ptr_n_r;
if (DEPTH > 2) begin
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
end else begin // (DEPTH == 2);
rd_ptr_n_r <= ~rd_ptr_n_r;
end
end
rd_ptr_n_r <= rd_ptr_n_n;
end
end
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM)
.LUTRAM (LUTRAM),
.RADDR_REG (1)
) dp_ram (
.clk (clk),
.reset (reset),
@ -112,7 +120,7 @@ module VX_fifo_queue #(
.wren (1'b1),
.waddr (wr_ptr_r),
.wdata (data_in),
.raddr (rd_ptr_n_r),
.raddr (rd_ptr_n_n),
.rdata (dout)
);
@ -130,23 +138,28 @@ module VX_fifo_queue #(
end else begin : g_no_out_reg
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n;
reg [ADDRW-1:0] wr_ptr_r;
always @(*) begin
rd_ptr_n = rd_ptr_r + ADDRW'(pop);
end
always @(posedge clk) begin
if (reset) begin
rd_ptr_r <= '0;
wr_ptr_r <= '0;
rd_ptr_r <= '0;
end else begin
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
rd_ptr_r <= rd_ptr_n;
end
end
VX_dp_ram #(
.DATAW (DATAW),
.SIZE (DEPTH),
.LUTRAM (LUTRAM)
.LUTRAM (LUTRAM),
.RADDR_REG (1)
) dp_ram (
.clk (clk),
.reset (reset),
@ -155,7 +168,7 @@ module VX_fifo_queue #(
.wren (1'b1),
.waddr (wr_ptr_r),
.wdata (data_in),
.raddr (rd_ptr_r),
.raddr (rd_ptr_n),
.rdata (data_out)
);

View file

@ -17,7 +17,7 @@
module VX_index_buffer #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter LUTRAM = 1,
parameter LUTRAM = 0,
parameter ADDRW = `LOG2UP(SIZE)
) (
input wire clk,