minor updates

This commit is contained in:
Blaise Tine 2023-06-29 13:58:22 -04:00
parent be1f265d62
commit fb4f62bab9
6 changed files with 89 additions and 99 deletions

View file

@ -295,8 +295,8 @@ module VX_cache_bank #(
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, pmask_st1, tag_st1, mshr_id_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
);
// we have a tag match
wire is_hit_st1 = | tag_matches_st1;
// we have a tag hit
wire is_hit_st1 = (| tag_matches_st1);
if (UUID_WIDTH != 0) begin
assign req_uuid_st1 = tag_st1[0][TAG_WIDTH-1 -: UUID_WIDTH];

View file

@ -19,7 +19,7 @@ module VX_cache_cluster #(
// Number of ports per banks
parameter NUM_PORTS = 1,
// Number of associative ways
parameter NUM_WAYS = 1,
parameter NUM_WAYS = 4,
// Size of a word in bytes
parameter WORD_SIZE = 4,
@ -179,8 +179,8 @@ endmodule
module VX_cache_cluster_top #(
parameter `STRING INSTANCE_ID = "",
parameter NUM_UNITS = 8,
parameter NUM_INPUTS = 16,
parameter NUM_UNITS = 2,
parameter NUM_INPUTS = 4,
parameter TAG_SEL_IDX = 0,
// Number of Word requests per cycle

View file

@ -49,44 +49,46 @@ module VX_cache_data #(
`UNUSED_VAR (addr)
`UNUSED_VAR (read)
localparam BYTEENW = WRITE_ENABLE ? LINE_SIZE : 1;
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] rdata;
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata;
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
wire [BYTEENW-1:0] wren;
wire [`CS_LINE_SEL_BITS-1:0] line_addr = addr[`CS_LINE_SEL_BITS-1:0];
if (WRITE_ENABLE != 0) begin
if (`CS_WORDS_PER_LINE > 1) begin
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
if (NUM_PORTS > 1) begin
always @(*) begin
wdata_r = 'x;
wren_r = '0;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if (pmask[i]) begin
wdata_r[wsel[i]] = write_data[i];
wren_r[wsel[i]] = byteen[i];
end
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
if (NUM_PORTS > 1) begin
always @(*) begin
wdata_r = 'x;
wren_r = '0;
for (integer i = 0; i < NUM_PORTS; ++i) begin
if (pmask[i]) begin
wdata_r[wsel[i]] = write_data[i];
wren_r[wsel[i]] = byteen[i];
end
end
end else begin
`UNUSED_VAR (pmask)
always @(*) begin
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
wren_r = '0;
wren_r[wsel] = byteen;
end
end
assign wdata = fill ? fill_data : wdata_r;
assign wren = fill ? {BYTEENW{fill}} : wren_r;
end else begin
`UNUSED_VAR (wsel)
`UNUSED_VAR (pmask)
assign wdata = fill ? fill_data : write_data;
assign wren = fill ? {BYTEENW{fill}} : byteen;
always @(*) begin
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
wren_r = '0;
wren_r[wsel] = byteen;
end
end
// order the data layout to perform ways multiplexing last
// this allows performing onehot encoding of the way index in parallel with BRAM read.
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
for (genvar j = 0; j < NUM_WAYS; ++j) begin
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i])
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}};
end
end
assign wren = wren_w;
end else begin
`UNUSED_VAR (write)
`UNUSED_VAR (byteen)
@ -95,44 +97,51 @@ module VX_cache_data #(
assign wdata = fill_data;
assign wren = fill;
end
wire [`CLOG2(NUM_WAYS)-1:0] way_idx;
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
for (genvar i = 0; i < NUM_WAYS; ++i) begin
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.write ((write || fill) && way_sel[i]),
.wren (wren),
.addr (line_addr),
.wdata (wdata),
.rdata (per_way_rdata[i])
);
end
VX_onehot_mux #(
.DATAW (`CS_WORDS_PER_LINE * `CS_WORD_WIDTH),
.N (NUM_WAYS)
) rdata_select (
.data_in (per_way_rdata),
.sel_in (way_sel),
.data_out (rdata)
VX_onehot_encoder #(
.N (NUM_WAYS)
) way_enc (
.data_in (way_sel),
.data_out (way_idx),
`UNUSED_PIN (valid_out)
);
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
wire [`CS_LINE_SEL_BITS-1:0] line_addr = addr[`CS_LINE_SEL_BITS-1:0];
VX_sp_ram #(
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
.SIZE (`CS_LINES_PER_BANK),
.WRENW (BYTEENW),
.NO_RWCHECK (1)
) data_store (
.clk (clk),
.write (write || fill),
.wren (wren),
.addr (line_addr),
.wdata (wdata),
.rdata (rdata)
);
wire [NUM_PORTS-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
if (`CS_WORDS_PER_LINE > 1) begin
for (genvar i = 0; i < NUM_PORTS; ++i) begin
assign read_data[i] = rdata[wsel[i]];
assign per_way_rdata[i] = rdata[wsel[i]];
end
end else begin
`UNUSED_VAR (wsel)
assign read_data = rdata;
assign per_way_rdata = rdata;
end
for (genvar i = 0; i < NUM_PORTS; ++i) begin
assign read_data[i] = per_way_rdata[i][way_idx];
end
`UNUSED_VAR (stall)
`UNUSED_VAR (stall)
`ifdef DBG_TRACE_CACHE_DATA
always @(posedge clk) begin

View file

@ -1,5 +1,11 @@
`include "VX_cache_define.vh"
// this is a desgin for a pipelined cache architecture
// we allocate a free slot from the MSHR at the entry of the bank pipeline
// and only release the slot when we get a cache hit.
// during a memory fill response we initiate the replay sequence
// and dequeue all pending entries with that fill address.
module VX_cache_mshr #(
parameter `STRING INSTANCE_ID= "",
parameter BANK_ID = 0,

View file

@ -16,52 +16,27 @@ module VX_multiplier #(
);
`STATIC_ASSERT ((LATENCY <= 3), ("invalid parameter"))
wire [A_WIDTH-1:0] dataa_w;
wire [B_WIDTH-1:0] datab_w;
wire [A_WIDTH+B_WIDTH-1:0] result_w;
`UNUSED_VAR (result_w)
wire [R_WIDTH-1:0] prod_w;
if (SIGNED != 0) begin
assign result_w = $signed(dataa_w) * $signed(datab_w);
assign prod_w = R_WIDTH'($signed(dataa) * $signed(datab));
end else begin
assign result_w = dataa_w * datab_w;
assign prod_w = R_WIDTH'(dataa * datab);
end
if (LATENCY == 0) begin
assign dataa_w = dataa;
assign datab_w = datab;
assign result = R_WIDTH'(result_w);
assign result = prod_w;
end else begin
if (LATENCY >= 2) begin
reg [A_WIDTH-1:0] dataa_p [LATENCY-2:0];
reg [B_WIDTH-1:0] datab_p [LATENCY-2:0];
always @(posedge clk) begin
if (enable) begin
dataa_p[0] <= dataa;
datab_p[0] <= datab;
end
end
for (genvar i = 2; i < LATENCY; ++i) begin
always @(posedge clk) begin
if (enable) begin
dataa_p[i-1] <= dataa_p[i-2];
datab_p[i-1] <= datab_p[i-2];
end
end
end
assign dataa_w = dataa_p[LATENCY-2];
assign datab_w = datab_p[LATENCY-2];
end else begin
assign dataa_w = dataa;
assign datab_w = datab;
end
reg [R_WIDTH-1:0] result_r;
reg [R_WIDTH-1:0] prod_r [LATENCY-1:0];
always @(posedge clk) begin
if (enable) begin
result_r <= R_WIDTH'(result_w);
prod_r[0] <= prod_w;
for (integer i = 1; i < LATENCY; ++i) begin
prod_r[i] <= prod_r[i-1];
end
end
end
assign result = result_r;
end
assign result = prod_r[LATENCY-1];
end
endmodule

View file

@ -176,7 +176,7 @@ $(BIN_DIR)/emconfig.json:
report: $(XCLBIN_CONTAINER)
ifeq ($(TARGET),$(findstring $(TARGET), hw))
cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin/runme.log
cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin/synthesis.log
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_full_util_routed.rpt $(BUILD_DIR)/bin/synthesis.log
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin/timing.log
endif