mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-23 21:39:10 -04:00
minor updates
This commit is contained in:
parent
be1f265d62
commit
fb4f62bab9
6 changed files with 89 additions and 99 deletions
4
hw/rtl/cache/VX_cache_bank.sv
vendored
4
hw/rtl/cache/VX_cache_bank.sv
vendored
|
@ -295,8 +295,8 @@ module VX_cache_bank #(
|
|||
.data_out ({valid_st1, is_mshr_st1, is_fill_st1, is_read_st1, is_write_st1, addr_st1, data_st1, byteen_st1, wsel_st1, req_idx_st1, pmask_st1, tag_st1, mshr_id_st1, tag_matches_st1, way_sel_st1, mshr_pending_st1})
|
||||
);
|
||||
|
||||
// we have a tag match
|
||||
wire is_hit_st1 = | tag_matches_st1;
|
||||
// we have a tag hit
|
||||
wire is_hit_st1 = (| tag_matches_st1);
|
||||
|
||||
if (UUID_WIDTH != 0) begin
|
||||
assign req_uuid_st1 = tag_st1[0][TAG_WIDTH-1 -: UUID_WIDTH];
|
||||
|
|
6
hw/rtl/cache/VX_cache_cluster.sv
vendored
6
hw/rtl/cache/VX_cache_cluster.sv
vendored
|
@ -19,7 +19,7 @@ module VX_cache_cluster #(
|
|||
// Number of ports per banks
|
||||
parameter NUM_PORTS = 1,
|
||||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
parameter NUM_WAYS = 4,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
|
||||
|
@ -179,8 +179,8 @@ endmodule
|
|||
module VX_cache_cluster_top #(
|
||||
parameter `STRING INSTANCE_ID = "",
|
||||
|
||||
parameter NUM_UNITS = 8,
|
||||
parameter NUM_INPUTS = 16,
|
||||
parameter NUM_UNITS = 2,
|
||||
parameter NUM_INPUTS = 4,
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
|
||||
// Number of Word requests per cycle
|
||||
|
|
123
hw/rtl/cache/VX_cache_data.sv
vendored
123
hw/rtl/cache/VX_cache_data.sv
vendored
|
@ -49,44 +49,46 @@ module VX_cache_data #(
|
|||
`UNUSED_VAR (addr)
|
||||
`UNUSED_VAR (read)
|
||||
|
||||
localparam BYTEENW = WRITE_ENABLE ? LINE_SIZE : 1;
|
||||
localparam BYTEENW = (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) ? (LINE_SIZE * NUM_WAYS) : 1;
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] rdata;
|
||||
wire [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] wdata;
|
||||
wire [BYTEENW-1:0] wren;
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_addr = addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
if (WRITE_ENABLE != 0) begin
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
|
||||
if (NUM_PORTS > 1) begin
|
||||
always @(*) begin
|
||||
wdata_r = 'x;
|
||||
wren_r = '0;
|
||||
for (integer i = 0; i < NUM_PORTS; ++i) begin
|
||||
if (pmask[i]) begin
|
||||
wdata_r[wsel[i]] = write_data[i];
|
||||
wren_r[wsel[i]] = byteen[i];
|
||||
end
|
||||
if (WRITE_ENABLE != 0 || (NUM_WAYS > 1)) begin
|
||||
reg [`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] wdata_r;
|
||||
reg [`CS_WORDS_PER_LINE-1:0][WORD_SIZE-1:0] wren_r;
|
||||
|
||||
if (NUM_PORTS > 1) begin
|
||||
always @(*) begin
|
||||
wdata_r = 'x;
|
||||
wren_r = '0;
|
||||
for (integer i = 0; i < NUM_PORTS; ++i) begin
|
||||
if (pmask[i]) begin
|
||||
wdata_r[wsel[i]] = write_data[i];
|
||||
wren_r[wsel[i]] = byteen[i];
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (pmask)
|
||||
always @(*) begin
|
||||
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
|
||||
wren_r = '0;
|
||||
wren_r[wsel] = byteen;
|
||||
end
|
||||
end
|
||||
assign wdata = fill ? fill_data : wdata_r;
|
||||
assign wren = fill ? {BYTEENW{fill}} : wren_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
`UNUSED_VAR (pmask)
|
||||
assign wdata = fill ? fill_data : write_data;
|
||||
assign wren = fill ? {BYTEENW{fill}} : byteen;
|
||||
always @(*) begin
|
||||
wdata_r = {`CS_WORDS_PER_LINE{write_data}};
|
||||
wren_r = '0;
|
||||
wren_r[wsel] = byteen;
|
||||
end
|
||||
end
|
||||
|
||||
// order the data layout to perform ways multiplexing last
|
||||
// this allows performing onehot encoding of the way index in parallel with BRAM read.
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][WORD_SIZE-1:0] wren_w;
|
||||
for (genvar i = 0; i < `CS_WORDS_PER_LINE; ++i) begin
|
||||
assign wdata[i] = fill ? {NUM_WAYS{fill_data[i]}} : {NUM_WAYS{wdata_r[i]}};
|
||||
for (genvar j = 0; j < NUM_WAYS; ++j) begin
|
||||
assign wren_w[i][j] = (fill ? {WORD_SIZE{1'b1}} : wren_r[i])
|
||||
& {WORD_SIZE{((NUM_WAYS == 1) || way_sel[j])}};
|
||||
end
|
||||
end
|
||||
assign wren = wren_w;
|
||||
end else begin
|
||||
`UNUSED_VAR (write)
|
||||
`UNUSED_VAR (byteen)
|
||||
|
@ -95,44 +97,51 @@ module VX_cache_data #(
|
|||
assign wdata = fill_data;
|
||||
assign wren = fill;
|
||||
end
|
||||
|
||||
wire [`CLOG2(NUM_WAYS)-1:0] way_idx;
|
||||
|
||||
wire [NUM_WAYS-1:0][`CS_WORDS_PER_LINE-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
|
||||
for (genvar i = 0; i < NUM_WAYS; ++i) begin
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.write ((write || fill) && way_sel[i]),
|
||||
.wren (wren),
|
||||
.addr (line_addr),
|
||||
.wdata (wdata),
|
||||
.rdata (per_way_rdata[i])
|
||||
);
|
||||
end
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (`CS_WORDS_PER_LINE * `CS_WORD_WIDTH),
|
||||
.N (NUM_WAYS)
|
||||
) rdata_select (
|
||||
.data_in (per_way_rdata),
|
||||
.sel_in (way_sel),
|
||||
.data_out (rdata)
|
||||
VX_onehot_encoder #(
|
||||
.N (NUM_WAYS)
|
||||
) way_enc (
|
||||
.data_in (way_sel),
|
||||
.data_out (way_idx),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire [`CS_WORDS_PER_LINE-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] rdata;
|
||||
|
||||
wire [`CS_LINE_SEL_BITS-1:0] line_addr = addr[`CS_LINE_SEL_BITS-1:0];
|
||||
|
||||
VX_sp_ram #(
|
||||
.DATAW (`CS_LINE_WIDTH * NUM_WAYS),
|
||||
.SIZE (`CS_LINES_PER_BANK),
|
||||
.WRENW (BYTEENW),
|
||||
.NO_RWCHECK (1)
|
||||
) data_store (
|
||||
.clk (clk),
|
||||
.write (write || fill),
|
||||
.wren (wren),
|
||||
.addr (line_addr),
|
||||
.wdata (wdata),
|
||||
.rdata (rdata)
|
||||
);
|
||||
|
||||
wire [NUM_PORTS-1:0][NUM_WAYS-1:0][`CS_WORD_WIDTH-1:0] per_way_rdata;
|
||||
|
||||
if (`CS_WORDS_PER_LINE > 1) begin
|
||||
for (genvar i = 0; i < NUM_PORTS; ++i) begin
|
||||
assign read_data[i] = rdata[wsel[i]];
|
||||
assign per_way_rdata[i] = rdata[wsel[i]];
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (wsel)
|
||||
assign read_data = rdata;
|
||||
assign per_way_rdata = rdata;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_PORTS; ++i) begin
|
||||
assign read_data[i] = per_way_rdata[i][way_idx];
|
||||
end
|
||||
|
||||
`UNUSED_VAR (stall)
|
||||
`UNUSED_VAR (stall)
|
||||
|
||||
`ifdef DBG_TRACE_CACHE_DATA
|
||||
always @(posedge clk) begin
|
||||
|
|
6
hw/rtl/cache/VX_cache_mshr.sv
vendored
6
hw/rtl/cache/VX_cache_mshr.sv
vendored
|
@ -1,5 +1,11 @@
|
|||
`include "VX_cache_define.vh"
|
||||
|
||||
// this is a desgin for a pipelined cache architecture
|
||||
// we allocate a free slot from the MSHR at the entry of the bank pipeline
|
||||
// and only release the slot when we get a cache hit.
|
||||
// during a memory fill response we initiate the replay sequence
|
||||
// and dequeue all pending entries with that fill address.
|
||||
|
||||
module VX_cache_mshr #(
|
||||
parameter `STRING INSTANCE_ID= "",
|
||||
parameter BANK_ID = 0,
|
||||
|
|
|
@ -16,52 +16,27 @@ module VX_multiplier #(
|
|||
);
|
||||
`STATIC_ASSERT ((LATENCY <= 3), ("invalid parameter"))
|
||||
|
||||
wire [A_WIDTH-1:0] dataa_w;
|
||||
wire [B_WIDTH-1:0] datab_w;
|
||||
wire [A_WIDTH+B_WIDTH-1:0] result_w;
|
||||
`UNUSED_VAR (result_w)
|
||||
wire [R_WIDTH-1:0] prod_w;
|
||||
|
||||
if (SIGNED != 0) begin
|
||||
assign result_w = $signed(dataa_w) * $signed(datab_w);
|
||||
assign prod_w = R_WIDTH'($signed(dataa) * $signed(datab));
|
||||
end else begin
|
||||
assign result_w = dataa_w * datab_w;
|
||||
assign prod_w = R_WIDTH'(dataa * datab);
|
||||
end
|
||||
|
||||
if (LATENCY == 0) begin
|
||||
assign dataa_w = dataa;
|
||||
assign datab_w = datab;
|
||||
assign result = R_WIDTH'(result_w);
|
||||
assign result = prod_w;
|
||||
end else begin
|
||||
if (LATENCY >= 2) begin
|
||||
reg [A_WIDTH-1:0] dataa_p [LATENCY-2:0];
|
||||
reg [B_WIDTH-1:0] datab_p [LATENCY-2:0];
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
dataa_p[0] <= dataa;
|
||||
datab_p[0] <= datab;
|
||||
end
|
||||
end
|
||||
for (genvar i = 2; i < LATENCY; ++i) begin
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
dataa_p[i-1] <= dataa_p[i-2];
|
||||
datab_p[i-1] <= datab_p[i-2];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign dataa_w = dataa_p[LATENCY-2];
|
||||
assign datab_w = datab_p[LATENCY-2];
|
||||
end else begin
|
||||
assign dataa_w = dataa;
|
||||
assign datab_w = datab;
|
||||
end
|
||||
reg [R_WIDTH-1:0] result_r;
|
||||
reg [R_WIDTH-1:0] prod_r [LATENCY-1:0];
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
result_r <= R_WIDTH'(result_w);
|
||||
prod_r[0] <= prod_w;
|
||||
for (integer i = 1; i < LATENCY; ++i) begin
|
||||
prod_r[i] <= prod_r[i-1];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign result = result_r;
|
||||
end
|
||||
assign result = prod_r[LATENCY-1];
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -176,7 +176,7 @@ $(BIN_DIR)/emconfig.json:
|
|||
report: $(XCLBIN_CONTAINER)
|
||||
ifeq ($(TARGET),$(findstring $(TARGET), hw))
|
||||
cp $(BUILD_DIR)/_x/logs/link/syn/ulp_vortex_afu_1_0_synth_1_runme.log $(BUILD_DIR)/bin/runme.log
|
||||
cp $(BUILD_DIR)/_x/reports/link/syn/ulp_vortex_afu_1_0_synth_1_ulp_vortex_afu_1_0_utilization_synth.rpt $(BUILD_DIR)/bin/synthesis.log
|
||||
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_full_util_routed.rpt $(BUILD_DIR)/bin/synthesis.log
|
||||
cp $(BUILD_DIR)/_x/reports/link/imp/impl_1_hw_bb_locked_timing_summary_routed.rpt $(BUILD_DIR)/bin/timing.log
|
||||
endif
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue