Merge pull request #184 from vortexgpgpu/develop

Develop
This commit is contained in:
Jaewon Lee 2024-10-02 15:41:35 -04:00 committed by GitHub
commit 6c725978b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 58 additions and 30 deletions

View file

@ -50,9 +50,11 @@
`define PERF_CTR_BITS 44
`ifndef NDEBUG
`define UUID_ENABLE
`define UUID_WIDTH 44
`else
`ifdef SCOPE
`define UUID_ENABLE
`define UUID_WIDTH 44
`else
`define UUID_WIDTH 1

View file

@ -91,29 +91,47 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
`ifdef SCOPE
`ifdef DBG_SCOPE_ISSUE
`SCOPE_IO_SWITCH (1);
wire decode_fire = decode_if.valid && decode_if.ready;
wire operands_fire = operands_if.valid && operands_if.ready;
`NEG_EDGE (reset_negedge, reset);
`SCOPE_TAP_EX (0, 2, 2, 2, (
`UUID_WIDTH + `NUM_THREADS + `EX_BITS + `INST_OP_BITS +
1 + `NR_BITS + (`NUM_THREADS * 3 * `XLEN) +
`UUID_WIDTH + `NUM_THREADS + `NR_BITS + (`NUM_THREADS*`XLEN) + 1
`SCOPE_TAP_EX (0, 2, 4, 3, (
`UUID_WIDTH + `NW_WIDTH + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS * 4 +
`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `PC_BITS + `EX_BITS + `INST_OP_BITS + 1 + `NR_BITS + (3 * `XLEN) +
`UUID_WIDTH + ISSUE_WIS_W + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * `XLEN) + 1
), {
decode_if.valid,
decode_if.ready,
operands_if.valid,
operands_if.ready
}, {
decode_fire,
operands_fire,
writeback_if.valid // ack-free
}, {
decode_if.data.uuid,
decode_if.data.wid,
decode_if.data.tmask,
decode_if.data.PC,
decode_if.data.ex_type,
decode_if.data.op_type,
decode_if.data.wb,
decode_if.data.rd,
decode_if.data.rs1,
decode_if.data.rs2,
decode_if.data.rs3,
operands_if.data.uuid,
operands_if.data.wis,
operands_if.data.tmask,
operands_if.data.PC,
operands_if.data.ex_type,
operands_if.data.op_type,
operands_if.data.wb,
operands_if.data.rd,
operands_if.data.rs1_data,
operands_if.data.rs2_data,
operands_if.data.rs3_data,
operands_if.data.rs1_data[0],
operands_if.data.rs2_data[0],
operands_if.data.rs3_data[0],
writeback_if.data.uuid,
writeback_if.data.wis,
writeback_if.data.tmask,
writeback_if.data.rd,
writeback_if.data.data,

View file

@ -310,7 +310,7 @@ module VX_lsu_slice import VX_gpu_pkg::*; #(
wire lsu_mem_rsp_ready;
VX_mem_scheduler #(
.INSTANCE_ID ($sformatf("%s-scheduler", INSTANCE_ID)),
.INSTANCE_ID ($sformatf("%s-memsched", INSTANCE_ID)),
.CORE_REQS (NUM_LANES),
.MEM_CHANNELS(NUM_LANES),
.WORD_SIZE (LSU_WORD_SIZE),

View file

@ -97,8 +97,10 @@ module VX_elastic_buffer #(
wire [DATAW-1:0] data_out_t;
wire ready_out_t;
wire valid_out_t = ~empty;
wire push = valid_in && ready_in;
wire pop = ~empty && ready_out_t;
wire pop = valid_out_t && ready_out_t;
VX_fifo_queue #(
.DATAW (DATAW),
@ -127,7 +129,7 @@ module VX_elastic_buffer #(
) out_buf (
.clk (clk),
.reset (reset),
.valid_in (~empty),
.valid_in (valid_out_t),
.data_in (data_out_t),
.ready_in (ready_out_t),
.valid_out (valid_out),

View file

@ -459,16 +459,21 @@ module VX_mem_scheduler #(
end else begin : g_rsp_full
reg [(CORE_BATCHES * CORE_CHANNELS * WORD_WIDTH)-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
reg [CORE_BATCHES-1:0][CORE_CHANNELS-1:0][WORD_WIDTH-1:0] rsp_store_n;
wire [CORE_CHANNELS-1:0][CORE_BATCHES-1:0][WORD_WIDTH-1:0] rsp_store_n;
reg [CORE_REQS-1:0] rsp_orig_mask [CORE_QUEUE_SIZE-1:0];
always @(*) begin
rsp_store_n = rsp_store[ibuf_raddr];
for (integer i = 0; i < CORE_CHANNELS; ++i) begin
if ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]) begin
rsp_store_n[rsp_batch_idx][i] = mem_rsp_data_s[i];
for (genvar i = 0; i < CORE_CHANNELS; ++i) begin : g_rsp_store
for (genvar j = 0; j < CORE_BATCHES; ++j) begin : g_j
reg [WORD_WIDTH-1:0] rsp_store [CORE_QUEUE_SIZE-1:0];
wire rsp_wren = mem_rsp_fire_s
&& (BATCH_SEL_WIDTH'(j) == rsp_batch_idx)
&& ((CORE_CHANNELS == 1) || mem_rsp_mask_s[i]);
always @(posedge clk) begin
if (rsp_wren) begin
rsp_store[ibuf_raddr] <= mem_rsp_data_s[i];
end
end
assign rsp_store_n[i][j] = rsp_wren ? mem_rsp_data_s[i] : rsp_store[ibuf_raddr];
end
end
@ -476,9 +481,6 @@ module VX_mem_scheduler #(
if (ibuf_push) begin
rsp_orig_mask[ibuf_waddr] <= core_req_mask;
end
if (mem_rsp_valid_s) begin
rsp_store[ibuf_raddr] <= rsp_store_n;
end
end
assign crsp_valid = mem_rsp_valid_s && rsp_complete;
@ -488,7 +490,7 @@ module VX_mem_scheduler #(
for (genvar r = 0; r < CORE_REQS; ++r) begin : g_crsp_data
localparam i = r / CORE_CHANNELS;
localparam j = r % CORE_CHANNELS;
assign crsp_data[r] = rsp_store_n[i][j];
assign crsp_data[r] = rsp_store_n[j][i];
end
assign mem_rsp_ready_s = crsp_ready || ~rsp_complete;

View file

@ -45,7 +45,7 @@ module VX_stream_buffer #(
assign valid_out = valid_in;
assign data_out = data_in;
end else if (OUT_REG != 0) begin : g_with_reg
end else if (OUT_REG != 0) begin : g_out_reg
reg [DATAW-1:0] data_out_r;
reg [DATAW-1:0] buffer;
@ -84,23 +84,27 @@ module VX_stream_buffer #(
assign valid_out = valid_out_r;
assign data_out = data_out_r;
end else begin : g_no_reg
end else begin : g_no_out_reg
reg [1:0][DATAW-1:0] shift_reg;
reg [1:0] fifo_state;
reg [1:0] fifo_state, fifo_state_n;
wire fire_in = valid_in && ready_in;
wire fire_in = valid_in && ready_in;
wire fire_out = valid_out && ready_out;
always @(*) begin
case ({fire_in, fire_out})
2'b10: fifo_state_n = {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10
2'b01: fifo_state_n = {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00
default: fifo_state_n = fifo_state;
endcase
end
always @(posedge clk) begin
if (reset) begin
fifo_state <= 2'b00;
end else begin
case ({fire_in, fire_out})
2'b10: fifo_state <= {fifo_state[0], 1'b1}; // 00 -> 01, 01 -> 10
2'b01: fifo_state <= {1'b0, fifo_state[1]}; // 10 -> 01, 01 -> 00
default: fifo_state <= fifo_state;
endcase
fifo_state <= fifo_state_n;
end
end