Merge branch 'develop' of https://github.com/vortexgpgpu/vortex into develop

This commit is contained in:
Blaise Tine 2024-01-31 12:14:49 -08:00
commit 0a38312527
12 changed files with 358 additions and 148 deletions

View file

@ -291,6 +291,13 @@
///////////////////////////////////////////////////////////////////////////////
`ifdef ICACHE_ENABLE
`define L1_ENABLE
`endif
`ifdef DCACHE_ENABLE
`define L1_ENABLE
`endif
`ifdef L2_ENABLE
`define L2_LINE_SIZE `MEM_BLOCK_SIZE
`else

View file

@ -533,8 +533,9 @@ module VX_decode #(
assign decode_sched_if.valid = fetch_fire;
assign decode_sched_if.wid = fetch_if.data.wid;
assign decode_sched_if.is_wstall = is_wstall;
`ifndef L1_ENABLE
assign fetch_if.ibuf_pop = decode_if.ibuf_pop;
`endif
`ifdef DBG_TRACE_CORE_PIPELINE
always @(posedge clk) begin

View file

@ -32,7 +32,6 @@ module VX_fetch import VX_gpu_pkg::*; #(
);
`UNUSED_PARAM (CORE_ID)
`UNUSED_VAR (reset)
localparam ISW_WIDTH = `LOG2UP(`ISSUE_WIDTH);
wire icache_req_valid;
wire [ICACHE_ADDR_WIDTH-1:0] icache_req_addr;
@ -44,8 +43,6 @@ module VX_fetch import VX_gpu_pkg::*; #(
wire icache_req_fire = icache_req_valid && icache_req_ready;
wire [ISW_WIDTH-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
assign req_tag = schedule_if.data.wid;
assign {rsp_uuid, rsp_tag} = icache_bus_if.rsp_data.tag;
@ -68,9 +65,12 @@ module VX_fetch import VX_gpu_pkg::*; #(
.rdata ({rsp_PC, rsp_tmask})
);
`ifndef L1_ENABLE
// Ensure that the ibuffer doesn't fill up.
// This resolves potential deadlock if ibuffer fills and the LSU stalls the execute stage due to pending dcache request.
// This issue is particularly prevalent when the icache and dcache is disabled and both requests share the same bus.
wire [ISSUE_ISW-1:0] schedule_isw = wid_to_isw(schedule_if.data.wid);
wire [`ISSUE_WIDTH-1:0] pending_ibuf_full;
for (genvar i = 0; i < `ISSUE_WIDTH; ++i) begin
VX_pending_size #(
@ -85,13 +85,16 @@ module VX_fetch import VX_gpu_pkg::*; #(
`UNUSED_PIN (empty)
);
end
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
`else
wire ibuf_ready = 1'b1;
`endif
`RUNTIME_ASSERT((!schedule_if.valid || schedule_if.data.PC != 0),
("%t: *** invalid PC=0x%0h, wid=%0d, tmask=%b (#%0d)", $time, schedule_if.data.PC, schedule_if.data.wid, schedule_if.data.tmask, schedule_if.data.uuid))
// Icache Request
wire ibuf_ready = ~pending_ibuf_full[schedule_isw];
assign icache_req_valid = schedule_if.valid && ibuf_ready;
assign icache_req_addr = schedule_if.data.PC[`MEM_ADDR_WIDTH-1:2];
assign icache_req_tag = {schedule_if.data.uuid, req_tag};

View file

@ -66,8 +66,9 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
.valid_out (ibuffer_if[i].valid),
.ready_out(ibuffer_if[i].ready)
);
`ifndef L1_ENABLE
assign decode_if.ibuf_pop[i] = ibuffer_if[i].valid && ibuffer_if[i].ready;
`endif
end
endmodule

View file

@ -36,21 +36,26 @@ interface VX_decode_if ();
logic valid;
data_t data;
logic ready;
wire [`ISSUE_WIDTH-1:0] ibuf_pop;
`ifndef L1_ENABLE
logic [`ISSUE_WIDTH-1:0] ibuf_pop;
`endif
modport master (
output valid,
output data,
input ibuf_pop,
input ready
`ifndef L1_ENABLE
, input ibuf_pop
`endif
);
modport slave (
input valid,
input data,
output ibuf_pop,
output ready
`ifndef L1_ENABLE
, output ibuf_pop
`endif
);
endinterface

View file

@ -26,21 +26,26 @@ interface VX_fetch_if ();
logic valid;
data_t data;
logic ready;
`ifndef L1_ENABLE
logic [`ISSUE_WIDTH-1:0] ibuf_pop;
`endif
modport master (
output valid,
output data,
input ibuf_pop,
input ready
`ifndef L1_ENABLE
, input ibuf_pop
`endif
);
modport slave (
input valid,
input data,
output ibuf_pop,
output ready
`ifndef L1_ENABLE
, output ibuf_pop
`endif
);
endinterface

View file

@ -11,6 +11,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// A bypass elastic buffer operates at full bandwidth where pop can happen if the buffer is empty but is going full
// It has the following benefits:
// + Full-bandwidth throughput
// + use only one register for storage
// It has the following limitations:
// + data_out is not registered
// + ready_in and ready_out are coupled
`include "VX_platform.vh"
`TRACING_OFF
@ -35,30 +43,27 @@ module VX_bypass_buffer #(
assign data_out = data_in;
end else begin
reg [DATAW-1:0] buffer;
reg buffer_valid;
reg has_data;
always @(posedge clk) begin
if (reset) begin
buffer_valid <= 0;
has_data <= 0;
end else begin
if (ready_out) begin
buffer_valid <= 0;
end
if (valid_in && ~ready_out) begin
`ASSERT(!buffer_valid, ("runtime error"));
buffer_valid <= 1;
has_data <= 0;
end else if (~has_data) begin
has_data <= valid_in;
end
end
if (valid_in && ~ready_out) begin
if (~has_data) begin
buffer <= data_in;
end
end
assign ready_in = ready_out || !buffer_valid;
assign data_out = buffer_valid ? buffer : data_in;
assign valid_out = valid_in || buffer_valid;
assign ready_in = ready_out || ~has_data;
assign data_out = has_data ? buffer : data_in;
assign valid_out = valid_in || has_data;
end
endmodule
`TRACING_ON
`TRACING_ON

View file

@ -42,34 +42,33 @@ module VX_elastic_buffer #(
end else if (SIZE == 1) begin
wire stall = valid_out && ~ready_out;
VX_pipe_register #(
.DATAW (1 + DATAW),
.RESETW (1)
) pipe_register (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in, data_in}),
.data_out ({valid_out, data_out})
VX_pipe_buffer #(
.DATAW (DATAW)
) pipe_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.data_in (data_in),
.ready_in (ready_in),
.valid_out (valid_out),
.data_out (data_out),
.ready_out (ready_out)
);
assign ready_in = ~stall;
end else if (SIZE == 2) begin
VX_skid_buffer #(
.DATAW (DATAW),
.FULL_BW (OUT_REG != 2),
.OUT_REG (OUT_REG)
) skid_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.valid_in (valid_in),
.data_in (data_in),
.ready_in (ready_in),
.data_in (data_in),
.data_out (data_out),
.valid_out (valid_out),
.data_out (data_out),
.ready_out (ready_out)
);
@ -111,10 +110,10 @@ module VX_elastic_buffer #(
.clk (clk),
.reset (reset),
.valid_in (~empty),
.ready_in (ready_out_t),
.data_in (data_out_t),
.data_out (data_out),
.ready_in (ready_out_t),
.valid_out (valid_out),
.data_out (data_out),
.ready_out (ready_out)
);

View file

@ -0,0 +1,63 @@
// Copyright 2024 blaise
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// A pipelined elastic buffer operates at full bandwidth where push can happen if the buffer is not empty but is going empty
// It has the following benefits:
// + Full-bandwidth throughput
// + use only one register for storage
// + data_out is fully registered
// It has the following limitations:
// + ready_in and ready_out are coupled
`include "VX_platform.vh"
`TRACING_OFF
module VX_pipe_buffer #(
parameter DATAW = 1,
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire valid_in,
output wire ready_in,
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out,
input wire ready_out,
output wire valid_out
);
if (PASSTHRU != 0) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign ready_in = ready_out;
assign valid_out = valid_in;
assign data_out = data_in;
end else begin
wire stall = valid_out && ~ready_out;
VX_pipe_register #(
.DATAW (1 + DATAW),
.RESETW (1)
) pipe_register (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in, data_in}),
.data_out ({valid_out, data_out})
);
assign ready_in = ~stall;
end
endmodule
`TRACING_ON

View file

@ -17,6 +17,7 @@
module VX_skid_buffer #(
parameter DATAW = 32,
parameter PASSTHRU = 0,
parameter FULL_BW = 0,
parameter OUT_REG = 0
) (
input wire clk,
@ -30,8 +31,6 @@ module VX_skid_buffer #(
input wire ready_out,
output wire valid_out
);
`STATIC_ASSERT ((OUT_REG <= 2), ("invalid parameter"))
if (PASSTHRU != 0) begin
`UNUSED_VAR (clk)
@ -41,112 +40,36 @@ module VX_skid_buffer #(
assign data_out = data_in;
assign ready_in = ready_out;
end else if (OUT_REG == 0) begin
end else if (FULL_BW != 0) begin
reg [1:0][DATAW-1:0] shift_reg;
reg valid_out_r, ready_in_r, rd_ptr_r;
wire push = valid_in && ready_in;
wire pop = valid_out_r && ready_out;
always @(posedge clk) begin
if (reset) begin
valid_out_r <= 0;
ready_in_r <= 1;
rd_ptr_r <= 1;
end else begin
if (push) begin
if (!pop) begin
ready_in_r <= rd_ptr_r;
valid_out_r <= 1;
end
end else if (pop) begin
ready_in_r <= 1;
valid_out_r <= rd_ptr_r;
end
rd_ptr_r <= rd_ptr_r ^ (push ^ pop);
end
end
always @(posedge clk) begin
if (push) begin
shift_reg[1] <= shift_reg[0];
shift_reg[0] <= data_in;
end
end
assign ready_in = ready_in_r;
assign valid_out = valid_out_r;
assign data_out = shift_reg[rd_ptr_r];
end else if (OUT_REG == 1) begin
// Full-bandwidth operation: input is consummed every cycle.
// However, data_out register has an additional multiplexer.
reg [DATAW-1:0] data_out_r;
reg [DATAW-1:0] buffer;
reg valid_out_r;
reg use_buffer;
wire push = valid_in && ready_in;
wire stall_out = valid_out_r && ~ready_out;
always @(posedge clk) begin
if (reset) begin
valid_out_r <= 0;
use_buffer <= 0;
end else begin
if (ready_out) begin
use_buffer <= 0;
end else if (valid_in && valid_out) begin
use_buffer <= 1;
end
if (~stall_out) begin
valid_out_r <= valid_in || use_buffer;
end
end
end
always @(posedge clk) begin
if (push) begin
buffer <= data_in;
end
if (~stall_out) begin
data_out_r <= use_buffer ? buffer : data_in;
end
end
assign ready_in = ~use_buffer;
assign valid_out = valid_out_r;
assign data_out = data_out_r;
VX_stream_buffer #(
.DATAW (DATAW),
.OUT_REG (OUT_REG)
) stream_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.data_in (data_in),
.ready_in (ready_in),
.valid_out (valid_out),
.data_out (data_out),
.ready_out (ready_out)
);
end else begin
// Half-bandwidth operation: input is consummed every other cycle.
// However, data_out register has no additional multiplexer.
reg [DATAW-1:0] data_out_r;
reg has_data;
always @(posedge clk) begin
if (reset) begin
has_data <= 0;
end else begin
if (~has_data) begin
has_data <= valid_in;
end else if (ready_out) begin
has_data <= 0;
end
end
if (~has_data) begin
data_out_r <= data_in;
end
end
assign ready_in = ~has_data;
assign valid_out = has_data;
assign data_out = data_out_r;
VX_toggle_buffer #(
.DATAW (DATAW)
) toggle_buffer (
.clk (clk),
.reset (reset),
.valid_in (valid_in),
.data_in (data_in),
.ready_in (ready_in),
.valid_out (valid_out),
.data_out (data_out),
.ready_out (ready_out)
);
end

View file

@ -0,0 +1,128 @@
// Copyright 2024 blaise
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// A stream elastic buffer operates at full-bandwidth where push and pop can happen simultaneously
// It has the following benefits:
// + full-bandwidth throughput
// + ready_in and ready_out are decoupled
// + data_out can be fully registered
// It has the following limitations:
// - requires two registers for storage
`include "VX_platform.vh"
`TRACING_OFF
module VX_stream_buffer #(
parameter DATAW = 1,
parameter OUT_REG = 0,
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire valid_in,
output wire ready_in,
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out,
input wire ready_out,
output wire valid_out
);
if (PASSTHRU != 0) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign ready_in = ready_out;
assign valid_out = valid_in;
assign data_out = data_in;
end else begin
if (OUT_REG != 0) begin
reg [DATAW-1:0] data_out_r;
reg [DATAW-1:0] buffer;
reg valid_out_r;
reg use_buffer;
wire push = valid_in && ready_in;
wire stall_out = valid_out_r && ~ready_out;
always @(posedge clk) begin
if (reset) begin
valid_out_r <= 0;
use_buffer <= 0;
end else begin
if (ready_out) begin
use_buffer <= 0;
end else if (valid_in && valid_out) begin
use_buffer <= 1;
end
if (~stall_out) begin
valid_out_r <= valid_in || use_buffer;
end
end
end
always @(posedge clk) begin
if (push) begin
buffer <= data_in;
end
if (~stall_out) begin
data_out_r <= use_buffer ? buffer : data_in;
end
end
assign ready_in = ~use_buffer;
assign valid_out = valid_out_r;
assign data_out = data_out_r;
end else begin
reg [1:0][DATAW-1:0] shift_reg;
reg valid_out_r, ready_in_r, rd_ptr_r;
wire push = valid_in && ready_in;
wire pop = valid_out_r && ready_out;
always @(posedge clk) begin
if (reset) begin
valid_out_r <= 0;
ready_in_r <= 1;
rd_ptr_r <= 1;
end else begin
if (push) begin
if (!pop) begin
ready_in_r <= rd_ptr_r;
valid_out_r <= 1;
end
end else if (pop) begin
ready_in_r <= 1;
valid_out_r <= rd_ptr_r;
end
rd_ptr_r <= rd_ptr_r ^ (push ^ pop);
end
end
always @(posedge clk) begin
if (push) begin
shift_reg[1] <= shift_reg[0];
shift_reg[0] <= data_in;
end
end
assign ready_in = ready_in_r;
assign valid_out = valid_out_r;
assign data_out = shift_reg[rd_ptr_r];
end
end
endmodule
`TRACING_ON

View file

@ -0,0 +1,70 @@
// Copyright 2024 blaise
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// A toggle elastic buffer operates at half-bandwidth where push can only trigger after pop
// It has the following benefits:
// + use only one register for storage
// + ready_in and ready_out are decoupled
// + data_out is fully registered
// It has the following limitations:
// - Half-bandwidth throughput
`include "VX_platform.vh"
`TRACING_OFF
module VX_toggle_buffer #(
parameter DATAW = 1,
parameter PASSTHRU = 0
) (
input wire clk,
input wire reset,
input wire valid_in,
output wire ready_in,
input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out,
input wire ready_out,
output wire valid_out
);
if (PASSTHRU != 0) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign ready_in = ready_out;
assign valid_out = valid_in;
assign data_out = data_in;
end else begin
reg [DATAW-1:0] buffer;
reg has_data;
always @(posedge clk) begin
if (reset) begin
has_data <= 0;
end else begin
if (~has_data) begin
has_data <= valid_in;
end else if (ready_out) begin
has_data <= 0;
end
end
if (~has_data) begin
buffer <= data_in;
end
end
assign ready_in = ~has_data;
assign valid_out = has_data;
assign data_out = buffer;
end
endmodule
`TRACING_ON