using onehot multiplexer to reduce critical path

This commit is contained in:
Blaise Tine 2021-07-08 00:26:59 -07:00
parent dc34c5c5bd
commit 10e9ee124b
11 changed files with 161 additions and 207 deletions

View file

@ -51,20 +51,17 @@ module VX_alu_unit #(
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
`IGNORE_WARNINGS_BEGIN
wire [32:0] shr_value = $signed(shr_in1) >>> alu_in2_imm[i][4:0];
`IGNORE_WARNINGS_END
assign shr_result[i] = shr_value[31:0];
assign shr_result[i] = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]);
end
for (genvar i = 0; i < `NUM_THREADS; i++) begin
always @(*) begin
case (alu_op)
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
//`ALU_SLL,
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
endcase
end
end

View file

@ -53,6 +53,8 @@ module VX_decode #(
wire [19:0] upper_imm = {func7, rs2, rs1, func3};
wire [11:0] alu_imm = (func3[0] && ~func3[1]) ? {{7{1'b0}}, rs2} : u_12;
wire [11:0] s_imm = {func7, rd};
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
wire [11:0] jalr_imm = {func7, rs2};
@ -70,7 +72,7 @@ module VX_decode #(
use_PC = 0;
use_rd = 0;
is_join = 0;
is_wstall = 0;
is_wstall = 0;
used_regs = 0;
case (opcode)
@ -184,7 +186,7 @@ module VX_decode #(
use_imm = 1;
use_PC = 1;
is_wstall = 1;
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
imm = {{19{b_imm[12]}}, b_imm};
`USED_IREG (rs1);
`USED_IREG (rs2);
end
@ -245,7 +247,7 @@ module VX_decode #(
`INST_S: begin
ex_type = `EX_LSU;
op_type = `OP_BITS'({1'b1, func3});
imm = {{20{func7[6]}}, func7, rd};
imm = {{20{s_imm[6]}}, s_imm};
`USED_IREG (rs1);
`ifdef EXT_F_ENABLE
if (opcode[2]) begin

View file

@ -168,7 +168,7 @@ module VX_fpu_unit #(
fflags_t rsp_fflags;
always @(*) begin
rsp_fflags = 0;
rsp_fflags = '0;
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (rsp_tmask[i]) begin
rsp_fflags.NX |= fflags[i].NX;

View file

@ -95,7 +95,9 @@ module VX_to_mem #(
always @(*) begin
mem_rsp_data_out_n = mem_rsp_data_out_r;
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_in;
if (mem_rsp_in_fire) begin
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_in;
end
end
always @(posedge clk) begin
@ -108,9 +110,9 @@ module VX_to_mem #(
end
if (mem_rsp_in_fire) begin
rsp_ctr <= rsp_ctr + 1;
mem_rsp_data_out_r <= mem_rsp_data_out_n;
end
end
mem_rsp_data_out_r <= mem_rsp_data_out_n;
end
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;

View file

@ -70,7 +70,7 @@ localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS;
localparam COUT_TID_WIDTH = $clog2(`IO_COUT_SIZE);
localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8;
localparam COUT_QUEUE_SIZE = 256;
localparam COUT_QUEUE_SIZE = 64;
localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ;
localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE;
@ -470,9 +470,7 @@ wire vx_mem_is_cout;
wire vx_mem_req_valid_qual;
wire vx_mem_req_ready_qual;
assign vx_mem_req_valid_qual = vx_mem_req_valid
&& vx_started
&& ~vx_mem_is_cout;
assign vx_mem_req_valid_qual = vx_mem_req_valid && vx_started;
assign vx_mem_req_ready = vx_mem_is_cout ? ~cout_q_full : vx_mem_req_ready_qual;
@ -534,8 +532,9 @@ VX_mem_arb #(
.DATA_WIDTH (LMEM_LINE_WIDTH),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (AVS_REQ_TAGW),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
.BUFFERED_REQ (0),
.BUFFERED_RSP (0),
.TYPE ("X")
) mem_arb (
.clk (clk),
.reset (reset),
@ -918,7 +917,7 @@ Vortex #() vortex (
// COUT HANDLING //////////////////////////////////////////////////////////////
wire [COUT_TID_WIDTH-1:0] cout_tid;
wire [7:0] cout_char;
reg [7:0] cout_char;
VX_onehot_encoder #(
.N (`VX_MEM_BYTEEN_WIDTH)
@ -928,8 +927,14 @@ VX_onehot_encoder #(
`UNUSED_PIN (valid)
);
wire [`VX_MEM_BYTEEN_WIDTH-1:0][7:0] vx_mem_req_data_ar = vx_mem_req_data;
assign cout_char = vx_mem_req_data_ar[cout_tid];
VX_onehot_mux #(
.DATAW (8),
.COUNT (`VX_MEM_BYTEEN_WIDTH)
) cout_char_mux (
.data_in (vx_mem_req_data),
.sel_in (vx_mem_req_byteen),
.data_out (cout_char)
);
assign vx_mem_is_cout = (vx_mem_req_addr == `VX_MEM_ADDR_WIDTH'(`IO_COUT_ADDR >> (32 - `VX_MEM_ADDR_WIDTH)));
@ -943,8 +948,8 @@ wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid
&& ~cout_q_empty;
VX_fifo_queue #(
.DATAW (COUT_QUEUE_DATAW),
.SIZE (COUT_QUEUE_SIZE)
.DATAW (COUT_QUEUE_DATAW),
.SIZE (COUT_QUEUE_SIZE)
) cout_queue (
.clk (clk),
.reset (reset),

View file

@ -188,6 +188,7 @@ module VX_bank #(
wire creq_pop_unqual = !mshr_pop_unqual && !mrsq_pop_unqual && !creq_empty && !flush_enable;
wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1);
assign mshr_pop = mshr_pop_unqual
&& !(is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed
&& !crsq_in_stall; // ensure core response ready

View file

@ -93,7 +93,6 @@ module VX_nc_bypass #(
// core request handling
reg [NUM_REQS-1:0] core_req_ready_in_r;
wire [NUM_REQS-1:0] core_req_valid_in_nc;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire [NUM_REQS-1:0] core_req_nc_tids;
@ -115,210 +114,130 @@ module VX_nc_bypass #(
.valid_out (core_req_nc_valid)
);
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
`UNUSED_VAR (core_req_nc_sel)
if (NUM_REQS > 1) begin
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid_in_nc[i]) begin
core_req_ready_in_r[i] = ~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i];
end else begin
core_req_ready_in_r[i] = core_req_ready_out[i];
end
end
end
end else begin
`UNUSED_VAR (core_req_nc_tid)
always @(*) begin
if (core_req_valid_in_nc) begin
core_req_ready_in_r = ~mem_req_valid_in && mem_req_ready_out;
end else begin
core_req_ready_in_r = core_req_ready_out;
end
end
end
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
assign core_req_rw_out = core_req_rw_in;
assign core_req_addr_out = core_req_addr_in;
assign core_req_byteen_out = core_req_byteen_in;
assign core_req_data_out = core_req_data_in;
assign core_req_tag_out = core_req_tag_in;
assign core_req_ready_in = core_req_ready_in_r;
if (NUM_REQS > 1) begin
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ?
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
end
end else begin
`UNUSED_VAR (core_req_nc_sel)
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
end
// memory request handling
reg mem_req_valid_out_r;
reg mem_req_rw_out_r;
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_out_r;
reg [MEM_ADDR_WIDTH-1:0] mem_req_addr_out_r;
reg [MEM_DATA_WIDTH-1:0] mem_req_data_out_r;
reg [MEM_TAG_WIDTH-1:0] mem_req_tag_out_r;
reg mem_req_ready_in_r;
always @(*) begin
if (mem_req_valid_in) begin
mem_req_valid_out_r = 1;
mem_req_ready_in_r = mem_req_ready_out;
end else begin
mem_req_valid_out_r = core_req_nc_valid;
mem_req_ready_in_r = 0;
end
end
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
assign mem_req_ready_in = mem_req_valid_in && mem_req_ready_out;
if (NUM_REQS > 1) begin
always @(*) begin
if (mem_req_valid_in) begin
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end else begin
mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid];
mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH];
for (integer i = 0; i < P; ++i) begin
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid];
end
end
wire [CORE_TAG_WIDTH-1:0] core_req_tag_in_sel;
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
wire core_req_rw_in_sel;
wire [NUM_REQS-1:0][(CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1)-1:0] core_req_nc_mux_in;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
end
VX_onehot_mux #(
.DATAW (CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1),
.COUNT (NUM_REQS)
) core_req_nc_mux (
.data_in (core_req_nc_mux_in),
.sel_in (core_req_nc_sel),
.data_out ({core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel})
);
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
for (genvar i = 0; i < P; ++i) begin
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in_sel;
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0];
always @(*) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = 0;
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]});
end
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in_sel;
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
end else begin
always @(*) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
end
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
end
end else begin
always @(*) begin
if (mem_req_valid_in) begin
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end else begin
mem_req_rw_out_r = core_req_rw_in;
mem_req_addr_out_r = core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
for (integer i = 0; i < P; ++i) begin
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in;
end
end
end else begin
`UNUSED_VAR (core_req_nc_tid)
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in;
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
for (genvar i = 0; i < P; ++i) begin
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in;
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
always @(*) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = 0;
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
mem_req_tag_out_r = MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in});
end
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
always @(*) begin
mem_req_byteen_in_r = 0;
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in});
end else begin
always @(*) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = core_req_byteen_in;
mem_req_tag_out_r = MEM_TAG_WIDTH'(core_req_tag_in);
end
end
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in;
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'(core_req_tag_in);
end
end
assign mem_req_valid_out = mem_req_valid_out_r;
assign mem_req_rw_out = mem_req_rw_out_r;
assign mem_req_addr_out = mem_req_addr_out_r;
assign mem_req_byteen_out = mem_req_byteen_out_r;
assign mem_req_data_out = mem_req_data_out_r;
assign mem_req_tag_out = mem_req_tag_out_r;
assign mem_req_ready_in = mem_req_ready_in_r;
// core response handling
reg [NUM_REQS-1:0] core_rsp_valid_out_r;
reg [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out_r;
reg [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out_r;
reg [NUM_RSP_TAGS-1:0] core_rsp_ready_in_r;
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
if (NUM_REQS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
reg [NUM_REQS-1:0] core_rsp_valid_in_r;
always @(*) begin
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 0;
core_rsp_valid_out_r[rsp_tid] = 1;
for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
end
core_rsp_ready_in_r = 0;
end else begin
core_rsp_valid_out_r = core_rsp_valid_in;
core_rsp_tag_out_r = core_rsp_tag_in;
core_rsp_ready_in_r = core_rsp_ready_out;
end
core_rsp_valid_in_r = 0;
core_rsp_valid_in_r[rsp_tid] = 1;
end
assign core_rsp_valid_out = is_mem_rsp_nc ? core_rsp_valid_in_r : core_rsp_valid_in;
assign core_rsp_ready_in = is_mem_rsp_nc ? '0 : core_rsp_ready_out;
end else begin
assign core_rsp_valid_out = is_mem_rsp_nc || core_rsp_valid_in;
assign core_rsp_ready_in = ~is_mem_rsp_nc && core_rsp_ready_out;
end
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = is_mem_rsp_nc ?
mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i];
end
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 1;
core_rsp_tag_out_r = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
core_rsp_ready_in_r = 0;
end else begin
core_rsp_valid_out_r = core_rsp_valid_in;
core_rsp_tag_out_r = core_rsp_tag_in;
core_rsp_ready_in_r = core_rsp_ready_out;
end
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_rsp_data_out[i] = is_mem_rsp_nc ? mem_rsp_data_in : core_rsp_data_in[i];
end
end
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
always @(*) begin
if (is_mem_rsp_nc) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_rsp_data_out_r[i] = mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end
end else begin
core_rsp_data_out_r = core_rsp_data_in;
end
end
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_rsp_data_out_r[i] = mem_rsp_data_in;
end
end else begin
core_rsp_data_out_r = core_rsp_data_in;
end
end
for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin
assign core_rsp_tag_out[i] = is_mem_rsp_nc ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in[i];
end
assign core_rsp_valid_out = core_rsp_valid_out_r;
assign core_rsp_data_out = core_rsp_data_out_r;
assign core_rsp_tag_out = core_rsp_tag_out_r;
assign core_rsp_ready_in = core_rsp_ready_in_r;
// memory response handling

View file

@ -67,7 +67,6 @@ module VX_onehot_encoder #(
reg [LN-1:0] index_r;
if (REVERSE) begin
always @(*) begin
index_r = 'x;
for (integer i = N-1; i >= 0; --i) begin
@ -76,7 +75,6 @@ module VX_onehot_encoder #(
end
end
end
end else begin
always @(*) begin
index_r = 'x;

View file

@ -0,0 +1,20 @@
`include "VX_platform.vh"
module VX_onehot_mux #(
parameter DATAW = 1,
parameter COUNT = 1
) (
input wire [COUNT-1:0][DATAW-1:0] data_in,
input wire [COUNT-1:0] sel_in,
output wire [DATAW-1:0] data_out
);
if (COUNT > 1) begin
for (genvar i = 0; i < COUNT; ++i) begin
assign data_out = sel_in[i] ? data_in[i] : 'z;
end
end else begin
`UNUSED_VAR (sel_in)
assign data_out = data_in;
end
endmodule

View file

@ -24,8 +24,7 @@ module VX_stream_arbiter #(
if (NUM_REQS > 1) begin
wire sel_valid;
wire sel_ready;
wire [LOG_NUM_REQS-1:0] sel_idx;
wire [NUM_REQS-1:0] sel_1hot;
wire [NUM_REQS-1:0] sel_1hot;
if (TYPE == "X") begin
VX_fixed_arbiter #(
@ -37,8 +36,8 @@ module VX_stream_arbiter #(
.requests (valid_in),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
.grant_onehot (sel_1hot),
`UNUSED_PIN (grant_index)
);
end else if (TYPE == "R") begin
VX_rr_arbiter #(
@ -50,8 +49,8 @@ module VX_stream_arbiter #(
.requests (valid_in),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
.grant_onehot (sel_1hot),
`UNUSED_PIN (grant_index)
);
end else if (TYPE == "F") begin
VX_fair_arbiter #(
@ -63,8 +62,8 @@ module VX_stream_arbiter #(
.requests (valid_in),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
.grant_onehot (sel_1hot),
`UNUSED_PIN (grant_index)
);
end else if (TYPE == "M") begin
VX_matrix_arbiter #(
@ -76,13 +75,24 @@ module VX_stream_arbiter #(
.requests (valid_in),
.enable (sel_ready),
.grant_valid (sel_valid),
.grant_index (sel_idx),
.grant_onehot (sel_1hot)
.grant_onehot (sel_1hot),
`UNUSED_PIN (grant_index)
);
end else begin
$error ("invalid parameter");
end
wire [DATAW-1:0] data_in_sel;
VX_onehot_mux #(
.DATAW (DATAW),
.COUNT (NUM_REQS)
) data_in_mux (
.data_in (data_in),
.sel_in (sel_1hot),
.data_out (data_in_sel)
);
VX_skid_buffer #(
.DATAW (DATAW),
.PASSTHRU (!BUFFERED)
@ -90,7 +100,7 @@ module VX_stream_arbiter #(
.clk (clk),
.reset (reset),
.valid_in (sel_valid),
.data_in (data_in[sel_idx]),
.data_in (data_in_sel),
.ready_in (sel_ready),
.valid_out (valid_out),
.data_out (data_out),

View file

@ -189,7 +189,7 @@ int main (int argc, char **argv) {
for (int i = 0; i < size; ++i) {
float ref = h_a[i];
int pos = 0;
for (uint32_t j = 0; j < size; ++j) {
for (int j = 0; j < size; ++j) {
float cur = h_a[j];
pos += (cur < ref) || (cur == ref && j < i);
}