mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
using onehot multiplexer to reduce critical path
This commit is contained in:
parent
dc34c5c5bd
commit
10e9ee124b
11 changed files with 161 additions and 207 deletions
|
@ -51,20 +51,17 @@ module VX_alu_unit #(
|
|||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [32:0] shr_value = $signed(shr_in1) >>> alu_in2_imm[i][4:0];
|
||||
`IGNORE_WARNINGS_END
|
||||
assign shr_result[i] = shr_value[31:0];
|
||||
assign shr_result[i] = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]);
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
`ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`ALU_SLL,
|
||||
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
default: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
|
|
@ -53,6 +53,8 @@ module VX_decode #(
|
|||
|
||||
wire [19:0] upper_imm = {func7, rs2, rs1, func3};
|
||||
wire [11:0] alu_imm = (func3[0] && ~func3[1]) ? {{7{1'b0}}, rs2} : u_12;
|
||||
wire [11:0] s_imm = {func7, rd};
|
||||
wire [12:0] b_imm = {instr[31], instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
|
||||
|
@ -70,7 +72,7 @@ module VX_decode #(
|
|||
use_PC = 0;
|
||||
use_rd = 0;
|
||||
is_join = 0;
|
||||
is_wstall = 0;
|
||||
is_wstall = 0;
|
||||
used_regs = 0;
|
||||
|
||||
case (opcode)
|
||||
|
@ -184,7 +186,7 @@ module VX_decode #(
|
|||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
imm = {{19{b_imm[12]}}, b_imm};
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
|
@ -245,7 +247,7 @@ module VX_decode #(
|
|||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b1, func3});
|
||||
imm = {{20{func7[6]}}, func7, rd};
|
||||
imm = {{20{s_imm[6]}}, s_imm};
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
|
|
|
@ -168,7 +168,7 @@ module VX_fpu_unit #(
|
|||
|
||||
fflags_t rsp_fflags;
|
||||
always @(*) begin
|
||||
rsp_fflags = 0;
|
||||
rsp_fflags = '0;
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (rsp_tmask[i]) begin
|
||||
rsp_fflags.NX |= fflags[i].NX;
|
||||
|
|
|
@ -95,7 +95,9 @@ module VX_to_mem #(
|
|||
|
||||
always @(*) begin
|
||||
mem_rsp_data_out_n = mem_rsp_data_out_r;
|
||||
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_in;
|
||||
if (mem_rsp_in_fire) begin
|
||||
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_in;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
|
@ -108,9 +110,9 @@ module VX_to_mem #(
|
|||
end
|
||||
if (mem_rsp_in_fire) begin
|
||||
rsp_ctr <= rsp_ctr + 1;
|
||||
mem_rsp_data_out_r <= mem_rsp_data_out_n;
|
||||
end
|
||||
end
|
||||
mem_rsp_data_out_r <= mem_rsp_data_out_n;
|
||||
end
|
||||
|
||||
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;
|
||||
|
|
|
@ -70,7 +70,7 @@ localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS;
|
|||
|
||||
localparam COUT_TID_WIDTH = $clog2(`IO_COUT_SIZE);
|
||||
localparam COUT_QUEUE_DATAW = COUT_TID_WIDTH + 8;
|
||||
localparam COUT_QUEUE_SIZE = 256;
|
||||
localparam COUT_QUEUE_SIZE = 64;
|
||||
|
||||
localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ;
|
||||
localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE;
|
||||
|
@ -470,9 +470,7 @@ wire vx_mem_is_cout;
|
|||
wire vx_mem_req_valid_qual;
|
||||
wire vx_mem_req_ready_qual;
|
||||
|
||||
assign vx_mem_req_valid_qual = vx_mem_req_valid
|
||||
&& vx_started
|
||||
&& ~vx_mem_is_cout;
|
||||
assign vx_mem_req_valid_qual = vx_mem_req_valid && vx_started;
|
||||
|
||||
assign vx_mem_req_ready = vx_mem_is_cout ? ~cout_q_full : vx_mem_req_ready_qual;
|
||||
|
||||
|
@ -534,8 +532,9 @@ VX_mem_arb #(
|
|||
.DATA_WIDTH (LMEM_LINE_WIDTH),
|
||||
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
|
||||
.TAG_IN_WIDTH (AVS_REQ_TAGW),
|
||||
.BUFFERED_REQ (1),
|
||||
.BUFFERED_RSP (1)
|
||||
.BUFFERED_REQ (0),
|
||||
.BUFFERED_RSP (0),
|
||||
.TYPE ("X")
|
||||
) mem_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
@ -918,7 +917,7 @@ Vortex #() vortex (
|
|||
// COUT HANDLING //////////////////////////////////////////////////////////////
|
||||
|
||||
wire [COUT_TID_WIDTH-1:0] cout_tid;
|
||||
wire [7:0] cout_char;
|
||||
reg [7:0] cout_char;
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (`VX_MEM_BYTEEN_WIDTH)
|
||||
|
@ -928,8 +927,14 @@ VX_onehot_encoder #(
|
|||
`UNUSED_PIN (valid)
|
||||
);
|
||||
|
||||
wire [`VX_MEM_BYTEEN_WIDTH-1:0][7:0] vx_mem_req_data_ar = vx_mem_req_data;
|
||||
assign cout_char = vx_mem_req_data_ar[cout_tid];
|
||||
VX_onehot_mux #(
|
||||
.DATAW (8),
|
||||
.COUNT (`VX_MEM_BYTEEN_WIDTH)
|
||||
) cout_char_mux (
|
||||
.data_in (vx_mem_req_data),
|
||||
.sel_in (vx_mem_req_byteen),
|
||||
.data_out (cout_char)
|
||||
);
|
||||
|
||||
assign vx_mem_is_cout = (vx_mem_req_addr == `VX_MEM_ADDR_WIDTH'(`IO_COUT_ADDR >> (32 - `VX_MEM_ADDR_WIDTH)));
|
||||
|
||||
|
@ -943,8 +948,8 @@ wire cout_q_pop = cp2af_sRxPort.c0.mmioRdValid
|
|||
&& ~cout_q_empty;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (COUT_QUEUE_DATAW),
|
||||
.SIZE (COUT_QUEUE_SIZE)
|
||||
.DATAW (COUT_QUEUE_DATAW),
|
||||
.SIZE (COUT_QUEUE_SIZE)
|
||||
) cout_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
1
hw/rtl/cache/VX_bank.v
vendored
1
hw/rtl/cache/VX_bank.v
vendored
|
@ -188,6 +188,7 @@ module VX_bank #(
|
|||
wire creq_pop_unqual = !mshr_pop_unqual && !mrsq_pop_unqual && !creq_empty && !flush_enable;
|
||||
|
||||
wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1);
|
||||
|
||||
assign mshr_pop = mshr_pop_unqual
|
||||
&& !(is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed
|
||||
&& !crsq_in_stall; // ensure core response ready
|
||||
|
|
255
hw/rtl/cache/VX_nc_bypass.v
vendored
255
hw/rtl/cache/VX_nc_bypass.v
vendored
|
@ -93,7 +93,6 @@ module VX_nc_bypass #(
|
|||
|
||||
// core request handling
|
||||
|
||||
reg [NUM_REQS-1:0] core_req_ready_in_r;
|
||||
wire [NUM_REQS-1:0] core_req_valid_in_nc;
|
||||
wire [NUM_REQS-1:0] core_req_nc_sel;
|
||||
wire [NUM_REQS-1:0] core_req_nc_tids;
|
||||
|
@ -115,210 +114,130 @@ module VX_nc_bypass #(
|
|||
.valid_out (core_req_nc_valid)
|
||||
);
|
||||
|
||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
|
||||
|
||||
`UNUSED_VAR (core_req_nc_sel)
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
always @(*) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (core_req_valid_in_nc[i]) begin
|
||||
core_req_ready_in_r[i] = ~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i];
|
||||
end else begin
|
||||
core_req_ready_in_r[i] = core_req_ready_out[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_nc_tid)
|
||||
always @(*) begin
|
||||
if (core_req_valid_in_nc) begin
|
||||
core_req_ready_in_r = ~mem_req_valid_in && mem_req_ready_out;
|
||||
end else begin
|
||||
core_req_ready_in_r = core_req_ready_out;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
|
||||
assign core_req_rw_out = core_req_rw_in;
|
||||
assign core_req_addr_out = core_req_addr_in;
|
||||
assign core_req_byteen_out = core_req_byteen_in;
|
||||
assign core_req_data_out = core_req_data_in;
|
||||
assign core_req_tag_out = core_req_tag_in;
|
||||
assign core_req_ready_in = core_req_ready_in_r;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_ready_in[i] = core_req_valid_in_nc[i] ?
|
||||
(~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i]) : core_req_ready_out[i];
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_nc_sel)
|
||||
assign core_req_ready_in = core_req_valid_in_nc ? (~mem_req_valid_in && mem_req_ready_out) : core_req_ready_out;
|
||||
end
|
||||
|
||||
// memory request handling
|
||||
|
||||
reg mem_req_valid_out_r;
|
||||
reg mem_req_rw_out_r;
|
||||
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_out_r;
|
||||
reg [MEM_ADDR_WIDTH-1:0] mem_req_addr_out_r;
|
||||
reg [MEM_DATA_WIDTH-1:0] mem_req_data_out_r;
|
||||
reg [MEM_TAG_WIDTH-1:0] mem_req_tag_out_r;
|
||||
reg mem_req_ready_in_r;
|
||||
|
||||
always @(*) begin
|
||||
if (mem_req_valid_in) begin
|
||||
mem_req_valid_out_r = 1;
|
||||
mem_req_ready_in_r = mem_req_ready_out;
|
||||
end else begin
|
||||
mem_req_valid_out_r = core_req_nc_valid;
|
||||
mem_req_ready_in_r = 0;
|
||||
end
|
||||
end
|
||||
assign mem_req_valid_out = mem_req_valid_in || core_req_nc_valid;
|
||||
assign mem_req_ready_in = mem_req_valid_in && mem_req_ready_out;
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
always @(*) begin
|
||||
if (mem_req_valid_in) begin
|
||||
mem_req_rw_out_r = mem_req_rw_in;
|
||||
mem_req_addr_out_r = mem_req_addr_in;
|
||||
mem_req_data_out_r = mem_req_data_in;
|
||||
end else begin
|
||||
mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid];
|
||||
mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH];
|
||||
for (integer i = 0; i < P; ++i) begin
|
||||
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid];
|
||||
end
|
||||
end
|
||||
|
||||
wire [CORE_TAG_WIDTH-1:0] core_req_tag_in_sel;
|
||||
wire [CORE_DATA_WIDTH-1:0] core_req_data_in_sel;
|
||||
wire [CORE_DATA_SIZE-1:0] core_req_byteen_in_sel;
|
||||
wire [CORE_ADDR_WIDTH-1:0] core_req_addr_in_sel;
|
||||
wire core_req_rw_in_sel;
|
||||
|
||||
wire [NUM_REQS-1:0][(CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1)-1:0] core_req_nc_mux_in;
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_req_nc_mux_in[i] = {core_req_tag_in[i], core_req_data_in[i], core_req_byteen_in[i], core_req_addr_in[i], core_req_rw_in[i]};
|
||||
end
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (CORE_TAG_WIDTH + CORE_DATA_WIDTH + CORE_DATA_SIZE + CORE_ADDR_WIDTH + 1),
|
||||
.COUNT (NUM_REQS)
|
||||
) core_req_nc_mux (
|
||||
.data_in (core_req_nc_mux_in),
|
||||
.sel_in (core_req_nc_sel),
|
||||
.data_out ({core_req_tag_in_sel, core_req_data_in_sel, core_req_byteen_in_sel, core_req_addr_in_sel, core_req_rw_in_sel})
|
||||
);
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in_sel;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in_sel[D +: MEM_ADDR_WIDTH];
|
||||
|
||||
for (genvar i = 0; i < P; ++i) begin
|
||||
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
|
||||
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in_sel;
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0];
|
||||
always @(*) begin
|
||||
if (mem_req_valid_in) begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end else begin
|
||||
mem_req_byteen_out_r = 0;
|
||||
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid];
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]});
|
||||
end
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in_sel[D-1:0];
|
||||
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = 0;
|
||||
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in_sel;
|
||||
end
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in_sel});
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (mem_req_valid_in) begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end else begin
|
||||
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
|
||||
end
|
||||
end
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in_sel;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in_sel});
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (mem_req_valid_in) begin
|
||||
mem_req_rw_out_r = mem_req_rw_in;
|
||||
mem_req_addr_out_r = mem_req_addr_in;
|
||||
mem_req_data_out_r = mem_req_data_in;
|
||||
end else begin
|
||||
mem_req_rw_out_r = core_req_rw_in;
|
||||
mem_req_addr_out_r = core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
|
||||
for (integer i = 0; i < P; ++i) begin
|
||||
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (core_req_nc_tid)
|
||||
|
||||
assign mem_req_rw_out = mem_req_valid_in ? mem_req_rw_in : core_req_rw_in;
|
||||
assign mem_req_addr_out = mem_req_valid_in ? mem_req_addr_in : core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
|
||||
|
||||
for (genvar i = 0; i < P; ++i) begin
|
||||
assign mem_req_data_out[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = mem_req_valid_in ?
|
||||
mem_req_data_in[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_req_data_in;
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
|
||||
always @(*) begin
|
||||
if (mem_req_valid_in) begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end else begin
|
||||
mem_req_byteen_out_r = 0;
|
||||
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in});
|
||||
end
|
||||
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
|
||||
reg [MEM_DATA_SIZE-1:0] mem_req_byteen_in_r;
|
||||
always @(*) begin
|
||||
mem_req_byteen_in_r = 0;
|
||||
mem_req_byteen_in_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
|
||||
end
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : mem_req_byteen_in_r;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in});
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (mem_req_valid_in) begin
|
||||
mem_req_byteen_out_r = mem_req_byteen_in;
|
||||
mem_req_tag_out_r = mem_req_tag_in;
|
||||
end else begin
|
||||
mem_req_byteen_out_r = core_req_byteen_in;
|
||||
mem_req_tag_out_r = MEM_TAG_WIDTH'(core_req_tag_in);
|
||||
end
|
||||
end
|
||||
assign mem_req_byteen_out = mem_req_valid_in ? mem_req_byteen_in : core_req_byteen_in;
|
||||
assign mem_req_tag_out = mem_req_valid_in ? mem_req_tag_in : MEM_TAG_WIDTH'(core_req_tag_in);
|
||||
end
|
||||
end
|
||||
|
||||
assign mem_req_valid_out = mem_req_valid_out_r;
|
||||
assign mem_req_rw_out = mem_req_rw_out_r;
|
||||
assign mem_req_addr_out = mem_req_addr_out_r;
|
||||
assign mem_req_byteen_out = mem_req_byteen_out_r;
|
||||
assign mem_req_data_out = mem_req_data_out_r;
|
||||
assign mem_req_tag_out = mem_req_tag_out_r;
|
||||
assign mem_req_ready_in = mem_req_ready_in_r;
|
||||
|
||||
// core response handling
|
||||
|
||||
reg [NUM_REQS-1:0] core_rsp_valid_out_r;
|
||||
reg [NUM_REQS-1:0][CORE_DATA_WIDTH-1:0] core_rsp_data_out_r;
|
||||
reg [NUM_RSP_TAGS-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag_out_r;
|
||||
reg [NUM_RSP_TAGS-1:0] core_rsp_ready_in_r;
|
||||
|
||||
wire is_mem_rsp_nc = mem_rsp_valid_in && mem_rsp_tag_in[NC_TAG_BIT];
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
|
||||
reg [NUM_REQS-1:0] core_rsp_valid_in_r;
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
core_rsp_valid_out_r = 0;
|
||||
core_rsp_valid_out_r[rsp_tid] = 1;
|
||||
for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin
|
||||
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
|
||||
end
|
||||
core_rsp_ready_in_r = 0;
|
||||
end else begin
|
||||
core_rsp_valid_out_r = core_rsp_valid_in;
|
||||
core_rsp_tag_out_r = core_rsp_tag_in;
|
||||
core_rsp_ready_in_r = core_rsp_ready_out;
|
||||
end
|
||||
core_rsp_valid_in_r = 0;
|
||||
core_rsp_valid_in_r[rsp_tid] = 1;
|
||||
end
|
||||
assign core_rsp_valid_out = is_mem_rsp_nc ? core_rsp_valid_in_r : core_rsp_valid_in;
|
||||
assign core_rsp_ready_in = is_mem_rsp_nc ? '0 : core_rsp_ready_out;
|
||||
end else begin
|
||||
assign core_rsp_valid_out = is_mem_rsp_nc || core_rsp_valid_in;
|
||||
assign core_rsp_ready_in = ~is_mem_rsp_nc && core_rsp_ready_out;
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = is_mem_rsp_nc ?
|
||||
mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] : core_rsp_data_in[i];
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
core_rsp_valid_out_r = 1;
|
||||
core_rsp_tag_out_r = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
|
||||
core_rsp_ready_in_r = 0;
|
||||
end else begin
|
||||
core_rsp_valid_out_r = core_rsp_valid_in;
|
||||
core_rsp_tag_out_r = core_rsp_tag_in;
|
||||
core_rsp_ready_in_r = core_rsp_ready_out;
|
||||
end
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
assign core_rsp_data_out[i] = is_mem_rsp_nc ? mem_rsp_data_in : core_rsp_data_in[i];
|
||||
end
|
||||
end
|
||||
|
||||
if (D != 0) begin
|
||||
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_rsp_data_out_r[i] = mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
|
||||
end
|
||||
end else begin
|
||||
core_rsp_data_out_r = core_rsp_data_in;
|
||||
end
|
||||
end
|
||||
end else begin
|
||||
always @(*) begin
|
||||
if (is_mem_rsp_nc) begin
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
core_rsp_data_out_r[i] = mem_rsp_data_in;
|
||||
end
|
||||
end else begin
|
||||
core_rsp_data_out_r = core_rsp_data_in;
|
||||
end
|
||||
end
|
||||
for (genvar i = 0; i < NUM_RSP_TAGS; ++i) begin
|
||||
assign core_rsp_tag_out[i] = is_mem_rsp_nc ? mem_rsp_tag_in[CORE_TAG_WIDTH-1:0] : core_rsp_tag_in[i];
|
||||
end
|
||||
|
||||
assign core_rsp_valid_out = core_rsp_valid_out_r;
|
||||
assign core_rsp_data_out = core_rsp_data_out_r;
|
||||
assign core_rsp_tag_out = core_rsp_tag_out_r;
|
||||
assign core_rsp_ready_in = core_rsp_ready_in_r;
|
||||
|
||||
// memory response handling
|
||||
|
||||
|
|
|
@ -67,7 +67,6 @@ module VX_onehot_encoder #(
|
|||
reg [LN-1:0] index_r;
|
||||
|
||||
if (REVERSE) begin
|
||||
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
for (integer i = N-1; i >= 0; --i) begin
|
||||
|
@ -76,7 +75,6 @@ module VX_onehot_encoder #(
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
|
|
20
hw/rtl/libs/VX_onehot_mux.v
Normal file
20
hw/rtl/libs/VX_onehot_mux.v
Normal file
|
@ -0,0 +1,20 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_onehot_mux #(
|
||||
parameter DATAW = 1,
|
||||
parameter COUNT = 1
|
||||
) (
|
||||
input wire [COUNT-1:0][DATAW-1:0] data_in,
|
||||
input wire [COUNT-1:0] sel_in,
|
||||
output wire [DATAW-1:0] data_out
|
||||
);
|
||||
if (COUNT > 1) begin
|
||||
for (genvar i = 0; i < COUNT; ++i) begin
|
||||
assign data_out = sel_in[i] ? data_in[i] : 'z;
|
||||
end
|
||||
end else begin
|
||||
`UNUSED_VAR (sel_in)
|
||||
assign data_out = data_in;
|
||||
end
|
||||
|
||||
endmodule
|
|
@ -24,8 +24,7 @@ module VX_stream_arbiter #(
|
|||
if (NUM_REQS > 1) begin
|
||||
wire sel_valid;
|
||||
wire sel_ready;
|
||||
wire [LOG_NUM_REQS-1:0] sel_idx;
|
||||
wire [NUM_REQS-1:0] sel_1hot;
|
||||
wire [NUM_REQS-1:0] sel_1hot;
|
||||
|
||||
if (TYPE == "X") begin
|
||||
VX_fixed_arbiter #(
|
||||
|
@ -37,8 +36,8 @@ module VX_stream_arbiter #(
|
|||
.requests (valid_in),
|
||||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
.grant_onehot (sel_1hot),
|
||||
`UNUSED_PIN (grant_index)
|
||||
);
|
||||
end else if (TYPE == "R") begin
|
||||
VX_rr_arbiter #(
|
||||
|
@ -50,8 +49,8 @@ module VX_stream_arbiter #(
|
|||
.requests (valid_in),
|
||||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
.grant_onehot (sel_1hot),
|
||||
`UNUSED_PIN (grant_index)
|
||||
);
|
||||
end else if (TYPE == "F") begin
|
||||
VX_fair_arbiter #(
|
||||
|
@ -63,8 +62,8 @@ module VX_stream_arbiter #(
|
|||
.requests (valid_in),
|
||||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
.grant_onehot (sel_1hot),
|
||||
`UNUSED_PIN (grant_index)
|
||||
);
|
||||
end else if (TYPE == "M") begin
|
||||
VX_matrix_arbiter #(
|
||||
|
@ -76,13 +75,24 @@ module VX_stream_arbiter #(
|
|||
.requests (valid_in),
|
||||
.enable (sel_ready),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
.grant_onehot (sel_1hot),
|
||||
`UNUSED_PIN (grant_index)
|
||||
);
|
||||
end else begin
|
||||
$error ("invalid parameter");
|
||||
end
|
||||
|
||||
wire [DATAW-1:0] data_in_sel;
|
||||
|
||||
VX_onehot_mux #(
|
||||
.DATAW (DATAW),
|
||||
.COUNT (NUM_REQS)
|
||||
) data_in_mux (
|
||||
.data_in (data_in),
|
||||
.sel_in (sel_1hot),
|
||||
.data_out (data_in_sel)
|
||||
);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!BUFFERED)
|
||||
|
@ -90,7 +100,7 @@ module VX_stream_arbiter #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (sel_valid),
|
||||
.data_in (data_in[sel_idx]),
|
||||
.data_in (data_in_sel),
|
||||
.ready_in (sel_ready),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
|
|
|
@ -189,7 +189,7 @@ int main (int argc, char **argv) {
|
|||
for (int i = 0; i < size; ++i) {
|
||||
float ref = h_a[i];
|
||||
int pos = 0;
|
||||
for (uint32_t j = 0; j < size; ++j) {
|
||||
for (int j = 0; j < size; ++j) {
|
||||
float cur = h_a[j];
|
||||
pos += (cur < ref) || (cur == ref && j < i);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue