This commit is contained in:
Blaise Tine 2021-06-28 05:50:32 -04:00
commit 714dfd3627
47 changed files with 1377 additions and 14807 deletions

View file

@ -1,4 +1,4 @@
[![Build Status](https://travis-ci.org/vortexgpgpu/vortex.svg?branch=master)](https://travis-ci.org/vortexgpgpu/vortex)
[![Build Status](https://travis-ci.com/vortexgpgpu/vortex.svg?branch=master)](https://travis-ci.com/vortexgpgpu/vortex)
[![codecov](https://codecov.io/gh/vortexgpgpu/vortex/branch/master/graph/badge.svg)](https://codecov.io/gh/vortexgpgpu/vortex)
# Vortex RISC-V GPGPU

View file

@ -11,6 +11,9 @@ make -C tests/riscv/isa run
make -C tests/opencl run
make -C simX run-tests
# basic pipeline stress
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=sgemm --args="-n128"
# warp/threads configurations
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=2 --app=demo
./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=1 --warps=2 --threads=8 --app=demo
@ -41,7 +44,10 @@ CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
# disable shared memory
CONFIGS=-DSM_ENABLE=0 make -C hw/simulate
# using FPNEW core
# using Default FPU core
FPU_CORE=FPU_DEFAULT ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# using FPNEW FPU core
FPU_CORE=FPU_FPNEW ./ci/blackbox.sh --driver=rtlsim --cores=1 --app=dogfood
# test 128-bit MEM block
@ -54,7 +60,7 @@ CONFIGS="-DMEM_BLOCK_SIZE=16 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLAT
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=27" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test 128-bit DRAM block
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1 -DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
CONFIGS="-DPLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH=128 -DPLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH=28 -DPLATFORM_PARAM_LOCAL_MEMORY_BANKS=1" ./ci/blackbox.sh --driver=vlsim --cores=1 --app=demo
# test verilator reset values
CONFIGS="-DVERILATOR_RESET_VALUE=0" ./ci/blackbox.sh --driver=vlsim --cores=4 --app=sgemm

View file

@ -75,6 +75,10 @@ endif
VL_FLAGS += -DNOPAE
CFLAGS += -DNOPAE
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)

View file

@ -154,12 +154,14 @@ void opae_sim::write_mmio64(uint32_t mmio_num, uint64_t offset, uint64_t value)
void opae_sim::reset() {
cci_reads_.clear();
cci_writes_.clear();
vortex_afu_->vcp2af_sRxPort_c0_mmioRdValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_mmioWrValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c1_rspValid = 0;
vortex_afu_->vcp2af_sRxPort_c0_TxAlmFull = 0;
vortex_afu_->vcp2af_sRxPort_c1_TxAlmFull = 0;
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
for (int b = 0; b < MEMORY_BANKS; ++b) {
mem_reads_[b].clear();
vortex_afu_->avs_readdatavalid[b] = 0;
vortex_afu_->avs_waitrequest[b] = 0;
@ -284,7 +286,7 @@ void opae_sim::sTxPort_bus() {
}
void opae_sim::avs_bus() {
for (int b = 0; b < PLATFORM_PARAM_LOCAL_MEMORY_BANKS; ++b) {
for (int b = 0; b < MEMORY_BANKS; ++b) {
// update memory responses schedule
for (auto& rsp : mem_reads_[b]) {
if (rsp.cycles_left > 0)

View file

@ -18,6 +18,14 @@
#include <list>
#include <unordered_map>
#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
#define MEMORY_BANKS 2
#endif
#endif
#undef MEM_BLOCK_SIZE
#define MEM_BLOCK_SIZE (PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH / 8)
@ -81,7 +89,7 @@ private:
std::unordered_map<int64_t, host_buffer_t> host_buffers_;
int64_t host_buffer_ids_;
std::list<mem_rd_req_t> mem_reads_ [PLATFORM_PARAM_LOCAL_MEMORY_BANKS];
std::list<mem_rd_req_t> mem_reads_ [MEMORY_BANKS];
std::list<cci_rd_req_t> cci_reads_;

View file

@ -64,6 +64,10 @@ ifdef PERF
CFLAGS += -DPERF_ENABLE
endif
# ALU backend
VL_FLAGS += -DIMUL_DPI
VL_FLAGS += -DIDIV_DPI
# FPU backend
FPU_CORE ?= FPU_DPI
VL_FLAGS += -D$(FPU_CORE)

View file

@ -9,6 +9,9 @@
#include "VX_config.h"
extern "C" {
void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth);
void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder);
int dpi_register();
void dpi_assert(int inst, bool cond, int delay);
}
@ -81,4 +84,53 @@ void dpi_assert(int inst, bool cond, int delay) {
printf("delayed assertion at %s!\n", svGetNameFromScope(svGetScope()));
std::abort();
}
}
void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth) {
uint64_t first = a;
uint64_t second = b;
if (is_signed_a && (a & 0x80000000)) {
first |= 0xFFFFFFFF00000000;
}
if (is_signed_b && (b & 0x80000000)) {
second |= 0xFFFFFFFF00000000;
}
uint64_t result;
if (is_signed_a || is_signed_b) {
result = (int64_t)first * (int64_t)second;
} else {
result = first * second;
}
*resultl = result & 0xFFFFFFFF;
*resulth = (result >> 32) & 0xFFFFFFFF;
}
void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) {
uint32_t dividen = a;
uint32_t divisor = b;
if (is_signed) {
if (b == 0) {
*quotient = -1;
*remainder = dividen;
} else if (dividen == 0x80000000 && divisor == 0xffffffff) {
*remainder = 0;
*quotient = dividen;
} else {
*quotient = (int32_t)dividen / (int32_t)divisor;
*remainder = (int32_t)dividen % (int32_t)divisor;
}
} else {
if (b == 0) {
*quotient = -1;
*remainder = dividen;
} else {
*quotient = dividen / divisor;
*remainder = dividen % divisor;
}
}
}

View file

@ -1,6 +1,9 @@
`ifndef UTIL_DPI
`define UTIL_DPI
import "DPI-C" context function void dpi_imul(input int a, input int b, input logic is_signed_a, input logic is_signed_b, output int resultl, output int resulth);
import "DPI-C" context function void dpi_idiv(input int a, input int b, input logic is_signed, output int quotient, output int remainder);
import "DPI-C" context function int dpi_register();
import "DPI-C" context function void dpi_assert(int inst, input logic cond, input int delay);

View file

@ -117,7 +117,7 @@
`endif
`ifndef LATENCY_FCVT
`define LATENCY_FCVT 4
`define LATENCY_FCVT 5
`endif
// CSR Addresses //////////////////////////////////////////////////////////////
@ -231,7 +231,7 @@
// Size of LSU Request Queue
`ifndef LSUQ_SIZE
`define LSUQ_SIZE 8
`define LSUQ_SIZE (`NUM_WARPS * 2)
`endif
// Size of FPU Request Queue
@ -300,7 +300,7 @@
// Memory Response Queue Size
`ifndef DMRSQ_SIZE
`define DMRSQ_SIZE `MAX(4, (`DNUM_BANKS * 2))
`define DMRSQ_SIZE `MAX(4, `DNUM_BANKS)
`endif
// SM Configurable Knobs //////////////////////////////////////////////////////
@ -329,7 +329,7 @@
// Size of cache in bytes
`ifndef L2CACHE_SIZE
`define L2CACHE_SIZE 65536
`define L2CACHE_SIZE 131072
`endif
// Number of banks
@ -361,7 +361,7 @@
// Size of cache in bytes
`ifndef L3CACHE_SIZE
`define L3CACHE_SIZE 131072
`define L3CACHE_SIZE 1048576
`endif
// Number of banks

View file

@ -16,7 +16,7 @@ module VX_execute #(
VX_cmt_to_csr_if cmt_to_csr_if,
`ifdef PERF_ENABLE
VX_perf_memsys_if perf_memsys_if,
VX_perf_memsys_if perf_memsys_if,
VX_perf_pipeline_if perf_pipeline_if,
`endif

View file

@ -7,10 +7,10 @@ module VX_ibuffer #(
input wire reset,
// inputs
VX_decode_if ibuf_enq_if,
VX_decode_if decode_if,
// outputs
VX_decode_if ibuf_deq_if
VX_ibuffer_if ibuffer_if
);
`UNUSED_PARAM (CORE_ID)
@ -28,27 +28,24 @@ module VX_ibuffer #(
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready;
wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready;
wire enq_fire = decode_if.valid && decode_if.ready;
wire deq_fire = ibuffer_if.valid && ibuffer_if.ready;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
wire writing = enq_fire && (i == ibuf_enq_if.wid);
wire reading = deq_fire && (i == ibuf_deq_if.wid);
wire writing = enq_fire && (i == decode_if.wid);
wire reading = deq_fire && (i == ibuffer_if.wid);
wire is_slot0 = empty_r[i] || (alm_empty_r[i] && reading);
wire push = writing && !is_slot0;
wire pop = reading && !alm_empty_r[i];
wire is_head_ptr = empty_r[i] || (alm_empty_r[i] && reading);
VX_skid_buffer #(
.DATAW (DATAW)
) queue (
.clk (clk),
.reset (reset),
.valid_in (push),
.valid_in (writing && !is_head_ptr),
.data_in (q_data_in),
.ready_out(pop),
.ready_out(reading),
.data_out (q_data_prev[i]),
`UNUSED_PIN (ready_in),
`UNUSED_PIN (valid_out)
@ -79,9 +76,9 @@ module VX_ibuffer #(
used_r[i] <= used_r[i] + ADDRW'($signed(2'(writing) - 2'(reading)));
end
if (writing && is_slot0) begin
if (writing && is_head_ptr) begin
q_data_out[i] <= q_data_in;
end else if (pop) begin
end else if (reading) begin
q_data_out[i] <= q_data_prev[i];
end
end
@ -100,67 +97,63 @@ module VX_ibuffer #(
reg [DATAW-1:0] deq_instr, deq_instr_n;
reg [NWARPSW-1:0] num_warps;
// calculate valid table
always @(*) begin
valid_table_n = valid_table;
if (deq_fire) begin
valid_table_n[deq_wid] = !q_alm_empty[deq_wid];
end
if (enq_fire) begin
valid_table_n[ibuf_enq_if.wid] = 1;
valid_table_n[decode_if.wid] = 1;
end
end
// schedule the next instruction to issue
// do round-robin when multiple warps are active
always @(*) begin
deq_valid_n = 0;
deq_wid_n = 'x;
deq_instr_n = 'x;
schedule_table_n = 'x;
always @(*) begin
if (num_warps > 1) begin
deq_valid_n = (| schedule_table);
schedule_table_n = schedule_table;
for (integer i = 0; i < `NUM_WARPS; i++) begin
deq_valid_n = 1;
deq_wid_n = 'x;
deq_instr_n = 'x;
for (integer i = `NUM_WARPS-1; i >= 0; --i) begin
if (schedule_table[i]) begin
deq_wid_n = `NW_BITS'(i);
deq_wid_n = `NW_BITS'(i);
deq_instr_n = q_data_out[i];
schedule_table_n[i] = 0;
break;
end
end
end else if (1 == num_warps && !(deq_fire && q_alm_empty[deq_wid])) begin
deq_valid_n = 1;
deq_valid_n = 1;
deq_wid_n = deq_wid;
deq_instr_n = deq_fire ? q_data_prev[deq_wid] : q_data_out[deq_wid];
end else begin
deq_valid_n = enq_fire;
deq_wid_n = ibuf_enq_if.wid;
deq_wid_n = decode_if.wid;
deq_instr_n = q_data_in;
end
end
wire warp_added = enq_fire && q_empty[ibuf_enq_if.wid];
wire warp_removed = deq_fire && ~(enq_fire && ibuf_enq_if.wid == deq_wid) && q_alm_empty[deq_wid];
// do round-robin scheduling with multiple active warps
always @(*) begin
if (1 == $countones(schedule_table)
|| (num_warps < 2)) begin
schedule_table_n = valid_table_n;
end else begin
schedule_table_n = schedule_table;
end
schedule_table_n[deq_wid_n] = 0;
end
wire warp_added = enq_fire && q_empty[decode_if.wid];
wire warp_removed = deq_fire && ~(enq_fire && decode_if.wid == deq_wid) && q_alm_empty[deq_wid];
always @(posedge clk) begin
if (reset) begin
valid_table <= 0;
schedule_table <= 0;
deq_valid <= 0;
num_warps <= 0;
end else begin
valid_table <= valid_table_n;
if (0 == (| schedule_table_n)
|| (num_warps < 2)) begin
schedule_table <= valid_table_n;
schedule_table[deq_wid_n] <= 0;
end else begin
schedule_table <= schedule_table_n;
end
deq_valid <= deq_valid_n;
valid_table <= valid_table_n;
deq_valid <= deq_valid_n;
schedule_table <= schedule_table_n;
if (warp_added && !warp_removed) begin
num_warps <= num_warps + NWARPSW'(1);
@ -173,37 +166,38 @@ module VX_ibuffer #(
deq_instr <= deq_instr_n;
end
assign ibuf_enq_if.ready = ~q_full[ibuf_enq_if.wid];
assign q_data_in = {ibuf_enq_if.tmask,
ibuf_enq_if.PC,
ibuf_enq_if.ex_type,
ibuf_enq_if.op_type,
ibuf_enq_if.op_mod,
ibuf_enq_if.wb,
ibuf_enq_if.rd,
ibuf_enq_if.rs1,
ibuf_enq_if.rs2,
ibuf_enq_if.rs3,
ibuf_enq_if.imm,
ibuf_enq_if.use_PC,
ibuf_enq_if.use_imm,
ibuf_enq_if.used_regs};
assign decode_if.ready = ~q_full[decode_if.wid];
assign q_data_in = {decode_if.tmask,
decode_if.PC,
decode_if.ex_type,
decode_if.op_type,
decode_if.op_mod,
decode_if.wb,
decode_if.rd,
decode_if.rs1,
decode_if.rs2,
decode_if.rs3,
decode_if.imm,
decode_if.use_PC,
decode_if.use_imm,
decode_if.used_regs};
assign ibuf_deq_if.valid = deq_valid;
assign ibuf_deq_if.wid = deq_wid;
assign {ibuf_deq_if.tmask,
ibuf_deq_if.PC,
ibuf_deq_if.ex_type,
ibuf_deq_if.op_type,
ibuf_deq_if.op_mod,
ibuf_deq_if.wb,
ibuf_deq_if.rd,
ibuf_deq_if.rs1,
ibuf_deq_if.rs2,
ibuf_deq_if.rs3,
ibuf_deq_if.imm,
ibuf_deq_if.use_PC,
ibuf_deq_if.use_imm,
ibuf_deq_if.used_regs} = deq_instr;
assign ibuffer_if.valid = deq_valid;
assign ibuffer_if.wid = deq_wid;
assign ibuffer_if.wid_n = deq_wid_n;
assign {ibuffer_if.tmask,
ibuffer_if.PC,
ibuffer_if.ex_type,
ibuffer_if.op_type,
ibuffer_if.op_mod,
ibuffer_if.wb,
ibuffer_if.rd,
ibuffer_if.rs1,
ibuffer_if.rs2,
ibuffer_if.rs3,
ibuffer_if.imm,
ibuffer_if.use_PC,
ibuffer_if.use_imm,
ibuffer_if.used_regs} = deq_instr;
endmodule

View file

@ -5,7 +5,7 @@ module VX_instr_demux (
input wire reset,
// inputs
VX_decode_if execute_if,
VX_ibuffer_if ibuffer_if,
VX_gpr_rsp_if gpr_rsp_if,
// outputs
@ -25,27 +25,26 @@ module VX_instr_demux (
VX_priority_encoder #(
.N (`NUM_THREADS)
) tid_select (
.data_in (execute_if.tmask),
.data_in (ibuffer_if.tmask),
.index (tid),
`UNUSED_PIN (onehot),
`UNUSED_PIN (valid_out)
);
wire [31:0] next_PC = execute_if.PC + 4;
wire [31:0] next_PC = ibuffer_if.PC + 4;
// ALU unit
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
wire alu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_ALU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
.USE_FASTREG (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
) alu_buffer (
.clk (clk),
.reset (reset),
.valid_in (alu_req_valid),
.ready_in (alu_req_ready),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_OP(execute_if.op_type), execute_if.op_mod, execute_if.imm, execute_if.use_PC, execute_if.use_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `ALU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.imm, ibuffer_if.use_PC, ibuffer_if.use_imm, ibuffer_if.rd, ibuffer_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.use_PC, alu_req_if.use_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
.valid_out (alu_req_if.valid),
.ready_out (alu_req_if.ready)
@ -53,17 +52,16 @@ module VX_instr_demux (
// lsu unit
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
wire lsu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_LSU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.USE_FASTREG (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
) lsu_buffer (
.clk (clk),
.reset (reset),
.valid_in (lsu_req_valid),
.ready_in (lsu_req_ready),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `LSU_OP(execute_if.op_type), execute_if.imm, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `LSU_OP(ibuffer_if.op_type), ibuffer_if.imm, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
.valid_out (lsu_req_if.valid),
.ready_out (lsu_req_if.ready)
@ -71,17 +69,16 @@ module VX_instr_demux (
// csr unit
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
wire csr_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_CSR);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
.USE_FASTREG (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32)
) csr_buffer (
.clk (clk),
.reset (reset),
.valid_in (csr_req_valid),
.ready_in (csr_req_ready),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `CSR_OP(execute_if.op_type), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, execute_if.use_imm, execute_if.rs1, gpr_rsp_if.rs1_data[0]}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `CSR_OP(ibuffer_if.op_type), ibuffer_if.imm[`CSR_ADDR_BITS-1:0], ibuffer_if.rd, ibuffer_if.wb, ibuffer_if.use_imm, ibuffer_if.rs1, gpr_rsp_if.rs1_data[0]}),
.data_out ({csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.op_type, csr_req_if.addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.use_imm, csr_req_if.rs1, csr_req_if.rs1_data}),
.valid_out (csr_req_if.valid),
.ready_out (csr_req_if.ready)
@ -90,17 +87,16 @@ module VX_instr_demux (
// fpu unit
`ifdef EXT_F_ENABLE
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
wire fpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_FPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
.USE_FASTREG (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
) fpu_buffer (
.clk (clk),
.reset (reset),
.valid_in (fpu_req_valid),
.ready_in (fpu_req_ready),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `FPU_OP(execute_if.op_type), execute_if.op_mod, execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, `FPU_OP(ibuffer_if.op_type), ibuffer_if.op_mod, ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data, gpr_rsp_if.rs3_data}),
.data_out ({fpu_req_if.wid, fpu_req_if.tmask, fpu_req_if.PC, fpu_req_if.op_type, fpu_req_if.op_mod, fpu_req_if.rd, fpu_req_if.wb, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
.valid_out (fpu_req_if.valid),
.ready_out (fpu_req_if.ready)
@ -112,17 +108,16 @@ module VX_instr_demux (
// gpu unit
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
wire gpu_req_valid = ibuffer_if.valid && (ibuffer_if.ex_type == `EX_GPU);
VX_skid_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
.USE_FASTREG (1)
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
) gpu_buffer (
.clk (clk),
.reset (reset),
.valid_in (gpu_req_valid),
.ready_in (gpu_req_ready),
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `GPU_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
.data_in ({ibuffer_if.wid, ibuffer_if.tmask, ibuffer_if.PC, next_PC, `GPU_OP(ibuffer_if.op_type), ibuffer_if.rd, ibuffer_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data[0]}),
.data_out ({gpu_req_if.wid, gpu_req_if.tmask, gpu_req_if.PC, gpu_req_if.next_PC, gpu_req_if.op_type, gpu_req_if.rd, gpu_req_if.wb, gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
.valid_out (gpu_req_if.valid),
.ready_out (gpu_req_if.ready)
@ -131,7 +126,7 @@ module VX_instr_demux (
// can take next request?
reg ready_r;
always @(*) begin
case (execute_if.ex_type)
case (ibuffer_if.ex_type)
`EX_ALU: ready_r = alu_req_ready;
`EX_LSU: ready_r = lsu_req_ready;
`EX_CSR: ready_r = csr_req_ready;
@ -140,6 +135,6 @@ module VX_instr_demux (
default: ready_r = 1'b1; // ignore NOPs
endcase
end
assign execute_if.ready = ready_r;
assign ibuffer_if.ready = ready_r;
endmodule

View file

@ -21,8 +21,8 @@ module VX_issue #(
VX_fpu_req_if fpu_req_if,
VX_gpu_req_if gpu_req_if
);
VX_decode_if ibuf_deq_if();
VX_decode_if execute_if();
VX_ibuffer_if ibuffer_if();
VX_ibuffer_if execute_if();
VX_gpr_req_if gpr_req_if();
VX_gpr_rsp_if gpr_rsp_if();
@ -31,26 +31,26 @@ module VX_issue #(
VX_ibuffer #(
.CORE_ID(CORE_ID)
) ibuffer (
.clk (clk),
.reset (reset),
.ibuf_enq_if (decode_if),
.ibuf_deq_if (ibuf_deq_if)
.clk (clk),
.reset (reset),
.decode_if (decode_if),
.ibuffer_if (ibuffer_if)
);
VX_scoreboard #(
.CORE_ID(CORE_ID)
) scoreboard (
.clk (clk),
.reset (reset),
.ibuf_deq_if (ibuf_deq_if),
.writeback_if (writeback_if),
.delay (scoreboard_delay)
.clk (clk),
.reset (reset),
.ibuffer_if (ibuffer_if),
.writeback_if(writeback_if),
.delay (scoreboard_delay)
);
assign gpr_req_if.wid = ibuf_deq_if.wid;
assign gpr_req_if.rs1 = ibuf_deq_if.rs1;
assign gpr_req_if.rs2 = ibuf_deq_if.rs2;
assign gpr_req_if.rs3 = ibuf_deq_if.rs3;
assign gpr_req_if.wid = ibuffer_if.wid;
assign gpr_req_if.rs1 = ibuffer_if.rs1;
assign gpr_req_if.rs2 = ibuffer_if.rs2;
assign gpr_req_if.rs3 = ibuffer_if.rs3;
VX_gpr_stage #(
.CORE_ID(CORE_ID)
@ -62,24 +62,24 @@ module VX_issue #(
.gpr_rsp_if (gpr_rsp_if)
);
assign execute_if.valid = ibuf_deq_if.valid && ~scoreboard_delay;
assign execute_if.wid = ibuf_deq_if.wid;
assign execute_if.tmask = ibuf_deq_if.tmask;
assign execute_if.PC = ibuf_deq_if.PC;
assign execute_if.ex_type = ibuf_deq_if.ex_type;
assign execute_if.op_type = ibuf_deq_if.op_type;
assign execute_if.op_mod = ibuf_deq_if.op_mod;
assign execute_if.wb = ibuf_deq_if.wb;
assign execute_if.rd = ibuf_deq_if.rd;
assign execute_if.rs1 = ibuf_deq_if.rs1;
assign execute_if.imm = ibuf_deq_if.imm;
assign execute_if.use_PC = ibuf_deq_if.use_PC;
assign execute_if.use_imm = ibuf_deq_if.use_imm;
assign execute_if.valid = ibuffer_if.valid && ~scoreboard_delay;
assign execute_if.wid = ibuffer_if.wid;
assign execute_if.tmask = ibuffer_if.tmask;
assign execute_if.PC = ibuffer_if.PC;
assign execute_if.ex_type = ibuffer_if.ex_type;
assign execute_if.op_type = ibuffer_if.op_type;
assign execute_if.op_mod = ibuffer_if.op_mod;
assign execute_if.wb = ibuffer_if.wb;
assign execute_if.rd = ibuffer_if.rd;
assign execute_if.rs1 = ibuffer_if.rs1;
assign execute_if.imm = ibuffer_if.imm;
assign execute_if.use_PC = ibuffer_if.use_PC;
assign execute_if.use_imm = ibuffer_if.use_imm;
VX_instr_demux instr_demux (
.clk (clk),
.reset (reset),
.execute_if (execute_if),
.ibuffer_if (execute_if),
.gpr_rsp_if (gpr_rsp_if),
.alu_req_if (alu_req_if),
.lsu_req_if (lsu_req_if),
@ -89,23 +89,23 @@ module VX_issue #(
);
// issue the instruction
assign ibuf_deq_if.ready = !scoreboard_delay && execute_if.ready;
assign ibuffer_if.ready = !scoreboard_delay && execute_if.ready;
`SCOPE_ASSIGN (issue_fire, ibuf_deq_if.valid && ibuf_deq_if.ready);
`SCOPE_ASSIGN (issue_wid, ibuf_deq_if.wid);
`SCOPE_ASSIGN (issue_tmask, ibuf_deq_if.tmask);
`SCOPE_ASSIGN (issue_pc, ibuf_deq_if.PC);
`SCOPE_ASSIGN (issue_ex_type, ibuf_deq_if.ex_type);
`SCOPE_ASSIGN (issue_op_type, ibuf_deq_if.op_type);
`SCOPE_ASSIGN (issue_op_mod, ibuf_deq_if.op_mod);
`SCOPE_ASSIGN (issue_wb, ibuf_deq_if.wb);
`SCOPE_ASSIGN (issue_rd, ibuf_deq_if.rd);
`SCOPE_ASSIGN (issue_rs1, ibuf_deq_if.rs1);
`SCOPE_ASSIGN (issue_rs2, ibuf_deq_if.rs2);
`SCOPE_ASSIGN (issue_rs3, ibuf_deq_if.rs3);
`SCOPE_ASSIGN (issue_imm, ibuf_deq_if.imm);
`SCOPE_ASSIGN (issue_use_pc, ibuf_deq_if.use_PC);
`SCOPE_ASSIGN (issue_use_imm, ibuf_deq_if.use_imm);
`SCOPE_ASSIGN (issue_fire, ibuffer_if.valid && ibuffer_if.ready);
`SCOPE_ASSIGN (issue_wid, ibuffer_if.wid);
`SCOPE_ASSIGN (issue_tmask, ibuffer_if.tmask);
`SCOPE_ASSIGN (issue_pc, ibuffer_if.PC);
`SCOPE_ASSIGN (issue_ex_type, ibuffer_if.ex_type);
`SCOPE_ASSIGN (issue_op_type, ibuffer_if.op_type);
`SCOPE_ASSIGN (issue_op_mod, ibuffer_if.op_mod);
`SCOPE_ASSIGN (issue_wb, ibuffer_if.wb);
`SCOPE_ASSIGN (issue_rd, ibuffer_if.rd);
`SCOPE_ASSIGN (issue_rs1, ibuffer_if.rs1);
`SCOPE_ASSIGN (issue_rs2, ibuffer_if.rs2);
`SCOPE_ASSIGN (issue_rs3, ibuffer_if.rs3);
`SCOPE_ASSIGN (issue_imm, ibuffer_if.imm);
`SCOPE_ASSIGN (issue_use_pc, ibuffer_if.use_PC);
`SCOPE_ASSIGN (issue_use_imm, ibuffer_if.use_imm);
`SCOPE_ASSIGN (scoreboard_delay, scoreboard_delay);
`SCOPE_ASSIGN (execute_delay, ~execute_if.ready);
`SCOPE_ASSIGN (gpr_rsp_a, gpr_rsp_if.rs1_data);
@ -145,7 +145,7 @@ module VX_issue #(
if (decode_if.valid & !decode_if.ready) begin
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'd1;
end
if (ibuf_deq_if.valid & scoreboard_delay) begin
if (ibuffer_if.valid & scoreboard_delay) begin
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'd1;
end
if (alu_req_if.valid & !alu_req_if.ready) begin

View file

@ -282,7 +282,7 @@ module VX_mem_unit # (
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`DMEM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (0)
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (reset),

View file

@ -1,5 +1,9 @@
`include "VX_define.vh"
`ifndef SYNTHESIS
`include "util_dpi.vh"
`endif
module VX_muldiv (
input wire clk,
input wire reset,
@ -43,13 +47,42 @@ module VX_muldiv (
wire mul_valid_out;
wire mul_valid_in = valid_in && !is_div_op;
wire mul_ready_in = ~stall_out || ~mul_valid_out;
wire is_mulh_in = (alu_op != `MUL_MUL);
wire is_mulh_in = (alu_op != `MUL_MUL);
wire is_signed_mul_a = (alu_op != `MUL_MULHU);
wire is_signed_mul_b = (alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU);
`ifdef IMUL_DPI
wire [`NUM_THREADS-1:0][31:0] mul_result_tmp;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] mul_resultl, mul_resulth;
always @(*) begin
dpi_imul (alu_in1[i], alu_in2[i], is_signed_mul_a, is_signed_mul_b, mul_resultl, mul_resulth);
end
assign mul_result_tmp[i] = is_mulh_in ? mul_resulth : mul_resultl;
end
VX_shift_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) mul_shift_reg (
.clk(clk),
.reset (reset),
.enable (mul_ready_in),
.data_in ({mul_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, mul_result_tmp}),
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_result})
);
`else
wire is_mulh_out;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]};
wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]};
wire [32:0] mul_in1 = {is_signed_mul_a & alu_in1[i][31], alu_in1[i]};
wire [32:0] mul_in2 = {is_signed_mul_b & alu_in2[i][31], alu_in2[i]};
`IGNORE_WARNINGS_BEGIN
wire [65:0] mul_result_tmp;
`IGNORE_WARNINGS_END
@ -83,9 +116,11 @@ module VX_muldiv (
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
);
`endif
///////////////////////////////////////////////////////////////////////////
wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp;
wire [`NUM_THREADS-1:0][31:0] div_result;
wire [`NW_BITS-1:0] div_wid_out;
wire [`NUM_THREADS-1:0] div_tmask_out;
wire [31:0] div_PC_out;
@ -98,6 +133,36 @@ module VX_muldiv (
wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL
wire div_ready_in;
wire div_valid_out;
`ifdef IDIV_DPI
wire [`NUM_THREADS-1:0][31:0] div_result_tmp;
for (genvar i = 0; i < `NUM_THREADS; i++) begin
wire [31:0] div_quotient, div_remainder;
always @(*) begin
dpi_idiv (alu_in1[i], alu_in2[i], is_signed_div, div_quotient, div_remainder);
end
assign div_result_tmp[i] = is_rem_op_in ? div_remainder : div_quotient;
end
VX_shift_register #(
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
.DEPTH (`LATENCY_IMUL),
.RESETW (1)
) div_shift_reg (
.clk(clk),
.reset (reset),
.enable (div_ready_in),
.data_in ({div_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, div_result_tmp}),
.data_out ({div_valid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_result})
);
assign div_ready_in = div_ready_out || ~div_valid_out;
`else
wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp;
wire is_rem_op_out;
VX_serial_div #(
@ -123,7 +188,9 @@ module VX_muldiv (
.tag_out ({div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out})
);
wire [`NUM_THREADS-1:0][31:0] div_result = is_rem_op_out ? rem_result_tmp : div_result_tmp;
assign div_result = is_rem_op_out ? rem_result_tmp : div_result_tmp;
`endif
///////////////////////////////////////////////////////////////////////////

View file

@ -6,59 +6,65 @@ module VX_scoreboard #(
input wire clk,
input wire reset,
VX_decode_if ibuf_deq_if,
VX_ibuffer_if ibuffer_if,
VX_writeback_if writeback_if,
output wire delay
);
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs;
wire [`NUM_REGS-1:0] deq_inuse_regs;
assign deq_inuse_regs = inuse_regs[ibuf_deq_if.wid] & ibuf_deq_if.used_regs;
reg [`NUM_WARPS-1:0][`NUM_REGS-1:0] inuse_regs, inuse_regs_n;
assign delay = (| deq_inuse_regs);
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && (ibuf_deq_if.wb != 0);
reg [`NUM_REGS-1:0] deq_inuse_regs;
assign delay = |(deq_inuse_regs & ibuffer_if.used_regs);
wire reserve_reg = ibuffer_if.valid && ibuffer_if.ready && ibuffer_if.wb;
wire release_reg = writeback_if.valid && writeback_if.ready && writeback_if.eop;
always @(posedge clk) begin
if (reset) begin
inuse_regs <= (`NUM_WARPS*`NUM_REGS)'(0);
end else begin
if (reserve_reg) begin
inuse_regs[ibuf_deq_if.wid][ibuf_deq_if.rd] <= 1;
end
if (release_reg) begin
inuse_regs[writeback_if.wid][writeback_if.rd] <= 0;
assert(inuse_regs[writeback_if.wid][writeback_if.rd] != 0)
else $error("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
end
always @(*) begin
inuse_regs_n = inuse_regs;
if (reserve_reg) begin
inuse_regs_n[ibuffer_if.wid][ibuffer_if.rd] = 1;
end
if (release_reg) begin
inuse_regs_n[writeback_if.wid][writeback_if.rd] = 0;
end
end
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: *** core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
deq_inuse_regs[ibuf_deq_if.rd], deq_inuse_regs[ibuf_deq_if.rs1], deq_inuse_regs[ibuf_deq_if.rs2], deq_inuse_regs[ibuf_deq_if.rs3]);
if (reset) begin
inuse_regs <= '0;
end else begin
inuse_regs <= inuse_regs_n;
end
end
`endif
deq_inuse_regs <= inuse_regs_n[ibuffer_if.wid_n];
end
reg [31:0] deadlock_ctr;
wire [31:0] deadlock_timeout = 1000 * (10 ** (`L2_ENABLE + `L3_ENABLE));
always @(posedge clk) begin
if (reset) begin
deadlock_ctr <= 0;
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
deadlock_ctr <= deadlock_ctr + 1;
assert(deadlock_ctr < deadlock_timeout) else $error("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
deq_inuse_regs[ibuf_deq_if.rd], deq_inuse_regs[ibuf_deq_if.rs1], deq_inuse_regs[ibuf_deq_if.rs2], deq_inuse_regs[ibuf_deq_if.rs3]);
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin
deadlock_ctr <= 0;
end else begin
`ifdef DBG_PRINT_PIPELINE
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
$display("%t: *** core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
deq_inuse_regs[ibuffer_if.rd], deq_inuse_regs[ibuffer_if.rs1], deq_inuse_regs[ibuffer_if.rs2], deq_inuse_regs[ibuffer_if.rs3]);
end
`endif
if (release_reg) begin
assert(inuse_regs[writeback_if.wid][writeback_if.rd] != 0)
else $error("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
end
if (ibuffer_if.valid && ~ibuffer_if.ready) begin
deadlock_ctr <= deadlock_ctr + 1;
assert(deadlock_ctr < deadlock_timeout) else $error("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$time, CORE_ID, ibuffer_if.wid, ibuffer_if.PC, ibuffer_if.rd, ibuffer_if.wb,
deq_inuse_regs[ibuffer_if.rd], deq_inuse_regs[ibuffer_if.rs1], deq_inuse_regs[ibuffer_if.rs2], deq_inuse_regs[ibuffer_if.rs3]);
end else if (ibuffer_if.valid && ibuffer_if.ready) begin
deadlock_ctr <= 0;
end
end
end

View file

@ -32,8 +32,8 @@ module VX_smem_arb (
VX_stream_demux #(
.NUM_REQS (2),
.DATAW (REQ_DATAW),
.BUFFERED (0)
) rsp_demux (
.BUFFERED (1)
) req_demux (
.clk (clk),
.reset (reset),
.sel (core_req_if.tag[i][0]),

View file

@ -51,8 +51,13 @@ module VX_avs_wrapper #(
wire [NUM_BANKS-1:0] req_queue_going_full;
wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
wire [NUM_BANKS-1:0][REQ_TAG_WIDTH-1:0] avs_reqq_data_out;
wire [BANK_ADDRW-1:0] req_bank_sel = (NUM_BANKS >= 2) ? mem_req_addr[BANK_ADDRW-1:0] : '0;
wire [BANK_ADDRW-1:0] req_bank_sel;
if (NUM_BANKS >= 2) begin
assign req_bank_sel = mem_req_addr[BANK_ADDRW-1:0];
end else begin
assign req_bank_sel = 0;
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign avs_reqq_ready[i] = !req_queue_going_full[i] && !avs_waitrequest[i];
@ -100,7 +105,11 @@ module VX_avs_wrapper #(
assign avs_burstcount[i] = AVS_BURST_WIDTH'(1);
end
assign mem_req_ready = avs_reqq_ready[req_bank_sel];
if (NUM_BANKS >= 2) begin
assign mem_req_ready = avs_reqq_ready[req_bank_sel];
end else begin
assign mem_req_ready = avs_reqq_ready;
end
// Responses handling

View file

@ -53,7 +53,6 @@ localparam AVS_REQ_TAGW_CCI = `MAX(CCI_ADDR_WIDTH, CCI_ADDR_WIDTH + $clog2(LME
localparam AVS_REQ_TAGW = `MAX(AVS_REQ_TAGW_VX, AVS_REQ_TAGW_CCI);
localparam CCI_RD_WINDOW_SIZE = 8;
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
localparam CCI_RW_PENDING_SIZE= 256;
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
@ -78,15 +77,15 @@ localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE;
localparam MMIO_DEV_CAPS = `AFU_IMAGE_MMIO_DEV_CAPS;
localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
localparam CCI_RD_RQ_DATAW = CCI_LINE_WIDTH + CCI_RD_RQ_TAGW;
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
localparam CCI_RD_QUEUE_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
localparam CCI_RD_QUEUE_DATAW = CCI_LINE_WIDTH + CCI_ADDR_WIDTH;
localparam STATE_IDLE = 0;
localparam STATE_READ = 1;
localparam STATE_WRITE = 2;
localparam STATE_WRITE = 1;
localparam STATE_READ = 2;
localparam STATE_START = 3;
localparam STATE_RUN = 4;
localparam STATE_MAX_VALUE = 5;
localparam STATE_MAX_VALUE = 4;
localparam STATE_WIDTH = $clog2(STATE_MAX_VALUE);
`ifdef SCOPE
@ -114,11 +113,9 @@ wire [`VX_MEM_LINE_WIDTH-1:0] vx_mem_rsp_data;
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_rsp_tag;
wire vx_mem_rsp_ready;
reg vx_reset;
wire vx_busy;
reg vx_reset;
reg vx_mem_en;
// CMD variables //////////////////////////////////////////////////////////////
t_ccip_clAddr cmd_io_addr;
@ -292,8 +289,9 @@ end
// COMMAND FSM ////////////////////////////////////////////////////////////////
wire cmd_read_done;
wire cmd_write_done;
reg cmd_write_done;
wire cmd_run_done;
reg vx_started;
reg [$clog2(RESET_DELAY+1)-1:0] vx_reset_ctr;
always @(posedge clk) begin
@ -306,9 +304,9 @@ end
always @(posedge clk) begin
if (reset) begin
state <= STATE_IDLE;
state <= STATE_IDLE;
vx_started <= 0;
vx_reset <= 0;
vx_mem_en <= 0;
end else begin
case (state)
STATE_IDLE: begin
@ -358,21 +356,20 @@ always @(posedge clk) begin
STATE_START: begin
// vortex reset cycles
if (vx_reset_ctr == $bits(vx_reset_ctr)'(RESET_DELAY)) begin
vx_reset <= 0;
vx_mem_en <= 1;
state <= STATE_RUN;
end
end
STATE_RUN: begin
if (cmd_run_done) begin
vx_mem_en <= 0;
state <= STATE_IDLE;
`ifdef DBG_PRINT_OPAE
$display("%t: STATE IDLE", $time);
`endif
end
if (vx_started) begin
if (cmd_run_done) begin
vx_started <= 0;
state <= STATE_IDLE;
`ifdef DBG_PRINT_OPAE
$display("%t: STATE IDLE", $time);
`endif
end
end else begin
if (vx_reset_ctr == $bits(vx_reset_ctr)'(RESET_DELAY)) begin
vx_started <= 1;
vx_reset <= 0;
end
end
end
default: begin
@ -387,11 +384,12 @@ end
wire cci_mem_rd_req_valid;
wire cci_mem_wr_req_valid;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
wire [CCI_RD_QUEUE_DATAW-1:0] cci_rdq_dout;
wire cci_mem_req_valid;
wire cci_mem_req_rw;
wire [CCI_ADDR_WIDTH-1:0] cci_mem_req_addr;
wire [CCI_LINE_WIDTH-1:0] cci_mem_req_data;
wire [CCI_ADDR_WIDTH-1:0] cci_mem_req_tag;
wire cci_mem_req_ready;
@ -430,7 +428,7 @@ VX_to_mem #(
.mem_req_addr_in (cci_mem_req_addr),
.mem_req_rw_in (cci_mem_req_rw),
.mem_req_byteen_in ({CCI_LINE_SIZE{1'b1}}),
.mem_req_data_in (cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]),
.mem_req_data_in (cci_mem_req_data),
.mem_req_tag_in (cci_mem_req_tag),
.mem_req_ready_in (cci_mem_req_ready),
@ -473,7 +471,7 @@ wire vx_mem_req_valid_qual;
wire vx_mem_req_ready_qual;
assign vx_mem_req_valid_qual = vx_mem_req_valid
&& vx_mem_en
&& vx_started
&& ~vx_mem_is_cout;
assign vx_mem_req_ready = vx_mem_is_cout ? ~cout_q_full : vx_mem_req_ready_qual;
@ -536,7 +534,8 @@ VX_mem_arb #(
.DATA_WIDTH (LMEM_LINE_WIDTH),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (AVS_REQ_TAGW),
.TYPE ("X")
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (reset),
@ -617,19 +616,20 @@ VX_avs_wrapper #(
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_ctr;
wire [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_addr;
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_addr_unqual;
reg [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr;
wire [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag;
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_tag;
reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr;
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_addr_base;
wire cci_rd_req_fire;
t_ccip_clAddr cci_rd_req_addr;
reg cci_rd_req_valid, cci_rd_req_wait;
reg [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr;
wire [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
wire [CCI_RD_QUEUE_TAGW-1:0] cci_rd_req_tag;
wire [CCI_RD_QUEUE_TAGW-1:0] cci_rd_rsp_tag;
reg [CCI_RD_QUEUE_TAGW-1:0] cci_rd_rsp_ctr;
wire cci_rdq_push, cci_rdq_pop;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din;
wire [CCI_RD_QUEUE_DATAW-1:0] cci_rdq_din;
wire cci_rdq_empty;
always @(*) begin
@ -641,16 +641,15 @@ end
wire cci_mem_wr_req_fire = cci_mem_wr_req_valid && cci_mem_req_ready;
wire cci_rd_rsp_fire = (STATE_WRITE == state)
&& cp2af_sRxPort.c0.rspValid
wire cci_rd_rsp_fire = cp2af_sRxPort.c0.rspValid
&& (cp2af_sRxPort.c0.hdr.resp_type == eRSP_RDLINE);
assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr);
assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
assign cci_rd_req_tag = CCI_RD_QUEUE_TAGW'(cci_rd_req_ctr);
assign cci_rd_rsp_tag = CCI_RD_QUEUE_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
assign cci_rdq_push = cci_rd_rsp_fire;
assign cci_rdq_pop = cci_mem_wr_req_fire;
assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag};
assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(cci_rd_rsp_tag)};
wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads;
wire cci_pending_reads_full;
@ -673,9 +672,7 @@ assign cci_rd_req_fire = cci_rd_req_valid && !(cci_rd_req_wait || cci_pending_re
assign cci_mem_wr_req_valid = !cci_rdq_empty;
assign cci_mem_wr_req_addr = cci_mem_wr_req_addr_unqual + (CCI_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
assign cmd_write_done = (cci_mem_wr_req_ctr == cmd_data_size);
assign cci_mem_wr_req_addr = cci_rdq_dout[CCI_ADDR_WIDTH-1:0];
// Send read requests to CCI
always @(posedge clk) begin
@ -693,11 +690,11 @@ always @(posedge clk) begin
&& (cci_rd_req_ctr_next != cmd_data_size)
&& !cp2af_sRxPort.c0TxAlmFull;
if (cci_rd_req_fire && (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
if (cci_rd_req_fire && (cci_rd_req_tag == CCI_RD_QUEUE_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
cci_rd_req_wait <= 1; // end current request batch
end
if (cci_rd_rsp_fire && (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
if (cci_rd_rsp_fire && (cci_rd_rsp_ctr == CCI_RD_QUEUE_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
cci_rd_req_wait <= 0; // begin new request batch
end
end
@ -708,7 +705,8 @@ always @(posedge clk) begin
cci_rd_req_ctr <= 0;
cci_rd_rsp_ctr <= 0;
cci_mem_wr_req_ctr <= 0;
cci_mem_wr_req_addr_unqual <= cmd_mem_addr;
cci_mem_wr_req_addr_base <= cmd_mem_addr;
cmd_write_done <= 0;
end
if (cci_rd_req_fire) begin
@ -720,7 +718,10 @@ always @(posedge clk) begin
end
if (cci_rd_rsp_fire) begin
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1);
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_QUEUE_TAGW'(1);
if (CCI_RD_QUEUE_TAGW'(cci_rd_rsp_ctr) == CCI_RD_QUEUE_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
cci_mem_wr_req_addr_base <= cci_mem_wr_req_addr_base + CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE);
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data);
`endif
@ -732,14 +733,16 @@ always @(posedge clk) begin
`endif
end
if (cci_mem_wr_req_fire) begin
cci_mem_wr_req_addr_unqual <= cci_mem_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_mem_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : CCI_ADDR_WIDTH'(0));
cci_mem_wr_req_ctr <= cci_mem_wr_req_ctr + CCI_ADDR_WIDTH'(1);
if (cci_mem_wr_req_fire) begin
cci_mem_wr_req_ctr <= cci_mem_wr_req_ctr + CCI_ADDR_WIDTH'(1);
if (cci_mem_wr_req_ctr == (cmd_data_size-1)) begin
cmd_write_done <= 1;
end
end
end
VX_fifo_queue #(
.DATAW (CCI_RD_RQ_DATAW),
.DATAW (CCI_RD_QUEUE_DATAW),
.SIZE (CCI_RD_QUEUE_SIZE)
) cci_rd_req_queue (
.clk (clk),
@ -779,11 +782,13 @@ VX_fifo_queue #(
reg [CCI_ADDR_WIDTH-1:0] cci_mem_rd_req_ctr;
reg [CCI_ADDR_WIDTH-1:0] cci_mem_rd_req_addr;
reg [CCI_ADDR_WIDTH-1:0] cci_wr_req_ctr;
reg cci_mem_rd_req_done;
reg [CCI_ADDR_WIDTH-1:0] cci_wr_req_ctr;
reg cci_wr_req_fire;
t_ccip_clAddr cci_wr_req_addr;
t_ccip_clData cci_wr_req_data;
reg cci_wr_req_done;
always @(*) begin
af2cp_sTxPort.c1.valid = cci_wr_req_fire;
@ -818,12 +823,12 @@ VX_pending_size #(
`UNUSED_VAR (cci_pending_writes)
assign cci_mem_rd_req_valid = (STATE_READ == state)
&& (cci_mem_rd_req_ctr != cmd_data_size);
&& !cci_mem_rd_req_done;
assign cci_mem_rsp_ready = !cp2af_sRxPort.c1TxAlmFull
&& !cci_pending_writes_full;
assign cmd_read_done = (0 == cci_wr_req_ctr)
assign cmd_read_done = cci_wr_req_done
&& cci_pending_writes_empty;
// Send write requests to CCI
@ -839,12 +844,17 @@ begin
&& (CMD_MEM_READ == cmd_type)) begin
cci_mem_rd_req_ctr <= 0;
cci_mem_rd_req_addr <= cmd_mem_addr;
cci_mem_rd_req_done <= 0;
cci_wr_req_ctr <= cmd_data_size;
cci_wr_req_done <= 0;
end
if (cci_mem_rd_req_fire) begin
cci_mem_rd_req_addr <= cci_mem_rd_req_addr + CCI_ADDR_WIDTH'(1);
cci_mem_rd_req_ctr <= cci_mem_rd_req_ctr + CCI_ADDR_WIDTH'(1);
if (cci_mem_rd_req_ctr == (cmd_data_size-1)) begin
cci_mem_rd_req_done <= 1;
end
end
cci_wr_req_addr <= cmd_io_addr + t_ccip_clAddr'(cci_mem_rsp_tag);
@ -853,6 +863,9 @@ begin
if (cci_wr_req_fire) begin
assert(cci_wr_req_ctr != 0);
cci_wr_req_ctr <= cci_wr_req_ctr - CCI_ADDR_WIDTH'(1);
if (cci_wr_req_ctr == CCI_ADDR_WIDTH'(1)) begin
cci_wr_req_done <= 1;
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data);
`endif
@ -867,9 +880,10 @@ end
//--
assign cci_mem_req_rw = (CMD_MEM_WRITE == state);
assign cci_mem_req_rw = state[0]; // STATE_WRITE=00, STATE_WRITE=01
assign cci_mem_req_valid = cci_mem_req_rw ? cci_mem_wr_req_valid : cci_mem_rd_req_valid;
assign cci_mem_req_addr = cci_mem_req_rw ? cci_mem_wr_req_addr : cci_mem_rd_req_addr;
assign cci_mem_req_data = cci_rdq_dout[CCI_RD_QUEUE_DATAW-1:CCI_ADDR_WIDTH];
assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_ctr;
// Vortex /////////////////////////////////////////////////////////////////////
@ -920,7 +934,7 @@ assign cout_char = vx_mem_req_data_ar[cout_tid];
assign vx_mem_is_cout = (vx_mem_req_addr == `VX_MEM_ADDR_WIDTH'(`IO_COUT_ADDR >> (32 - `VX_MEM_ADDR_WIDTH)));
wire cout_q_push = vx_mem_req_valid
&& vx_mem_en
&& vx_started
&& vx_mem_is_cout
&& ~cout_q_full;

View file

@ -437,10 +437,12 @@ module VX_bank #(
`UNUSED_PIN (enqueue_almfull),
`UNUSED_PIN (enqueue_full),
// lookup
.lookup_ready (mrsq_pop),
// lookup
.lookup_addr (lookup_addr),
.lookup_match (mshr_pending),
// fill update
.fill_update (mrsq_pop),
// schedule
.schedule (mshr_pop),
@ -475,8 +477,7 @@ module VX_bank #(
end
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
.USE_FASTREG (NUM_BANKS == 1)
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS)
) core_rsp_req (
.clk (clk),
.reset (reset),

View file

@ -106,8 +106,7 @@ module VX_cache_core_rsp_merge #(
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
VX_skid_buffer #(
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
.USE_FASTREG (1)
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
) pipe_reg (
.clk (clk),
.reset (reset),
@ -155,8 +154,7 @@ module VX_cache_core_rsp_merge #(
for (genvar i = 0; i < NUM_REQS; i++) begin
VX_skid_buffer #(
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
.USE_FASTREG (1)
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
) pipe_reg (
.clk (clk),
.reset (reset),

View file

@ -42,11 +42,13 @@ module VX_miss_resrv #(
output wire enqueue_full,
output wire enqueue_almfull,
// lookup
input wire lookup_ready,
// lookup
input wire [`LINE_ADDR_WIDTH-1:0] lookup_addr,
output wire lookup_match,
// fill update
input wire fill_update,
// schedule
input wire schedule,
output wire schedule_valid,
@ -74,8 +76,6 @@ module VX_miss_resrv #(
assign valid_address_match[i] = valid_table[i] && (addr_table[i] == lookup_addr);
end
assign lookup_match = (| valid_address_match);
wire push_new = enqueue && !enqueue_is_mshr;
wire restore = enqueue && enqueue_is_mshr;
@ -93,10 +93,9 @@ module VX_miss_resrv #(
used_r <= 0;
alm_full_r <= 0;
full_r <= 0;
end else begin
end else begin
// WARNING: lookup should happen enqueue for ready_table's correct update
if (lookup_ready) begin
if (fill_update) begin
// unlock pending requests for scheduling
ready_table <= ready_table | valid_address_match;
end
@ -170,6 +169,7 @@ module VX_miss_resrv #(
.dout(schedule_data)
);
assign lookup_match = (| valid_address_match);
assign schedule_valid = ready_table[schedule_ptr];
assign schedule_addr = addr_table[schedule_ptr];
assign enqueue_almfull = alm_full_r;
@ -177,7 +177,7 @@ module VX_miss_resrv #(
`ifdef DBG_PRINT_CACHE_MSHR
always @(posedge clk) begin
if (lookup_ready || schedule || enqueue || dequeue) begin
if (fill_update || schedule || enqueue || dequeue) begin
if (schedule)
$display("%t: cache%0d:%0d mshr-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(schedule_addr, BANK_ID), deq_debug_wid, deq_debug_pc);
if (enqueue) begin

View file

@ -93,42 +93,37 @@ module VX_nc_bypass #(
// core request handling
reg [NUM_REQS-1:0] core_req_valid_out_r;
reg [NUM_REQS-1:0] core_req_ready_in_r;
wire [NUM_REQS-1:0] core_req_valid_in_nc;
wire [NUM_REQS-1:0] core_req_nc_sel;
wire [NUM_REQS-1:0] core_req_nc_tids;
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
wire core_req_nc_valid;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_valid_in_nc[i] = core_req_valid_in[i] && core_req_tag_in[i][NC_TAG_BIT];
assign core_req_nc_tids[i] = core_req_tag_in[i][NC_TAG_BIT];
end
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid_in_nc[i]) begin
core_req_valid_out_r[i] = 0;
end else begin
core_req_valid_out_r[i] = core_req_valid_in[i];
end
end
end
wire [`UP(CORE_REQ_TIDW)-1:0] core_req_nc_tid;
wire core_req_nc_valid;
assign core_req_valid_in_nc = core_req_valid_in & core_req_nc_tids;
VX_priority_encoder #(
.N (NUM_REQS)
) core_req_sel (
.data_in (core_req_valid_in_nc),
.index (core_req_nc_tid),
`UNUSED_PIN (onehot),
.onehot (core_req_nc_sel),
.valid_out (core_req_nc_valid)
);
assign core_req_valid_out = core_req_valid_in & ~core_req_nc_tids;
`UNUSED_VAR (core_req_nc_sel)
if (NUM_REQS > 1) begin
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid_in_nc[i]) begin
core_req_ready_in_r[i] = mem_req_ready_out && (core_req_nc_tid == CORE_REQ_TIDW'(i));
core_req_ready_in_r[i] = ~mem_req_valid_in && mem_req_ready_out && core_req_nc_sel[i];
end else begin
core_req_ready_in_r[i] = core_req_ready_out[i];
end
@ -138,14 +133,13 @@ module VX_nc_bypass #(
`UNUSED_VAR (core_req_nc_tid)
always @(*) begin
if (core_req_valid_in_nc) begin
core_req_ready_in_r = mem_req_ready_out;
core_req_ready_in_r = ~mem_req_valid_in && mem_req_ready_out;
end else begin
core_req_ready_in_r = core_req_ready_out;
end
end
end
assign core_req_valid_out = core_req_valid_out_r;
assign core_req_rw_out = core_req_rw_in;
assign core_req_addr_out = core_req_addr_in;
assign core_req_byteen_out = core_req_byteen_in;
@ -161,91 +155,91 @@ module VX_nc_bypass #(
reg [MEM_ADDR_WIDTH-1:0] mem_req_addr_out_r;
reg [MEM_DATA_WIDTH-1:0] mem_req_data_out_r;
reg [MEM_TAG_WIDTH-1:0] mem_req_tag_out_r;
reg mem_req_ready_in_r;
reg mem_req_ready_in_r;
always @(*) begin
if (core_req_nc_valid) begin
if (mem_req_valid_in) begin
mem_req_valid_out_r = 1;
mem_req_ready_in_r = 0;
end else begin
mem_req_valid_out_r = mem_req_valid_in;
mem_req_ready_in_r = mem_req_ready_out;
end else begin
mem_req_valid_out_r = core_req_nc_valid;
mem_req_ready_in_r = 0;
end
end
if (NUM_REQS > 1) begin
always @(*) begin
if (core_req_nc_valid) begin
if (mem_req_valid_in) begin
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end else begin
mem_req_rw_out_r = core_req_rw_in[core_req_nc_tid];
mem_req_addr_out_r = core_req_addr_in[core_req_nc_tid][D +: MEM_ADDR_WIDTH];
for (integer i = 0; i < P; ++i) begin
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in[core_req_nc_tid];
end
end else begin
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[core_req_nc_tid][D-1:0];
always @(*) begin
if (core_req_nc_valid) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = 0;
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, req_addr_idx, core_req_tag_in[core_req_nc_tid]});
end else begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end
end
end else begin
always @(*) begin
if (core_req_nc_valid) begin
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
end else begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = core_req_byteen_in[core_req_nc_tid];
mem_req_tag_out_r = MEM_TAG_WIDTH'({core_req_nc_tid, core_req_tag_in[core_req_nc_tid]});
end
end
end
end else begin
always @(*) begin
if (core_req_nc_valid) begin
if (mem_req_valid_in) begin
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end else begin
mem_req_rw_out_r = core_req_rw_in;
mem_req_addr_out_r = core_req_addr_in[0][D +: MEM_ADDR_WIDTH];
for (integer i = 0; i < P; ++i) begin
mem_req_data_out_r[i * CORE_DATA_WIDTH +: CORE_DATA_WIDTH] = core_req_data_in;
end
end else begin
mem_req_rw_out_r = mem_req_rw_in;
mem_req_addr_out_r = mem_req_addr_in;
mem_req_data_out_r = mem_req_data_in;
end
end
if (D != 0) begin
wire [D-1:0] req_addr_idx = core_req_addr_in[0][D-1:0];
always @(*) begin
if (core_req_nc_valid) begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = 0;
mem_req_byteen_out_r[req_addr_idx * CORE_DATA_SIZE +: CORE_DATA_SIZE] = core_req_byteen_in;
mem_req_tag_out_r = MEM_TAG_WIDTH'({req_addr_idx, core_req_tag_in});
end else begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end
end
end else begin
always @(*) begin
if (core_req_nc_valid) begin
mem_req_byteen_out_r = core_req_byteen_in;
mem_req_tag_out_r = MEM_TAG_WIDTH'(core_req_tag_in);
end else begin
if (mem_req_valid_in) begin
mem_req_byteen_out_r = mem_req_byteen_in;
mem_req_tag_out_r = mem_req_tag_in;
end else begin
mem_req_byteen_out_r = core_req_byteen_in;
mem_req_tag_out_r = MEM_TAG_WIDTH'(core_req_tag_in);
end
end
end
@ -272,26 +266,25 @@ module VX_nc_bypass #(
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
if (NUM_RSP_TAGS > 1) begin
always @(*) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (is_mem_rsp_nc && (rsp_tid == CORE_REQ_TIDW'(i))) begin
core_rsp_valid_out_r[i] = 1;
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
core_rsp_ready_in_r[i] = 0;
end else begin
core_rsp_valid_out_r[i] = core_rsp_valid_in[i];
core_rsp_tag_out_r[i] = core_rsp_tag_in[i];
core_rsp_ready_in_r[i] = core_rsp_ready_out[i];
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 0;
core_rsp_valid_out_r[rsp_tid] = 1;
for (integer i = 0; i < NUM_REQS; ++i) begin
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
end
end
core_rsp_ready_in_r = 0;
end else begin
core_rsp_valid_out_r = core_rsp_valid_in;
core_rsp_tag_out_r = core_rsp_tag_in;
core_rsp_ready_in_r = core_rsp_ready_out;
end
end
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 0;
core_rsp_valid_out_r[rsp_tid] = 1;
for (integer i = 0; i < NUM_RSP_TAGS; ++i) begin
core_rsp_tag_out_r[i] = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
end
core_rsp_tag_out_r = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
core_rsp_ready_in_r = 0;
end else begin
core_rsp_valid_out_r = core_rsp_valid_in;
@ -301,7 +294,7 @@ module VX_nc_bypass #(
end
end
end else begin
always @(*) begin
always @(*) begin
if (is_mem_rsp_nc) begin
core_rsp_valid_out_r = 1;
core_rsp_tag_out_r = mem_rsp_tag_in[CORE_TAG_WIDTH-1:0];
@ -316,49 +309,23 @@ module VX_nc_bypass #(
if (D != 0) begin
wire [D-1:0] rsp_addr_idx = mem_rsp_tag_in[CORE_TAG_WIDTH +: D];
if (NUM_RSP_TAGS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
always @(*) begin
always @(*) begin
if (is_mem_rsp_nc) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (is_mem_rsp_nc && (rsp_tid == CORE_REQ_TIDW'(i))) begin
core_rsp_data_out_r[i] = mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end else begin
core_rsp_data_out_r[i] = core_rsp_data_in[i];
end
end
end
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_rsp_data_out_r[i] = mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end
end else begin
core_rsp_data_out_r = core_rsp_data_in;
core_rsp_data_out_r[i] = mem_rsp_data_in[rsp_addr_idx * CORE_DATA_WIDTH +: CORE_DATA_WIDTH];
end
end else begin
core_rsp_data_out_r = core_rsp_data_in;
end
end
end else begin
if (NUM_RSP_TAGS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
always @(*) begin
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (is_mem_rsp_nc && (rsp_tid == CORE_REQ_TIDW'(i))) begin
core_rsp_data_out_r[i] = mem_rsp_data_in;
end else begin
core_rsp_data_out_r[i] = core_rsp_data_in[i];
end
end
end
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
core_rsp_data_out_r[i] = mem_rsp_data_in;
end
end else begin
core_rsp_data_out_r = core_rsp_data_in;
core_rsp_data_out_r[i] = mem_rsp_data_in;
end
end else begin
core_rsp_data_out_r = core_rsp_data_in;
end
end
end
@ -370,39 +337,16 @@ module VX_nc_bypass #(
// memory response handling
reg mem_rsp_valid_out_r;
reg mem_rsp_ready_in_r;
always @(*) begin
if (is_mem_rsp_nc) begin
mem_rsp_valid_out_r = 0;
end else begin
mem_rsp_valid_out_r = mem_rsp_valid_in;
end
end
assign mem_rsp_valid_out = mem_rsp_valid_in && ~mem_rsp_tag_in[NC_TAG_BIT];
if (NUM_RSP_TAGS > 1) begin
wire [CORE_REQ_TIDW-1:0] rsp_tid = mem_rsp_tag_in[(CORE_TAG_WIDTH + D) +: CORE_REQ_TIDW];
always @(*) begin
if (is_mem_rsp_nc) begin
mem_rsp_ready_in_r = core_rsp_ready_out[rsp_tid];
end else begin
mem_rsp_ready_in_r = mem_rsp_ready_out;
end
end
assign mem_rsp_ready_in = is_mem_rsp_nc ? core_rsp_ready_out[rsp_tid] : mem_rsp_ready_out;
end else begin
always @(*) begin
if (is_mem_rsp_nc) begin
mem_rsp_ready_in_r = core_rsp_ready_out;
end else begin
mem_rsp_ready_in_r = mem_rsp_ready_out;
end
end
assign mem_rsp_ready_in = is_mem_rsp_nc ? core_rsp_ready_out : mem_rsp_ready_out;
end
assign mem_rsp_valid_out = mem_rsp_valid_out_r;
assign mem_rsp_data_out = mem_rsp_data_in;
assign mem_rsp_tag_out = mem_rsp_tag_in;
assign mem_rsp_ready_in = mem_rsp_ready_in_r;
assign mem_rsp_data_out = mem_rsp_data_in;
assign mem_rsp_tag_out = mem_rsp_tag_in;
endmodule

View file

@ -86,22 +86,6 @@ module VX_fp_cvt #(
assign input_sign[i] = is_itof ? int_sign : fmt_sign;
end
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt; // renormalization shift amount
wire [LANES-1:0] mant_is_zero; // for integer zeroes
for (genvar i = 0; i < LANES; ++i) begin
wire mant_is_nonzero;
VX_lzc #(
.WIDTH (INT_MAN_WIDTH),
.MODE (1)
) lzc (
.in_i (encoded_mant[i]),
.cnt_o (renorm_shamt[i]),
.valid_o (mant_is_nonzero)
);
assign mant_is_zero[i] = ~mant_is_nonzero;
end
// Pipeline stage0
wire valid_in_s0;
@ -112,29 +96,42 @@ module VX_fp_cvt #(
fp_type_t [LANES-1:0] in_a_type_s0;
wire [LANES-1:0] input_sign_s0;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] fmt_exponent_s0;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0;
wire [LANES-1:0] mant_is_zero_s0;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] encoded_mant_s0;
wire stall;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 +INT_EXP_WIDTH + INT_MAN_WIDTH + LZC_RESULT_WIDTH + 1)),
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + INT_EXP_WIDTH + INT_MAN_WIDTH)),
.RESETW (1)
) pipe_reg0 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, in_a_type, input_sign, fmt_exponent, encoded_mant, renorm_shamt, mant_is_zero}),
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0, renorm_shamt_s0, mant_is_zero_s0})
.data_in ({valid_in, tag_in, is_itof, !is_signed, frm, in_a_type, input_sign, fmt_exponent, encoded_mant}),
.data_out ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, fmt_exponent_s0, encoded_mant_s0})
);
// Normalization
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant; // normalized input mantissa
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp; // unbiased true exponent
wire [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
wire [LANES-1:0][LZC_RESULT_WIDTH-1:0] renorm_shamt_s0; // renormalization shift amount
wire [LANES-1:0] mant_is_zero_s0; // for integer zeroes
for (genvar i = 0; i < LANES; ++i) begin
wire mant_is_nonzero;
VX_lzc #(
.WIDTH (INT_MAN_WIDTH),
.MODE (1)
) lzc (
.in_i (encoded_mant_s0[i]),
.cnt_o (renorm_shamt_s0[i]),
.valid_o (mant_is_nonzero)
);
assign mant_is_zero_s0[i] = ~mant_is_nonzero;
end
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s0; // normalized input mantissa
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s0; // unbiased true exponent
for (genvar i = 0; i < LANES; ++i) begin
`IGNORE_WARNINGS_BEGIN
// Input mantissa needs to be normalized
@ -142,7 +139,7 @@ module VX_fp_cvt #(
wire [INT_EXP_WIDTH-1:0] int_input_exp;
// Realign input mantissa, append zeroes if destination is wider
assign input_mant[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
assign input_mant_s0[i] = encoded_mant_s0[i] << renorm_shamt_s0[i];
// Unbias exponent and compensate for shift
assign fp_input_exp = fmt_exponent_s0[i] +
@ -152,96 +149,118 @@ module VX_fp_cvt #(
assign int_input_exp = (INT_MAN_WIDTH-1) - {1'b0, renorm_shamt_s0[i]};
assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
// Rebias the exponent
assign destination_exp[i] = input_exp[i] + EXP_BIAS;
assign input_exp_s0[i] = is_itof_s0 ? int_input_exp : fp_input_exp;
`IGNORE_WARNINGS_END
end
// Perform adjustments to mantissa and exponent
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s0;
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s0;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s0;
wire [LANES-1:0] of_before_round_s0;
for (genvar i = 0; i < LANES; ++i) begin
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg of_before_round;
always @(*) begin
`IGNORE_WARNINGS_BEGIN
// Default assignment
final_exp = destination_exp[i]; // take exponent as is, only look at lower bits
preshift_mant = {input_mant[i], 33'b0}; // Place mantissa to the left of the shifter
denorm_shamt = 0; // right of mantissa
of_before_round = 1'b0;
// Handle INT casts
if (is_itof_s0) begin
if ($signed(destination_exp[i]) >= $signed(2**EXP_BITS-1)) begin
// Overflow or infinities (for proper rounding)
final_exp = (2**EXP_BITS-2); // largest normal value
preshift_mant = ~0; // largest normal value and RS bits set
of_before_round = 1'b1;
end else if ($signed(destination_exp[i]) < $signed(-MAN_BITS)) begin
// Limit the shift to retain sticky bits
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
end else if ($signed(destination_exp[i]) < $signed(1)) begin
// Denormalize underflowing values
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + 1 - destination_exp[i]; // adjust right shifting
end
end else begin
if ($signed(input_exp[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s0)) begin
// overflow: when converting to unsigned the range is larger by one
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
of_before_round = 1'b1;
end else if ($signed(input_exp[i]) < $signed(-1)) begin
// underflow
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
end else begin
// By default right shift mantissa to be an integer
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp[i];
end
end
`IGNORE_WARNINGS_END
end
assign preshift_mant_s0[i] = preshift_mant;
assign denorm_shamt_s0[i] = denorm_shamt;
assign final_exp_s0[i] = final_exp;
assign of_before_round_s0[i] = of_before_round;
end
// Pipeline stage1
wire valid_in_s1;
wire [TAGW-1:0] tag_in_s1;
wire is_itof_s1;
wire unsigned_s1;
wire [2:0] rnd_mode_s1;
fp_type_t [LANES-1:0] in_a_type_s1;
fp_type_t [LANES-1:0] in_a_type_s1;
wire [LANES-1:0] input_sign_s1;
wire [LANES-1:0] mant_is_zero_s1;
wire [LANES-1:0] input_sign_s1;
wire [LANES-1:0][2*INT_MAN_WIDTH:0] preshift_mant_s1;
wire [LANES-1:0][SHAMT_BITS-1:0] denorm_shamt_s1;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
wire [LANES-1:0] of_before_round_s1;
wire [LANES-1:0][INT_MAN_WIDTH-1:0] input_mant_s1;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] input_exp_s1;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + SHAMT_BITS + INT_EXP_WIDTH + 1)),
.DATAW (1 + TAGW + 1 + `FRM_BITS + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + INT_MAN_WIDTH + INT_EXP_WIDTH)),
.RESETW (1)
) pipe_reg1 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, mant_is_zero_s0, input_sign_s0, preshift_mant_s0, denorm_shamt_s0, final_exp_s0, of_before_round_s0}),
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, preshift_mant_s1, denorm_shamt_s1, final_exp_s1, of_before_round_s1})
.data_in ({valid_in_s0, tag_in_s0, is_itof_s0, unsigned_s0, rnd_mode_s0, in_a_type_s0, input_sign_s0, mant_is_zero_s0, input_mant_s0, input_exp_s0}),
.data_out ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, input_sign_s1, mant_is_zero_s1, input_mant_s1, input_exp_s1})
);
// Perform adjustments to mantissa and exponent
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s1;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s1;
wire [LANES-1:0] of_before_round_s1;
for (genvar i = 0; i < LANES; ++i) begin
wire [INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination
reg [2*INT_MAN_WIDTH:0] preshift_mant; // mantissa before final shift
reg [SHAMT_BITS-1:0] denorm_shamt; // shift amount for denormalization
reg [INT_EXP_WIDTH-1:0] final_exp; // after eventual adjustments
reg of_before_round;
// Rebias the exponent
assign destination_exp = input_exp_s1[i] + EXP_BIAS;
always @(*) begin
`IGNORE_WARNINGS_BEGIN
// Default assignment
final_exp = destination_exp; // take exponent as is, only look at lower bits
preshift_mant = {input_mant_s1[i], 33'b0}; // Place mantissa to the left of the shifter
denorm_shamt = 0; // right of mantissa
of_before_round = 1'b0;
// Handle INT casts
if (is_itof_s1) begin
if ($signed(destination_exp) >= $signed(2**EXP_BITS-1)) begin
// Overflow or infinities (for proper rounding)
final_exp = (2**EXP_BITS-2); // largest normal value
preshift_mant = ~0; // largest normal value and RS bits set
of_before_round = 1'b1;
end else if ($signed(destination_exp) < $signed(-MAN_BITS)) begin
// Limit the shift to retain sticky bits
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + (2 + MAN_BITS); // to sticky
end else if ($signed(destination_exp) < $signed(1)) begin
// Denormalize underflowing values
final_exp = 0; // denormal result
denorm_shamt = denorm_shamt + 1 - destination_exp; // adjust right shifting
end
end else begin
if ($signed(input_exp_s1[i]) >= $signed((MAX_INT_WIDTH-1) + unsigned_s1)) begin
// overflow: when converting to unsigned the range is larger by one
denorm_shamt = SHAMT_BITS'(0); // prevent shifting
of_before_round = 1'b1;
end else if ($signed(input_exp_s1[i]) < $signed(-1)) begin
// underflow
denorm_shamt = MAX_INT_WIDTH + 1; // all bits go to the sticky
end else begin
// By default right shift mantissa to be an integer
denorm_shamt = (MAX_INT_WIDTH-1) - input_exp_s1[i];
end
end
`IGNORE_WARNINGS_END
end
assign destination_mant_s1[i] = preshift_mant >> denorm_shamt;
assign final_exp_s1[i] = final_exp;
assign of_before_round_s1[i] = of_before_round;
end
// Pipeline stage2
wire valid_in_s2;
wire [TAGW-1:0] tag_in_s2;
wire is_itof_s2;
wire unsigned_s2;
wire [2:0] rnd_mode_s2;
fp_type_t [LANES-1:0] in_a_type_s2;
wire [LANES-1:0] mant_is_zero_s2;
wire [LANES-1:0] input_sign_s2;
wire [LANES-1:0][2*INT_MAN_WIDTH:0] destination_mant_s2;
wire [LANES-1:0][INT_EXP_WIDTH-1:0] final_exp_s2;
wire [LANES-1:0] of_before_round_s2;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + 1 + `FRM_BITS + LANES * ($bits(fp_type_t) + 1 + 1 + (2*INT_MAN_WIDTH+1) + INT_EXP_WIDTH + 1)),
.RESETW (1)
) pipe_reg2 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, rnd_mode_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, destination_mant_s1, final_exp_s1, of_before_round_s1}),
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, rnd_mode_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, destination_mant_s2, final_exp_s2, of_before_round_s2})
);
wire [LANES-1:0] rounded_sign;
@ -251,41 +270,37 @@ module VX_fp_cvt #(
// Rouding and classification
for (genvar i = 0; i < LANES; ++i) begin
wire [2*INT_MAN_WIDTH:0] destination_mant;
wire [MAN_BITS-1:0] final_mant; // mantissa after adjustments
wire [MAX_INT_WIDTH-1:0] final_int; // integer shifted in position
wire [1:0] round_sticky_bits;
wire [31:0] fmt_pre_round_abs;
wire [31:0] pre_round_abs;
// Mantissa adjustment shift
assign destination_mant = preshift_mant_s1[i] >> denorm_shamt_s1[i];
// Extract final mantissa and round bit, discard the normal bit (for FP)
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant[2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
assign {final_mant, fp_round_sticky_bits[i][1]} = destination_mant_s2[i][2*INT_MAN_WIDTH-1 : 2*INT_MAN_WIDTH-1 - (MAN_BITS+1) + 1];
assign {final_int, int_round_sticky_bits[i][1]} = destination_mant_s2[i][2*INT_MAN_WIDTH : 2*INT_MAN_WIDTH - (MAX_INT_WIDTH+1) + 1];
// Collapse sticky bits
assign fp_round_sticky_bits[i][0] = (| destination_mant[NUM_FP_STICKY-1:0]);
assign int_round_sticky_bits[i][0] = (| destination_mant[NUM_INT_STICKY-1:0]);
assign fp_round_sticky_bits[i][0] = (| destination_mant_s2[i][NUM_FP_STICKY-1:0]);
assign int_round_sticky_bits[i][0] = (| destination_mant_s2[i][NUM_INT_STICKY-1:0]);
// select RS bits for destination operation
assign round_sticky_bits = is_itof_s1 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
assign round_sticky_bits = is_itof_s2 ? fp_round_sticky_bits[i] : int_round_sticky_bits[i];
// Pack exponent and mantissa into proper rounding form
assign fmt_pre_round_abs = {1'b0, final_exp_s1[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
assign fmt_pre_round_abs = {1'b0, final_exp_s2[i][EXP_BITS-1:0], final_mant[MAN_BITS-1:0]};
// Select output with destination format and operation
assign pre_round_abs = is_itof_s1 ? fmt_pre_round_abs : final_int;
assign pre_round_abs = is_itof_s2 ? fmt_pre_round_abs : final_int;
// Perform the rounding
VX_fp_rounding #(
.DAT_WIDTH (32)
) fp_rounding (
.abs_value_i (pre_round_abs),
.sign_i (input_sign_s1[i]),
.sign_i (input_sign_s2[i]),
.round_sticky_bits_i(round_sticky_bits),
.rnd_mode_i (rnd_mode_s1),
.rnd_mode_i (rnd_mode_s2),
.effective_subtraction_i(1'b0),
.abs_rounded_o (rounded_abs[i]),
.sign_o (rounded_sign[i]),
@ -293,28 +308,28 @@ module VX_fp_cvt #(
);
end
// Pipeline stage2
// Pipeline stage3
wire valid_in_s2;
wire [TAGW-1:0] tag_in_s2;
wire is_itof_s2;
wire unsigned_s2;
fp_type_t [LANES-1:0] in_a_type_s2;
wire [LANES-1:0] mant_is_zero_s2;
wire [LANES-1:0] input_sign_s2;
wire [LANES-1:0] rounded_sign_s2;
wire [LANES-1:0][31:0] rounded_abs_s2;
wire [LANES-1:0] of_before_round_s2;
wire valid_in_s3;
wire [TAGW-1:0] tag_in_s3;
wire is_itof_s3;
wire unsigned_s3;
fp_type_t [LANES-1:0] in_a_type_s3;
wire [LANES-1:0] mant_is_zero_s3;
wire [LANES-1:0] input_sign_s3;
wire [LANES-1:0] rounded_sign_s3;
wire [LANES-1:0][31:0] rounded_abs_s3;
wire [LANES-1:0] of_before_round_s3;
VX_pipe_register #(
.DATAW (1 + TAGW + 1 + 1 + LANES * ($bits(fp_type_t) + 1 + 1 + 32 + 1 + 1)),
.RESETW (1)
) pipe_reg2 (
) pipe_reg3 (
.clk (clk),
.reset (reset),
.enable (~stall),
.data_in ({valid_in_s1, tag_in_s1, is_itof_s1, unsigned_s1, in_a_type_s1, mant_is_zero_s1, input_sign_s1, rounded_abs, rounded_sign, of_before_round_s1}),
.data_out ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs_s2, rounded_sign_s2, of_before_round_s2})
.data_in ({valid_in_s2, tag_in_s2, is_itof_s2, unsigned_s2, in_a_type_s2, mant_is_zero_s2, input_sign_s2, rounded_abs, rounded_sign, of_before_round_s2}),
.data_out ({valid_in_s3, tag_in_s3, is_itof_s3, unsigned_s3, in_a_type_s3, mant_is_zero_s3, input_sign_s3, rounded_abs_s3, rounded_sign_s3, of_before_round_s3})
);
wire [LANES-1:0] of_after_round;
@ -325,14 +340,14 @@ module VX_fp_cvt #(
for (genvar i = 0; i < LANES; ++i) begin
// Assemble regular result, nan box short ones. Int zeroes need to be detected
assign fmt_result[i] = (is_itof_s2 & mant_is_zero_s2[i]) ? 0 : {rounded_sign_s2[i], rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:0]};
assign fmt_result[i] = (is_itof_s3 & mant_is_zero_s3[i]) ? 0 : {rounded_sign_s3[i], rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:0]};
// Classification after rounding select by destination format
assign uf_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == 0); // denormal
assign of_after_round[i] = (rounded_abs_s2[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
assign uf_after_round[i] = (rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == 0); // denormal
assign of_after_round[i] = (rounded_abs_s3[i][EXP_BITS+MAN_BITS-1:MAN_BITS] == ~0); // inf exp.
// Negative integer result needs to be brought into two's complement
assign rounded_int_res[i] = rounded_sign_s2[i] ? (-rounded_abs_s2[i]) : rounded_abs_s2[i];
assign rounded_int_res[i] = rounded_sign_s3[i] ? (-rounded_abs_s3[i]) : rounded_abs_s3[i];
assign rounded_int_res_zero[i] = (rounded_int_res[i] == 0);
end
@ -347,13 +362,13 @@ module VX_fp_cvt #(
for (genvar i = 0; i < LANES; ++i) begin
// Detect special case from source format, I2F casts don't produce a special result
assign fp_result_is_special[i] = ~is_itof_s2 & (in_a_type_s2[i].is_zero | in_a_type_s2[i].is_nan);
assign fp_result_is_special[i] = ~is_itof_s3 & (in_a_type_s3[i].is_zero | in_a_type_s3[i].is_nan);
// Signalling input NaNs raise invalid flag, otherwise no flags set
assign fp_special_status[i] = in_a_type_s2[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
assign fp_special_status[i] = in_a_type_s3[i].is_signaling ? {1'b1, 4'h0} : 5'h0; // invalid operation
// Assemble result according to destination format
assign fp_special_result[i] = in_a_type_s2[i].is_zero ? (32'(input_sign_s2) << 31) // signed zero
assign fp_special_result[i] = in_a_type_s3[i].is_zero ? (32'(input_sign_s3) << 31) // signed zero
: {1'b0, QNAN_EXPONENT, QNAN_MANTISSA}; // qNaN
end
@ -366,20 +381,20 @@ module VX_fp_cvt #(
for (genvar i = 0; i < LANES; ++i) begin
// Assemble result according to destination format
always @(*) begin
if (input_sign_s2[i] && !in_a_type_s2[i].is_nan) begin
if (input_sign_s3[i] && !in_a_type_s3[i].is_nan) begin
int_special_result[i][30:0] = 0; // alone yields 2**(31)-1
int_special_result[i][31] = ~unsigned_s2; // for unsigned casts yields 2**31
int_special_result[i][31] = ~unsigned_s3; // for unsigned casts yields 2**31
end else begin
int_special_result[i][30:0] = 2**(31) - 1; // alone yields 2**(31)-1
int_special_result[i][31] = unsigned_s2; // for unsigned casts yields 2**31
int_special_result[i][31] = unsigned_s3; // for unsigned casts yields 2**31
end
end
// Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
assign int_result_is_special[i] = in_a_type_s2[i].is_nan
| in_a_type_s2[i].is_inf
| of_before_round_s2[i]
| (input_sign_s2[i] & unsigned_s2 & ~rounded_int_res_zero[i]);
assign int_result_is_special[i] = in_a_type_s3[i].is_nan
| in_a_type_s3[i].is_inf
| of_before_round_s3[i]
| (input_sign_s3[i] & unsigned_s3 & ~rounded_int_res_zero[i]);
// All integer special cases are invalid
assign int_special_status[i] = {1'b1, 4'h0};
@ -395,12 +410,12 @@ module VX_fp_cvt #(
fflags_t fp_status, int_status;
wire [31:0] fp_result, int_result;
wire inexact = is_itof_s2 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
: (| fp_round_sticky_bits[i]) | (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i]));
wire inexact = is_itof_s3 ? (| fp_round_sticky_bits[i]) // overflow is invalid in i2f;
: (| fp_round_sticky_bits[i]) | (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i]));
assign fp_regular_status.NV = is_itof_s2 & (of_before_round_s2[i] | of_after_round[i]); // overflow is invalid for I2F casts
assign fp_regular_status.NV = is_itof_s3 & (of_before_round_s3[i] | of_after_round[i]); // overflow is invalid for I2F casts
assign fp_regular_status.DZ = 1'b0; // no divisions
assign fp_regular_status.OF = ~is_itof_s2 & (~in_a_type_s2[i].is_inf & (of_before_round_s2[i] | of_after_round[i])); // inf casts no OF
assign fp_regular_status.OF = ~is_itof_s3 & (~in_a_type_s3[i].is_inf & (of_before_round_s3[i] | of_after_round[i])); // inf casts no OF
assign fp_regular_status.UF = uf_after_round[i] & inexact;
assign fp_regular_status.NX = inexact;
@ -413,8 +428,8 @@ module VX_fp_cvt #(
assign int_status = int_result_is_special[i] ? int_special_status[i] : int_regular_status;
// Select output depending on special case detection
assign tmp_result[i] = is_itof_s2 ? fp_result : int_result;
assign tmp_fflags[i] = is_itof_s2 ? fp_status : int_status;
assign tmp_result[i] = is_itof_s3 ? fp_result : int_result;
assign tmp_fflags[i] = is_itof_s3 ? fp_status : int_status;
end
assign stall = ~ready_out && valid_out;
@ -422,11 +437,11 @@ module VX_fp_cvt #(
VX_pipe_register #(
.DATAW (1 + TAGW + (LANES * 32) + (LANES * `FFG_BITS)),
.RESETW (1)
) pipe_reg3 (
) pipe_reg4 (
.clk (clk),
.reset (reset),
.enable (!stall),
.data_in ({valid_in_s2, tag_in_s2, tmp_result, tmp_fflags}),
.data_in ({valid_in_s3, tag_in_s3, tmp_result, tmp_fflags}),
.data_out ({valid_out, tag_out, result, fflags})
);
@ -434,4 +449,4 @@ module VX_fp_cvt #(
assign has_fflags = 1'b1;
endmodule
endmodule

View file

@ -0,0 +1,29 @@
`ifndef VX_IBUFFER_IF
`define VX_IBUFFER_IF
`include "VX_define.vh"
interface VX_ibuffer_if ();
wire valid;
wire [`NW_BITS-1:0] wid;
wire [`NW_BITS-1:0] wid_n;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`EX_BITS-1:0] ex_type;
wire [`OP_BITS-1:0] op_type;
wire [`MOD_BITS-1:0] op_mod;
wire wb;
wire [`NR_BITS-1:0] rd;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;
wire [`NR_BITS-1:0] rs3;
wire [31:0] imm;
wire use_PC;
wire use_imm;
wire [`NUM_REGS-1:0] used_regs;
wire ready;
endinterface
`endif

View file

@ -1,18 +1,29 @@
`include "VX_platform.vh"
// Fast encoder using parallel prefix computation
// Adapter from BaseJump STL: http://bjump.org/index.html
// Adapter from BaseJump STL: http://bjump.org/data_out.html
module VX_onehot_encoder #(
parameter N = 1,
parameter N = 1,
parameter REVERSE = 0,
parameter FAST = 1
parameter FAST = 1,
parameter LN = `LOG2UP(N)
) (
input wire [N-1:0] data_in,
output wire [`LOG2UP(N)-1:0] data_out,
output wire valid
input wire [N-1:0] data_in,
output wire [LN-1:0] data_out,
output wire valid
);
if (FAST) begin
if (N == 1) begin
assign data_out = data_in;
assign valid = data_in;
end else if (N == 2) begin
assign data_out = data_in[!REVERSE];
assign valid = (| data_in);
end else if (FAST) begin
`IGNORE_WARNINGS_BEGIN
localparam levels_lp = $clog2(N);
localparam aligned_width_lp = 1 << $clog2(N);
@ -51,23 +62,34 @@ module VX_onehot_encoder #(
assign data_out = addr[levels_lp][`LOG2UP(N)-1:0];
assign valid = v[levels_lp][0];
`IGNORE_WARNINGS_END
end else begin
end else begin
reg [`LOG2UP(N)-1:0] data_out_r;
reg valid_r;
reg [LN-1:0] index_r;
always @(*) begin
data_out_r = 'x;
for (integer i = 0; i < N; i++) begin
if (data_in[i]) begin
data_out_r = `LOG2UP(N)'(i);
if (REVERSE) begin
always @(*) begin
index_r = 'x;
for (integer i = N-1; i >= 0; --i) begin
if (data_in[i]) begin
index_r = `LOG2UP(N)'(i);
end
end
end
end else begin
always @(*) begin
index_r = 'x;
for (integer i = 0; i < N; i++) begin
if (data_in[i]) begin
index_r = `LOG2UP(N)'(i);
end
end
end
end
assign data_out = data_out_r;
assign data_out = index_r;
assign valid = (| data_in);
end
endmodule

View file

@ -1,7 +1,7 @@
`include "VX_platform.vh"
module VX_priority_encoder #(
parameter N = 1,
parameter N = 1,
parameter REVERSE = 0,
parameter FAST = 1,
parameter LN = `LOG2UP(N)
@ -20,8 +20,8 @@ module VX_priority_encoder #(
end else if (N == 2) begin
assign onehot = {!data_in[REVERSE], data_in[REVERSE]};
assign index = !data_in[REVERSE];
assign onehot = {~data_in[REVERSE], data_in[REVERSE]};
assign index = ~data_in[REVERSE];
assign valid_out = (| data_in);
end else if (FAST) begin
@ -48,7 +48,7 @@ module VX_priority_encoder #(
VX_onehot_encoder #(
.N (N),
.REVERSE (REVERSE)
) b (
) onehot_encoder (
.data_in (onehot),
.data_out (index),
`UNUSED_PIN (valid)
@ -56,35 +56,39 @@ module VX_priority_encoder #(
end else begin
reg [N-1:0] onehot_r;
reg [LN-1:0] index_r;
always @(*) begin
index_r = 'x;
onehot_r = 0;
if (REVERSE) begin
for (integer i = N-1; i >= 0; i--) begin
reg [N-1:0] onehot_r;
if (REVERSE) begin
always @(*) begin
index_r = 'x;
onehot_r = 'x;
for (integer i = 0; i < N; ++i) begin
if (data_in[i]) begin
index_r = LN'(i);
onehot_r = 0;
onehot_r[i] = 1'b1;
break;
end
end
end else begin
for (integer i = 0; i < N; i++) begin
if (data_in[i]) begin
index_r = LN'(i);
onehot_r[i] = 1'b1;
break;
end
end
end
end
end else begin
always @(*) begin
index_r = 'x;
onehot_r = 'x;
for (integer i = N-1; i >= 0; --i) begin
if (data_in[i]) begin
index_r = LN'(i);
onehot_r = 0;
onehot_r[i] = 1'b1;
end
end
end
end
assign index = index_r;
assign onehot = onehot_r;
assign valid_out = (| data_in);
end
assign index = index_r;
assign onehot = onehot_r;
assign valid_out = (| data_in);
end
endmodule

View file

@ -24,39 +24,403 @@ module VX_rr_arbiter #(
assign grant_valid = requests[0];
end else begin
reg [LOG_NUM_REQS-1:0] grant_table [NUM_REQS-1:0];
reg [LOG_NUM_REQS-1:0] state;
always @(*) begin
for (integer i = 0; i < NUM_REQS; i++) begin
grant_table[i] = LOG_NUM_REQS'(i);
for (integer j = 0; j < NUM_REQS; j++) begin
if (requests[(i+j) % NUM_REQS]) begin
grant_table[i] = LOG_NUM_REQS'((i+j) % NUM_REQS);
reg [LOG_NUM_REQS-1:0] grant_index_r;
reg [NUM_REQS-1:0] grant_onehot_r;
reg [LOG_NUM_REQS-1:0] state;
if (NUM_REQS == 2) begin
always @(*) begin
casez ({state, requests})
3'b0_1?: begin grant_onehot_r = 2'b10; grant_index_r = LOG_NUM_REQS'(1); end
3'b0_01: begin grant_onehot_r = 2'b01; grant_index_r = LOG_NUM_REQS'(0); end
3'b1_?1: begin grant_onehot_r = 2'b01; grant_index_r = LOG_NUM_REQS'(0); end
3'b1_10: begin grant_onehot_r = 2'b10; grant_index_r = LOG_NUM_REQS'(1); end
default: begin grant_onehot_r = 'x; grant_index_r = 'x; end
endcase
end
end else if (NUM_REQS == 4) begin
always @(*) begin
casez ({state, requests})
6'b00_??1?: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end
6'b00_?10?: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end
6'b00_100?: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end
6'b00_0001: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end
6'b01_?1??: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end
6'b01_10??: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end
6'b01_00?1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end
6'b01_0010: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end
6'b10_1???: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end
6'b10_0??1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end
6'b10_0?10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end
6'b10_0100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end
6'b11_???1: begin grant_onehot_r = 4'b0001; grant_index_r = LOG_NUM_REQS'(0); end
6'b11_??10: begin grant_onehot_r = 4'b0010; grant_index_r = LOG_NUM_REQS'(1); end
6'b11_?100: begin grant_onehot_r = 4'b0100; grant_index_r = LOG_NUM_REQS'(2); end
6'b11_1000: begin grant_onehot_r = 4'b1000; grant_index_r = LOG_NUM_REQS'(3); end
default: begin grant_onehot_r = 'x; grant_index_r = 'x; end
endcase
end
end else if (NUM_REQS == 8) begin
always @(*) begin
casez ({state, requests})
11'b000_??????1?: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b000_?????10?: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b000_????100?: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end
11'b000_???1000?: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end
11'b000_??10000?: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end
11'b000_?100000?: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end
11'b000_1000000?: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end
11'b000_00000001: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b001_?????1??: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b001_????10??: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end
11'b001_???100??: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end
11'b001_??1000??: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end
11'b001_?10000??: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end
11'b001_100000??: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end
11'b001_000000?1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b001_00000010: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b010_????1???: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end
11'b010_???10???: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end
11'b010_??100???: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end
11'b010_?1000???: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end
11'b010_10000???: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end
11'b010_00000??1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b010_00000?10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b010_00000100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b011_???1????: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end
11'b011_??10????: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end
11'b011_?100????: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end
11'b011_1000????: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end
11'b011_0000???1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b011_0000??10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b011_0000?100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b011_00001000: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end
11'b100_??1?????: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end
11'b100_?10?????: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end
11'b100_100?????: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end
11'b100_000????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b100_000???10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b100_000??100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b100_000?1000: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end
11'b100_00010000: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end
11'b101_?1??????: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end
11'b101_10??????: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end
11'b101_00?????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b101_00????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b101_00???100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b101_00??1000: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end
11'b101_00?10000: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end
11'b101_00100000: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end
11'b110_1???????: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end
11'b110_0??????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b110_0?????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b110_0????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b110_0???1000: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end
11'b110_0??10000: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end
11'b110_0?100000: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end
11'b110_01000000: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end
11'b111_???????1: begin grant_onehot_r = 8'b00000001; grant_index_r = LOG_NUM_REQS'(0); end
11'b111_??????10: begin grant_onehot_r = 8'b00000010; grant_index_r = LOG_NUM_REQS'(1); end
11'b111_?????100: begin grant_onehot_r = 8'b00000100; grant_index_r = LOG_NUM_REQS'(2); end
11'b111_????1000: begin grant_onehot_r = 8'b00001000; grant_index_r = LOG_NUM_REQS'(3); end
11'b111_???10000: begin grant_onehot_r = 8'b00010000; grant_index_r = LOG_NUM_REQS'(4); end
11'b111_??100000: begin grant_onehot_r = 8'b00100000; grant_index_r = LOG_NUM_REQS'(5); end
11'b111_?1000000: begin grant_onehot_r = 8'b01000000; grant_index_r = LOG_NUM_REQS'(6); end
11'b111_10000000: begin grant_onehot_r = 8'b10000000; grant_index_r = LOG_NUM_REQS'(7); end
default: begin grant_onehot_r = 'x; grant_index_r = 'x; end
endcase
end
end else if (NUM_REQS == 16) begin
always @(*) begin
casez ({state, requests})
20'b0000_??????????????1?: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b0000_?????????????10?: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b0000_????????????100?: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b0000_???????????1000?: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b0000_??????????10000?: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b0000_?????????100000?: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b0000_????????1000000?: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b0000_???????10000000?: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b0000_??????100000000?: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b0000_?????1000000000?: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b0000_????10000000000?: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b0000_???100000000000?: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b0000_??1000000000000?: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b0000_?10000000000000?: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b0000_100000000000000?: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b0000_0000000000000001: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b0001_?????????????1??: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b0001_????????????10??: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b0001_???????????100??: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b0001_??????????1000??: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b0001_?????????10000??: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b0001_????????100000??: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b0001_???????1000000??: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b0001_??????10000000??: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b0001_?????100000000??: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b0001_????1000000000??: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b0001_???10000000000??: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b0001_??100000000000??: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b0001_?1000000000000??: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b0001_10000000000000??: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b0001_00000000000000?1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b0001_0000000000000010: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b0010_????????????1???: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b0010_???????????10???: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b0010_??????????100???: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b0010_?????????1000???: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b0010_????????10000???: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b0010_???????100000???: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b0010_??????1000000???: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b0010_?????10000000???: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b0010_????100000000???: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b0010_???1000000000???: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b0010_??10000000000???: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b0010_?100000000000???: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b0010_1000000000000???: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b0010_0000000000000??1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b0010_0000000000000?10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b0010_0000000000000100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b0011_???????????1????: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b0011_??????????10????: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b0011_?????????100????: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b0011_????????1000????: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b0011_???????10000????: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b0011_??????100000????: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b0011_?????1000000????: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b0011_????10000000????: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b0011_???100000000????: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b0011_??1000000000????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b0011_?10000000000????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b0011_100000000000????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b0011_000000000000???1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b0011_000000000000??10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b0011_000000000000?100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b0011_0000000000001000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b0100_??????????1?????: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b0100_?????????10?????: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b0100_????????100?????: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b0100_???????1000?????: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b0100_??????10000?????: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b0100_?????100000?????: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b0100_????1000000?????: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b0100_???10000000?????: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b0100_??100000000?????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b0100_?1000000000?????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b0100_10000000000?????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b0100_00000000000????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b0100_00000000000???10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b0100_00000000000??100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b0100_00000000000?1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b0100_0000000000010000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b0101_?????????1??????: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b0101_????????10??????: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b0101_???????100??????: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b0101_??????1000??????: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b0101_?????10000??????: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b0101_????100000??????: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b0101_???1000000??????: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b0101_??10000000??????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b0101_?100000000??????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b0101_1000000000??????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b0101_0000000000?????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b0101_0000000000????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b0101_0000000000???100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b0101_0000000000??1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b0101_0000000000?10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b0101_0000000000100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b0110_????????1???????: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b0110_???????10???????: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b0110_??????100???????: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b0110_?????1000???????: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b0110_????10000???????: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b0110_???100000???????: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b0110_??1000000???????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b0110_?10000000???????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b0110_100000000???????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b0110_000000000??????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b0110_000000000?????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b0110_000000000????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b0110_000000000???1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b0110_000000000??10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b0110_000000000?100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b0110_0000000001000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b0111_???????1????????: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b0111_??????10????????: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b0111_?????100????????: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b0111_????1000????????: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b0111_???10000????????: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b0111_??100000????????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b0111_?1000000????????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b0111_10000000????????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b0111_00000000???????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b0111_00000000??????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b0111_00000000?????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b0111_00000000????1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b0111_00000000???10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b0111_00000000??100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b0111_00000000?1000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b0111_0000000010000000: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b1000_??????1?????????: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b1000_?????10?????????: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b1000_????100?????????: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b1000_???1000?????????: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b1000_??10000?????????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b1000_?100000?????????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b1000_1000000?????????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b1000_0000000????????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b1000_0000000???????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b1000_0000000??????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b1000_0000000?????1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b1000_0000000????10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b1000_0000000???100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b1000_0000000??1000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b1000_0000000?10000000: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b1000_0000000100000000: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b1001_?????1??????????: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b1001_????10??????????: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b1001_???100??????????: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b1001_??1000??????????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b1001_?10000??????????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b1001_100000??????????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b1001_000000?????????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b1001_000000????????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b1001_000000???????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b1001_000000??????1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b1001_000000?????10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b1001_000000????100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b1001_000000???1000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b1001_000000??10000000: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b1001_000000?100000000: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b1001_0000001000000000: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b1010_????1???????????: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b1010_???10???????????: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b1010_??100???????????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b1010_?1000???????????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b1010_10000???????????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b1010_00000??????????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b1010_00000?????????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b1010_00000????????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b1010_00000???????1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b1010_00000??????10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b1010_00000?????100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b1010_00000????1000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b1010_00000???10000000: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b1010_00000??100000000: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b1010_00000?1000000000: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b1010_0000010000000000: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b1011_???1????????????: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b1011_??10????????????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b1011_?100????????????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b1011_1000????????????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b1011_0000???????????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b1011_0000??????????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b1011_0000?????????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b1011_0000????????1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b1011_0000???????10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b1011_0000??????100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b1011_0000?????1000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b1011_0000????10000000: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b1011_0000???100000000: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b1011_0000??1000000000: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b1011_0000?10000000000: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b1011_0000100000000000: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b1100_??1?????????????: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b1100_?10?????????????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b1100_100?????????????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b1100_000????????????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b1100_000???????????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b1100_000??????????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b1100_000?????????1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b1100_000????????10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b1100_000???????100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b1100_000??????1000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b1100_000?????10000000: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b1100_000????100000000: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b1100_000???1000000000: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b1100_000??10000000000: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b1100_000?100000000000: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b1100_0001000000000000: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b1101_?1??????????????: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b1101_10??????????????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b1101_00?????????????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b1101_00????????????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b1101_00???????????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b1101_00??????????1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b1101_00?????????10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b1101_00????????100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b1101_00???????1000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b1101_00??????10000000: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b1101_00?????100000000: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b1101_00????1000000000: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b1101_00???10000000000: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b1101_00??100000000000: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b1101_00?1000000000000: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b1101_0010000000000000: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b1110_1???????????????: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
20'b1110_0??????????????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b1110_0?????????????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b1110_0????????????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b1110_0???????????1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b1110_0??????????10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b1110_0?????????100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b1110_0????????1000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b1110_0???????10000000: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b1110_0??????100000000: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b1110_0?????1000000000: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b1110_0????10000000000: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b1110_0???100000000000: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b1110_0??1000000000000: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b1110_0?10000000000000: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b1110_0100000000000000: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b1111_???????????????1: begin grant_onehot_r = 16'b0000000000000001; grant_index_r = LOG_NUM_REQS'(0); end
20'b1111_??????????????10: begin grant_onehot_r = 16'b0000000000000010; grant_index_r = LOG_NUM_REQS'(1); end
20'b1111_?????????????100: begin grant_onehot_r = 16'b0000000000000100; grant_index_r = LOG_NUM_REQS'(2); end
20'b1111_????????????1000: begin grant_onehot_r = 16'b0000000000001000; grant_index_r = LOG_NUM_REQS'(3); end
20'b1111_???????????10000: begin grant_onehot_r = 16'b0000000000010000; grant_index_r = LOG_NUM_REQS'(4); end
20'b1111_??????????100000: begin grant_onehot_r = 16'b0000000000100000; grant_index_r = LOG_NUM_REQS'(5); end
20'b1111_?????????1000000: begin grant_onehot_r = 16'b0000000001000000; grant_index_r = LOG_NUM_REQS'(6); end
20'b1111_????????10000000: begin grant_onehot_r = 16'b0000000010000000; grant_index_r = LOG_NUM_REQS'(7); end
20'b1111_???????100000000: begin grant_onehot_r = 16'b0000000100000000; grant_index_r = LOG_NUM_REQS'(8); end
20'b1111_??????1000000000: begin grant_onehot_r = 16'b0000001000000000; grant_index_r = LOG_NUM_REQS'(9); end
20'b1111_?????10000000000: begin grant_onehot_r = 16'b0000010000000000; grant_index_r = LOG_NUM_REQS'(10); end
20'b1111_????100000000000: begin grant_onehot_r = 16'b0000100000000000; grant_index_r = LOG_NUM_REQS'(11); end
20'b1111_???1000000000000: begin grant_onehot_r = 16'b0001000000000000; grant_index_r = LOG_NUM_REQS'(12); end
20'b1111_??10000000000000: begin grant_onehot_r = 16'b0010000000000000; grant_index_r = LOG_NUM_REQS'(13); end
20'b1111_?100000000000000: begin grant_onehot_r = 16'b0100000000000000; grant_index_r = LOG_NUM_REQS'(14); end
20'b1111_1000000000000000: begin grant_onehot_r = 16'b1000000000000000; grant_index_r = LOG_NUM_REQS'(15); end
default: begin grant_onehot_r = 'x; grant_index_r = 'x; end
endcase
end
end else begin
always @(*) begin
grant_index_r = 'x;
grant_onehot_r = 'x;
for (integer i = 0; i < NUM_REQS; ++i) begin
for (integer j = 0; j < NUM_REQS; ++j) begin
if (state == LOG_NUM_REQS'(i)
&& requests[(j + 1) % NUM_REQS]) begin
grant_index_r = LOG_NUM_REQS'((j + 1) % NUM_REQS);
grant_onehot_r = '0;
grant_onehot_r[(j + 1) % NUM_REQS] = 1;
end
end
end
end
end
end
always @(posedge clk) begin
if (reset) begin
state <= 0;
end else if (!LOCK_ENABLE || enable) begin
state <= grant_table[state];
state <= grant_index;
end
end
reg [NUM_REQS-1:0] grant_onehot_r;
always @(*) begin
grant_onehot_r = NUM_REQS'(0);
grant_onehot_r[grant_table[state]] = 1;
end
assign grant_index = grant_table[state];
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
assign grant_index = grant_index_r;
assign grant_onehot = grant_onehot_r;
assign grant_valid = (| requests);
end
endmodule

View file

@ -83,7 +83,7 @@ module VX_skid_buffer #(
end
if (pop && !use_buffer) begin
data_out_r <= data_in;
end else if (pop) begin
end else if (ready_out) begin
data_out_r <= buffer;
end
end

View file

@ -74,7 +74,7 @@
"taps": {
"afu": {
"!cmd_type":3,
"!state":3,
"!state":2,
"?cci_sRxPort_c0_mmioRdValid":1,
"?cci_sRxPort_c0_mmioWrValid":1,
"mmio_hdr_address":16,

View file

@ -69,12 +69,18 @@ Simulator::~Simulator() {
void Simulator::attach_ram(RAM* ram) {
ram_ = ram;
mem_rsp_vec_.clear();
for (int b = 0; b < MEMORY_BANKS; ++b) {
mem_rsp_vec_[b].clear();
}
last_mem_rsp_bank_ = 0;
}
void Simulator::reset() {
print_bufs_.clear();
mem_rsp_vec_.clear();
for (int b = 0; b < MEMORY_BANKS; ++b) {
mem_rsp_vec_[b].clear();
}
last_mem_rsp_bank_ = 0;
mem_rsp_active_ = false;
@ -128,42 +134,54 @@ void Simulator::eval_mem_bus() {
}
// update memory responses schedule
for (auto& rsp : mem_rsp_vec_) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
for (int b = 0; b < MEMORY_BANKS; ++b) {
for (auto& rsp : mem_rsp_vec_[b]) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
}
bool has_response = false;
// schedule memory responses in FIFO order
std::list<mem_req_t>::iterator mem_rsp_it(mem_rsp_vec_.end());
if (!mem_rsp_vec_.empty()
&& (0 == mem_rsp_vec_.begin()->cycles_left)) {
mem_rsp_it = mem_rsp_vec_.begin();
for (int i = 0; i < MEMORY_BANKS; ++i) {
uint32_t b = (i + last_mem_rsp_bank_ + 1) % MEMORY_BANKS;
if (!mem_rsp_vec_[b].empty()
&& (0 == mem_rsp_vec_[b].begin()->cycles_left)) {
has_response = true;
last_mem_rsp_bank_ = b;
break;
}
}
// send memory response
if (mem_rsp_active_
&& vortex_->mem_rsp_valid && mem_rsp_ready_) {
&& vortex_->mem_rsp_valid && mem_rsp_ready_) {
mem_rsp_active_ = false;
}
if (!mem_rsp_active_) {
if (mem_rsp_it != mem_rsp_vec_.end()) {
vortex_->mem_rsp_valid = 1;
if (has_response) {
vortex_->mem_rsp_valid = 1;
std::list<mem_req_t>::iterator mem_rsp_it = mem_rsp_vec_[last_mem_rsp_bank_].begin();
memcpy((uint8_t*)vortex_->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
vortex_->mem_rsp_tag = mem_rsp_it->tag;
mem_rsp_vec_.erase(mem_rsp_it);
mem_rsp_vec_[last_mem_rsp_bank_].erase(mem_rsp_it);
mem_rsp_active_ = true;
} else {
vortex_->mem_rsp_valid = 0;
}
}
// select the memory bank
uint32_t req_bank = (MEMORY_BANKS >= 2) ? (vortex_->mem_req_addr % MEMORY_BANKS) : 0;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (mem_rsp_vec_.size() >= MEM_RQ_SIZE) {
if (mem_rsp_vec_[req_bank].size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
@ -201,13 +219,13 @@ void Simulator::eval_mem_bus() {
mem_req.addr = vortex_->mem_req_addr;
ram_->read(vortex_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.block.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_rsp_vec_) {
for (auto& rsp : mem_rsp_vec_[req_bank]) {
if (mem_req.addr == rsp.addr) {
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
mem_rsp_vec_.emplace_back(mem_req);
mem_rsp_vec_[req_bank].emplace_back(mem_req);
}
}
}

View file

@ -17,6 +17,14 @@
#include <sstream>
#include <unordered_map>
#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
#define MEMORY_BANKS 2
#endif
#endif
class Simulator {
public:
@ -57,7 +65,9 @@ private:
void eval_mem_bus();
std::list<mem_req_t> mem_rsp_vec_;
std::list<mem_req_t> mem_rsp_vec_ [MEMORY_BANKS];
uint32_t last_mem_rsp_bank_;
bool mem_rsp_active_;
bool mem_rsp_ready_;

View file

@ -76,16 +76,16 @@ $(FPGA_BUILD_DIR)_4c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_4c
$(FPGA_BUILD_DIR)_8c/build/dcp.qpf:
afu_synth_setup -s setup8.cfg $(FPGA_BUILD_DIR)_8c
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_8c
$(FPGA_BUILD_DIR)_16c/build/dcp.qpf:
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_16c
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_16c
$(FPGA_BUILD_DIR)_32c/build/dcp.qpf:
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_32c
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_32c
$(FPGA_BUILD_DIR)_64c/build/dcp.qpf:
afu_synth_setup -s setup16.cfg $(FPGA_BUILD_DIR)_64c
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_64c
gen-sources-1c:
./gen_sources.sh $(CFLAGS) $(CONFIG1) > sources.txt

View file

@ -1,7 +0,0 @@
+define+SYNTHESIS
+define+QUARTUS
vortex_afu16.json
QI:vortex_afu.qsf
C:sources.txt

View file

@ -1,7 +0,0 @@
+define+SYNTHESIS
+define+QUARTUS
vortex_afu8.json
QI:vortex_afu.qsf
C:sources.txt

View file

@ -2,8 +2,8 @@
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto-220",
"clock-frequency-low": "auto-220",
"clock-frequency-high": "auto",
"clock-frequency-low": "auto",
"cmd-mem-read": 1,
"cmd-mem-write": 2,

View file

@ -1,53 +0,0 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto-200",
"clock-frequency-low": "auto-200",
"cmd-mem-read": 1,
"cmd-mem-write": 2,
"cmd-run": 3,
"cmd-csr-read": 4,
"cmd-csr-write": 5,
"mmio-cmd-type": 10,
"mmio-io-addr": 12,
"mmio-mem-addr": 14,
"mmio-data-size": 16,
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-dev-caps": 24,
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

View file

@ -1,54 +0,0 @@
{
"version": 1,
"afu-image": {
"power": 0,
"clock-frequency-high": "auto-210",
"clock-frequency-low": "auto-210",
"cmd-mem-read": 1,
"cmd-mem-write": 2,
"cmd-run": 3,
"cmd-csr-read": 4,
"cmd-csr-write": 5,
"mmio-cmd-type": 10,
"mmio-io-addr": 12,
"mmio-mem-addr": 14,
"mmio-data-size": 16,
"mmio-status": 18,
"mmio-scope-read": 20,
"mmio-scope-write": 22,
"mmio-dev-caps": 24,
"afu-top-interface":
{
"class": "ccip_std_afu_avalon_mm",
"module-ports" :
[
{
"class": "cci-p",
"params":
{
"clock": "uClk_usr"
}
},
{
"class": "local-memory",
"params":
{
"clock": "uClk_usr"
}
}
]
},
"accelerator-clusters":
[
{
"name": "vortex_afu",
"total-contexts": 1,
"accelerator-type-uuid": "35f9452b-25c2-434c-93d5-6f8c60db361c"
}
]
}
}

View file

@ -5,7 +5,7 @@ BUILDIR ?= build
unittest:
mkdir -p unittest/$(BUILDIR)
cp unittest/Makefile unittest/$(BUILDIR)
$(MAKE) -C unittest/$(BUILDIR) clean && $(MAKE) -C unittest/$(BUILDIR) > unittest//$(BUILDIR)build.log 2>&1 &
$(MAKE) -C unittest/$(BUILDIR) clean && $(MAKE) -C unittest/$(BUILDIR) > unittest/$(BUILDIR)/build.log 2>&1 &
pipeline:
mkdir -p pipeline/$(BUILDIR)
@ -55,7 +55,7 @@ top8:
top16:
mkdir -p top16/$(BUILDIR)
cp top16/Makefile top16/$(BUILDIR)
$(MAKE) -C top16/$(BUILDIR) clean && $(MAKE) -C top16/$(BUILDIR) > top16/$(BUILDIR)build.log 2>&1 &
$(MAKE) -C top16/$(BUILDIR) clean && $(MAKE) -C top16/$(BUILDIR) > top16/$(BUILDIR)/build.log 2>&1 &
top32:
mkdir -p top32/$(BUILDIR)

View file

@ -40,30 +40,13 @@ set_global_assignment -name VERILOG_MACRO SYNTHESIS
set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON
set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
set_global_assignment -name OPTIMIZATION_TECHNIQUE AREA
#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED
#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE"
#set_global_assignment -name FINAL_PLACEMENT_OPTIMIZATION ALWAYS
#set_global_assignment -name PLACEMENT_EFFORT_MULTIPLIER 2.0
#set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS"
#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON
#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM
#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON
#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON
#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON
#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON
#set_global_assignment -name USE_HIGH_SPEED_ADDER ON
#set_global_assignment -name MUX_RESTRUCTURE ON
#set_global_assignment -name ADV_NETLIST_OPT_SYNTH_WYSIWYG_REMAP ON
#set_global_assignment -name PROGRAMMABLE_POWER_TECHNOLOGY_SETTING "FORCE ALL TILES WITH FAILING TIMING PATHS TO HIGH SPEED"
#set_global_assignment -name PHYSICAL_SYNTHESIS_COMBO_LOGIC ON
#set_global_assignment -name PHYSICAL_SYNTHESIS_REGISTER_RETIMING ON
#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
#set_global_assignment -name SEED 1
# Power estimation
set_global_assignment -name POWER_PRESET_COOLING_SOLUTION "23 MM HEAT SINK WITH 200 LFPM AIRFLOW"
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
switch $opts(family) {
"Arria 10" {

File diff suppressed because it is too large Load diff

View file

@ -4,8 +4,10 @@ SYSROOT ?= $(RISCV_TOOLCHAIN_PATH)/riscv32-unknown-elf
POCL_CC_PATH ?= /opt/pocl/compiler
POCL_RT_PATH ?= /opt/pocl/runtime
OPTS ?= filelist.txt
VORTEX_DRV_PATH ?= $(realpath ../../../driver)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
VORTEX_RT_PATH ?= $(realpath ../../../runtime)
K_LLCFLAGS += "-O3 -march=riscv32 -target-abi=ilp32f -mcpu=generic-rv32 -mattr=+m,+f -float-abi=hard -code-model=small"
K_CFLAGS += "-v -O3 --sysroot=$(SYSROOT) --gcc-toolchain=$(RISCV_TOOLCHAIN_PATH) -march=rv32imf -mabi=ilp32f -I$(VORTEX_RT_PATH)/include -fno-rtti -fno-exceptions -ffreestanding -nostartfiles -fdata-sections -ffunction-sections"
@ -34,19 +36,19 @@ $(PROJECT): $(SRCS)
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
run-fpga: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-asesim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-vlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/opae/vlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-simx: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
run-rtlsim: $(PROJECT) kernel.pocl
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT)
LD_LIBRARY_PATH=$(POCL_RT_PATH)/lib:$(VORTEX_DRV_PATH)/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS)
.depend: $(SRCS)
$(CXX) $(CXXFLAGS) -MM $^ > .depend;

View file

@ -0,0 +1,5 @@
hurricanegen: hurricanegen.c
gcc -std=c99 -o $@ $<
clean:
rm hurricanegen

View file

@ -0,0 +1,12 @@
#!/bin/bash
./hurricanegen 65536 1
./hurricanegen 131072 1
./hurricanegen 262144 1
./hurricanegen 524288 1
./hurricanegen 1048576 1
./hurricanegen 2097152 1
./hurricanegen 4194304 1
./hurricanegen 8388608 1
./hurricanegen 16777216 1
./hurricanegen 33554432 1

View file

@ -0,0 +1,13 @@
#!/bin/bash
./hurricanegen 10240 1
./hurricanegen 20480 2
./hurricanegen 40960 4
./hurricanegen 81920 8
./hurricanegen 163840 16
./hurricanegen 327680 32
./hurricanegen 655360 64
./hurricanegen 1310720 128
./hurricanegen 2621440 256
./hurricanegen 5242880 512

View file

@ -0,0 +1,105 @@
/*
* hurricanegen.c
* Original author unknown
* Modified by Sam Kauffman - University of Virginia
*
* Generates datasets of "hurricanes" to be used by Rodinia's Nearest Neighbor (nn)
* Also generates lists of the files in the dataset. These lists are passed to nn.
*
* Usage: hurricanegen <num_hurricanes> <num_files>
* The number of hurricanes should be a multiple of both 1024 and the number of files.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
// 641986 gets you ~30 MB of data
int main(int argc, char **argv)
{
FILE *fp;
int i = 0, total_canes = 0, canes = 0, num_files = 0, j = 0;
int year, month, date, hour, num, speed, press;
float lat, lon;
int hours[4] =
{ 0, 6, 12, 18 };
char *name, fname[30];
char names[21][10] =
{ "ALBERTO", "BERYL", "CHRIS", "DEBBY", "ERNESTO", "FLORENCE", "GORDON",
"HELENE", "ISAAC", "JOYCE", "KIRK", "LESLIE", "MICHAEL", "NADINE",
"OSCAR", "PATTY", "RAFAEL", "SANDY", "TONY", "VALERIE", "WILLIAM" };
if (argc < 3)
{
fprintf(stderr, "Error: Enter a number of hurricanes and a number of files.\n");
fprintf(stderr, "The number of hurricanes should be a multiple of both 1024\nand the number of files.\n");
exit(0);
}
total_canes = atoi(argv[1]);
num_files = atoi(argv[2]);
total_canes = ((total_canes+1023)/1024) * 1024; // round up to multiple of 1024
canes = (total_canes + num_files - 1) / num_files; // round up (ceiling division)
total_canes = canes * num_files;
srand(time(NULL));
for (j = 0; j < num_files; j++)
{
if (num_files == 1)
sprintf(fname, "cane%dk.db", total_canes / 1024);
else
sprintf(fname, "cane%dk_%d_%d.db", total_canes / 1024, num_files, j);
if ((fp = fopen(fname, "w")) == NULL)
{
fprintf(stderr, "Failed to open output file '%s'!\n", fname);
return -1;
}
for (i = 0; i < canes; i++)
{
year = 1950 + rand() % 55;
month = 1 + rand() % 12;
date = 1 + rand() % 28;
hour = hours[rand() % 4];
num = 1 + rand() % 28;
name = names[rand() % 21];
lat = ((float) (7 + rand() % 63))
+ ((float) rand() / (float) 0x7fffffff);
lon = ((float) (rand() % 358))
+ ((float) rand() / (float) 0x7fffffff);
speed = 10 + rand() % 155;
press = rand() % 900;
fprintf(fp, "%4d %2d %2d %2d %2d %-9s %5.1f %5.1f %4d %4d\n",
year, month, date, hour, num, name, lat, lon, speed, press);
}
fclose(fp);
}
printf("Generated %d hurricanes in %d file(s).\n", total_canes, num_files);
if (num_files == 1)
{
sprintf(fname, "list%dk.txt", total_canes / 1024);
fp = fopen(fname, "w");
fprintf(fp, "../../data/nn/cane%dk.db\n", total_canes / 1024);
}
else
{
sprintf(fname, "list%dk_%d.txt", total_canes / 1024, num_files);
fp = fopen(fname, "w");
for (int i = 0; i < num_files; i++)
fprintf(fp, "../../data/nn/cane%dk_%d_%d.db\n", total_canes / 1024, num_files, i);
}
fclose(fp);
printf( "File list written to %s.\n", fname );
return 0;
}

View file

@ -237,12 +237,12 @@ int loadData(char *filename, std::vector<Record> &records,
locations.push_back(latLong);
records.push_back(record);
recNum++;
if (0 == (recNum % 500))
break;
/*if (0 == (recNum % 500))
break;*/
}
if (++q == 3)
break;
/*if (++q == 3)
break;*/
fclose(fp);
}
fclose(flist);
@ -281,8 +281,8 @@ void findLowest(std::vector<Record> &records, float *distances, int numRecords,
int parseCommandline(int argc, char *argv[], char *filename, int *r, float *lat,
float *lng, int *q, int *t, int *p, int *d) {
int i;
// if (argc < 2) return 1; // error
strncpy(filename, "filelist.txt", 100);
if (argc < 2) return 1; // error
strncpy(filename,argv[1],100);
char flag;
for (i = 1; i < argc; i++) {