mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 05:47:35 -04:00
ibuffer addition
This commit is contained in:
parent
6c12391338
commit
0b355f228e
80 changed files with 1811 additions and 1528 deletions
|
@ -89,4 +89,26 @@ extern int vx_upload_kernel_file(vx_device_h device, const char* filename) {
|
|||
delete[] content;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
extern int vx_get_perf(vx_device_h device, uint64_t* cycles, uint64_t* instrs) {
|
||||
int ret = 0;
|
||||
|
||||
unsigned value;
|
||||
|
||||
if (cycles) {
|
||||
ret |= vx_csr_get(device, 0, CSR_CYCLE_H, &value);
|
||||
*cycles = value;
|
||||
ret |= vx_csr_get(device, 0, CSR_CYCLE, &value);
|
||||
*cycles = (*cycles << 32) | value;
|
||||
}
|
||||
|
||||
if (instrs) {
|
||||
ret |= vx_csr_get(device, 0, CSR_INSTRET_H, &value);
|
||||
*instrs = value;
|
||||
ret |= vx_csr_get(device, 0, CSR_INSTRET, &value);
|
||||
*instrs = (*instrs << 32) | value;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
|
@ -71,6 +71,9 @@ int vx_upload_kernel_bytes(vx_device_h device, const void* content, size_t size)
|
|||
// upload kernel file to device
|
||||
int vx_upload_kernel_file(vx_device_h device, const char* filename);
|
||||
|
||||
// get performance counters
|
||||
int vx_get_perf(vx_device_h device, uint64_t* cycles, uint64_t* instrs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -212,25 +212,11 @@ extern int vx_dev_close(vx_device_h hdevice) {
|
|||
#endif
|
||||
|
||||
{
|
||||
// Dump performance stats
|
||||
// Dump perf stats
|
||||
uint64_t instrs, cycles;
|
||||
unsigned value;
|
||||
|
||||
int ret = 0;
|
||||
ret |= vx_csr_get(hdevice, 0, CSR_INSTRET_H, &value);
|
||||
instrs = value;
|
||||
ret |= vx_csr_get(hdevice, 0, CSR_INSTRET, &value);
|
||||
instrs = (instrs << 32) | value;
|
||||
|
||||
ret |= vx_csr_get(hdevice, 0, CSR_CYCLE_H, &value);
|
||||
cycles = value;
|
||||
ret |= vx_csr_get(hdevice, 0, CSR_CYCLE, &value);
|
||||
cycles = (cycles << 32) | value;
|
||||
|
||||
int ret = vx_get_perf(hdevice, &instrs, &cycles);
|
||||
float IPC = (float)(double(instrs) / double(cycles));
|
||||
|
||||
fprintf(stdout, "PERF: instrs=%ld, cycles=%ld, IPC=%f\n", instrs, cycles, IPC);
|
||||
|
||||
assert(ret == 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -68,7 +68,8 @@ public:
|
|||
simulator_.attach_ram(&ram_);
|
||||
}
|
||||
|
||||
~vx_device() {
|
||||
~vx_device() {
|
||||
simulator_.print_stats(std::cout);
|
||||
if (future_.valid()) {
|
||||
future_.wait();
|
||||
}
|
||||
|
|
|
@ -155,7 +155,7 @@ int run_kernel_test(const kernel_arg_t& kernel_arg,
|
|||
int32_t curr = ((int32_t*)vx_host_ptr(buffer))[i];
|
||||
int32_t ref = i;
|
||||
if (curr != ref) {
|
||||
std::cout << "error at value " << i
|
||||
std::cout << "error at result #" << i
|
||||
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
|
@ -238,7 +238,7 @@ int main(int argc, char *argv[]) {
|
|||
std::cout << "cleanup" << std::endl;
|
||||
cleanup();
|
||||
|
||||
std::cout << "Test PASSED" << std::endl;
|
||||
|
||||
std::cout << "Test PASSED" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -86,7 +86,7 @@ int run_test(const kernel_arg_t& kernel_arg,
|
|||
int ref = i + i;
|
||||
int cur = buf_ptr[i];
|
||||
if (cur != ref) {
|
||||
std::cout << "error at value " << i
|
||||
std::cout << "error at result #" << i
|
||||
<< ": actual 0x" << cur << ", expected 0x" << ref << std::endl;
|
||||
++errors;
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] + b[i];
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -85,7 +85,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i];
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] / b[i];
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -143,7 +143,7 @@ public:
|
|||
auto y = a[i] * b[i];
|
||||
auto ref = x + y;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] + b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -199,7 +199,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] - b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -227,7 +227,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -255,7 +255,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i] + 0.5f;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -283,7 +283,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] * b[i] - 0.5f;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -311,7 +311,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = -a[i] * b[i] - 0.5f;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -339,7 +339,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = -a[i] * b[i] + 0.5f;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -369,7 +369,7 @@ public:
|
|||
auto y = a[i] * b[i] + 0.5f;
|
||||
auto ref = x + y;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -397,7 +397,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = a[i] / b[i];
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -427,7 +427,7 @@ public:
|
|||
auto y = b[i] / a[i];
|
||||
auto ref = x + y;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -456,7 +456,7 @@ public:
|
|||
for (int i = 0; i < n; ++i) {
|
||||
auto ref = sqrt(a[i] * b[i]);
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -485,7 +485,7 @@ public:
|
|||
auto x = a[i] + b[i];
|
||||
auto ref = (int32_t)x;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -514,7 +514,7 @@ public:
|
|||
auto x = a[i] + b[i];
|
||||
auto ref = (uint32_t)x;
|
||||
if (c[i] != ref) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -543,7 +543,7 @@ public:
|
|||
auto x = a[i] + b[i];
|
||||
auto ref = (float)x;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
@ -572,7 +572,7 @@ public:
|
|||
auto x = a[i] + b[i];
|
||||
auto ref = (float)x;
|
||||
if (!almost_equal(c[i], ref)) {
|
||||
std::cout << "error at value " << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
std::cout << "error at result #" << i << ": expected " << ref << ", actual " << c[i] << ", a=" << a[i] << ", b=" << b[i] << std::endl;
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,6 +75,9 @@ tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt
|
|||
|
||||
# compress VCD trace
|
||||
tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd
|
||||
tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd
|
||||
tar -zcvf trace.vcd.tar.gz trace.vcd
|
||||
tar -zcvf run.log.tar.gz run.log
|
||||
|
||||
# decompress VCD trace
|
||||
tar -zxvf /mnt/c/Users/Blaise/Downloads/vortex.vcd.tar.gz
|
||||
|
|
|
@ -3,90 +3,143 @@
|
|||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_alu_req_if alu_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_exu_to_cmt_if alu_commit_if
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_exu_to_cmt_if alu_commit_if
|
||||
);
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] addsub_result;
|
||||
wire [`NUM_THREADS-1:0] less_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] shift_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] misc_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] add_result;
|
||||
reg [`NUM_THREADS-1:0][32:0] sub_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] shift_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] misc_result;
|
||||
|
||||
wire valid_r;
|
||||
wire [`NW_BITS-1:0] wid_r;
|
||||
wire [`NUM_THREADS-1:0] thread_mask_r;
|
||||
wire [31:0] curr_PC_r;
|
||||
wire [`NR_BITS-1:0] rd_r;
|
||||
wire wb_r;
|
||||
wire [`NT_BITS-1:0] tid_r;
|
||||
wire is_sub_r;
|
||||
wire [`BR_BITS-1:0] br_op_r;
|
||||
wire is_br_op_r, is_br_op_s;
|
||||
wire [1:0] alu_op_class_r;
|
||||
wire [31:0] next_PC_r;
|
||||
|
||||
wire is_br_op = `IS_BR_OP(alu_req_if.op);
|
||||
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op);
|
||||
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op);
|
||||
wire alu_signed = `ALU_SIGNED(alu_op);
|
||||
wire [1:0] alu_op_class = `ALU_OP_CLASS(alu_op);
|
||||
wire is_sub = (alu_op == `ALU_SUB);
|
||||
|
||||
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.curr_PC}} : alu_in1;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
|
||||
wire negate_add = (alu_op == `ALU_SUB);
|
||||
wire signed_less = (alu_op == `ALU_SLT);
|
||||
wire signed_shift = (alu_op == `ALU_SRA);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.curr_PC}} : alu_in1;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] addsub_in1 = {alu_in1_PC[i], 1'b1};
|
||||
wire [32:0] addsub_in2 = {alu_in2_imm[i], 1'b0} ^ {33{negate_add}};
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [32:0] addsub_addd = addsub_in1 + addsub_in2;
|
||||
`IGNORE_WARNINGS_END
|
||||
assign addsub_result[i] = addsub_addd[32:1];
|
||||
always @(posedge clk) begin
|
||||
add_result[i] <= alu_in1_PC[i] + alu_in2_imm[i];
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] less_in1 = {signed_less & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] less_in2 = {signed_less & alu_in2_imm[i][31], alu_in2_imm[i]};
|
||||
assign less_result[i] = $signed(less_in1) < $signed(less_in2);
|
||||
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
|
||||
always @(posedge clk) begin
|
||||
sub_result[i] <= $signed(sub_in1) - $signed(sub_in2);
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [32:0] shift_in1 = {signed_shift & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] shift_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [32:0] shift_value = $signed(shift_in1) >>> alu_in2_imm[i][4:0];
|
||||
`IGNORE_WARNINGS_END
|
||||
assign shift_result[i] = shift_value[31:0];
|
||||
always @(posedge clk) begin
|
||||
shift_result[i] <= shift_value[31:0];
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
always @(posedge clk) begin
|
||||
case (alu_op)
|
||||
`ALU_AND: misc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`ALU_OR: misc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`ALU_XOR: misc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
`ALU_AND: misc_result[i] <= alu_in1[i] & alu_in2_imm[i];
|
||||
`ALU_OR: misc_result[i] <= alu_in1[i] | alu_in2_imm[i];
|
||||
`ALU_XOR: misc_result[i] <= alu_in1[i] ^ alu_in2_imm[i];
|
||||
//`ALU_SLL,
|
||||
default: misc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
default: misc_result[i] <= alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
reg [31:0] next_PC = alu_req_if.curr_PC + 4;
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `NT_BITS + 1 + 1 + `BR_BITS + 2 + 32),
|
||||
.DEPTH(1)
|
||||
) alu_shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(alu_req_if.ready),
|
||||
.in({alu_req_if.valid, alu_req_if.wid, alu_req_if.thread_mask, alu_req_if.curr_PC, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, is_sub, is_br_op, br_op, alu_op_class, next_PC}),
|
||||
.out({valid_r, wid_r, thread_mask_r, curr_PC_r, rd_r, wb_r, tid_r, is_sub_r, is_br_op_r, br_op_r, alu_op_class_r, next_PC_r})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
always @(*) begin
|
||||
case (`ALU_OP_CLASS(alu_op))
|
||||
0: alu_result[i] = addsub_result[i];
|
||||
1: alu_result[i] = {31'b0, less_result[i]};
|
||||
case (alu_op_class_r)
|
||||
0: alu_result[i] = is_sub_r ? sub_result[i][31:0] : add_result[i];
|
||||
1: alu_result[i] = {31'b0, sub_result[i][32]};
|
||||
2: alu_result[i] = shift_result[i];
|
||||
default: alu_result[i] = misc_result[i];
|
||||
endcase
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// branch handling
|
||||
|
||||
wire br_neg = `BR_NEG(br_op_r);
|
||||
wire br_less = `BR_LESS(br_op_r);
|
||||
wire br_static = `BR_STATIC(br_op_r);
|
||||
wire is_jal = is_br_op_r && (br_op_r == `BR_JAL || br_op_r == `BR_JALR);
|
||||
|
||||
wire [31:0] br_dest = add_result[tid_r];
|
||||
wire [32:0] cmp_result = sub_result[tid_r];
|
||||
wire is_less = cmp_result[32];
|
||||
wire is_equal = ~(| cmp_result[31:0]);
|
||||
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{next_PC_r}} : alu_result;
|
||||
|
||||
// output
|
||||
|
||||
wire stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32))
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + 1 + 32)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (0),
|
||||
.stall (stall_out),
|
||||
.flush (0),
|
||||
.in ({alu_req_if.valid, alu_req_if.issue_tag, alu_result}),
|
||||
.out ({alu_commit_if.valid, alu_commit_if.issue_tag, alu_commit_if.data})
|
||||
.in ({valid_r, wid_r, thread_mask_r, curr_PC_r, rd_r, wb_r, alu_jal_result, is_br_op_r, br_taken, br_dest}),
|
||||
.out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.thread_mask, alu_commit_if.curr_PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_s, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||
);
|
||||
|
||||
assign alu_req_if.ready = 1'b1;
|
||||
assign branch_ctl_if.valid = alu_commit_if.valid && alu_commit_if.ready && is_br_op_s;
|
||||
assign branch_ctl_if.wid = alu_commit_if.wid;
|
||||
|
||||
// can accept new request?
|
||||
assign alu_req_if.ready = ~stall_out;
|
||||
|
||||
endmodule
|
|
@ -1,56 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_bru_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_bru_req_if bru_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_exu_to_cmt_if bru_commit_if
|
||||
);
|
||||
wire [`BRU_BITS-1:0] bru_op = bru_req_if.op;
|
||||
wire bru_neg = `BRU_NEG(bru_op);
|
||||
wire bru_less = `BRU_LESS(bru_op);
|
||||
wire bru_signed = `BRU_SIGNED(bru_op);
|
||||
wire bru_static = `BRU_STATIC(bru_op);
|
||||
|
||||
wire [31:0] rs1_data = bru_req_if.rs1_data;
|
||||
wire [31:0] rs2_data = bru_req_if.rs2_data;
|
||||
|
||||
wire [32:0] signed_in1 = {bru_signed & rs1_data[31], rs1_data};
|
||||
wire [32:0] signed_in2 = {bru_signed & rs2_data[31], rs2_data};
|
||||
wire is_less = $signed(signed_in1) < $signed(signed_in2);
|
||||
|
||||
wire is_equal = (rs1_data == rs2_data);
|
||||
|
||||
wire taken = ((bru_less ? is_less : is_equal) ^ bru_neg) | bru_static;
|
||||
|
||||
wire [31:0] base_addr = bru_req_if.rs1_is_PC ? bru_req_if.curr_PC : rs1_data;
|
||||
wire [31:0] dest = base_addr + bru_req_if.offset;
|
||||
|
||||
wire [31:0] jal_result = bru_req_if.curr_PC + 4;
|
||||
wire [31:0] jal_result_r;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `ISTAG_BITS + 1 + 32 + 32)
|
||||
) bru_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (0),
|
||||
.flush (0),
|
||||
.in ({bru_req_if.valid, bru_req_if.wid, bru_req_if.issue_tag, taken, dest, jal_result}),
|
||||
.out ({bru_commit_if.valid, branch_ctl_if.wid, bru_commit_if.issue_tag, branch_ctl_if.taken, branch_ctl_if.dest, jal_result_r})
|
||||
);
|
||||
|
||||
assign branch_ctl_if.valid = bru_commit_if.valid;
|
||||
|
||||
assign bru_commit_if.data = {`NUM_THREADS{jal_result_r}};
|
||||
|
||||
assign bru_req_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
|
@ -8,7 +8,6 @@ module VX_commit #(
|
|||
|
||||
// inputs
|
||||
VX_exu_to_cmt_if alu_commit_if,
|
||||
VX_exu_to_cmt_if bru_commit_if,
|
||||
VX_exu_to_cmt_if lsu_commit_if,
|
||||
VX_exu_to_cmt_if mul_commit_if,
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
|
@ -16,15 +15,13 @@ module VX_commit #(
|
|||
VX_exu_to_cmt_if gpu_commit_if,
|
||||
|
||||
// outputs
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_writeback_if writeback_if,
|
||||
VX_cmt_to_csr_if cmt_to_csr_if
|
||||
);
|
||||
// update CRSs
|
||||
// CSRs update
|
||||
|
||||
wire [`NUM_EXS-1:0] commited_mask;
|
||||
assign commited_mask = {alu_commit_if.valid,
|
||||
bru_commit_if.valid,
|
||||
assign commited_mask = {alu_commit_if.valid,
|
||||
lsu_commit_if.valid,
|
||||
csr_commit_if.valid,
|
||||
mul_commit_if.valid,
|
||||
|
@ -44,7 +41,7 @@ module VX_commit #(
|
|||
always @(*) begin
|
||||
fflags = 0;
|
||||
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
|
||||
if (fpu_commit_if.thread_mask[i]) begin
|
||||
fflags.NX |= fpu_commit_if.fflags[i].NX;
|
||||
fflags.UF |= fpu_commit_if.fflags[i].UF;
|
||||
fflags.OF |= fpu_commit_if.fflags[i].OF;
|
||||
|
@ -64,7 +61,7 @@ module VX_commit #(
|
|||
csr_update_r <= (| commited_mask);
|
||||
fflags_r <= fflags;
|
||||
has_fflags_r <= fpu_commit_if.valid && fpu_commit_if.has_fflags;
|
||||
wid_r <= cmt_to_issue_if.fpu_data.wid;
|
||||
wid_r <= fpu_commit_if.wid;
|
||||
num_commits_r <= num_commits;
|
||||
end
|
||||
|
||||
|
@ -74,23 +71,7 @@ module VX_commit #(
|
|||
assign cmt_to_csr_if.has_fflags = has_fflags_r;
|
||||
assign cmt_to_csr_if.fflags = fflags_r;
|
||||
|
||||
// Notify issue stage
|
||||
|
||||
assign cmt_to_issue_if.alu_valid = alu_commit_if.valid;
|
||||
assign cmt_to_issue_if.bru_valid = bru_commit_if.valid;
|
||||
assign cmt_to_issue_if.lsu_valid = lsu_commit_if.valid;
|
||||
assign cmt_to_issue_if.csr_valid = csr_commit_if.valid;
|
||||
assign cmt_to_issue_if.mul_valid = mul_commit_if.valid;
|
||||
assign cmt_to_issue_if.fpu_valid = fpu_commit_if.valid;
|
||||
assign cmt_to_issue_if.gpu_valid = gpu_commit_if.valid;
|
||||
|
||||
assign cmt_to_issue_if.alu_tag = alu_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.bru_tag = bru_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.lsu_tag = lsu_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.csr_tag = csr_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.mul_tag = mul_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.fpu_tag = fpu_commit_if.issue_tag;
|
||||
assign cmt_to_issue_if.gpu_tag = gpu_commit_if.issue_tag;
|
||||
// Writeback
|
||||
|
||||
VX_writeback #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
@ -99,41 +80,38 @@ module VX_commit #(
|
|||
.reset (reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.bru_commit_if (bru_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
|
||||
.writeback_if (writeback_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.alu_data.wid, cmt_to_issue_if.alu_data.curr_PC, alu_commit_if.issue_tag, cmt_to_issue_if.alu_data.thread_mask, cmt_to_issue_if.alu_data.wb, cmt_to_issue_if.alu_data.rd, alu_commit_if.data);
|
||||
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=ALU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, alu_commit_if.wid, alu_commit_if.curr_PC, alu_commit_if.thread_mask, alu_commit_if.wb, alu_commit_if.rd, alu_commit_if.data);
|
||||
end
|
||||
if (bru_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=BRU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.bru_data.wid, cmt_to_issue_if.bru_data.curr_PC, bru_commit_if.issue_tag, cmt_to_issue_if.bru_data.thread_mask, cmt_to_issue_if.bru_data.wb, cmt_to_issue_if.bru_data.rd, bru_commit_if.data);
|
||||
if (lsu_commit_if.valid && lsu_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=LSU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, lsu_commit_if.wid, lsu_commit_if.curr_PC, lsu_commit_if.thread_mask, lsu_commit_if.wb, lsu_commit_if.rd, lsu_commit_if.data);
|
||||
end
|
||||
if (lsu_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.lsu_data.wid, cmt_to_issue_if.lsu_data.curr_PC, lsu_commit_if.issue_tag, cmt_to_issue_if.lsu_data.thread_mask, cmt_to_issue_if.lsu_data.wb, cmt_to_issue_if.lsu_data.rd, lsu_commit_if.data);
|
||||
end
|
||||
if (csr_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.csr_data.wid, cmt_to_issue_if.csr_data.curr_PC, csr_commit_if.issue_tag, cmt_to_issue_if.csr_data.thread_mask, cmt_to_issue_if.csr_data.wb, cmt_to_issue_if.csr_data.rd, csr_commit_if.data);
|
||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.curr_PC, csr_commit_if.thread_mask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
|
||||
end
|
||||
if (mul_commit_if.validy) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.mul_data.wid, cmt_to_issue_if.mul_data.curr_PC, mul_commit_if.issue_tag, cmt_to_issue_if.mul_data.thread_mask, cmt_to_issue_if.mul_data.wb, cmt_to_issue_if.mul_data.rd, mul_commit_if.data);
|
||||
if (mul_commit_if.valid && mul_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=MUL, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.wid, mul_commit_if.curr_PC, mul_commit_if.thread_mask, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
|
||||
end
|
||||
if (fpu_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.fpu_data.wid, cmt_to_issue_if.fpu_data.curr_PC, fpu_commit_if.issue_tag, cmt_to_issue_if.fpu_data.thread_mask, cmt_to_issue_if.fpu_data.wb, cmt_to_issue_if.fpu_data.rd, fpu_commit_if.data);
|
||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.curr_PC, fpu_commit_if.thread_mask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
|
||||
end
|
||||
if (gpu_commit_if.valid) begin
|
||||
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.gpu_data.wid, cmt_to_issue_if.gpu_data.curr_PC, gpu_commit_if.issue_tag, cmt_to_issue_if.gpu_data.thread_mask, cmt_to_issue_if.gpu_data.wb, cmt_to_issue_if.gpu_data.rd, gpu_commit_if.data);
|
||||
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=GPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, gpu_commit_if.wid, gpu_commit_if.curr_PC, gpu_commit_if.thread_mask, gpu_commit_if.wb, gpu_commit_if.rd, gpu_commit_if.data);
|
||||
end
|
||||
end
|
||||
`else
|
||||
`UNUSED_FIELD(fpu_commit_if, curr_PC)
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -60,16 +60,6 @@
|
|||
`define ARCHITECTURE_ID 0
|
||||
`define IMPLEMENTATION_ID 0
|
||||
|
||||
// Size of MUL Request Queue Size
|
||||
`ifndef MULRQ_SIZE
|
||||
`define MULRQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Size of issue queue
|
||||
`ifndef ISSUEQ_SIZE
|
||||
`define ISSUEQ_SIZE (8 + `NUM_WARPS)
|
||||
`endif
|
||||
|
||||
// CSR Addresses //////////////////////////////////////////////////////////////
|
||||
|
||||
`define CSR_FFLAGS 12'h001
|
||||
|
@ -109,6 +99,28 @@
|
|||
`define CSR_MIMPID 12'hF13
|
||||
`define CSR_MHARTID 12'hF14
|
||||
|
||||
// Pipeline Queues ============================================================
|
||||
|
||||
// Size of instruction queue
|
||||
`ifndef IBUF_SIZE
|
||||
`define IBUF_SIZE 8
|
||||
`endif
|
||||
|
||||
// Size of LSU Request Queue
|
||||
`ifndef LSUQ_SIZE
|
||||
`define LSUQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Size of MUL Request Queue
|
||||
`ifndef MULQ_SIZE
|
||||
`define MULQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Size of FPU Request Queue
|
||||
`ifndef FPUQ_SIZE
|
||||
`define FPUQ_SIZE 8
|
||||
`endif
|
||||
|
||||
// Dcache Configurable Knobs ==================================================
|
||||
|
||||
// Size of cache in bytes
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_csr_arb (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
module VX_csr_arb (
|
||||
// inputs
|
||||
VX_csr_req_if csr_core_req_if,
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
|
@ -12,7 +9,7 @@ module VX_csr_arb (
|
|||
VX_csr_req_if csr_req_if,
|
||||
|
||||
// input
|
||||
VX_csr_rsp_if csr_rsp_if,
|
||||
VX_exu_to_cmt_if csr_rsp_if,
|
||||
|
||||
// outputs
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
|
@ -21,33 +18,33 @@ module VX_csr_arb (
|
|||
input wire select_io_req,
|
||||
input wire select_io_rsp
|
||||
);
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
// requests
|
||||
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
|
||||
assign csr_req_if.issue_tag = (~select_io_req) ? csr_core_req_if.issue_tag : 0;
|
||||
assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
|
||||
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
|
||||
assign csr_req_if.op = (~select_io_req) ? csr_core_req_if.op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
|
||||
assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
|
||||
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
|
||||
assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
|
||||
assign csr_req_if.thread_mask = (~select_io_req) ? csr_core_req_if.thread_mask : 0;
|
||||
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
|
||||
assign csr_req_if.op = (~select_io_req) ? csr_core_req_if.op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
|
||||
assign csr_req_if.wb = (~select_io_req) ? csr_core_req_if.wb : 0;
|
||||
assign csr_req_if.is_io = select_io_req;
|
||||
|
||||
assign csr_core_req_if.ready = csr_req_if.ready && (~select_io_req);
|
||||
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
|
||||
assign csr_io_req_if.ready = csr_req_if.ready && select_io_req;
|
||||
|
||||
// responses
|
||||
assign csr_io_rsp_if.valid = csr_rsp_if.valid & select_io_rsp;
|
||||
assign csr_io_rsp_if.data = csr_rsp_if.data[0];
|
||||
|
||||
assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp;
|
||||
assign csr_commit_if.issue_tag= csr_rsp_if.issue_tag;
|
||||
assign csr_commit_if.data = csr_rsp_if.data;
|
||||
assign csr_commit_if.valid = csr_rsp_if.valid & ~select_io_rsp;
|
||||
assign csr_commit_if.wid = csr_rsp_if.wid;
|
||||
assign csr_commit_if.thread_mask = csr_rsp_if.thread_mask;
|
||||
assign csr_commit_if.curr_PC = csr_rsp_if.curr_PC;
|
||||
assign csr_commit_if.rd = csr_rsp_if.rd;
|
||||
assign csr_commit_if.wb = csr_rsp_if.wb;
|
||||
assign csr_commit_if.data = csr_rsp_if.data;
|
||||
|
||||
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : 1'b1;
|
||||
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -7,7 +7,7 @@ module VX_csr_data #(
|
|||
input wire reset,
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||
VX_csr_to_issue_if csr_to_issue_if,
|
||||
|
||||
input wire[`NW_BITS-1:0] wid,
|
||||
|
||||
|
@ -129,11 +129,11 @@ module VX_csr_data #(
|
|||
`CSR_MIMPID : read_data = `IMPLEMENTATION_ID;
|
||||
|
||||
default: begin
|
||||
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
|
||||
end
|
||||
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid];
|
||||
assign csr_to_issue_if.frm = csr_frm[csr_to_issue_if.wid];
|
||||
|
||||
endmodule
|
|
@ -7,7 +7,7 @@ module VX_csr_unit #(
|
|||
input wire reset,
|
||||
|
||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||
VX_csr_to_issue_if csr_to_issue_if,
|
||||
|
||||
VX_csr_io_req_if csr_io_req_if,
|
||||
VX_csr_io_rsp_if csr_io_rsp_if,
|
||||
|
@ -15,16 +15,13 @@ module VX_csr_unit #(
|
|||
VX_csr_req_if csr_req_if,
|
||||
VX_exu_to_cmt_if csr_commit_if
|
||||
);
|
||||
VX_csr_req_if csr_pipe_req_if();
|
||||
VX_csr_rsp_if csr_pipe_rsp_if();
|
||||
VX_csr_req_if csr_pipe_req_if();
|
||||
VX_exu_to_cmt_if csr_pipe_rsp_if();
|
||||
|
||||
wire select_io_req = csr_io_req_if.valid;
|
||||
wire select_io_rsp;
|
||||
|
||||
VX_csr_arb csr_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.csr_core_req_if (csr_req_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_req_if (csr_pipe_req_if),
|
||||
|
@ -41,7 +38,6 @@ module VX_csr_unit #(
|
|||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||
wire [31:0] csr_read_data, csr_read_data_s1;
|
||||
wire [31:0] csr_updated_data_s1;
|
||||
wire [`NW_BITS-1:0] wid_s1;
|
||||
|
||||
VX_csr_data #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
@ -49,7 +45,7 @@ module VX_csr_unit #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.csr_to_fpu_if (csr_to_fpu_if),
|
||||
.csr_to_issue_if (csr_to_issue_if),
|
||||
.read_enable (csr_pipe_req_if.valid),
|
||||
.read_addr (csr_pipe_req_if.csr_addr),
|
||||
.read_data (csr_read_data),
|
||||
|
@ -60,7 +56,7 @@ module VX_csr_unit #(
|
|||
);
|
||||
|
||||
wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr)
|
||||
&& (wid_s1 == csr_pipe_req_if.wid)
|
||||
&& (csr_pipe_rsp_if.wid == csr_pipe_req_if.wid)
|
||||
&& csr_pipe_rsp_if.valid;
|
||||
|
||||
wire [31:0] csr_read_data_qual = csr_hazard ? csr_updated_data_s1 : csr_read_data;
|
||||
|
@ -86,21 +82,21 @@ module VX_csr_unit #(
|
|||
end
|
||||
default: csr_updated_data = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid;
|
||||
|
||||
wire stall = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.wid, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||
.out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.issue_tag, wid_s1, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.wid, csr_pipe_req_if.thread_mask, csr_pipe_req_if.curr_PC, csr_pipe_req_if.rd, csr_pipe_req_if.wb, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||
.out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.wid, csr_pipe_rsp_if.thread_mask, csr_pipe_rsp_if.curr_PC, csr_pipe_rsp_if.rd, csr_pipe_rsp_if.wb, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||
);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
@ -109,6 +105,7 @@ module VX_csr_unit #(
|
|||
csr_read_data_s1;
|
||||
end
|
||||
|
||||
// can accept new request?
|
||||
assign csr_pipe_req_if.ready = ~stall;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -15,11 +15,13 @@ module VX_decode #(
|
|||
VX_wstall_if wstall_if,
|
||||
VX_join_if join_if
|
||||
);
|
||||
wire valid_in = ifetch_rsp_if.valid;
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
|
||||
reg [`ALU_BITS-1:0] alu_op;
|
||||
reg [`BRU_BITS-1:0] br_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
reg [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`CSR_BITS-1:0] csr_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
|
@ -100,27 +102,27 @@ module VX_decode #(
|
|||
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
||||
|
||||
always @(*) begin
|
||||
br_op = `BRU_OTHER;
|
||||
br_op = `BR_OTHER;
|
||||
case (opcode)
|
||||
`INST_B: begin
|
||||
case (func3)
|
||||
3'h0: br_op = `BRU_EQ;
|
||||
3'h1: br_op = `BRU_NE;
|
||||
3'h4: br_op = `BRU_LT;
|
||||
3'h5: br_op = `BRU_GE;
|
||||
3'h6: br_op = `BRU_LTU;
|
||||
3'h7: br_op = `BRU_GEU;
|
||||
3'h0: br_op = `BR_EQ;
|
||||
3'h1: br_op = `BR_NE;
|
||||
3'h4: br_op = `BR_LT;
|
||||
3'h5: br_op = `BR_GE;
|
||||
3'h6: br_op = `BR_LTU;
|
||||
3'h7: br_op = `BR_GEU;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_JAL: br_op = `BRU_JAL;
|
||||
`INST_JALR: br_op = `BRU_JALR;
|
||||
`INST_JAL: br_op = `BR_JAL;
|
||||
`INST_JALR: br_op = `BR_JALR;
|
||||
`INST_SYS: begin
|
||||
if (is_jals && u_12 == 12'h000) br_op = `BRU_ECALL;
|
||||
if (is_jals && u_12 == 12'h001) br_op = `BRU_EBREAK;
|
||||
if (is_jals && u_12 == 12'h302) br_op = `BRU_MRET;
|
||||
if (is_jals && u_12 == 12'h102) br_op = `BRU_SRET;
|
||||
if (is_jals && u_12 == 12'h7B2) br_op = `BRU_DRET;
|
||||
if (is_jals && u_12 == 12'h000) br_op = `BR_ECALL;
|
||||
if (is_jals && u_12 == 12'h001) br_op = `BR_EBREAK;
|
||||
if (is_jals && u_12 == 12'h302) br_op = `BR_MRET;
|
||||
if (is_jals && u_12 == 12'h102) br_op = `BR_SRET;
|
||||
if (is_jals && u_12 == 12'h7B2) br_op = `BR_DRET;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
|
@ -290,104 +292,93 @@ module VX_decode #(
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_decode_if decode_tmp_if();
|
||||
assign decode_if.valid = ifetch_rsp_if.valid
|
||||
&& (decode_if.ex_type != `EX_NOP); // skip noop
|
||||
|
||||
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_tmp_if.wid = ifetch_rsp_if.wid;
|
||||
assign decode_tmp_if.thread_mask = ifetch_rsp_if.thread_mask;
|
||||
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
||||
assign decode_if.wid = ifetch_rsp_if.wid;
|
||||
assign decode_if.thread_mask = ifetch_rsp_if.thread_mask;
|
||||
assign decode_if.curr_PC = ifetch_rsp_if.curr_PC;
|
||||
|
||||
assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU :
|
||||
is_csr ? `EX_CSR :
|
||||
is_mul ? `EX_MUL :
|
||||
is_fpu ? `EX_FPU :
|
||||
is_gpu ? `EX_GPU :
|
||||
is_br ? `EX_BRU :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
assign decode_if.ex_type = is_lsu ? `EX_LSU :
|
||||
is_csr ? `EX_CSR :
|
||||
is_mul ? `EX_MUL :
|
||||
is_fpu ? `EX_FPU :
|
||||
is_gpu ? `EX_GPU :
|
||||
is_br ? `EX_ALU :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
|
||||
assign decode_tmp_if.ex_op = is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_fpu ? `OP_BITS'(fpu_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_br ? `OP_BITS'(br_op) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||
0;
|
||||
assign decode_if.ex_op = is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_fpu ? `OP_BITS'(fpu_op) :
|
||||
is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_br ? `OP_BITS'({1'b1, br_op}) :
|
||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'({1'b0, alu_op}) :
|
||||
0;
|
||||
|
||||
assign decode_tmp_if.wb = use_rd;
|
||||
assign decode_if.wb = use_rd;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
||||
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || (fpu_op == `FPU_MVXW || fpu_op == `FPU_CLASS));
|
||||
wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || (fpu_op == `FPU_MVWX)));
|
||||
wire rs2_is_fp = is_fs || is_fr4 || is_fci;
|
||||
|
||||
assign decode_tmp_if.rd = {rd_is_fp, rd};
|
||||
assign decode_tmp_if.rs1 = {rs1_is_fp, rs1_qual};
|
||||
assign decode_tmp_if.rs2 = {rs2_is_fp, rs2};
|
||||
assign decode_tmp_if.rs3 = {1'b1, rs3};
|
||||
assign decode_if.rd = {rd_is_fp, rd};
|
||||
assign decode_if.rs1 = {rs1_is_fp, rs1_qual};
|
||||
assign decode_if.rs2 = {rs2_is_fp, rs2};
|
||||
assign decode_if.rs3 = {1'b1, rs3};
|
||||
`else
|
||||
assign decode_tmp_if.rd = rd;
|
||||
assign decode_tmp_if.rs1 = rs1_qual;
|
||||
assign decode_tmp_if.rs2 = rs2;
|
||||
assign decode_tmp_if.rs3 = rs3;
|
||||
assign decode_if.rd = rd;
|
||||
assign decode_if.rs1 = rs1_qual;
|
||||
assign decode_if.rs2 = rs2;
|
||||
assign decode_if.rs3 = rs3;
|
||||
`endif
|
||||
|
||||
assign decode_tmp_if.use_rs3 = use_rs3;
|
||||
assign decode_if.use_rs3 = use_rs3;
|
||||
|
||||
assign decode_tmp_if.reg_use_mask = ((`NUM_REGS)'(use_rd) << decode_tmp_if.rd)
|
||||
| ((`NUM_REGS)'(use_rs1) << decode_tmp_if.rs1)
|
||||
| ((`NUM_REGS)'(use_rs2) << decode_tmp_if.rs2)
|
||||
| ((`NUM_REGS)'(use_rs3) << decode_tmp_if.rs3);
|
||||
assign decode_if.used_regs = ((`NUM_REGS)'(use_rd) << decode_if.rd)
|
||||
| ((`NUM_REGS)'(use_rs1) << decode_if.rs1)
|
||||
| ((`NUM_REGS)'(use_rs2) << decode_if.rs2)
|
||||
| ((`NUM_REGS)'(use_rs3) << decode_if.rs3);
|
||||
|
||||
assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
||||
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
||||
is_csr ? 32'(u_12) :
|
||||
src2_imm;
|
||||
assign decode_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
||||
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
||||
is_csr ? 32'(u_12) :
|
||||
src2_imm;
|
||||
|
||||
assign decode_tmp_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
|
||||
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
||||
assign decode_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
|
||||
assign decode_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm || is_br;
|
||||
|
||||
assign decode_tmp_if.frm = func3;
|
||||
assign decode_if.frm = func3;
|
||||
|
||||
assign join_if.is_join = valid_in && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire decode_fire = decode_if.valid && decode_if.ready;
|
||||
|
||||
assign join_if.is_join = decode_fire && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
assign wstall_if.wstall = valid_in && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.wstall = decode_fire && (is_btype || is_jal || is_jalr
|
||||
|| (is_gpu && (gpu_op == `GPU_TMC
|
||||
|| gpu_op == `GPU_SPLIT
|
||||
|| gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
wire stall = ~decode_if.ready && decode_if.valid;
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + `FRM_BITS + `NUM_REGS)
|
||||
) decode_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({decode_tmp_if.valid, decode_tmp_if.wid, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, decode_tmp_if.reg_use_mask}),
|
||||
.out ({decode_if.valid, decode_if.wid, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, decode_if.reg_use_mask})
|
||||
);
|
||||
|
||||
assign ifetch_rsp_if.ready = ~stall;
|
||||
assign ifetch_rsp_if.ready = decode_if.ready;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_tmp_if.valid && ~stall) begin
|
||||
$write("%t: Core%0d-Decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.wid, decode_tmp_if.curr_PC);
|
||||
print_ex_type(decode_tmp_if.ex_type);
|
||||
if (decode_if.valid && decode_if.ready) begin
|
||||
$write("%t: core%0d-decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_if.wid, decode_if.curr_PC);
|
||||
print_ex_type(decode_if.ex_type);
|
||||
$write(", op=");
|
||||
print_ex_op(decode_tmp_if.ex_type, decode_tmp_if.ex_op);
|
||||
$write(", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, frm=", decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.rs3, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm);
|
||||
print_frm(decode_tmp_if.frm);
|
||||
$write("\n");
|
||||
|
||||
// trap unsupported instructions
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.ex_op) == `ALU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_BRU) && `BRU_OP(decode_tmp_if.ex_op) == `BRU_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.ex_op) == `CSR_OTHER));
|
||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.ex_op) == `GPU_OTHER));
|
||||
print_ex_op(decode_if.ex_type, decode_if.ex_op);
|
||||
$write(", tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, frm=", decode_if.thread_mask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm);
|
||||
print_frm(decode_if.frm);
|
||||
$write("\n");
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -33,8 +33,6 @@
|
|||
|
||||
`define CSR_WIDTH 12
|
||||
|
||||
`define ISTAG_BITS `LOG2UP(`ISSUEQ_SIZE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define LATENCY_IDIV 33
|
||||
|
@ -98,15 +96,14 @@
|
|||
|
||||
`define EX_NOP 3'h0
|
||||
`define EX_ALU 3'h1
|
||||
`define EX_BRU 3'h2
|
||||
`define EX_LSU 3'h3
|
||||
`define EX_CSR 3'h4
|
||||
`define EX_MUL 3'h5
|
||||
`define EX_FPU 3'h6
|
||||
`define EX_GPU 3'h7
|
||||
`define EX_LSU 3'h2
|
||||
`define EX_CSR 3'h3
|
||||
`define EX_MUL 3'h4
|
||||
`define EX_FPU 3'h5
|
||||
`define EX_GPU 3'h6
|
||||
`define EX_BITS 3
|
||||
|
||||
`define NUM_EXS 7
|
||||
`define NUM_EXS 6
|
||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
@ -117,8 +114,8 @@
|
|||
`define ALU_SUB 4'b0001
|
||||
`define ALU_LUI 4'b0010
|
||||
`define ALU_AUIPC 4'b0011
|
||||
`define ALU_SLT 4'b0100
|
||||
`define ALU_SLTU 4'b0101
|
||||
`define ALU_SLTU 4'b0100
|
||||
`define ALU_SLT 4'b0101
|
||||
`define ALU_SRL 4'b1000
|
||||
`define ALU_SRA 4'b1001
|
||||
`define ALU_AND 4'b1100
|
||||
|
@ -129,27 +126,31 @@
|
|||
`define ALU_BITS 4
|
||||
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
||||
`define ALU_OP_CLASS(x) x[3:2]
|
||||
`define ALU_SIGNED(x) x[0]
|
||||
|
||||
`define BRU_EQ 4'b0000
|
||||
`define BRU_NE 4'b0001
|
||||
`define BRU_LTU 4'b0010
|
||||
`define BRU_GEU 4'b0011
|
||||
`define BRU_LT 4'b0110
|
||||
`define BRU_GE 4'b0111
|
||||
`define BRU_JAL 4'b1000
|
||||
`define BRU_JALR 4'b1001
|
||||
`define BRU_ECALL 4'b1010
|
||||
`define BRU_EBREAK 4'b1011
|
||||
`define BRU_MRET 4'b1100
|
||||
`define BRU_SRET 4'b1101
|
||||
`define BRU_DRET 4'b1110
|
||||
`define BRU_OTHER 4'b1111
|
||||
`define BRU_BITS 4
|
||||
`define BRU_OP(x) x[`BRU_BITS-1:0]
|
||||
`define BRU_NEG(x) x[0]
|
||||
`define BRU_LESS(x) x[1]
|
||||
`define BRU_SIGNED(x) x[2]
|
||||
`define BRU_STATIC(x) x[3]
|
||||
`define BR_EQ 4'b0000
|
||||
`define BR_NE 4'b0010
|
||||
`define BR_LTU 4'b0100
|
||||
`define BR_GEU 4'b0110
|
||||
`define BR_LT 4'b0101
|
||||
`define BR_GE 4'b0111
|
||||
`define BR_JAL 4'b1000
|
||||
`define BR_JALR 4'b1001
|
||||
`define BR_ECALL 4'b1010
|
||||
`define BR_EBREAK 4'b1011
|
||||
`define BR_MRET 4'b1100
|
||||
`define BR_SRET 4'b1101
|
||||
`define BR_DRET 4'b1110
|
||||
`define BR_OTHER 4'b1111
|
||||
`define BR_BITS 4
|
||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
||||
`define BR_NEG(x) x[1]
|
||||
`define BR_LESS(x) x[2]
|
||||
`define BR_STATIC(x) x[3]
|
||||
|
||||
`define ALU_BR_BITS 5
|
||||
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
|
||||
`define IS_BR_OP(x) x[4]
|
||||
|
||||
`define LSU_LB {1'b0, `BYTEEN_SB}
|
||||
`define LSU_LH {1'b0, `BYTEEN_SH}
|
||||
|
@ -262,10 +263,10 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, wid
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 1 + `NR_BITS + `NW_BITS)
|
||||
`ifdef DBG_CORE_REQ_INFO // pc, rd, wid
|
||||
`define DBG_CORE_REQ_MDATAW (32 + `NR_BITS + `NW_BITS)
|
||||
`else
|
||||
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
|
||||
`define DBG_CORE_REQ_MDATAW 0
|
||||
`endif
|
||||
|
||||
////////////////////////// Dcache Configurable Knobs //////////////////////////
|
||||
|
@ -274,10 +275,10 @@
|
|||
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
|
||||
|
||||
// TAG sharing enable
|
||||
`define DCORE_TAG_ID_BITS `ISTAG_BITS
|
||||
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
|
||||
|
||||
// Core request tag bits
|
||||
`define DCORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `DCORE_TAG_ID_BITS)
|
||||
`define DCORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `DCORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define DDRAM_LINE_WIDTH (`DBANK_LINE_SIZE * 8)
|
||||
|
@ -312,7 +313,7 @@
|
|||
`define ICORE_TAG_ID_BITS `NW_BITS
|
||||
|
||||
// Core request tag bits
|
||||
`define ICORE_TAG_WIDTH (`DEBUG_CORE_REQ_MDATA_WIDTH + `ICORE_TAG_ID_BITS)
|
||||
`define ICORE_TAG_WIDTH (`DBG_CORE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
|
||||
|
||||
// DRAM request data bits
|
||||
`define IDRAM_LINE_WIDTH (`IBANK_LINE_SIZE * 8)
|
||||
|
|
|
@ -22,7 +22,6 @@ module VX_execute #(
|
|||
|
||||
// inputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_bru_req_if bru_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
|
@ -30,10 +29,10 @@ module VX_execute #(
|
|||
VX_gpu_req_if gpu_req_if,
|
||||
|
||||
// outputs
|
||||
VX_csr_to_issue_if csr_to_issue_if,
|
||||
VX_branch_ctl_if branch_ctl_if,
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_exu_to_cmt_if alu_commit_if,
|
||||
VX_exu_to_cmt_if bru_commit_if,
|
||||
VX_exu_to_cmt_if lsu_commit_if,
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
VX_exu_to_cmt_if mul_commit_if,
|
||||
|
@ -43,25 +42,14 @@ module VX_execute #(
|
|||
output wire ebreak
|
||||
);
|
||||
|
||||
VX_csr_to_fpu_if csr_to_fpu_if();
|
||||
|
||||
VX_alu_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) alu_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.alu_req_if (alu_req_if),
|
||||
.alu_commit_if (alu_commit_if)
|
||||
);
|
||||
|
||||
VX_bru_unit #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) bru_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.bru_req_if (bru_req_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.bru_commit_if (bru_commit_if)
|
||||
.alu_commit_if (alu_commit_if)
|
||||
);
|
||||
|
||||
VX_lsu_unit #(
|
||||
|
@ -82,7 +70,7 @@ module VX_execute #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
.csr_to_fpu_if (csr_to_fpu_if),
|
||||
.csr_to_issue_if (csr_to_issue_if),
|
||||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
|
@ -95,8 +83,8 @@ module VX_execute #(
|
|||
) mul_unit (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.alu_req_if (mul_req_if),
|
||||
.alu_commit_if (mul_commit_if)
|
||||
.mul_req_if (mul_req_if),
|
||||
.mul_commit_if (mul_commit_if)
|
||||
);
|
||||
`else
|
||||
assign mul_req_if.ready = 0;
|
||||
|
@ -112,7 +100,6 @@ module VX_execute #(
|
|||
.clk (clk),
|
||||
.reset (reset),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.csr_to_fpu_if (csr_to_fpu_if),
|
||||
.fpu_commit_if (fpu_commit_if)
|
||||
);
|
||||
`else
|
||||
|
@ -134,9 +121,10 @@ module VX_execute #(
|
|||
.gpu_commit_if (gpu_commit_if)
|
||||
);
|
||||
|
||||
assign ebreak = bru_req_if.valid
|
||||
&& (bru_req_if.op == `BRU_EBREAK
|
||||
|| bru_req_if.op == `BRU_ECALL);
|
||||
assign ebreak = alu_req_if.valid
|
||||
&& `IS_BR_OP(alu_req_if.op)
|
||||
&& (`BR_OP(alu_req_if.op) == `BR_EBREAK
|
||||
|| `BR_OP(alu_req_if.op) == `BR_ECALL);
|
||||
|
||||
`SCOPE_ASSIGN (scope_decode_valid, decode_if.valid);
|
||||
`SCOPE_ASSIGN (scope_decode_wid, decode_if.wid);
|
||||
|
|
|
@ -9,59 +9,81 @@ module VX_fpu_unit #(
|
|||
|
||||
// inputs
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||
|
||||
|
||||
// outputs
|
||||
VX_fpu_to_cmt_if fpu_commit_if
|
||||
);
|
||||
VX_fpu_req_if fpu_req_tmp_if();
|
||||
);
|
||||
localparam FPUQ_BITS = `LOG2UP(`FPUQ_SIZE);
|
||||
|
||||
// resolve dynamic FRM
|
||||
wire [`FRM_BITS-1:0] frm, frm_tmp;
|
||||
assign csr_to_fpu_if.wid = fpu_req_if.wid;
|
||||
assign frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
|
||||
wire ready_in;
|
||||
wire valid_out;
|
||||
wire ready_out;
|
||||
|
||||
// use a skid buffer since fpcore has realtime backpressure
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`ISTAG_BITS + `NW_BITS + 32 + `FPU_BITS + `FRM_BITS + (3 * `NUM_THREADS * 32)),
|
||||
.SIZE (0)
|
||||
) input_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (fpu_req_if.valid),
|
||||
.ready_in (fpu_req_if.ready),
|
||||
.data_in ({fpu_req_if.issue_tag, fpu_req_if.wid, fpu_req_if.curr_PC, fpu_req_if.op, frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.data_out ({fpu_req_tmp_if.issue_tag, fpu_req_tmp_if.wid, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.op, frm_tmp, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data}),
|
||||
.ready_out (fpu_req_tmp_if.ready),
|
||||
.valid_out (fpu_req_tmp_if.valid)
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] rsp_thread_mask;
|
||||
wire [31:0] rsp_curr_PC;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
|
||||
wire has_fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags;
|
||||
wire [`NUM_THREADS-1:0][31:0] result;
|
||||
|
||||
wire [FPUQ_BITS-1:0] tag_in, tag_out;
|
||||
wire fpuq_full;
|
||||
|
||||
wire fpuq_push = fpu_req_if.valid && fpu_req_if.ready;
|
||||
wire fpuq_pop = valid_out && ready_out;
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
|
||||
.SIZE (`FPUQ_SIZE)
|
||||
) mul_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_slot (fpuq_push),
|
||||
.write_addr (tag_in),
|
||||
.read_addr (tag_out),
|
||||
.release_addr (tag_out),
|
||||
.write_data ({fpu_req_if.wid, fpu_req_if.thread_mask, fpu_req_if.curr_PC, fpu_req_if.rd, fpu_req_if.wb}),
|
||||
.read_data ({rsp_wid, rsp_thread_mask, rsp_curr_PC, rsp_rd, rsp_wb}),
|
||||
.release_slot (fpuq_pop),
|
||||
.full (fpuq_full)
|
||||
);
|
||||
|
||||
wire valid_in = fpu_req_if.valid && ~fpuq_full;
|
||||
|
||||
// can accept new request?
|
||||
assign fpu_req_if.ready = ready_in && ~fpuq_full;
|
||||
|
||||
`ifdef SYNTHESIS
|
||||
|
||||
VX_fp_fpga fp_core (
|
||||
VX_fp_fpga #(
|
||||
.TAGW (FPUQ_BITS)
|
||||
) fp_core (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_tmp_if.valid),
|
||||
.ready_in (fpu_req_tmp_if.ready),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
|
||||
.tag_in (fpu_req_tmp_if.issue_tag),
|
||||
.tag_in (tag_in),
|
||||
|
||||
.op (fpu_req_tmp_if.op),
|
||||
.frm (frm_tmp),
|
||||
.op (fpu_req_if.op),
|
||||
.frm (fpu_req_if.frm),
|
||||
|
||||
.dataa (fpu_req_tmp_if.rs1_data),
|
||||
.datab (fpu_req_tmp_if.rs2_data),
|
||||
.datac (fpu_req_tmp_if.rs3_data),
|
||||
.result (fpu_commit_if.data),
|
||||
.dataa (fpu_req_if.rs1_data),
|
||||
.datab (fpu_req_if.rs2_data),
|
||||
.datac (fpu_req_if.rs3_data),
|
||||
.result (result),
|
||||
|
||||
.has_fflags (fpu_commit_if.has_fflags),
|
||||
.fflags (fpu_commit_if.fflags),
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
|
||||
.tag_out (fpu_commit_if.issue_tag),
|
||||
.tag_out (tag_out),
|
||||
|
||||
.ready_out (1'b1),
|
||||
.valid_out (fpu_commit_if.valid)
|
||||
.ready_out (ready_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
||||
`else
|
||||
|
@ -70,33 +92,49 @@ module VX_fpu_unit #(
|
|||
.FMULADD (1),
|
||||
.FDIVSQRT (1),
|
||||
.FNONCOMP (1),
|
||||
.FCONV (1)
|
||||
.FCONV (1),
|
||||
.TAGW (FPUQ_BITS)
|
||||
) fp_core (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
.valid_in (fpu_req_tmp_if.valid),
|
||||
.ready_in (fpu_req_tmp_if.ready),
|
||||
.valid_in (valid_in),
|
||||
.ready_in (ready_in),
|
||||
|
||||
.tag_in (fpu_req_tmp_if.issue_tag),
|
||||
.tag_in (tag_in),
|
||||
|
||||
.op (fpu_req_tmp_if.op),
|
||||
.frm (frm_tmp),
|
||||
.op (fpu_req_if.op),
|
||||
.frm (fpu_req_if.frm),
|
||||
|
||||
.dataa (fpu_req_tmp_if.rs1_data),
|
||||
.datab (fpu_req_tmp_if.rs2_data),
|
||||
.datac (fpu_req_tmp_if.rs3_data),
|
||||
.result (fpu_commit_if.data),
|
||||
.dataa (fpu_req_if.rs1_data),
|
||||
.datab (fpu_req_if.rs2_data),
|
||||
.datac (fpu_req_if.rs3_data),
|
||||
.result (result),
|
||||
|
||||
.has_fflags (fpu_commit_if.has_fflags),
|
||||
.fflags (fpu_commit_if.fflags),
|
||||
.has_fflags (has_fflags),
|
||||
.fflags (fflags),
|
||||
|
||||
.tag_out (fpu_commit_if.issue_tag),
|
||||
.tag_out (tag_out),
|
||||
|
||||
.ready_out (1'b1),
|
||||
.valid_out (fpu_commit_if.valid)
|
||||
.ready_out (ready_out),
|
||||
.valid_out (valid_out)
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
|
||||
) fpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.in ({valid_out, rsp_wid, rsp_thread_mask, rsp_curr_PC, rsp_rd, rsp_wb, result, has_fflags, fflags}),
|
||||
.out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.thread_mask, fpu_commit_if.curr_PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, fpu_commit_if.has_fflags, fpu_commit_if.fflags})
|
||||
);
|
||||
|
||||
assign ready_out = ~stall_out;
|
||||
|
||||
endmodule
|
53
hw/rtl/VX_gpr_bypass.v
Normal file
53
hw/rtl/VX_gpr_bypass.v
Normal file
|
@ -0,0 +1,53 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_gpr_bypass #(
|
||||
parameter DATAW = 1,
|
||||
parameter BUFFERED = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input reg pop,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [DATAW-1:0] data_out
|
||||
);
|
||||
reg [DATAW-1:0] buffer, buffer2;
|
||||
reg use_buffer, use_buffer2;
|
||||
reg delayed_push;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
delayed_push <= 0;
|
||||
use_buffer <= 0;
|
||||
use_buffer2 <= 0;
|
||||
end else begin
|
||||
delayed_push <= push;
|
||||
assert(!use_buffer2 || use_buffer);
|
||||
if (pop) begin
|
||||
if (use_buffer) begin
|
||||
buffer <= buffer2;
|
||||
use_buffer <= use_buffer2;
|
||||
use_buffer2 <= 0;
|
||||
end
|
||||
end
|
||||
if (delayed_push) begin
|
||||
if (use_buffer) begin
|
||||
assert(!use_buffer2); // queue full!
|
||||
if (pop) begin
|
||||
buffer <= data_in;
|
||||
end else begin
|
||||
buffer2 <= data_in;
|
||||
use_buffer2 <= 1;
|
||||
end
|
||||
use_buffer <= 1;
|
||||
end else if (!pop) begin
|
||||
buffer <= data_in;
|
||||
use_buffer <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = use_buffer ? buffer : data_in;
|
||||
|
||||
endmodule
|
|
@ -16,25 +16,30 @@ module VX_gpr_fp_ctrl (
|
|||
|
||||
reg [`NUM_THREADS-1:0][31:0] rs1_tmp_data, rs2_tmp_data, rs3_tmp_data;
|
||||
reg read_rs3;
|
||||
reg [`NW_BITS-1:0] rs3_wid;
|
||||
|
||||
wire rs3_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3;
|
||||
wire read_fire = gpr_read_if.valid && read_rs3;
|
||||
wire read_fire = gpr_read_if.valid && gpr_read_if.ready_out;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
read_rs3 <= 0;
|
||||
rs3_wid <= 0;
|
||||
end else begin
|
||||
if (rs3_delay) begin
|
||||
read_rs3 <= 1;
|
||||
rs3_wid <= gpr_read_if.wid;
|
||||
end else if (read_fire) begin
|
||||
read_rs3 <= 0;
|
||||
end
|
||||
if (read_rs3) begin
|
||||
assert(rs3_wid == gpr_read_if.wid);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// backup original rs1 data
|
||||
always @(posedge clk) begin
|
||||
if (~gpr_read_if.use_rs3 || rs3_delay) begin
|
||||
always @(posedge clk) begin
|
||||
if (~read_rs3) begin
|
||||
rs1_tmp_data <= rs1_data;
|
||||
end
|
||||
rs2_tmp_data <= rs2_data;
|
||||
|
@ -44,7 +49,7 @@ module VX_gpr_fp_ctrl (
|
|||
// outputs
|
||||
wire [`NR_BITS-1:0] rs1 = read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1;
|
||||
assign raddr1 = {gpr_read_if.wid, rs1};
|
||||
assign gpr_read_if.ready = ~rs3_delay;
|
||||
assign gpr_read_if.ready_in = ~rs3_delay;
|
||||
assign gpr_read_if.rs1_data = rs1_tmp_data;
|
||||
assign gpr_read_if.rs2_data = rs2_tmp_data;
|
||||
assign gpr_read_if.rs3_data = rs3_tmp_data;
|
||||
|
|
|
@ -7,7 +7,7 @@ module VX_gpr_stage #(
|
|||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_wb_if writeback_if,
|
||||
VX_writeback_if writeback_if,
|
||||
|
||||
// outputs
|
||||
VX_gpr_read_if gpr_read_if
|
||||
|
@ -50,14 +50,14 @@ module VX_gpr_stage #(
|
|||
assign gpr_read_if.rs1_data = rs1_tmp_data;
|
||||
assign gpr_read_if.rs2_data = rs2_tmp_data;
|
||||
assign gpr_read_if.rs3_data = 0;
|
||||
assign gpr_read_if.ready = 1;
|
||||
assign gpr_read_if.ready_in = 1;
|
||||
|
||||
wire valid = gpr_read_if.valid;
|
||||
wire use_rs3 = gpr_read_if.use_rs3;
|
||||
wire [`NR_BITS-1:0] rs3 = gpr_read_if.rs3;
|
||||
`UNUSED_VAR (valid);
|
||||
`UNUSED_VAR (use_rs3);
|
||||
`UNUSED_VAR (rs3);
|
||||
`UNUSED_FIELD (gpr_read_if, valid);
|
||||
`UNUSED_FIELD (gpr_read_if, use_rs3);
|
||||
`UNUSED_FIELD (gpr_read_if, rs3);
|
||||
`UNUSED_FIELD (gpr_read_if, ready_out);
|
||||
`endif
|
||||
|
||||
assign writeback_if.ready = 1'b1;
|
||||
|
||||
endmodule
|
||||
|
|
|
@ -3,15 +3,15 @@
|
|||
module VX_gpu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// Input
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if,
|
||||
|
||||
// Output
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_exu_to_cmt_if gpu_commit_if
|
||||
VX_warp_ctl_if warp_ctl_if,
|
||||
VX_exu_to_cmt_if gpu_commit_if
|
||||
);
|
||||
gpu_tmc_t tmc;
|
||||
gpu_wspawn_t wspawn;
|
||||
|
@ -23,15 +23,13 @@ module VX_gpu_unit #(
|
|||
wire is_split = (gpu_req_if.op == `GPU_SPLIT);
|
||||
wire is_bar = (gpu_req_if.op == `GPU_BAR);
|
||||
|
||||
wire gpu_req_fire = gpu_req_if.valid;
|
||||
|
||||
// tmc
|
||||
|
||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
assign tmc.valid = gpu_req_fire && is_tmc;
|
||||
assign tmc.valid = is_tmc;
|
||||
assign tmc.thread_mask = tmc_new_mask;
|
||||
|
||||
// wspawn
|
||||
|
@ -41,7 +39,7 @@ module VX_gpu_unit #(
|
|||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||
end
|
||||
assign wspawn.valid = gpu_req_fire && is_wspawn;
|
||||
assign wspawn.valid = is_wspawn;
|
||||
assign wspawn.wmask = wspawn_wmask;
|
||||
assign wspawn.pc = wspawn_pc;
|
||||
|
||||
|
@ -56,7 +54,7 @@ module VX_gpu_unit #(
|
|||
assign split_else_mask[i] = gpu_req_if.thread_mask[i] & ~taken;
|
||||
end
|
||||
|
||||
assign split.valid = gpu_req_fire && is_split;
|
||||
assign split.valid = is_split;
|
||||
assign split.diverged = (| split_then_mask) && (| split_else_mask);
|
||||
assign split.then_mask = split_then_mask;
|
||||
assign split.else_mask = split_else_mask;
|
||||
|
@ -64,23 +62,29 @@ module VX_gpu_unit #(
|
|||
|
||||
// barrier
|
||||
|
||||
assign barrier.valid = is_bar && gpu_req_fire;
|
||||
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign barrier.num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
||||
assign barrier.valid = is_bar;
|
||||
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||
assign barrier.size_m1 = (`NW_BITS)'(gpu_req_if.rs2_data - 1);
|
||||
|
||||
// output
|
||||
|
||||
wire stall = ~gpu_commit_if.ready && gpu_commit_if.valid;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + $bits(gpu_tmc_t) + $bits(gpu_wspawn_t) + $bits(gpu_split_t) + $bits(gpu_barrier_t))
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + $bits(gpu_tmc_t) + $bits(gpu_wspawn_t) + $bits(gpu_split_t) + $bits(gpu_barrier_t))
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (0),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({gpu_req_if.valid, gpu_req_if.issue_tag, gpu_req_if.wid, tmc, wspawn, split, barrier}),
|
||||
.out ({gpu_commit_if.valid, gpu_commit_if.issue_tag, warp_ctl_if.wid, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
.in ({gpu_req_if.valid, gpu_req_if.wid, gpu_req_if.thread_mask, gpu_req_if.curr_PC, gpu_req_if.rd, gpu_req_if.wb, tmc, wspawn, split, barrier}),
|
||||
.out ({gpu_commit_if.valid, gpu_commit_if.wid, gpu_commit_if.thread_mask, gpu_commit_if.curr_PC, gpu_commit_if.rd, gpu_commit_if.wb, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||
);
|
||||
|
||||
assign gpu_req_if.ready = 1'b1;
|
||||
assign warp_ctl_if.valid = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||
assign warp_ctl_if.wid = gpu_commit_if.wid;
|
||||
|
||||
// can accept new request?
|
||||
assign gpu_req_if.ready = ~stall;
|
||||
|
||||
endmodule
|
187
hw/rtl/VX_ibuffer.v
Normal file
187
hw/rtl/VX_ibuffer.v
Normal file
|
@ -0,0 +1,187 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_ibuffer #(
|
||||
parameter CORE_ID = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
input wire freeze, // do not switch to another warp
|
||||
VX_decode_if ibuf_enq_if,
|
||||
|
||||
// outputs
|
||||
VX_decode_if ibuf_deq_if
|
||||
);
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + 1 + `NUM_REGS;
|
||||
localparam SIZE = `IBUF_SIZE;
|
||||
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] entries [`NUM_WARPS-1:0][SIZE-1:0];
|
||||
reg [`LOG2UP(SIZE+1)-1:0] size_r [`NUM_WARPS-1:0];
|
||||
reg [`LOG2UP(SIZE):0] rd_ptr_r [`NUM_WARPS-1:0];
|
||||
reg [`LOG2UP(SIZE):0] wr_ptr_r [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NUM_WARPS-1:0] q_full;
|
||||
wire [`NUM_WARPS-1:0][`LOG2UP(SIZE+1)-1:0] q_size;
|
||||
wire [DATAW-1:0] q_data_in;
|
||||
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
|
||||
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
|
||||
|
||||
wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready;
|
||||
wire deq_fire = ibuf_deq_if.valid && ibuf_deq_if.ready;
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
|
||||
wire writing = enq_fire && (i == ibuf_enq_if.wid);
|
||||
wire reading = deq_fire && (i == ibuf_deq_if.wid);
|
||||
|
||||
wire [`LOG2UP(SIZE-1)-1:0] rd_ptr_a = rd_ptr_r[i][`LOG2UP(SIZE-1)-1:0];
|
||||
wire [`LOG2UP(SIZE-1)-1:0] wr_ptr_a = wr_ptr_r[i][`LOG2UP(SIZE-1)-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r[i] <= 0;
|
||||
wr_ptr_r[i] <= 0;
|
||||
size_r[i] <= 0;
|
||||
end else begin
|
||||
if (writing) begin
|
||||
if ((0 == size_r[i]) || ((1 == size_r[i]) && reading)) begin
|
||||
q_data_out[i] <= q_data_in;
|
||||
end else begin
|
||||
entries[i][wr_ptr_a] <= q_data_in;
|
||||
wr_ptr_r[i] <= wr_ptr_r[i] + 1;
|
||||
end
|
||||
if (!reading) begin
|
||||
size_r[i] <= size_r[i] + 1;
|
||||
end
|
||||
end
|
||||
if (reading) begin
|
||||
if (size_r[i] != 1) begin
|
||||
q_data_out[i] <= q_data_prev[i];
|
||||
rd_ptr_r[i] <= rd_ptr_r[i] + 1;
|
||||
end
|
||||
if (!writing) begin
|
||||
size_r[i] <= size_r[i] - 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign q_data_prev[i] = (wr_ptr_r != rd_ptr_r) ? entries[i][rd_ptr_a] : q_data_in;
|
||||
assign q_full[i] = (size_r[i] == SIZE);
|
||||
assign q_size[i] = size_r[i];
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
reg [`NUM_WARPS-1:0] valid_table, valid_table_n;
|
||||
reg [`NUM_WARPS-1:0] ready_table, ready_table_n;
|
||||
reg [`LOG2UP(`NUM_WARPS+1)-1:0] active_warps;
|
||||
reg [`NW_BITS-1:0] deq_wid, deq_wid_n;
|
||||
reg deq_valid, deq_valid_n;
|
||||
reg [DATAW-1:0] deq_instr, deq_instr_n;
|
||||
|
||||
always @(*) begin
|
||||
valid_table_n = valid_table;
|
||||
if (deq_fire) begin
|
||||
valid_table_n[ibuf_deq_if.wid] = (q_size[ibuf_deq_if.wid] != 1);
|
||||
end
|
||||
if (enq_fire) begin
|
||||
valid_table_n[ibuf_enq_if.wid] = 1;
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
deq_wid_n = 0;
|
||||
deq_valid_n = 0;
|
||||
ready_table_n = ready_table;
|
||||
if (deq_fire) begin
|
||||
ready_table_n[ibuf_deq_if.wid] = (q_size[ibuf_deq_if.wid] != 1);
|
||||
end
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
if (ready_table_n[i]) begin
|
||||
deq_wid_n = `NW_BITS'(i);
|
||||
deq_valid_n = 1;
|
||||
deq_instr_n = (deq_fire && (ibuf_deq_if.wid == `NW_BITS'(i))) ? q_data_prev[i] : q_data_out[i];
|
||||
ready_table_n[i] = 0;
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
wire warp_added = enq_fire && (0 == q_size[ibuf_enq_if.wid]) && (!deq_fire || ibuf_enq_if.wid != ibuf_deq_if.wid);
|
||||
wire warp_removed = deq_fire && (1 == q_size[ibuf_deq_if.wid]) && (!enq_fire || ibuf_enq_if.wid != ibuf_deq_if.wid);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_table <= 0;
|
||||
ready_table <= 0;
|
||||
deq_valid <= 0;
|
||||
active_warps <= 0;
|
||||
end else begin
|
||||
valid_table <= valid_table_n;
|
||||
ready_table <= (| ready_table_n) ? ready_table_n : valid_table_n;
|
||||
|
||||
if (enq_fire && (0 == active_warps)) begin
|
||||
deq_valid <= 1;
|
||||
deq_wid <= ibuf_enq_if.wid;
|
||||
deq_instr <= q_data_in;
|
||||
end else if (!freeze) begin
|
||||
deq_valid <= deq_valid_n;
|
||||
deq_wid <= deq_wid_n;
|
||||
deq_instr <= deq_instr_n;
|
||||
end
|
||||
|
||||
if (warp_added && !warp_removed) begin
|
||||
active_warps <= active_warps + 1;
|
||||
end else if (warp_removed && !warp_added) begin
|
||||
active_warps <= active_warps - 1;
|
||||
end
|
||||
|
||||
begin
|
||||
integer k = 0;
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
k += 32'(q_size[i] != 0);
|
||||
end
|
||||
assert(k == 32'(active_warps));
|
||||
assert(~deq_fire || active_warps != 0);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign ibuf_enq_if.ready = ~q_full[ibuf_enq_if.wid];
|
||||
assign q_data_in = {ibuf_enq_if.thread_mask,
|
||||
ibuf_enq_if.curr_PC,
|
||||
ibuf_enq_if.ex_type,
|
||||
ibuf_enq_if.ex_op,
|
||||
ibuf_enq_if.frm,
|
||||
ibuf_enq_if.wb,
|
||||
ibuf_enq_if.rd,
|
||||
ibuf_enq_if.rs1,
|
||||
ibuf_enq_if.rs2,
|
||||
ibuf_enq_if.rs3,
|
||||
ibuf_enq_if.imm,
|
||||
ibuf_enq_if.rs1_is_PC,
|
||||
ibuf_enq_if.rs2_is_imm,
|
||||
ibuf_enq_if.use_rs3,
|
||||
ibuf_enq_if.used_regs};
|
||||
|
||||
assign ibuf_deq_if.valid = deq_valid;
|
||||
assign ibuf_deq_if.wid = deq_wid;
|
||||
assign {ibuf_deq_if.thread_mask,
|
||||
ibuf_deq_if.curr_PC,
|
||||
ibuf_deq_if.ex_type,
|
||||
ibuf_deq_if.ex_op,
|
||||
ibuf_deq_if.frm,
|
||||
ibuf_deq_if.wb,
|
||||
ibuf_deq_if.rd,
|
||||
ibuf_deq_if.rs1,
|
||||
ibuf_deq_if.rs2,
|
||||
ibuf_deq_if.rs3,
|
||||
ibuf_deq_if.imm,
|
||||
ibuf_deq_if.rs1_is_PC,
|
||||
ibuf_deq_if.rs2_is_imm,
|
||||
ibuf_deq_if.use_rs3,
|
||||
ibuf_deq_if.used_regs} = deq_instr;
|
||||
|
||||
endmodule
|
|
@ -46,7 +46,7 @@ module VX_icache_stage #(
|
|||
assign ifetch_req_if.ready = icache_req_if.ready;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, `NR_BITS'(0), ifetch_req_if.wid, req_tag};
|
||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, `NR_BITS'(0), ifetch_req_if.wid, req_tag};
|
||||
`else
|
||||
assign icache_req_if.tag = req_tag;
|
||||
`endif
|
||||
|
|
233
hw/rtl/VX_instr_demux.v
Normal file
233
hw/rtl/VX_instr_demux.v
Normal file
|
@ -0,0 +1,233 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_instr_demux (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
// inputs
|
||||
VX_decode_if execute_if,
|
||||
VX_gpr_read_if gpr_read_if,
|
||||
VX_csr_to_issue_if csr_to_issue_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
// ALU unit
|
||||
|
||||
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
||||
wire alu_req_ready;
|
||||
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
VX_priority_encoder #(
|
||||
.N(`NUM_THREADS)
|
||||
) tid_select (
|
||||
.data_in (execute_if.thread_mask),
|
||||
.data_out (tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `ALU_BR_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS)
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (alu_req_ready),
|
||||
.valid_in (alu_req_valid),
|
||||
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `ALU_BR_OP(execute_if.ex_op), execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid}),
|
||||
.data_out ({alu_req_if.wid, alu_req_if.thread_mask, alu_req_if.curr_PC, alu_req_if.op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid}),
|
||||
.ready_out (alu_req_if.ready),
|
||||
.valid_out (alu_req_if.valid)
|
||||
);
|
||||
|
||||
VX_gpr_bypass #(
|
||||
.DATAW ((2 * `NUM_THREADS * 32))
|
||||
) alu_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (alu_req_valid && alu_req_ready),
|
||||
.data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data}),
|
||||
.data_out ({alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.pop (alu_req_if.valid && alu_req_if.ready)
|
||||
);
|
||||
|
||||
// lsu unit
|
||||
|
||||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||
wire lsu_req_ready;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 1 + `BYTEEN_BITS + 32 + `NR_BITS + 1)
|
||||
) lsu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (lsu_req_ready),
|
||||
.valid_in (lsu_req_valid),
|
||||
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `LSU_RW(execute_if.ex_op), `LSU_BE(execute_if.ex_op), execute_if.imm, execute_if.rd, execute_if.wb}),
|
||||
.data_out ({lsu_req_if.wid, lsu_req_if.thread_mask, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb}),
|
||||
.ready_out (lsu_req_if.ready),
|
||||
.valid_out (lsu_req_if.valid)
|
||||
);
|
||||
|
||||
VX_gpr_bypass #(
|
||||
.DATAW ((2 * `NUM_THREADS * 32))
|
||||
) lsu_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (lsu_req_valid && lsu_req_ready),
|
||||
.data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.pop (lsu_req_if.valid && lsu_req_if.ready)
|
||||
);
|
||||
|
||||
// csr unit
|
||||
|
||||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||
wire csr_req_ready;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1)
|
||||
) csr_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (csr_req_ready),
|
||||
.valid_in (csr_req_valid),
|
||||
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `CSR_OP(execute_if.ex_op), execute_if.imm[`CSR_ADDR_BITS-1:0], execute_if.rd, execute_if.wb, 1'b0}),
|
||||
.data_out ({csr_req_if.wid, csr_req_if.thread_mask, csr_req_if.curr_PC, csr_req_if.op, csr_req_if.csr_addr, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_io}),
|
||||
.ready_out (csr_req_if.ready),
|
||||
.valid_out (csr_req_if.valid)
|
||||
);
|
||||
|
||||
reg tmp_rs2_is_imm;
|
||||
reg [`NR_BITS-1:0] tmp_rs1;
|
||||
|
||||
always @(posedge clk) begin
|
||||
tmp_rs2_is_imm <= execute_if.rs2_is_imm;
|
||||
tmp_rs1 <= execute_if.rs1;
|
||||
end
|
||||
|
||||
wire [31:0] csr_req_mask = tmp_rs2_is_imm ? 32'(tmp_rs1) : gpr_read_if.rs1_data[0];
|
||||
|
||||
VX_gpr_bypass #(
|
||||
.DATAW (32)
|
||||
) csr_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (csr_req_valid && csr_req_ready),
|
||||
.data_in (csr_req_mask),
|
||||
.data_out (csr_req_if.csr_mask),
|
||||
.pop (csr_req_if.valid && csr_req_if.ready)
|
||||
);
|
||||
|
||||
// mul unit
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
|
||||
wire mul_req_ready;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1)
|
||||
) mul_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (mul_req_ready),
|
||||
.valid_in (mul_req_valid),
|
||||
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `MUL_OP(execute_if.ex_op), execute_if.rd, execute_if.wb}),
|
||||
.data_out ({mul_req_if.wid, mul_req_if.thread_mask, mul_req_if.curr_PC, mul_req_if.op, mul_req_if.rd, mul_req_if.wb}),
|
||||
.ready_out (mul_req_if.ready),
|
||||
.valid_out (mul_req_if.valid)
|
||||
);
|
||||
|
||||
VX_gpr_bypass #(
|
||||
.DATAW ((2 * `NUM_THREADS * 32))
|
||||
) mul_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (mul_req_valid && mul_req_ready),
|
||||
.data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data}),
|
||||
.data_out ({mul_req_if.rs1_data, mul_req_if.rs2_data}),
|
||||
.pop (mul_req_if.valid && mul_req_if.ready)
|
||||
);
|
||||
`endif
|
||||
|
||||
// fpu unit
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||
wire fpu_req_ready;
|
||||
|
||||
// resolve dynamic FRM
|
||||
assign csr_to_issue_if.wid = execute_if.wid;
|
||||
wire [`FRM_BITS-1:0] fpu_frm = (execute_if.frm == `FRM_DYN) ? csr_to_issue_if.frm : execute_if.frm;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `FRM_BITS + `NR_BITS + 1)
|
||||
) fpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (fpu_req_ready),
|
||||
.valid_in (fpu_req_valid),
|
||||
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `FPU_OP(execute_if.ex_op), fpu_frm, execute_if.rd, execute_if.wb}),
|
||||
.data_out ({fpu_req_if.wid, fpu_req_if.thread_mask, fpu_req_if.curr_PC, fpu_req_if.op, fpu_req_if.frm, fpu_req_if.rd, fpu_req_if.wb}),
|
||||
.ready_out (fpu_req_if.ready),
|
||||
.valid_out (fpu_req_if.valid)
|
||||
);
|
||||
|
||||
VX_gpr_bypass #(
|
||||
.DATAW ((3 * `NUM_THREADS * 32))
|
||||
) fpu_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (fpu_req_valid && fpu_req_ready),
|
||||
.data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data, gpr_read_if.rs3_data}),
|
||||
.data_out ({fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||
.pop (fpu_req_if.valid && fpu_req_if.ready)
|
||||
);
|
||||
`endif
|
||||
|
||||
// gpu unit
|
||||
|
||||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||
wire gpu_req_ready;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `GPU_BITS + `NR_BITS + 1)
|
||||
) gpu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ready_in (gpu_req_ready),
|
||||
.valid_in (gpu_req_valid),
|
||||
.data_in ({execute_if.wid, execute_if.thread_mask, execute_if.curr_PC, `GPU_OP(execute_if.ex_op), execute_if.rd, execute_if.wb}),
|
||||
.data_out ({gpu_req_if.wid, gpu_req_if.thread_mask, gpu_req_if.curr_PC, gpu_req_if.op, gpu_req_if.rd, gpu_req_if.wb}),
|
||||
.ready_out (gpu_req_if.ready),
|
||||
.valid_out (gpu_req_if.valid)
|
||||
);
|
||||
|
||||
VX_gpr_bypass #(
|
||||
.DATAW ((`NUM_THREADS * 32) + 32)
|
||||
) gpu_bypass (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (gpu_req_valid && gpu_req_ready),
|
||||
.data_in ({gpr_read_if.rs1_data, gpr_read_if.rs2_data[0]}),
|
||||
.data_out ({gpu_req_if.rs1_data, gpu_req_if.rs2_data}),
|
||||
.pop (gpu_req_if.valid && gpu_req_if.ready)
|
||||
);
|
||||
|
||||
// can take next request?
|
||||
assign execute_if.ready = (alu_req_ready && (execute_if.ex_type == `EX_ALU))
|
||||
|| (lsu_req_ready && (execute_if.ex_type == `EX_LSU))
|
||||
|| (csr_req_ready && (execute_if.ex_type == `EX_CSR))
|
||||
`ifdef EXT_M_ENABLE
|
||||
|| (mul_req_ready && (execute_if.ex_type == `EX_MUL))
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| (fpu_req_ready && (execute_if.ex_type == `EX_FPU))
|
||||
`endif
|
||||
|| (gpu_req_ready && (execute_if.ex_type == `EX_GPU));
|
||||
|
||||
endmodule
|
|
@ -7,53 +7,51 @@ module VX_issue #(
|
|||
input wire reset,
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
VX_writeback_if writeback_if,
|
||||
VX_csr_to_issue_if csr_to_issue_if,
|
||||
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_bru_req_if bru_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire schedule_delay;
|
||||
|
||||
VX_decode_if ibuf_deq_if();
|
||||
VX_decode_if execute_if();
|
||||
VX_gpr_read_if gpr_read_if();
|
||||
assign gpr_read_if.valid = decode_if.valid && ~schedule_delay;
|
||||
assign gpr_read_if.wid = decode_if.wid;
|
||||
assign gpr_read_if.rs1 = decode_if.rs1;
|
||||
assign gpr_read_if.rs2 = decode_if.rs2;
|
||||
assign gpr_read_if.rs3 = decode_if.rs3;
|
||||
assign gpr_read_if.use_rs3 = decode_if.use_rs3;
|
||||
|
||||
wire ex_busy = (~alu_req_if.ready && (decode_if.ex_type == `EX_ALU))
|
||||
|| (~bru_req_if.ready && (decode_if.ex_type == `EX_BRU))
|
||||
|| (~lsu_req_if.ready && (decode_if.ex_type == `EX_LSU))
|
||||
|| (~csr_req_if.ready && (decode_if.ex_type == `EX_CSR))
|
||||
`ifdef EXT_M_ENABLE
|
||||
|| (~mul_req_if.ready && (decode_if.ex_type == `EX_MUL))
|
||||
`endif
|
||||
`ifdef EXT_F_ENABLE
|
||||
|| (~fpu_req_if.ready && (decode_if.ex_type == `EX_FPU))
|
||||
`endif
|
||||
|| (~gpu_req_if.ready && (decode_if.ex_type == `EX_GPU));
|
||||
wire scoreboard_delay;
|
||||
|
||||
VX_ibuffer #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) ibuffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.ibuf_enq_if (decode_if),
|
||||
.ibuf_deq_if (ibuf_deq_if),
|
||||
.freeze (~gpr_read_if.ready_in)
|
||||
);
|
||||
|
||||
VX_scoreboard #(
|
||||
.CORE_ID(CORE_ID)
|
||||
) scoreboard (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.decode_if (decode_if),
|
||||
.ibuf_deq_if (ibuf_deq_if),
|
||||
.writeback_if (writeback_if),
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
.ex_busy (ex_busy),
|
||||
.issue_tag (issue_tag),
|
||||
.schedule_delay (schedule_delay)
|
||||
.exe_delay (~execute_if.ready),
|
||||
.gpr_delay (~gpr_read_if.ready_in),
|
||||
.delay (scoreboard_delay)
|
||||
);
|
||||
|
||||
assign gpr_read_if.valid = ibuf_deq_if.valid && ~scoreboard_delay;
|
||||
assign gpr_read_if.wid = ibuf_deq_if.wid;
|
||||
assign gpr_read_if.rs1 = ibuf_deq_if.rs1;
|
||||
assign gpr_read_if.rs2 = ibuf_deq_if.rs2;
|
||||
assign gpr_read_if.rs3 = ibuf_deq_if.rs3;
|
||||
assign gpr_read_if.use_rs3 = ibuf_deq_if.use_rs3;
|
||||
assign gpr_read_if.ready_out = execute_if.ready;
|
||||
|
||||
VX_gpr_stage #(
|
||||
.CORE_ID(CORE_ID)
|
||||
|
@ -63,72 +61,54 @@ module VX_issue #(
|
|||
.writeback_if (writeback_if),
|
||||
.gpr_read_if (gpr_read_if)
|
||||
);
|
||||
|
||||
VX_issue_if issue_if();
|
||||
|
||||
assign issue_if.rs1_data = gpr_read_if.rs1_data;
|
||||
assign issue_if.rs2_data = gpr_read_if.rs2_data;
|
||||
assign issue_if.rs3_data = gpr_read_if.rs3_data;
|
||||
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
VX_priority_encoder #(
|
||||
.N(`NUM_THREADS)
|
||||
) sel_src (
|
||||
.data_in (decode_if.thread_mask),
|
||||
.data_out (tid),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
wire stall = schedule_delay || ~gpr_read_if.ready;
|
||||
wire flush = stall; // clear output on stall
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `FRM_BITS + `NT_BITS)
|
||||
) issue_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (flush),
|
||||
.in ({decode_if.valid, issue_tag, decode_if.wid, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.rs1, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.frm, tid}),
|
||||
.out ({issue_if.valid, issue_if.issue_tag, issue_if.wid, issue_if.thread_mask, issue_if.curr_PC, issue_if.rd, issue_if.rs1, issue_if.imm, issue_if.rs1_is_PC, issue_if.rs2_is_imm, issue_if.ex_type, issue_if.ex_op, issue_if.wb, issue_if.frm, issue_if.tid})
|
||||
);
|
||||
|
||||
assign decode_if.ready = issue_if.ready;
|
||||
assign issue_if.ready = ~stall;
|
||||
|
||||
VX_issue_demux issue_demux (
|
||||
.issue_if (issue_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
.bru_req_if (bru_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
assign execute_if.valid = ibuf_deq_if.valid && gpr_read_if.ready_in && ~scoreboard_delay;
|
||||
assign execute_if.wid = ibuf_deq_if.wid;
|
||||
assign execute_if.thread_mask = ibuf_deq_if.thread_mask;
|
||||
assign execute_if.curr_PC = ibuf_deq_if.curr_PC;
|
||||
assign execute_if.ex_type = ibuf_deq_if.ex_type;
|
||||
assign execute_if.ex_op = ibuf_deq_if.ex_op;
|
||||
assign execute_if.frm = ibuf_deq_if.frm;
|
||||
assign execute_if.wb = ibuf_deq_if.wb;
|
||||
assign execute_if.rd = ibuf_deq_if.rd;
|
||||
assign execute_if.rs1 = ibuf_deq_if.rs1;
|
||||
assign execute_if.imm = ibuf_deq_if.imm;
|
||||
assign execute_if.rs1_is_PC = ibuf_deq_if.rs1_is_PC;
|
||||
assign execute_if.rs2_is_imm = ibuf_deq_if.rs2_is_imm;
|
||||
|
||||
VX_instr_demux instr_demux (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.execute_if (execute_if),
|
||||
.gpr_read_if (gpr_read_if),
|
||||
.csr_to_issue_if(csr_to_issue_if),
|
||||
.alu_req_if (alu_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
.fpu_req_if (fpu_req_if),
|
||||
.gpu_req_if (gpu_req_if)
|
||||
);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.curr_PC, alu_req_if.issue_tag, alu_req_if.thread_mask, alu_req_if.rs1_data, alu_req_if.rs2_data);
|
||||
end
|
||||
if (bru_req_if.valid && bru_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=BRU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, offset=%0h", $time, CORE_ID, bru_req_if.wid, bru_req_if.curr_PC, bru_req_if.issue_tag, bru_req_if.thread_mask, bru_req_if.rs1_data, bru_req_if.rs2_data, bru_req_if.offset);
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=ALU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.curr_PC, alu_req_if.thread_mask, alu_req_if.rs1_data, alu_req_if.rs2_data);
|
||||
end
|
||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.curr_PC, lsu_req_if.issue_tag, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=LSU, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.curr_PC, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||
end
|
||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.curr_PC, csr_req_if.issue_tag, csr_req_if.thread_mask, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.curr_PC, csr_req_if.thread_mask, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||
end
|
||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.curr_PC, mul_req_if.issue_tag, mul_req_if.thread_mask, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.curr_PC, mul_req_if.thread_mask, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||
end
|
||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.curr_PC, fpu_req_if.issue_tag, fpu_req_if.thread_mask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.curr_PC, fpu_req_if.thread_mask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||
end
|
||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.curr_PC, gpu_req_if.issue_tag, gpu_req_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=GPU, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.curr_PC, gpu_req_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -1,102 +0,0 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_issue_demux (
|
||||
// inputs
|
||||
VX_issue_if issue_if,
|
||||
|
||||
// outputs
|
||||
VX_alu_req_if alu_req_if,
|
||||
VX_bru_req_if bru_req_if,
|
||||
VX_lsu_req_if lsu_req_if,
|
||||
VX_csr_req_if csr_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
VX_fpu_req_if fpu_req_if,
|
||||
VX_gpu_req_if gpu_req_if
|
||||
);
|
||||
// ALU unit
|
||||
assign alu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_ALU);
|
||||
assign alu_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign alu_req_if.wid = issue_if.wid;
|
||||
assign alu_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign alu_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign alu_req_if.op = `ALU_OP(issue_if.ex_op);
|
||||
assign alu_req_if.rs1_is_PC = issue_if.rs1_is_PC;
|
||||
assign alu_req_if.rs2_is_imm = issue_if.rs2_is_imm;
|
||||
assign alu_req_if.imm = issue_if.imm;
|
||||
assign alu_req_if.rs1_data = issue_if.rs1_data;
|
||||
assign alu_req_if.rs2_data = issue_if.rs2_data;
|
||||
|
||||
// BRU unit
|
||||
assign bru_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_BRU);
|
||||
assign bru_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign bru_req_if.wid = issue_if.wid;
|
||||
assign bru_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign bru_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign bru_req_if.op = `BRU_OP(issue_if.ex_op);
|
||||
assign bru_req_if.rs1_is_PC = issue_if.rs1_is_PC;
|
||||
assign bru_req_if.rs1_data = issue_if.rs1_data[issue_if.tid];
|
||||
assign bru_req_if.rs2_data = issue_if.rs2_data[issue_if.tid];
|
||||
assign bru_req_if.offset = issue_if.imm;
|
||||
|
||||
// LSU unit
|
||||
assign lsu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_LSU);
|
||||
assign lsu_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign lsu_req_if.wid = issue_if.wid;
|
||||
assign lsu_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign lsu_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign lsu_req_if.rw = `LSU_RW(issue_if.ex_op);
|
||||
assign lsu_req_if.byteen = `LSU_BE(issue_if.ex_op);
|
||||
assign lsu_req_if.base_addr = issue_if.rs1_data;
|
||||
assign lsu_req_if.store_data = issue_if.rs2_data;
|
||||
assign lsu_req_if.offset = issue_if.imm;
|
||||
assign lsu_req_if.rd = issue_if.rd;
|
||||
assign lsu_req_if.wb = issue_if.wb;
|
||||
|
||||
// CSR unit
|
||||
assign csr_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_CSR);
|
||||
assign csr_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign csr_req_if.wid = issue_if.wid;
|
||||
assign csr_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign csr_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign csr_req_if.op = `CSR_OP(issue_if.ex_op);
|
||||
assign csr_req_if.csr_addr = issue_if.imm[`CSR_ADDR_BITS-1:0];
|
||||
assign csr_req_if.csr_mask = issue_if.rs2_is_imm ? 32'(issue_if.rs1) : issue_if.rs1_data[0];
|
||||
assign csr_req_if.is_io = 1'b0;
|
||||
|
||||
// MUL unit
|
||||
`ifdef EXT_M_ENABLE
|
||||
assign mul_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_MUL);
|
||||
assign mul_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign mul_req_if.wid = issue_if.wid;
|
||||
assign mul_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign mul_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign mul_req_if.op = `MUL_OP(issue_if.ex_op);
|
||||
assign mul_req_if.rs1_data = issue_if.rs1_data;
|
||||
assign mul_req_if.rs2_data = issue_if.rs2_data;
|
||||
`endif
|
||||
|
||||
// FPU unit
|
||||
`ifdef EXT_F_ENABLE
|
||||
assign fpu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_FPU);
|
||||
assign fpu_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign fpu_req_if.wid = issue_if.wid;
|
||||
assign fpu_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign fpu_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign fpu_req_if.op = `FPU_OP(issue_if.ex_op);
|
||||
assign fpu_req_if.frm = issue_if.frm;
|
||||
assign fpu_req_if.rs1_data = issue_if.rs1_data;
|
||||
assign fpu_req_if.rs2_data = issue_if.rs2_data;
|
||||
assign fpu_req_if.rs3_data = issue_if.rs3_data;
|
||||
`endif
|
||||
|
||||
// GPU unit
|
||||
assign gpu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_GPU);
|
||||
assign gpu_req_if.issue_tag = issue_if.issue_tag;
|
||||
assign gpu_req_if.wid = issue_if.wid;
|
||||
assign gpu_req_if.thread_mask = issue_if.thread_mask;
|
||||
assign gpu_req_if.curr_PC = issue_if.curr_PC;
|
||||
assign gpu_req_if.op = `GPU_OP(issue_if.ex_op);
|
||||
assign gpu_req_if.rs1_data = issue_if.rs1_data;
|
||||
assign gpu_req_if.rs2_data = issue_if.rs2_data[0];
|
||||
|
||||
endmodule
|
|
@ -18,10 +18,6 @@ module VX_lsu_unit #(
|
|||
// outputs
|
||||
VX_exu_to_cmt_if lsu_commit_if
|
||||
);
|
||||
|
||||
wire valid_in;
|
||||
wire ready_in;
|
||||
|
||||
wire [`NUM_THREADS-1:0] req_thread_mask;
|
||||
wire req_rw;
|
||||
wire [`NUM_THREADS-1:0][29:0] req_addr;
|
||||
|
@ -30,10 +26,9 @@ module VX_lsu_unit #(
|
|||
wire [`NUM_THREADS-1:0][31:0] req_data;
|
||||
wire [1:0] req_sext;
|
||||
wire [`NR_BITS-1:0] req_rd;
|
||||
wire [`NW_BITS-1:0] req_wid;
|
||||
wire [`ISTAG_BITS-1:0] req_issue_tag;
|
||||
wire req_wb;
|
||||
wire [31:0] req_pc;
|
||||
wire [`NW_BITS-1:0] req_wid;
|
||||
wire [31:0] req_curr_PC;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] full_address;
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
@ -74,121 +69,127 @@ module VX_lsu_unit #(
|
|||
wire [`NUM_THREADS-1:0][31:0] req_address;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
// use a skid buffer because the dcache's ready signal is combinational
|
||||
// use buffer size of two for stall-free execution
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + `ISTAG_BITS + (`NUM_THREADS * 32) + 2 + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + 1 + 32),
|
||||
.SIZE (2)
|
||||
) input_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (lsu_req_if.valid),
|
||||
.ready_in (lsu_req_if.ready),
|
||||
.data_in ({lsu_req_if.wid, lsu_req_if.thread_mask, lsu_req_if.issue_tag, full_address, mem_req_sext, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
||||
.data_out ({req_wid, req_thread_mask, req_issue_tag, req_address, req_sext, req_rw, req_addr, req_offset, req_byteen, req_data, req_rd, req_wb, req_pc}),
|
||||
.ready_out (ready_in),
|
||||
.valid_out (valid_in)
|
||||
wire valid_in;
|
||||
wire stall_in;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32)))
|
||||
) lsu_req_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_in),
|
||||
.flush (0),
|
||||
.in ({lsu_req_if.valid, lsu_req_if.wid, lsu_req_if.thread_mask, lsu_req_if.curr_PC, lsu_req_if.rw, lsu_req_if.rd, lsu_req_if.wb, full_address, mem_req_sext, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data}),
|
||||
.out ({valid_in, req_wid, req_thread_mask, req_curr_PC, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
|
||||
);
|
||||
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask_buf;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] mem_rsp_data_prev_buf;
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [31:0] rsp_curr_PC;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
wire [`NUM_THREADS-1:0][1:0] rsp_offset;
|
||||
wire [1:0] rsp_sext;
|
||||
reg [`NUM_THREADS-1:0][31:0] rsp_data;
|
||||
|
||||
reg [`NUM_THREADS-1:0][1:0] mem_rsp_offset_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [1:0] mem_rsp_sext_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NW_BITS-1:0] mem_rsp_wid_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [31:0] mem_rsp_curr_PC_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NR_BITS-1:0] mem_rsp_rd_buf [`ISSUEQ_SIZE-1:0];
|
||||
reg [`NUM_THREADS-1:0] mem_rsp_mask[`LSUQ_SIZE-1:0];
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] mem_rsp_data_curr;
|
||||
wire [`DCORE_TAG_ID_BITS-1:0] req_tag, rsp_tag;
|
||||
wire lsuq_full;
|
||||
|
||||
wire [`ISTAG_BITS-1:0] rsp_issue_tag = dcache_rsp_if.tag[0][`ISTAG_BITS-1:0];
|
||||
wire lsuq_push = (| dcache_req_if.valid) && dcache_req_if.ready
|
||||
&& (0 == req_rw); // only loads
|
||||
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask = mem_rsp_mask_buf [rsp_issue_tag];
|
||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset = mem_rsp_offset_buf [rsp_issue_tag];
|
||||
wire [1:0] mem_rsp_sext = mem_rsp_sext_buf [rsp_issue_tag];
|
||||
wire [`NUM_THREADS-1:0][31:0] mem_rsp_data_prev= mem_rsp_data_prev_buf [rsp_issue_tag];
|
||||
wire [`NW_BITS-1:0] mem_rsp_wid = mem_rsp_wid_buf [rsp_issue_tag];
|
||||
wire [31:0] mem_rsp_curr_PC = mem_rsp_curr_PC_buf [rsp_issue_tag];
|
||||
wire [`NR_BITS-1:0] mem_rsp_rd = mem_rsp_rd_buf [rsp_issue_tag];
|
||||
wire lsuq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||
|
||||
assign rsp_tag = dcache_rsp_if.tag[0][`DCORE_TAG_ID_BITS-1:0];
|
||||
|
||||
wire dcache_req_fire = (| dcache_req_if.valid) && dcache_req_if.ready;
|
||||
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask_upd = mem_rsp_mask[rsp_tag] & ~dcache_rsp_if.valid;
|
||||
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask & ~dcache_rsp_if.valid;
|
||||
wire lsuq_pop = lsuq_pop_part && (0 == mem_rsp_mask_upd);
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
|
||||
.SIZE (`LSUQ_SIZE)
|
||||
) lsu_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_addr (req_tag),
|
||||
.acquire_slot (lsuq_push),
|
||||
.read_addr (rsp_tag),
|
||||
.write_data ({req_wid, req_curr_PC, req_rd, req_wb, req_offset, req_sext}),
|
||||
.read_data ({rsp_wid, rsp_curr_PC, rsp_rd, rsp_wb, rsp_offset, rsp_sext}),
|
||||
.release_addr (rsp_tag),
|
||||
.release_slot (lsuq_pop),
|
||||
.full (lsuq_full)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (dcache_req_fire && (0 == req_rw)) begin
|
||||
mem_rsp_mask_buf [req_issue_tag] <= req_thread_mask;
|
||||
mem_rsp_data_prev_buf [req_issue_tag] <= 0;
|
||||
if (lsuq_push) begin
|
||||
mem_rsp_mask[req_tag] <= req_thread_mask;
|
||||
end
|
||||
if (dcache_rsp_fire) begin
|
||||
mem_rsp_mask_buf [rsp_issue_tag] <= mem_rsp_mask_n;
|
||||
mem_rsp_data_prev_buf [rsp_issue_tag] <= mem_rsp_data_curr | mem_rsp_data_prev;
|
||||
if (lsuq_pop_part) begin
|
||||
mem_rsp_mask[rsp_tag] <= mem_rsp_mask_upd;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (dcache_req_fire && (0 == req_rw)) begin
|
||||
mem_rsp_offset_buf [req_issue_tag] <= req_offset;
|
||||
mem_rsp_sext_buf [req_issue_tag] <= req_sext;
|
||||
mem_rsp_wid_buf [req_issue_tag] <= req_wid;
|
||||
mem_rsp_curr_PC_buf [req_issue_tag] <= req_pc;
|
||||
mem_rsp_rd_buf [req_issue_tag] <= req_rd;
|
||||
end
|
||||
end
|
||||
|
||||
wire stall_in;
|
||||
wire store_stall = valid_in && req_rw && stall_out;
|
||||
|
||||
// Core Request
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~stall_in}} & req_thread_mask;
|
||||
assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~lsuq_full && ~store_stall}} & req_thread_mask;
|
||||
assign dcache_req_if.rw = {`NUM_THREADS{req_rw}};
|
||||
assign dcache_req_if.byteen = req_byteen;
|
||||
assign dcache_req_if.addr = req_addr;
|
||||
assign dcache_req_if.data = req_data;
|
||||
|
||||
assign ready_in = dcache_req_if.ready && ~stall_in;
|
||||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
assign dcache_req_if.tag = {req_pc, req_wb, req_rd, req_wid, req_issue_tag};
|
||||
assign dcache_req_if.tag = {req_curr_PC, req_rd, req_wid, req_tag};
|
||||
`else
|
||||
assign dcache_req_if.tag = req_issue_tag;
|
||||
assign dcache_req_if.tag = req_tag;
|
||||
`endif
|
||||
|
||||
assign stall_in = ~dcache_req_if.ready || lsuq_full || store_stall;
|
||||
|
||||
// Can accept new request?
|
||||
assign lsu_req_if.ready = ~stall_in;
|
||||
|
||||
// Core Response
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
wire [31:0] rsp_data_shifted = dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0};
|
||||
wire [31:0] rsp_data_shifted = dcache_rsp_if.data[i] >> {rsp_offset[i], 3'b0};
|
||||
always @(*) begin
|
||||
case (mem_rsp_sext)
|
||||
1: mem_rsp_data_curr[i] = {{24{rsp_data_shifted[7]}}, rsp_data_shifted[7:0]};
|
||||
2: mem_rsp_data_curr[i] = {{16{rsp_data_shifted[15]}}, rsp_data_shifted[15:0]};
|
||||
default: mem_rsp_data_curr[i] = rsp_data_shifted;
|
||||
case (rsp_sext)
|
||||
1: rsp_data[i] = {{24{rsp_data_shifted[7]}}, rsp_data_shifted[7:0]};
|
||||
2: rsp_data[i] = {{16{rsp_data_shifted[15]}}, rsp_data_shifted[15:0]};
|
||||
default: rsp_data[i] = rsp_data_shifted;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
reg is_load_rsp;
|
||||
reg [`NUM_THREADS-1:0][31:0] load_data;
|
||||
reg [`ISTAG_BITS-1:0] rsp_issue_tag_r;
|
||||
wire is_store_req = valid_in && ~lsuq_full && req_rw && dcache_req_if.ready;
|
||||
wire is_load_rsp = (| dcache_rsp_if.valid);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
is_load_rsp <= 0;
|
||||
end else begin
|
||||
is_load_rsp <= dcache_rsp_fire && (0 == mem_rsp_mask_n);
|
||||
load_data <= mem_rsp_data_curr | mem_rsp_data_prev;
|
||||
rsp_issue_tag_r <= rsp_issue_tag;
|
||||
end
|
||||
end
|
||||
wire stall_out = ~lsu_commit_if.ready && lsu_commit_if.valid;
|
||||
wire mem_rsp_stall = is_load_rsp && is_store_req; // arbitration prioritizes stores
|
||||
|
||||
wire is_store_req = dcache_req_fire && req_rw;
|
||||
assign stall_in = is_load_rsp && valid_in && req_rw; // LOAD has priority
|
||||
wire arb_valid = is_store_req || is_load_rsp;
|
||||
wire [`NW_BITS-1:0] arb_wid = is_store_req ? req_wid : rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] arb_thread_mask = is_store_req ? req_thread_mask : dcache_rsp_if.valid;
|
||||
wire [31:0] arb_curr_PC = is_store_req ? req_curr_PC : rsp_curr_PC;
|
||||
wire [`NR_BITS-1:0] arb_rd = is_store_req ? 0 : rsp_rd;
|
||||
wire arb_wb = is_store_req ? 0 : rsp_wb;
|
||||
|
||||
assign lsu_commit_if.valid = is_load_rsp || is_store_req;
|
||||
assign lsu_commit_if.issue_tag = is_load_rsp ? rsp_issue_tag_r : req_issue_tag;
|
||||
assign lsu_commit_if.data = load_data;
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
||||
) lsu_rsp_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (1'b0),
|
||||
.in ({arb_valid, arb_wid, arb_thread_mask, arb_curr_PC, arb_rd, arb_wb, rsp_data}),
|
||||
.out ({lsu_commit_if.valid, lsu_commit_if.wid, lsu_commit_if.thread_mask, lsu_commit_if.curr_PC, lsu_commit_if.rd, lsu_commit_if.wb, lsu_commit_if.data})
|
||||
);
|
||||
|
||||
// Can accept new cache response?
|
||||
assign dcache_rsp_if.ready = 1'b1;
|
||||
assign dcache_rsp_if.ready = ~(stall_out || mem_rsp_stall);
|
||||
|
||||
// scope registration
|
||||
`SCOPE_ASSIGN (scope_dcache_req_valid, dcache_req_if.valid);
|
||||
|
@ -198,28 +199,23 @@ module VX_lsu_unit #(
|
|||
`SCOPE_ASSIGN (scope_dcache_req_data, dcache_req_if.data);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_tag, dcache_req_if.tag);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_ready, dcache_req_if.ready);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_wid, req_wid);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_wid, req_wid);
|
||||
`SCOPE_ASSIGN (scope_dcache_req_curr_PC, req_pc);
|
||||
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_data, dcache_rsp_if.data);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_tag, dcache_rsp_if.tag);
|
||||
`SCOPE_ASSIGN (scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
||||
|
||||
`UNUSED_VAR (mem_rsp_wid)
|
||||
`UNUSED_VAR (mem_rsp_curr_PC)
|
||||
`UNUSED_VAR (mem_rsp_rd)
|
||||
`UNUSED_VAR (req_wb)
|
||||
|
||||
`ifdef DBG_PRINT_CORE_DCACHE
|
||||
always @(posedge clk) begin
|
||||
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
|
||||
$display("%t: D$%0d req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, rd=%0d, rw=%0b, byteen=%0h, data=%0h",
|
||||
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, req_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data);
|
||||
$time, CORE_ID, req_wid, req_curr_PC, dcache_req_if.valid, req_address, dcache_req_if.tag, req_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data);
|
||||
end
|
||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
||||
$display("%t: D$%0d rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
||||
$time, CORE_ID, dcache_rsp_if.valid, mem_rsp_wid, mem_rsp_curr_PC, dcache_rsp_if.tag, mem_rsp_rd, dcache_rsp_if.data);
|
||||
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_curr_PC, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -7,34 +7,52 @@ module VX_mul_unit #(
|
|||
input wire reset,
|
||||
|
||||
// Inputs
|
||||
VX_mul_req_if alu_req_if,
|
||||
VX_mul_req_if mul_req_if,
|
||||
|
||||
// Outputs
|
||||
VX_exu_to_cmt_if alu_commit_if
|
||||
VX_exu_to_cmt_if mul_commit_if
|
||||
);
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`MUL_BITS-1:0] alu_op;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1, alu_in2;
|
||||
wire valid_in, ready_in;
|
||||
|
||||
// use a skid buffer due to MUL/DIV output arbitration adding realtime backpressure
|
||||
VX_elastic_buffer #(
|
||||
.DATAW (`ISTAG_BITS + `MUL_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.SIZE (0)
|
||||
) input_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (alu_req_if.valid),
|
||||
.ready_in (alu_req_if.ready),
|
||||
.data_in ({alu_req_if.issue_tag, alu_req_if.op, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||
.data_out ({issue_tag, alu_op, alu_in1, alu_in2}),
|
||||
.ready_out (ready_in),
|
||||
.valid_out (valid_in)
|
||||
);
|
||||
localparam MULQ_BITS = `LOG2UP(`MULQ_SIZE);
|
||||
|
||||
wire [`MUL_BITS-1:0] alu_op = mul_req_if.op;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;
|
||||
|
||||
wire [`NW_BITS-1:0] rsp_wid;
|
||||
wire [`NUM_THREADS-1:0] rsp_thread_mask;
|
||||
wire [31:0] rsp_curr_PC;
|
||||
wire [`NR_BITS-1:0] rsp_rd;
|
||||
wire rsp_wb;
|
||||
wire [MULQ_BITS-1:0] tag_in, tag_out;
|
||||
wire valid_out;
|
||||
wire stall_out;
|
||||
wire mulq_full;
|
||||
|
||||
wire mulq_push = mul_req_if.valid && mul_req_if.ready;
|
||||
wire mulq_pop = valid_out && ~stall_out;
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
|
||||
.SIZE (`MULQ_SIZE)
|
||||
) mul_queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.acquire_slot (mulq_push),
|
||||
.write_addr (tag_in),
|
||||
.read_addr (tag_out),
|
||||
.release_addr (tag_out),
|
||||
.write_data ({mul_req_if.wid, mul_req_if.thread_mask, mul_req_if.curr_PC, mul_req_if.rd, mul_req_if.wb}),
|
||||
.read_data ({rsp_wid, rsp_thread_mask, rsp_curr_PC, rsp_rd, rsp_wb}),
|
||||
.release_slot (mulq_pop),
|
||||
.full (mulq_full)
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
||||
wire is_mulw = (alu_op == `MUL_MUL);
|
||||
wire is_mulw_out;
|
||||
wire stall_mul;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
||||
|
@ -51,7 +69,7 @@ module VX_mul_unit #(
|
|||
) multiplier (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.clk_en(1'b1),
|
||||
.clk_en(~stall_mul),
|
||||
.dataa(mul_in1),
|
||||
.datab(mul_in2),
|
||||
.result(mul_result_tmp)
|
||||
|
@ -60,20 +78,20 @@ module VX_mul_unit #(
|
|||
assign mul_result[i] = is_mulw_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
|
||||
end
|
||||
|
||||
wire [`ISTAG_BITS-1:0] mul_issue_tag;
|
||||
wire [MULQ_BITS-1:0] mul_tag;
|
||||
wire mul_valid_out;
|
||||
|
||||
wire mul_fire = valid_in && ready_in && ~`IS_DIV_OP(alu_op);
|
||||
wire mul_fire = mul_req_if.valid && mul_req_if.ready && ~`IS_DIV_OP(alu_op);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(1 + `ISTAG_BITS + 1),
|
||||
.DATAW(1 + MULQ_BITS + 1),
|
||||
.DEPTH(`LATENCY_IMUL)
|
||||
) mul_shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(1'b1),
|
||||
.in({mul_fire, issue_tag, is_mulw}),
|
||||
.out({mul_valid_out, mul_issue_tag, is_mulw_out})
|
||||
.enable(~stall_mul),
|
||||
.in({mul_fire, tag_in, is_mulw}),
|
||||
.out({mul_valid_out, mul_tag, is_mulw_out})
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
@ -81,8 +99,8 @@ module VX_mul_unit #(
|
|||
wire [`NUM_THREADS-1:0][31:0] div_result;
|
||||
wire is_div = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
|
||||
wire is_signed_div = (alu_op == `MUL_DIV || alu_op == `MUL_REM);
|
||||
reg [`NUM_THREADS-1:0] is_div_qual;
|
||||
wire [`NUM_THREADS-1:0] is_div_out;
|
||||
reg [`NUM_THREADS-1:0] is_div_qual;
|
||||
wire is_div_out;
|
||||
wire stall_div;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
|
@ -95,8 +113,8 @@ module VX_mul_unit #(
|
|||
always @(*) begin
|
||||
if (~stall_div) begin
|
||||
is_div_qual[i] = is_div;
|
||||
div_in1_qual = alu_in1[i];
|
||||
div_in2_qual = alu_in2[i];
|
||||
div_in1_qual = alu_in1[i];
|
||||
div_in2_qual = alu_in2[i];
|
||||
if (0 == alu_in2[i]) begin
|
||||
div_in2_qual = 1;
|
||||
if (is_div) begin
|
||||
|
@ -134,34 +152,52 @@ module VX_mul_unit #(
|
|||
.remainder(rem_result_tmp)
|
||||
);
|
||||
|
||||
assign div_result[i] = is_div_out[i] ? div_result_tmp : rem_result_tmp;
|
||||
assign div_result[i] = is_div_out ? div_result_tmp : rem_result_tmp;
|
||||
end
|
||||
|
||||
wire [`ISTAG_BITS-1:0] div_issue_tag;
|
||||
wire [MULQ_BITS-1:0] div_tag;
|
||||
wire div_valid_out;
|
||||
|
||||
wire div_fire = valid_in && ready_in && `IS_DIV_OP(alu_op);
|
||||
wire div_fire = mul_req_if.valid && mul_req_if.ready && `IS_DIV_OP(alu_op);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(1 + `ISTAG_BITS + `NUM_THREADS),
|
||||
.DATAW(1 + MULQ_BITS + 1),
|
||||
.DEPTH(`LATENCY_IDIV + 1)
|
||||
) div_shift_reg (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.enable(~stall_div),
|
||||
.in({div_fire, issue_tag, is_div_qual}),
|
||||
.out({div_valid_out, div_issue_tag, is_div_out})
|
||||
.in({div_fire, tag_in, (| is_div_qual)}),
|
||||
.out({div_valid_out, div_tag, is_div_out})
|
||||
);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign stall_div = mul_valid_out && div_valid_out; // arbitration prioritizes MUL
|
||||
wire arbiter_hazard = mul_valid_out && div_valid_out;
|
||||
|
||||
assign stall_out = ~mul_commit_if.ready && mul_commit_if.valid;
|
||||
assign stall_mul = stall_out || mulq_full;
|
||||
assign stall_div = stall_out || mulq_full
|
||||
|| arbiter_hazard; // arbitration prioritizes MUL
|
||||
wire stall_in = stall_mul || stall_div;
|
||||
|
||||
assign valid_out = mul_valid_out || div_valid_out;
|
||||
assign tag_out = mul_valid_out ? mul_tag : div_tag;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] result = mul_valid_out ? mul_result : div_result;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
||||
) alu_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (0),
|
||||
.in ({valid_out, rsp_wid, rsp_thread_mask, rsp_curr_PC, rsp_rd, rsp_wb, result}),
|
||||
.out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.thread_mask, mul_commit_if.curr_PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
||||
);
|
||||
|
||||
// can accept new request?
|
||||
assign ready_in = ~stall_div;
|
||||
|
||||
assign alu_commit_if.valid = mul_valid_out || div_valid_out;
|
||||
assign alu_commit_if.issue_tag = mul_valid_out ? mul_issue_tag : div_issue_tag;
|
||||
assign alu_commit_if.data = mul_valid_out ? mul_result : div_result;
|
||||
assign mul_req_if.ready = ~stall_in;
|
||||
|
||||
endmodule
|
|
@ -101,24 +101,22 @@ module VX_pipeline #(
|
|||
assign csr_io_rsp_data = csr_io_rsp_if.data;
|
||||
assign csr_io_rsp_if.ready = csr_io_rsp_ready;
|
||||
|
||||
VX_csr_to_issue_if csr_to_issue_if();
|
||||
VX_cmt_to_csr_if cmt_to_csr_if();
|
||||
VX_decode_if decode_if();
|
||||
VX_branch_ctl_if branch_ctl_if();
|
||||
VX_warp_ctl_if warp_ctl_if();
|
||||
VX_ifetch_rsp_if ifetch_rsp_if();
|
||||
VX_alu_req_if alu_req_if();
|
||||
VX_bru_req_if bru_req_if();
|
||||
VX_lsu_req_if lsu_req_if();
|
||||
VX_csr_req_if csr_req_if();
|
||||
VX_mul_req_if mul_req_if();
|
||||
VX_fpu_req_if fpu_req_if();
|
||||
VX_gpu_req_if gpu_req_if();
|
||||
VX_wb_if writeback_if();
|
||||
VX_cmt_to_issue_if cmt_to_issue_if();
|
||||
VX_writeback_if writeback_if();
|
||||
VX_wstall_if wstall_if();
|
||||
VX_join_if join_if();
|
||||
VX_exu_to_cmt_if alu_commit_if();
|
||||
VX_exu_to_cmt_if bru_commit_if();
|
||||
VX_exu_to_cmt_if lsu_commit_if();
|
||||
VX_exu_to_cmt_if csr_commit_if();
|
||||
VX_exu_to_cmt_if mul_commit_if();
|
||||
|
@ -159,10 +157,9 @@ module VX_pipeline #(
|
|||
|
||||
.decode_if (decode_if),
|
||||
.writeback_if (writeback_if),
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
.csr_to_issue_if(csr_to_issue_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.bru_req_if (bru_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
|
@ -183,10 +180,10 @@ module VX_pipeline #(
|
|||
.csr_io_req_if (csr_io_req_if),
|
||||
.csr_io_rsp_if (csr_io_rsp_if),
|
||||
|
||||
.csr_to_issue_if(csr_to_issue_if),
|
||||
.cmt_to_csr_if (cmt_to_csr_if),
|
||||
|
||||
.alu_req_if (alu_req_if),
|
||||
.bru_req_if (bru_req_if),
|
||||
.lsu_req_if (lsu_req_if),
|
||||
.csr_req_if (csr_req_if),
|
||||
.mul_req_if (mul_req_if),
|
||||
|
@ -196,7 +193,6 @@ module VX_pipeline #(
|
|||
.warp_ctl_if (warp_ctl_if),
|
||||
.branch_ctl_if (branch_ctl_if),
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.bru_commit_if (bru_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
|
@ -213,14 +209,12 @@ module VX_pipeline #(
|
|||
.reset (reset),
|
||||
|
||||
.alu_commit_if (alu_commit_if),
|
||||
.bru_commit_if (bru_commit_if),
|
||||
.lsu_commit_if (lsu_commit_if),
|
||||
.csr_commit_if (csr_commit_if),
|
||||
.mul_commit_if (mul_commit_if),
|
||||
.fpu_commit_if (fpu_commit_if),
|
||||
.gpu_commit_if (gpu_commit_if),
|
||||
|
||||
.cmt_to_issue_if(cmt_to_issue_if),
|
||||
.writeback_if (writeback_if),
|
||||
.cmt_to_csr_if (cmt_to_csr_if)
|
||||
);
|
||||
|
|
|
@ -35,6 +35,10 @@
|
|||
wire [$bits(x)-1:0] __``x``__ = x; \
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
`define UNUSED_FIELD(x,y) /* verilator lint_off UNUSED */ \
|
||||
wire [$bits(x.y)-1:0] __``y``__ = x.y; \
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
`define UNUSED_PIN(x) /* verilator lint_off PINCONNECTEMPTY */ \
|
||||
. x () \
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
|
|
|
@ -24,39 +24,40 @@ task print_ex_op;
|
|||
begin
|
||||
case (ex)
|
||||
`EX_ALU: begin
|
||||
case (`ALU_BITS'(op))
|
||||
`ALU_ADD: $write("ADD");
|
||||
`ALU_SUB: $write("SUB");
|
||||
`ALU_SLL: $write("SLL");
|
||||
`ALU_SRL: $write("SRL");
|
||||
`ALU_SRA: $write("SRA");
|
||||
`ALU_SLT: $write("SLT");
|
||||
`ALU_SLTU: $write("SLTU");
|
||||
`ALU_XOR: $write("XOR");
|
||||
`ALU_OR: $write("OR");
|
||||
`ALU_AND: $write("AND");
|
||||
`ALU_LUI: $write("LUI");
|
||||
`ALU_AUIPC: $write("AUIPC");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
`EX_BRU: begin
|
||||
case (`BRU_BITS'(op))
|
||||
`BRU_EQ: $write("BEQ");
|
||||
`BRU_NE: $write("BNE");
|
||||
`BRU_LT: $write("BLT");
|
||||
`BRU_GE: $write("BGE");
|
||||
`BRU_LTU: $write("BLTU");
|
||||
`BRU_GEU: $write("BGEU");
|
||||
`BRU_JAL: $write("JAL");
|
||||
`BRU_JALR: $write("JALR");
|
||||
`BRU_ECALL: $write("ECALL");
|
||||
`BRU_EBREAK:$write("EBREAK");
|
||||
`BRU_MRET: $write("MRET");
|
||||
`BRU_SRET: $write("SRET");
|
||||
`BRU_DRET: $write("DRET");
|
||||
default: $write("?");
|
||||
endcase
|
||||
if (`IS_BR_OP(op)) begin
|
||||
case (`BR_BITS'(op))
|
||||
`BR_EQ: $write("BEQ");
|
||||
`BR_NE: $write("BNE");
|
||||
`BR_LT: $write("BLT");
|
||||
`BR_GE: $write("BGE");
|
||||
`BR_LTU: $write("BLTU");
|
||||
`BR_GEU: $write("BGEU");
|
||||
`BR_JAL: $write("JAL");
|
||||
`BR_JALR: $write("JALR");
|
||||
`BR_ECALL: $write("ECALL");
|
||||
`BR_EBREAK:$write("EBREAK");
|
||||
`BR_MRET: $write("MRET");
|
||||
`BR_SRET: $write("SRET");
|
||||
`BR_DRET: $write("DRET");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end else begin
|
||||
case (`ALU_BITS'(op))
|
||||
`ALU_ADD: $write("ADD");
|
||||
`ALU_SUB: $write("SUB");
|
||||
`ALU_SLL: $write("SLL");
|
||||
`ALU_SRL: $write("SRL");
|
||||
`ALU_SRA: $write("SRA");
|
||||
`ALU_SLT: $write("SLT");
|
||||
`ALU_SLTU: $write("SLTU");
|
||||
`ALU_XOR: $write("XOR");
|
||||
`ALU_OR: $write("OR");
|
||||
`ALU_AND: $write("AND");
|
||||
`ALU_LUI: $write("LUI");
|
||||
`ALU_AUIPC: $write("AUIPC");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`EX_LSU: begin
|
||||
case (`LSU_BITS'(op))
|
||||
|
|
|
@ -6,66 +6,56 @@ module VX_scoreboard #(
|
|||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_decode_if decode_if,
|
||||
VX_wb_if writeback_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
input wire ex_busy,
|
||||
output wire [`ISTAG_BITS-1:0] issue_tag,
|
||||
output wire schedule_delay
|
||||
VX_decode_if ibuf_deq_if,
|
||||
VX_writeback_if writeback_if,
|
||||
input wire exe_delay,
|
||||
input wire gpr_delay,
|
||||
|
||||
output wire delay
|
||||
);
|
||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||
|
||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.wid] & decode_if.reg_use_mask;
|
||||
wire inuse_hazard = (inuse_mask != 0);
|
||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[ibuf_deq_if.wid] & ibuf_deq_if.used_regs;
|
||||
|
||||
wire issue_buf_full;
|
||||
|
||||
assign schedule_delay = ex_busy || inuse_hazard || issue_buf_full;
|
||||
|
||||
wire issue_fire = decode_if.valid && decode_if.ready;
|
||||
assign delay = (| inuse_mask);
|
||||
|
||||
wire reserve_rd = issue_fire && (decode_if.wb != 0);
|
||||
wire reserve_reg = ibuf_deq_if.valid && ibuf_deq_if.ready && (ibuf_deq_if.wb != 0);
|
||||
|
||||
wire release_rd = writeback_if.valid;
|
||||
wire release_reg = writeback_if.valid && writeback_if.ready;
|
||||
|
||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.wid, writeback_if.rd}] & ~writeback_if.thread_mask;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||
inuse_reg_mask[i] <= `NUM_REGS'(0);
|
||||
for (integer w = 0; w < `NUM_WARPS; w++) begin
|
||||
for (integer i = 0; i < `NUM_REGS; i++) begin
|
||||
inuse_registers[w * `NUM_REGS + i] <= 0;
|
||||
end
|
||||
inuse_reg_mask [w] <= `NUM_REGS'(0);
|
||||
end
|
||||
end else begin
|
||||
if (reserve_rd) begin
|
||||
inuse_reg_mask[decode_if.wid][decode_if.rd] <= 1;
|
||||
if (reserve_reg) begin
|
||||
inuse_registers[{ibuf_deq_if.wid, ibuf_deq_if.rd}] <= ibuf_deq_if.thread_mask;
|
||||
inuse_reg_mask[ibuf_deq_if.wid][ibuf_deq_if.rd] <= 1;
|
||||
end
|
||||
if (release_rd) begin
|
||||
if (release_reg) begin
|
||||
assert(inuse_reg_mask[writeback_if.wid][writeback_if.rd] != 0);
|
||||
inuse_reg_mask[writeback_if.wid][writeback_if.rd] <= 0;
|
||||
inuse_registers[{writeback_if.wid, writeback_if.rd}] <= inuse_registers_n;
|
||||
inuse_reg_mask[writeback_if.wid][writeback_if.rd] <= (| inuse_registers_n);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_cam_buffer #(
|
||||
.DATAW ($bits(issue_data_t)),
|
||||
.SIZE (`ISSUEQ_SIZE),
|
||||
.RPORTS (`NUM_EXS)
|
||||
) issue_table (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({decode_if.wid, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.wb}),
|
||||
.write_addr (issue_tag),
|
||||
.acquire_slot (issue_fire),
|
||||
.release_slot ({cmt_to_issue_if.alu_valid, cmt_to_issue_if.bru_valid, cmt_to_issue_if.lsu_valid, cmt_to_issue_if.csr_valid, cmt_to_issue_if.mul_valid, cmt_to_issue_if.fpu_valid, cmt_to_issue_if.gpu_valid}),
|
||||
.read_addr ({cmt_to_issue_if.alu_tag, cmt_to_issue_if.bru_tag, cmt_to_issue_if.lsu_tag, cmt_to_issue_if.csr_tag, cmt_to_issue_if.mul_tag, cmt_to_issue_if.fpu_tag, cmt_to_issue_if.gpu_tag}),
|
||||
.read_data ({cmt_to_issue_if.alu_data, cmt_to_issue_if.bru_data, cmt_to_issue_if.lsu_data, cmt_to_issue_if.csr_data, cmt_to_issue_if.mul_data, cmt_to_issue_if.fpu_data, cmt_to_issue_if.gpu_data}),
|
||||
.full (issue_buf_full)
|
||||
);
|
||||
// issue the instruction
|
||||
assign ibuf_deq_if.ready = ~(delay || exe_delay || gpr_delay);
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
always @(posedge clk) begin
|
||||
if (decode_if.valid && ~decode_if.ready) begin
|
||||
$display("%t: Core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b",
|
||||
$time, CORE_ID, decode_if.wid, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full,
|
||||
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy);
|
||||
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
|
||||
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b, exe=%b, gpr=%b",
|
||||
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.curr_PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
|
||||
inuse_mask[ibuf_deq_if.rd], inuse_mask[ibuf_deq_if.rs1], inuse_mask[ibuf_deq_if.rs2], inuse_mask[ibuf_deq_if.rs3], exe_delay, gpr_delay);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
|
@ -3,14 +3,6 @@
|
|||
|
||||
`include "VX_define.vh"
|
||||
|
||||
typedef struct packed {
|
||||
logic [`NW_BITS-1:0] wid;
|
||||
logic [`NUM_THREADS-1:0] thread_mask;
|
||||
logic [31:0] curr_PC;
|
||||
logic [`NR_BITS-1:0] rd;
|
||||
logic wb;
|
||||
} issue_data_t;
|
||||
|
||||
typedef struct packed {
|
||||
logic is_normal;
|
||||
logic is_zero;
|
||||
|
@ -53,7 +45,7 @@ typedef struct packed {
|
|||
typedef struct packed {
|
||||
logic valid;
|
||||
logic [`NB_BITS-1:0] id;
|
||||
logic [`NW_BITS:0] num_warps;
|
||||
logic [`NW_BITS-1:0] size_m1;
|
||||
} gpu_barrier_t;
|
||||
|
||||
`endif
|
|
@ -20,36 +20,46 @@ module VX_warp_sched #(
|
|||
wire [31:0] join_pc;
|
||||
wire [`NUM_THREADS-1:0] join_tm;
|
||||
|
||||
reg [`NUM_WARPS-1:0] warp_active;
|
||||
reg [`NUM_WARPS-1:0] warp_stalled;
|
||||
reg [`NUM_WARPS-1:0] visible_active;
|
||||
wire update_visible_active;
|
||||
reg [`NUM_WARPS-1:0] warp_active; // real active warps (updated when a warp is activated or disabled)
|
||||
reg [`NUM_WARPS-1:0] warp_stalled; // asserted when a branch/gpgpu instructions are issued
|
||||
reg [`NUM_WARPS-1:0] warp_ready, warp_ready_n; // enforces round-robin, barrier, and non-speculating branches
|
||||
|
||||
reg [`NUM_WARPS-1:0] warp_lock;
|
||||
// Lock warp until instruction decode to resolve branches
|
||||
reg [`NUM_WARPS-1:0] fetch_lock;
|
||||
|
||||
reg [`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
|
||||
reg [31:0] warp_pcs[`NUM_WARPS-1:0];
|
||||
|
||||
// barriers
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0];
|
||||
wire reached_barrier_limit;
|
||||
reg [`NUM_WARPS-1:0] total_barrier_stall;
|
||||
|
||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0]; // warps waiting on barrier
|
||||
wire reached_barrier_limit; // the expected number of warps reached the barrier
|
||||
|
||||
// wspawn
|
||||
reg [31:0] use_wspawn_pc;
|
||||
reg [`NUM_WARPS-1:0] use_wspawn;
|
||||
|
||||
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] warp_pc;
|
||||
wire [`NW_BITS-1:0] warp_to_schedule;
|
||||
wire scheduled_warp;
|
||||
|
||||
wire [`NUM_WARPS-1:0] total_warp_stalled;
|
||||
|
||||
wire stall_out;
|
||||
wire global_stall;
|
||||
wire real_schedule;
|
||||
reg didnt_split;
|
||||
|
||||
reg didnt_split;
|
||||
always @(*) begin
|
||||
warp_ready_n = warp_ready;
|
||||
if (warp_ctl_if.valid
|
||||
&& warp_ctl_if.tmc.valid
|
||||
&& (0 == warp_ctl_if.tmc.thread_mask)) begin
|
||||
warp_ready_n[warp_ctl_if.wid] = 0;
|
||||
end
|
||||
if (wstall_if.wstall) begin
|
||||
warp_ready_n[wstall_if.wid] = 0;
|
||||
end
|
||||
if (scheduled_warp) begin
|
||||
warp_ready_n[warp_to_schedule] = 0;
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
|
@ -57,42 +67,41 @@ module VX_warp_sched #(
|
|||
barrier_stall_mask[i] <= 0;
|
||||
end
|
||||
|
||||
use_wspawn_pc <= 0;
|
||||
use_wspawn <= 0;
|
||||
warp_pcs[0] <= `STARTUP_ADDR;
|
||||
warp_active[0] <= 1; // Activating first warp
|
||||
visible_active[0] <= 1; // Activating first warp
|
||||
thread_masks[0] <= 1; // Activating first thread in first warp
|
||||
warp_stalled <= 0;
|
||||
didnt_split <= 0;
|
||||
warp_lock <= 0;
|
||||
use_wspawn_pc <= 0;
|
||||
use_wspawn <= 0;
|
||||
warp_pcs[0] <= `STARTUP_ADDR;
|
||||
warp_active[0] <= 1; // Activating first warp
|
||||
warp_ready[0] <= 1; // set first warp as ready
|
||||
thread_masks[0] <= 1; // Activating first thread in first warp
|
||||
warp_stalled <= 0;
|
||||
didnt_split <= 0;
|
||||
fetch_lock <= 0;
|
||||
|
||||
for (integer i = 1; i < `NUM_WARPS; i++) begin
|
||||
warp_pcs[i] <= 0;
|
||||
warp_active[i] <= 0; // Activating first warp
|
||||
visible_active[i] <= 0; // Activating first warp
|
||||
thread_masks[i] <= 1; // Activating first thread in first warp
|
||||
warp_pcs[i] <= 0;
|
||||
warp_active[i] <= 0;
|
||||
warp_ready[i] <= 0;
|
||||
thread_masks[i] <= 0;
|
||||
end
|
||||
end else begin
|
||||
if (warp_ctl_if.wspawn.valid) begin
|
||||
if (warp_ctl_if.valid && warp_ctl_if.wspawn.valid) begin
|
||||
warp_active <= warp_ctl_if.wspawn.wmask;
|
||||
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
|
||||
use_wspawn_pc <= warp_ctl_if.wspawn.pc;
|
||||
end
|
||||
|
||||
if (warp_ctl_if.barrier.valid) begin
|
||||
if (warp_ctl_if.valid && warp_ctl_if.barrier.valid) begin
|
||||
warp_stalled[warp_ctl_if.wid] <= 0;
|
||||
if (reached_barrier_limit) begin
|
||||
barrier_stall_mask[warp_ctl_if.barrier.id] <= 0;
|
||||
end else begin
|
||||
barrier_stall_mask[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
|
||||
end
|
||||
end else if (warp_ctl_if.tmc.valid) begin
|
||||
end else if (warp_ctl_if.valid && warp_ctl_if.tmc.valid) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.thread_mask;
|
||||
warp_stalled[warp_ctl_if.wid] <= 0;
|
||||
if (0 == warp_ctl_if.tmc.thread_mask) begin
|
||||
warp_active[warp_ctl_if.wid] <= 0;
|
||||
visible_active[warp_ctl_if.wid] <= 0;
|
||||
warp_active[warp_ctl_if.wid] <= 0;
|
||||
end
|
||||
end else if (join_if.is_join && !didnt_split) begin
|
||||
if (!join_fall) begin
|
||||
|
@ -100,7 +109,7 @@ module VX_warp_sched #(
|
|||
end
|
||||
thread_masks[join_if.wid] <= join_tm;
|
||||
didnt_split <= 0;
|
||||
end else if (warp_ctl_if.split.valid) begin
|
||||
end else if (warp_ctl_if.valid && warp_ctl_if.split.valid) begin
|
||||
warp_stalled[warp_ctl_if.wid] <= 0;
|
||||
if (warp_ctl_if.split.diverged) begin
|
||||
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_mask;
|
||||
|
@ -110,26 +119,19 @@ module VX_warp_sched #(
|
|||
end
|
||||
end
|
||||
|
||||
if (use_wspawn[warp_to_schedule] && !global_stall) begin
|
||||
if (use_wspawn[warp_to_schedule] && scheduled_warp) begin
|
||||
use_wspawn[warp_to_schedule] <= 0;
|
||||
thread_masks[warp_to_schedule] <= 1;
|
||||
end
|
||||
|
||||
// Stalling the scheduling of warps
|
||||
if (wstall_if.wstall) begin
|
||||
warp_stalled[wstall_if.wid] <= 1;
|
||||
visible_active[wstall_if.wid] <= 0;
|
||||
warp_stalled[wstall_if.wid] <= 1;
|
||||
end
|
||||
|
||||
// Refilling active warps
|
||||
if (update_visible_active) begin
|
||||
visible_active <= warp_active & ~warp_stalled & ~total_barrier_stall & ~warp_lock;
|
||||
end
|
||||
|
||||
// Don't change state if stall
|
||||
if (!global_stall && real_schedule && (thread_mask != 0)) begin
|
||||
visible_active[warp_to_schedule] <= 0;
|
||||
warp_pcs[warp_to_schedule] <= warp_pc + 4;
|
||||
// update 'warp_ready' when a warp is scheduled (update round-robin warp schedule)
|
||||
if (scheduled_warp) begin
|
||||
warp_pcs[warp_to_schedule] <= warp_pc + 4;
|
||||
end
|
||||
|
||||
// Branch
|
||||
|
@ -140,38 +142,42 @@ module VX_warp_sched #(
|
|||
warp_stalled[branch_ctl_if.wid] <= 0;
|
||||
end
|
||||
|
||||
// Lock/Release
|
||||
if (scheduled_warp && !stall_out) begin
|
||||
warp_lock[warp_to_schedule] <= 1;
|
||||
// Lock warp until instruction decode to resolve branches
|
||||
if (scheduled_warp) begin
|
||||
fetch_lock[warp_to_schedule] <= 1;
|
||||
end
|
||||
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
||||
warp_lock[ifetch_rsp_if.wid] <= 0;
|
||||
fetch_lock[ifetch_rsp_if.wid] <= 0;
|
||||
end
|
||||
|
||||
// reset 'warp_ready' when it goes to zero (reset round-robin warp schedule)
|
||||
warp_ready <= (| warp_ready_n) ? warp_ready_n : (warp_active & ~total_warp_stalled);
|
||||
end
|
||||
end
|
||||
|
||||
wire [`NUM_WARPS-1:0] b_mask = barrier_stall_mask[warp_ctl_if.barrier.id][`NUM_WARPS-1:0];
|
||||
wire [`NW_BITS:0] b_count;
|
||||
// calculate active barrier status
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
wire [`NW_BITS:0] active_barrier_count;
|
||||
`IGNORE_WARNINGS_END
|
||||
VX_countones #(
|
||||
.N(`NUM_WARPS)
|
||||
) barrier_count (
|
||||
.valids(b_mask),
|
||||
.count (b_count)
|
||||
);
|
||||
.valids(barrier_stall_mask[warp_ctl_if.barrier.id]),
|
||||
.count (active_barrier_count)
|
||||
);
|
||||
|
||||
wire [`NW_BITS:0] count_visible_active;
|
||||
wire reached_barrier_limit = (active_barrier_count[`NW_BITS-1:0] == warp_ctl_if.barrier.size_m1);
|
||||
|
||||
VX_countones #(
|
||||
.N(`NUM_WARPS)
|
||||
) num_visible (
|
||||
.valids(visible_active),
|
||||
.count (count_visible_active)
|
||||
);
|
||||
|
||||
assign reached_barrier_limit = (b_count == warp_ctl_if.barrier.num_warps);
|
||||
reg [`NUM_WARPS-1:0] total_barrier_stall;
|
||||
always @(*) begin
|
||||
total_barrier_stall = barrier_stall_mask[0];
|
||||
for (integer i = 1; i < `NUM_BARRIERS; ++i) begin
|
||||
total_barrier_stall |= barrier_stall_mask[i];
|
||||
end
|
||||
end
|
||||
|
||||
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
||||
// split/join stack management
|
||||
|
||||
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.wid]};
|
||||
|
@ -180,7 +186,8 @@ module VX_warp_sched #(
|
|||
assign {join_fall, join_pc, join_tm} = ipdom[join_if.wid];
|
||||
|
||||
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||
wire push = warp_ctl_if.split.valid
|
||||
wire push = warp_ctl_if.valid
|
||||
&& warp_ctl_if.split.valid
|
||||
&& warp_ctl_if.split.diverged
|
||||
&& (i == warp_ctl_if.wid);
|
||||
|
||||
|
@ -203,46 +210,40 @@ module VX_warp_sched #(
|
|||
);
|
||||
end
|
||||
|
||||
// calculate next warp schedule
|
||||
|
||||
wire schedule;
|
||||
|
||||
wire branch_hazard = schedule
|
||||
&& branch_ctl_if.valid
|
||||
&& branch_ctl_if.taken
|
||||
&& (branch_ctl_if.wid == warp_to_schedule);
|
||||
|
||||
assign real_schedule = schedule
|
||||
&& !warp_stalled[warp_to_schedule]
|
||||
&& !total_barrier_stall[warp_to_schedule]
|
||||
&& !warp_lock[0];
|
||||
|
||||
wire wstall_this_cycle = wstall_if.wstall && (wstall_if.wid == warp_to_schedule); // Maybe bug
|
||||
|
||||
assign update_visible_active = (0 == count_visible_active) && !(stall_out || wstall_this_cycle || branch_hazard || join_if.is_join);
|
||||
|
||||
assign global_stall = stall_out || wstall_this_cycle || branch_hazard || !real_schedule || join_if.is_join;
|
||||
|
||||
assign scheduled_warp = !(wstall_this_cycle || branch_hazard || !real_schedule || join_if.is_join) && !reset;
|
||||
|
||||
assign warp_pc = use_wspawn[warp_to_schedule] ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
||||
|
||||
assign thread_mask = global_stall ? 0 : (use_wspawn[warp_to_schedule] ? `NUM_THREADS'(1) : thread_masks[warp_to_schedule]);
|
||||
assign total_warp_stalled = warp_stalled | total_barrier_stall | fetch_lock;
|
||||
|
||||
wire [`NUM_WARPS-1:0] use_active = (count_visible_active != 0) ? visible_active :
|
||||
(warp_active & ~warp_stalled & ~total_barrier_stall & ~warp_lock);
|
||||
wire [`NUM_WARPS-1:0] use_ready = warp_ready & ~total_warp_stalled;
|
||||
|
||||
// Choosing a warp to schedule
|
||||
VX_fixed_arbiter #(
|
||||
.N(`NUM_WARPS)
|
||||
) choose_schedule (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (use_active),
|
||||
.requests (use_ready),
|
||||
.grant_index (warp_to_schedule),
|
||||
.grant_valid (schedule),
|
||||
`UNUSED_PIN (grant_onehot)
|
||||
);
|
||||
);
|
||||
|
||||
assign stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
wire stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||
|
||||
wire branch_hazard = branch_ctl_if.valid
|
||||
&& branch_ctl_if.taken
|
||||
&& (branch_ctl_if.wid == warp_to_schedule);
|
||||
|
||||
wire wstall_this_cycle = wstall_if.wstall && (wstall_if.wid == warp_to_schedule);
|
||||
|
||||
wire stall = stall_out || wstall_this_cycle || branch_hazard || join_if.is_join;
|
||||
|
||||
assign scheduled_warp = schedule && ~stall;
|
||||
|
||||
wire [`NUM_THREADS-1:0] thread_mask = use_wspawn[warp_to_schedule] ? `NUM_THREADS'(1) : thread_masks[warp_to_schedule];
|
||||
|
||||
assign warp_pc = use_wspawn[warp_to_schedule] ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
||||
|
@ -251,7 +252,7 @@ module VX_warp_sched #(
|
|||
.reset (reset),
|
||||
.stall (stall_out),
|
||||
.flush (0),
|
||||
.in ({(| thread_mask), thread_mask, warp_pc, warp_to_schedule}),
|
||||
.in ({scheduled_warp, thread_mask, warp_pc, warp_to_schedule}),
|
||||
.out ({ifetch_req_if.valid, ifetch_req_if.thread_mask, ifetch_req_if.curr_PC, ifetch_req_if.wid})
|
||||
);
|
||||
|
||||
|
|
|
@ -8,165 +8,82 @@ module VX_writeback #(
|
|||
|
||||
// inputs
|
||||
VX_exu_to_cmt_if alu_commit_if,
|
||||
VX_exu_to_cmt_if bru_commit_if,
|
||||
VX_exu_to_cmt_if lsu_commit_if,
|
||||
VX_exu_to_cmt_if csr_commit_if,
|
||||
VX_exu_to_cmt_if mul_commit_if,
|
||||
VX_fpu_to_cmt_if fpu_commit_if,
|
||||
VX_exu_to_cmt_if gpu_commit_if,
|
||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||
|
||||
// outputs
|
||||
VX_wb_if writeback_if
|
||||
VX_writeback_if writeback_if
|
||||
);
|
||||
reg [`ISSUEQ_SIZE-1:0] wb_valid_table, wb_valid_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] wb_data_table, wb_data_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NW_BITS-1:0] wb_wid_table, wb_wid_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] wb_thread_mask_table, wb_thread_mask_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][31:0] wb_curr_PC_table, wb_curr_PC_table_n;
|
||||
reg [`ISSUEQ_SIZE-1:0][`NR_BITS-1:0] wb_rd_table, wb_rd_table_n;
|
||||
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
|
||||
wire lsu_valid = lsu_commit_if.valid && lsu_commit_if.wb;
|
||||
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
|
||||
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
|
||||
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
|
||||
|
||||
reg wb_valid, wb_valid_n;
|
||||
reg [`NUM_THREADS-1:0][31:0] wb_data, wb_data_n;
|
||||
reg [`NW_BITS-1:0] wb_wid, wb_wid_n;
|
||||
reg [`NUM_THREADS-1:0] wb_thread_mask, wb_thread_mask_n;
|
||||
reg [31:0] wb_curr_PC, wb_curr_PC_n;
|
||||
reg [`NR_BITS-1:0] wb_rd, wb_rd_n;
|
||||
VX_writeback_if writeback_tmp_if();
|
||||
|
||||
reg [`ISTAG_BITS-1:0] wb_index;
|
||||
reg [`ISTAG_BITS-1:0] wb_index_n;
|
||||
assign writeback_tmp_if.valid = alu_valid ? alu_commit_if.valid :
|
||||
lsu_valid ? lsu_commit_if.valid :
|
||||
csr_valid ? csr_commit_if.valid :
|
||||
mul_valid ? mul_commit_if.valid :
|
||||
fpu_valid ? fpu_commit_if.valid :
|
||||
0;
|
||||
|
||||
always @(*) begin
|
||||
wb_valid_table_n = wb_valid_table;
|
||||
wb_wid_table_n = wb_wid_table;
|
||||
wb_thread_mask_table_n = wb_thread_mask_table;
|
||||
wb_curr_PC_table_n = wb_curr_PC_table;
|
||||
wb_rd_table_n = wb_rd_table;
|
||||
wb_data_table_n = wb_data_table;
|
||||
assign writeback_tmp_if.wid = alu_valid ? alu_commit_if.wid :
|
||||
lsu_valid ? lsu_commit_if.wid :
|
||||
csr_valid ? csr_commit_if.wid :
|
||||
mul_valid ? mul_commit_if.wid :
|
||||
fpu_valid ? fpu_commit_if.wid :
|
||||
0;
|
||||
|
||||
assign writeback_tmp_if.thread_mask = alu_valid ? alu_commit_if.thread_mask :
|
||||
lsu_valid ? lsu_commit_if.thread_mask :
|
||||
csr_valid ? csr_commit_if.thread_mask :
|
||||
mul_valid ? mul_commit_if.thread_mask :
|
||||
fpu_valid ? fpu_commit_if.thread_mask :
|
||||
0;
|
||||
|
||||
if (wb_valid) begin
|
||||
wb_valid_table_n[wb_index] = 0;
|
||||
end
|
||||
assign writeback_tmp_if.rd = alu_valid ? alu_commit_if.rd :
|
||||
lsu_valid ? lsu_commit_if.rd :
|
||||
csr_valid ? csr_commit_if.rd :
|
||||
mul_valid ? mul_commit_if.rd :
|
||||
fpu_valid ? fpu_commit_if.rd :
|
||||
0;
|
||||
|
||||
if (alu_commit_if.valid) begin
|
||||
wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb;
|
||||
wb_thread_mask_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.thread_mask;
|
||||
wb_data_table_n [alu_commit_if.issue_tag] = alu_commit_if.data;
|
||||
wb_wid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wid;
|
||||
wb_curr_PC_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.curr_PC;
|
||||
wb_rd_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.rd;
|
||||
end
|
||||
assign writeback_tmp_if.data = alu_valid ? alu_commit_if.data :
|
||||
lsu_valid ? lsu_commit_if.data :
|
||||
csr_valid ? csr_commit_if.data :
|
||||
mul_valid ? mul_commit_if.data :
|
||||
fpu_valid ? fpu_commit_if.data :
|
||||
0;
|
||||
|
||||
if (bru_commit_if.valid) begin
|
||||
wb_valid_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.wb;
|
||||
wb_thread_mask_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.thread_mask;
|
||||
wb_data_table_n [bru_commit_if.issue_tag] = bru_commit_if.data;
|
||||
wb_wid_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.wid;
|
||||
wb_curr_PC_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.curr_PC;
|
||||
wb_rd_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.rd;
|
||||
end
|
||||
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||
|
||||
if (lsu_commit_if.valid) begin
|
||||
wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb;
|
||||
wb_thread_mask_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.thread_mask;
|
||||
wb_data_table_n [lsu_commit_if.issue_tag] = lsu_commit_if.data;
|
||||
wb_wid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wid;
|
||||
wb_curr_PC_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.curr_PC;
|
||||
wb_rd_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.rd;
|
||||
end
|
||||
VX_generic_register #(
|
||||
.N(1 + `NW_BITS + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
|
||||
) wb_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (0),
|
||||
.in ({writeback_tmp_if.valid, writeback_tmp_if.wid, writeback_tmp_if.thread_mask, writeback_tmp_if.rd, writeback_tmp_if.data}),
|
||||
.out ({writeback_if.valid, writeback_if.wid, writeback_if.thread_mask, writeback_if.rd, writeback_if.data})
|
||||
);
|
||||
|
||||
if (csr_commit_if.valid) begin
|
||||
wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb;
|
||||
wb_thread_mask_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.thread_mask;
|
||||
wb_data_table_n [csr_commit_if.issue_tag] = csr_commit_if.data;
|
||||
wb_wid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wid;
|
||||
wb_curr_PC_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.curr_PC;
|
||||
wb_rd_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.rd;
|
||||
end
|
||||
|
||||
if (mul_commit_if.valid) begin
|
||||
wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb;
|
||||
wb_thread_mask_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.thread_mask;
|
||||
wb_data_table_n [mul_commit_if.issue_tag] = mul_commit_if.data;
|
||||
wb_wid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wid;
|
||||
wb_curr_PC_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.curr_PC;
|
||||
wb_rd_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.rd;
|
||||
end
|
||||
|
||||
if (fpu_commit_if.valid) begin
|
||||
wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb;
|
||||
wb_thread_mask_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.thread_mask;
|
||||
wb_data_table_n [fpu_commit_if.issue_tag] = fpu_commit_if.data;
|
||||
wb_wid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wid;
|
||||
wb_curr_PC_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.curr_PC;
|
||||
wb_rd_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.rd;
|
||||
end
|
||||
|
||||
if (gpu_commit_if.valid) begin
|
||||
wb_valid_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.wb;
|
||||
wb_thread_mask_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.thread_mask;
|
||||
wb_data_table_n [gpu_commit_if.issue_tag] = gpu_commit_if.data;
|
||||
wb_wid_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.wid;
|
||||
wb_curr_PC_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.curr_PC;
|
||||
wb_rd_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.rd;
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
wb_index_n = 0;
|
||||
wb_valid_n = 0;
|
||||
wb_thread_mask_n = {`NUM_THREADS{1'bx}};
|
||||
wb_wid_n = {`NW_BITS{1'bx}};
|
||||
wb_curr_PC_n = {32{1'bx}};
|
||||
wb_data_n = {(`NUM_THREADS * 32){1'bx}};
|
||||
for (integer i = `ISSUEQ_SIZE-1; i >= 0; i--) begin
|
||||
if (wb_valid_table_n[i]) begin
|
||||
wb_index_n = `ISTAG_BITS'(i);
|
||||
wb_valid_n = 1;
|
||||
wb_thread_mask_n= wb_thread_mask_table_n[i];
|
||||
wb_wid_n = wb_wid_table_n[i];
|
||||
wb_curr_PC_n = wb_curr_PC_table_n[i];
|
||||
wb_rd_n = wb_rd_table_n[i];
|
||||
wb_data_n = wb_data_table_n[i];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wb_valid_table <= 0;
|
||||
wb_index <= 0;
|
||||
wb_valid <= 0;
|
||||
end else begin
|
||||
wb_valid_table <= wb_valid_table_n;
|
||||
wb_thread_mask_table <= wb_thread_mask_table_n;
|
||||
wb_wid_table <= wb_wid_table_n;
|
||||
wb_curr_PC_table <= wb_curr_PC_table_n;
|
||||
wb_rd_table <= wb_rd_table_n;
|
||||
wb_data_table <= wb_data_table_n;
|
||||
|
||||
wb_index <= wb_index_n;
|
||||
wb_valid <= wb_valid_n;
|
||||
wb_thread_mask <= wb_thread_mask_n;
|
||||
wb_wid <= wb_wid_n;
|
||||
wb_curr_PC <= wb_curr_PC_n;
|
||||
wb_rd <= wb_rd_n;
|
||||
wb_data <= wb_data_n;
|
||||
end
|
||||
end
|
||||
|
||||
// writeback request
|
||||
assign writeback_if.valid = wb_valid;
|
||||
assign writeback_if.thread_mask = wb_thread_mask;
|
||||
assign writeback_if.wid = wb_wid;
|
||||
assign writeback_if.curr_PC = wb_curr_PC;
|
||||
assign writeback_if.rd = wb_rd;
|
||||
assign writeback_if.data = wb_data;
|
||||
assign alu_commit_if.ready = !stall;
|
||||
assign lsu_commit_if.ready = !stall && !alu_valid;
|
||||
assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid;
|
||||
assign mul_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid;
|
||||
assign fpu_commit_if.ready = !stall && !alu_valid && !lsu_valid && !csr_valid && !mul_valid;
|
||||
assign gpu_commit_if.ready = 1'b1;
|
||||
|
||||
// special workaround to get RISC-V tests Pass/Fail status
|
||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||
always @(posedge clk) begin
|
||||
if (writeback_if.valid) begin
|
||||
if (writeback_if.valid && writeback_if.ready) begin
|
||||
last_wb_value[writeback_if.rd] <= writeback_if.data[0];
|
||||
end
|
||||
end
|
||||
|
|
24
hw/rtl/cache/VX_bank.v
vendored
24
hw/rtl/cache/VX_bank.v
vendored
|
@ -106,7 +106,6 @@ module VX_bank #(
|
|||
`ifdef DBG_CORE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_pc_st0;
|
||||
wire debug_wb_st0;
|
||||
wire[`NR_BITS-1:0] debug_rd_st0;
|
||||
wire[`NW_BITS-1:0] debug_wid_st0;
|
||||
wire debug_rw_st0;
|
||||
|
@ -115,7 +114,6 @@ module VX_bank #(
|
|||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
|
||||
|
||||
wire[31:0] debug_pc_st1e;
|
||||
wire debug_wb_st1e;
|
||||
wire[`NR_BITS-1:0] debug_rd_st1e;
|
||||
wire[`NW_BITS-1:0] debug_wid_st1e;
|
||||
wire debug_rw_st1e;
|
||||
|
@ -124,7 +122,6 @@ module VX_bank #(
|
|||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
|
||||
|
||||
wire[31:0] debug_pc_st2;
|
||||
wire debug_wb_st2;
|
||||
wire[`NR_BITS-1:0] debug_rd_st2;
|
||||
wire[`NW_BITS-1:0] debug_wid_st2;
|
||||
wire debug_rw_st2;
|
||||
|
@ -359,7 +356,7 @@ module VX_bank #(
|
|||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||
assign {debug_pc_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||
end
|
||||
`endif
|
||||
|
||||
|
@ -446,7 +443,6 @@ module VX_bank #(
|
|||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
.debug_pc_st1e(debug_pc_st1e),
|
||||
.debug_wb_st1e(debug_wb_st1e),
|
||||
.debug_rd_st1e(debug_rd_st1e),
|
||||
.debug_wid_st1e(debug_wid_st1e),
|
||||
.debug_tagid_st1e(debug_tagid_st1e),
|
||||
|
@ -488,7 +484,7 @@ module VX_bank #(
|
|||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_wid_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||
assign {debug_pc_st1e, debug_rd_st1e, debug_wid_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||
end
|
||||
`endif
|
||||
|
||||
|
@ -529,7 +525,7 @@ module VX_bank #(
|
|||
|
||||
`ifdef DBG_CORE_REQ_INFO
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
assign {debug_pc_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||
end
|
||||
`endif
|
||||
|
||||
|
@ -740,25 +736,25 @@ module VX_bank #(
|
|||
`ifdef DBG_PRINT_CACHE_BANK
|
||||
always @(posedge clk) begin
|
||||
if ((|core_req_valid) && core_req_ready) begin
|
||||
$display("%t: bank%0d:%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr[0], BANK_ID), core_req_tag);
|
||||
$display("%t: cache%0d:%0d core req: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(core_req_addr[0], BANK_ID), core_req_tag);
|
||||
end
|
||||
if (core_rsp_valid && core_rsp_ready) begin
|
||||
$display("%t: bank%0d:%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
|
||||
$display("%t: cache%0d:%0d core rsp: tag=%0h, data=%0h", $time, CACHE_ID, BANK_ID, core_rsp_tag, core_rsp_data);
|
||||
end
|
||||
if (dram_fill_req_valid && dram_fill_req_ready) begin
|
||||
$display("%t: bank%0d:%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
|
||||
$display("%t: cache%0d:%0d dram_fill req: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_req_addr, BANK_ID));
|
||||
end
|
||||
if (dram_wb_req_valid && dram_wb_req_ready) begin
|
||||
$display("%t: bank%0d:%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
|
||||
$display("%t: cache%0d:%0d dram_wb req: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_wb_req_addr, BANK_ID), dram_wb_req_data);
|
||||
end
|
||||
if (dram_fill_rsp_valid && dram_fill_rsp_ready) begin
|
||||
$display("%t: bank%0d:%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
|
||||
$display("%t: cache%0d:%0d dram_fill rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_fill_rsp_addr, BANK_ID), dram_fill_rsp_data);
|
||||
end
|
||||
if (snp_req_valid && snp_req_ready) begin
|
||||
$display("%t: bank%0d:%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
|
||||
$display("%t: cache%0d:%0d snp req: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(snp_req_addr, BANK_ID), snp_req_invalidate, snp_req_tag);
|
||||
end
|
||||
if (snp_rsp_valid && snp_rsp_ready) begin
|
||||
$display("%t: bank%0d:%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
|
||||
$display("%t: cache%0d:%0d snp rsp: tag=%0h", $time, CACHE_ID, BANK_ID, snp_rsp_tag);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
|
3
hw/rtl/cache/VX_cache.v
vendored
3
hw/rtl/cache/VX_cache.v
vendored
|
@ -130,14 +130,13 @@ module VX_cache #(
|
|||
`ifdef DBG_CORE_REQ_INFO
|
||||
/* verilator lint_off UNUSED */
|
||||
wire[31:0] debug_core_req_use_pc;
|
||||
wire debug_core_req_wb;
|
||||
wire[`NR_BITS-1:0] debug_core_req_rd;
|
||||
wire[`NW_BITS-1:0] debug_core_req_wid;
|
||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx;
|
||||
/* verilator lint_on UNUSED */
|
||||
|
||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_wid, debug_core_req_idx} = core_req_tag[0];
|
||||
assign {debug_core_req_use_pc, debug_core_req_rd, debug_core_req_wid, debug_core_req_idx} = core_req_tag[0];
|
||||
end
|
||||
`endif
|
||||
|
||||
|
|
2
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
2
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
|
@ -157,7 +157,7 @@ module VX_cache_miss_resrv #(
|
|||
`ifdef DBG_PRINT_CACHE_MSRQ
|
||||
always @(posedge clk) begin
|
||||
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
||||
$write("%t: bank%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
|
||||
$write("%t: cache%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
|
||||
for (integer j = 0; j < MRVQ_SIZE; j++) begin
|
||||
if (valid_table[j]) begin
|
||||
$write(" ");
|
||||
|
|
9
hw/rtl/cache/VX_snp_forwarder.v
vendored
9
hw/rtl/cache/VX_snp_forwarder.v
vendored
|
@ -65,12 +65,13 @@ module VX_snp_forwarder #(
|
|||
) snp_fwd_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
||||
.write_addr (sfq_write_addr),
|
||||
.acquire_slot (sfq_acquire),
|
||||
.release_slot (sfq_release),
|
||||
.write_addr (sfq_write_addr),
|
||||
.acquire_slot (sfq_acquire),
|
||||
.read_addr (sfq_read_addr),
|
||||
.write_data ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
|
||||
.read_data ({snp_rsp_addr, snp_rsp_invalidate, snp_rsp_tag}),
|
||||
.release_addr (sfq_read_addr),
|
||||
.release_slot (sfq_release),
|
||||
.full (sfq_full)
|
||||
);
|
||||
|
||||
|
|
9
hw/rtl/cache/VX_tag_data_access.v
vendored
9
hw/rtl/cache/VX_tag_data_access.v
vendored
|
@ -28,7 +28,6 @@ module VX_tag_data_access #(
|
|||
`ifdef DBG_CORE_REQ_INFO
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
input wire[31:0] debug_pc_st1e,
|
||||
input wire debug_wb_st1e,
|
||||
input wire[`NR_BITS-1:0] debug_rd_st1e,
|
||||
input wire[`NW_BITS-1:0] debug_wid_st1e,
|
||||
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
|
||||
|
@ -217,15 +216,15 @@ module VX_tag_data_access #(
|
|||
if (valid_req_st1e) begin
|
||||
if ((| use_write_enable)) begin
|
||||
if (writefill_st1e) begin
|
||||
$display("%t: bank%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
|
||||
$display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
|
||||
end else begin
|
||||
$display("%t: bank%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
|
||||
$display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
|
||||
end
|
||||
end else
|
||||
if (miss_st1e) begin
|
||||
$display("%t: bank%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
|
||||
$display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_rd_st1e, dirty_st1e);
|
||||
end else begin
|
||||
$display("%t: bank%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
|
||||
$display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,14 +1,16 @@
|
|||
`include "VX_define.vh"
|
||||
`include "dspba_library_ver.sv"
|
||||
|
||||
module VX_fp_fpga (
|
||||
module VX_fp_fpga #(
|
||||
parameter TAGW = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
|
@ -21,7 +23,7 @@ module VX_fp_fpga (
|
|||
output wire has_fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -31,7 +33,7 @@ module VX_fp_fpga (
|
|||
|
||||
wire [NUM_FPC-1:0] per_core_ready_in;
|
||||
wire [NUM_FPC-1:0][`NUM_THREADS-1:0][31:0] per_core_result;
|
||||
wire [NUM_FPC-1:0][`ISTAG_BITS-1:0] per_core_tag_out;
|
||||
wire [NUM_FPC-1:0][TAGW-1:0] per_core_tag_out;
|
||||
wire [NUM_FPC-1:0] per_core_ready_out;
|
||||
wire [NUM_FPC-1:0] per_core_valid_out;
|
||||
|
||||
|
@ -62,7 +64,10 @@ module VX_fp_fpga (
|
|||
endcase
|
||||
end
|
||||
|
||||
VX_fp_noncomp fp_noncomp (
|
||||
VX_fp_noncomp #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_noncomp (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 0)),
|
||||
|
@ -80,7 +85,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[0])
|
||||
);
|
||||
|
||||
VX_fp_add fp_add (
|
||||
VX_fp_add #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_add (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 1)),
|
||||
|
@ -94,7 +102,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[1])
|
||||
);
|
||||
|
||||
VX_fp_sub fp_sub (
|
||||
VX_fp_sub #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_sub (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 2)),
|
||||
|
@ -108,7 +119,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[2])
|
||||
);
|
||||
|
||||
VX_fp_mul fp_mul (
|
||||
VX_fp_mul #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_mul (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 3)),
|
||||
|
@ -122,7 +136,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[3])
|
||||
);
|
||||
|
||||
VX_fp_madd fp_madd (
|
||||
VX_fp_madd #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_madd (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 4)),
|
||||
|
@ -138,7 +155,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[4])
|
||||
);
|
||||
|
||||
VX_fp_msub fp_msub (
|
||||
VX_fp_msub #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_msub (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 5)),
|
||||
|
@ -154,7 +174,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[5])
|
||||
);
|
||||
|
||||
VX_fp_div fp_div (
|
||||
VX_fp_div #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_div (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 6)),
|
||||
|
@ -168,7 +191,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[6])
|
||||
);
|
||||
|
||||
VX_fp_sqrt fp_sqrt (
|
||||
VX_fp_sqrt #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_sqrt (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 7)),
|
||||
|
@ -181,7 +207,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[7])
|
||||
);
|
||||
|
||||
VX_fp_ftoi fp_ftoi (
|
||||
VX_fp_ftoi #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_ftoi (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 8)),
|
||||
|
@ -194,7 +223,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[8])
|
||||
);
|
||||
|
||||
VX_fp_ftou fp_ftou (
|
||||
VX_fp_ftou #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_ftou (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 9)),
|
||||
|
@ -207,7 +239,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[9])
|
||||
);
|
||||
|
||||
VX_fp_itof fp_itof (
|
||||
VX_fp_itof #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_itof (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 10)),
|
||||
|
@ -220,7 +255,10 @@ module VX_fp_fpga (
|
|||
.valid_out (per_core_valid_out[10])
|
||||
);
|
||||
|
||||
VX_fp_utof fp_utof (
|
||||
VX_fp_utof #(
|
||||
.TAGW (TAGW),
|
||||
.LANES(`NUM_THREADS)
|
||||
) fp_utof (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in && (core_select == 11)),
|
||||
|
@ -248,21 +286,10 @@ module VX_fp_fpga (
|
|||
assign per_core_ready_out[i] = ready_out && (i == fp_index);
|
||||
end
|
||||
|
||||
wire tmp_valid = fp_valid;
|
||||
wire [`ISTAG_BITS-1:0] tmp_tag = per_core_tag_out[fp_index];
|
||||
wire [`NUM_THREADS-1:0][31:0] tmp_result = per_core_result[fp_index];
|
||||
wire tmp_has_fflags = fpnew_has_fflags && (fp_index == 0);
|
||||
fflags_t [`NUM_THREADS-1:0] tmp_flags = fpnew_fflags;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + `FFG_BITS)
|
||||
) nc_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.in ({tmp_valid, tmp_tag, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||
);
|
||||
assign valid_out = fp_valid;
|
||||
assign tag_out = per_core_tag_out[fp_index];
|
||||
assign result = per_core_result[fp_index];
|
||||
assign has_fflags = fpnew_has_fflags && (fp_index == 0);
|
||||
assign fflags = fpnew_fflags;
|
||||
|
||||
endmodule
|
|
@ -1,25 +1,28 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_noncomp (
|
||||
module VX_fp_noncomp #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire has_fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
output fflags_t [LANES-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -35,21 +38,21 @@ module VX_fp_noncomp (
|
|||
SIG_NAN = 32'h00000100,
|
||||
QUT_NAN = 32'h00000200;
|
||||
|
||||
wire [`NUM_THREADS-1:0] a_sign, b_sign;
|
||||
wire [`NUM_THREADS-1:0][7:0] a_exponent, b_exponent;
|
||||
wire [`NUM_THREADS-1:0][22:0] a_mantissa, b_mantissa;
|
||||
fp_type_t [`NUM_THREADS-1:0] a_type, b_type;
|
||||
wire [LANES-1:0] a_sign, b_sign;
|
||||
wire [LANES-1:0][7:0] a_exponent, b_exponent;
|
||||
wire [LANES-1:0][22:0] a_mantissa, b_mantissa;
|
||||
fp_type_t [LANES-1:0] a_type, b_type;
|
||||
|
||||
wire [`NUM_THREADS-1:0] a_smaller, ab_equal;
|
||||
wire [LANES-1:0] a_smaller, ab_equal;
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
|
||||
reg [`NUM_THREADS-1:0][31:0] fminmax_res; // result of fmin/fmax
|
||||
reg [`NUM_THREADS-1:0][31:0] fsgnj_res; // result of sign injection
|
||||
reg [`NUM_THREADS-1:0][31:0] fcmp_res; // result of comparison
|
||||
reg [`NUM_THREADS-1:0][ 4:0] fcmp_excp; // exception of comparison
|
||||
reg [LANES-1:0][31:0] fclass_mask; // generate a 10-bit mask for integer reg
|
||||
reg [LANES-1:0][31:0] fminmax_res; // result of fmin/fmax
|
||||
reg [LANES-1:0][31:0] fsgnj_res; // result of sign injection
|
||||
reg [LANES-1:0][31:0] fcmp_res; // result of comparison
|
||||
reg [LANES-1:0][ 4:0] fcmp_excp; // exception of comparison
|
||||
|
||||
// Setup
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
assign a_sign[i] = dataa[i][31];
|
||||
assign a_exponent[i] = dataa[i][30:23];
|
||||
assign a_mantissa[i] = dataa[i][22:0];
|
||||
|
@ -75,7 +78,7 @@ module VX_fp_noncomp (
|
|||
end
|
||||
|
||||
// FCLASS
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type[i].is_normal) begin
|
||||
fclass_mask[i] = a_sign[i] ? NEG_NORM : POS_NORM;
|
||||
|
@ -99,7 +102,7 @@ module VX_fp_noncomp (
|
|||
end
|
||||
|
||||
// Min/Max
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
if (a_type[i].is_nan && b_type[i].is_nan)
|
||||
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||
|
@ -118,7 +121,7 @@ module VX_fp_noncomp (
|
|||
end
|
||||
|
||||
// Sign Injection
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (op)
|
||||
`FPU_SGNJ: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||
|
@ -130,7 +133,7 @@ module VX_fp_noncomp (
|
|||
end
|
||||
|
||||
// Comparison
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
case (frm)
|
||||
`FRM_RNE: begin
|
||||
|
@ -176,8 +179,8 @@ module VX_fp_noncomp (
|
|||
|
||||
reg tmp_valid;
|
||||
reg tmp_has_fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] tmp_fflags;
|
||||
reg [`NUM_THREADS-1:0][31:0] tmp_result;
|
||||
fflags_t [LANES-1:0] tmp_fflags;
|
||||
reg [LANES-1:0][31:0] tmp_result;
|
||||
|
||||
always @(*) begin
|
||||
case (op)
|
||||
|
@ -191,7 +194,7 @@ module VX_fp_noncomp (
|
|||
endcase
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
always @(*) begin
|
||||
tmp_valid = 1'b1;
|
||||
case (op)
|
||||
|
@ -228,7 +231,7 @@ module VX_fp_noncomp (
|
|||
assign ready_in = ~stall;
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
|
||||
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
|
||||
) nc_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
`include "defs_div_sqrt_mvp.sv"
|
||||
|
||||
module VX_fpnew #(
|
||||
parameter TAGW = 1,
|
||||
parameter FMULADD = 1,
|
||||
parameter FDIVSQRT = 1,
|
||||
parameter FNONCOMP = 1,
|
||||
|
@ -14,7 +15,7 @@ module VX_fpnew #(
|
|||
input wire valid_in,
|
||||
output wire ready_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`FPU_BITS-1:0] op,
|
||||
input wire [`FRM_BITS-1:0] frm,
|
||||
|
@ -27,7 +28,7 @@ module VX_fpnew #(
|
|||
output wire has_fflags,
|
||||
output fflags_t [`NUM_THREADS-1:0] fflags,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -66,7 +67,7 @@ module VX_fpnew #(
|
|||
wire fpu_ready_in, fpu_valid_in;
|
||||
wire fpu_ready_out, fpu_valid_out;
|
||||
|
||||
reg [`ISTAG_BITS-1:0] fpu_tag_in, fpu_tag_out;
|
||||
reg [TAGW-1:0] fpu_tag_in, fpu_tag_out;
|
||||
|
||||
reg [2:0][`NUM_THREADS-1:0][31:0] fpu_operands;
|
||||
|
||||
|
@ -77,9 +78,6 @@ module VX_fpnew #(
|
|||
wire [`NUM_THREADS-1:0][31:0] fpu_result;
|
||||
fpnew_pkg::status_t [0:`NUM_THREADS-1] fpu_status;
|
||||
|
||||
wire is_class_op, is_class_op_out;
|
||||
assign is_class_op = (op == `FPU_CLASS);
|
||||
|
||||
reg [FOP_BITS-1:0] fpu_op;
|
||||
reg [`FRM_BITS-1:0] fpu_rnd;
|
||||
reg fpu_op_mod;
|
||||
|
@ -136,7 +134,7 @@ module VX_fpnew #(
|
|||
fpnew_top #(
|
||||
.Features (FPU_FEATURES),
|
||||
.Implementation (FPU_IMPLEMENTATION),
|
||||
.TagType (logic[`ISTAG_BITS+1+1-1:0])
|
||||
.TagType (logic[TAGW+1+1-1:0])
|
||||
) fpnew_core (
|
||||
.clk_i (clk),
|
||||
.rst_ni (1'b1),
|
||||
|
@ -148,13 +146,13 @@ module VX_fpnew #(
|
|||
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
|
||||
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
|
||||
.vectorial_op_i (1'b0),
|
||||
.tag_i ({fpu_tag_in, fpu_has_fflags, is_class_op}),
|
||||
.tag_i ({fpu_tag_in, fpu_has_fflags}),
|
||||
.in_valid_i (fpu_valid_in),
|
||||
.in_ready_o (fpu_ready_in),
|
||||
.flush_i (reset),
|
||||
.result_o (fpu_result[0]),
|
||||
.status_o (fpu_status[0]),
|
||||
.tag_o ({fpu_tag_out, fpu_has_fflags_out, is_class_op_out}),
|
||||
.tag_o ({fpu_tag_out, fpu_has_fflags_out}),
|
||||
.out_valid_o (fpu_valid_out),
|
||||
.out_ready_i (fpu_ready_out),
|
||||
`UNUSED_PIN (busy_o)
|
||||
|
|
|
@ -1,19 +1,22 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_add (
|
||||
module VX_fp_add #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -22,7 +25,7 @@ module VX_fp_add (
|
|||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
@ -65,7 +68,7 @@ module VX_fp_add (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(1)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,19 +1,22 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_div (
|
||||
module VX_fp_div #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -22,7 +25,7 @@ module VX_fp_div (
|
|||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
acl_fp_div fdiv (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -34,7 +37,7 @@ module VX_fp_div (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(`LATENCY_FDIV)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,18 +1,21 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_ftoi (
|
||||
module VX_fp_ftoi #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -21,7 +24,7 @@ module VX_fp_ftoi (
|
|||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
acl_fp_ftoi ftoi (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -32,7 +35,7 @@ module VX_fp_ftoi (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(`LATENCY_FTOI)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,18 +1,21 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_ftou (
|
||||
module VX_fp_ftou #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -21,7 +24,7 @@ module VX_fp_ftou (
|
|||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
acl_fp_ftou ftou (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -32,7 +35,7 @@ module VX_fp_ftou (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(`LATENCY_FTOU)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,18 +1,21 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_itof (
|
||||
module VX_fp_itof #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -21,7 +24,7 @@ module VX_fp_itof (
|
|||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
acl_fp_itof itof (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -32,7 +35,7 @@ module VX_fp_itof (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(`LATENCY_ITOF)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,22 +1,25 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_madd (
|
||||
module VX_fp_madd #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datac,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
input wire [LANES-1:0][31:0] datac,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
input wire negate,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -24,11 +27,11 @@ module VX_fp_madd (
|
|||
wire enable0, enable1;
|
||||
assign ready_in = enable0 && enable1;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] result_st0, result_st1;
|
||||
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
||||
wire [LANES-1:0][31:0] result_st0, result_st1;
|
||||
wire [TAGW-1:0] out_tag_st0, out_tag_st1;
|
||||
wire in_valid_st0, out_valid_st0, out_valid_st1;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys0 (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
@ -111,7 +114,7 @@ module VX_fp_madd (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1 + 1),
|
||||
.DATAW(TAGW + 1 + 1),
|
||||
.DEPTH(1)
|
||||
) shift_reg0 (
|
||||
.clk(clk),
|
||||
|
@ -122,7 +125,7 @@ module VX_fp_madd (
|
|||
);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(1)
|
||||
) shift_reg1 (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,22 +1,25 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_msub (
|
||||
module VX_fp_msub #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datac,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
input wire [LANES-1:0][31:0] datac,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
input wire negate,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -24,11 +27,11 @@ module VX_fp_msub (
|
|||
wire enable0, enable1;
|
||||
assign ready_in = enable0 && enable1;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] result_st0, result_st1;
|
||||
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
||||
wire [LANES-1:0][31:0] result_st0, result_st1;
|
||||
wire [TAGW-1:0] out_tag_st0, out_tag_st1;
|
||||
wire in_valid_st0, out_valid_st0, out_valid_st1;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys0 (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
@ -111,7 +114,7 @@ module VX_fp_msub (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1 + 1),
|
||||
.DATAW(TAGW + 1 + 1),
|
||||
.DEPTH(1)
|
||||
) shift_reg0 (
|
||||
.clk(clk),
|
||||
|
@ -122,7 +125,7 @@ module VX_fp_msub (
|
|||
);
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(1)
|
||||
) shift_reg1 (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,19 +1,22 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_mul (
|
||||
module VX_fp_mul #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -22,7 +25,7 @@ module VX_fp_mul (
|
|||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
@ -65,7 +68,7 @@ module VX_fp_mul (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(1)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,18 +1,21 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_sqrt (
|
||||
module VX_fp_sqrt #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -21,7 +24,7 @@ module VX_fp_sqrt (
|
|||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
acl_fp_sqrt fsqrt (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -32,7 +35,7 @@ module VX_fp_sqrt (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(`LATENCY_FSQRT)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,19 +1,22 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_sub (
|
||||
module VX_fp_sub #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
input wire [`NUM_THREADS-1:0][31:0] datab,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
input wire [LANES-1:0][31:0] datab,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -22,7 +25,7 @@ module VX_fp_sub (
|
|||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
twentynm_fp_mac mac_fp_wys (
|
||||
// inputs
|
||||
.accumulate(),
|
||||
|
@ -65,7 +68,7 @@ module VX_fp_sub (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(1)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
|
|
|
@ -1,18 +1,21 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
module VX_fp_utof (
|
||||
module VX_fp_utof #(
|
||||
parameter TAGW = 1,
|
||||
parameter LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
output wire ready_in,
|
||||
input wire valid_in,
|
||||
|
||||
input wire [`ISTAG_BITS-1:0] tag_in,
|
||||
input wire [TAGW-1:0] tag_in,
|
||||
|
||||
input wire [`NUM_THREADS-1:0][31:0] dataa,
|
||||
output wire [`NUM_THREADS-1:0][31:0] result,
|
||||
input wire [LANES-1:0][31:0] dataa,
|
||||
output wire [LANES-1:0][31:0] result,
|
||||
|
||||
output wire [`ISTAG_BITS-1:0] tag_out,
|
||||
output wire [TAGW-1:0] tag_out,
|
||||
|
||||
input wire ready_out,
|
||||
output wire valid_out
|
||||
|
@ -21,7 +24,7 @@ module VX_fp_utof (
|
|||
wire enable = ~stall;
|
||||
assign ready_in = enable;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||
for (genvar i = 0; i < LANES; i++) begin
|
||||
acl_fp_utof utof (
|
||||
.clk (clk),
|
||||
.areset (1'b0),
|
||||
|
@ -32,7 +35,7 @@ module VX_fp_utof (
|
|||
end
|
||||
|
||||
VX_shift_register #(
|
||||
.DATAW(`ISTAG_BITS + 1),
|
||||
.DATAW(TAGW + 1),
|
||||
.DEPTH(`LATENCY_UTOF)
|
||||
) shift_reg (
|
||||
.clk(clk),
|
||||
|
|
|
@ -5,23 +5,20 @@
|
|||
|
||||
interface VX_alu_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
`DEBUG_END
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`ALU_BITS-1:0] op;
|
||||
|
||||
wire [`ALU_BR_BITS-1:0] op;
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
|
||||
wire [31:0] imm;
|
||||
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
`ifndef VX_BRANCH_REQ_IF
|
||||
`define VX_BRANCH_REQ_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_bru_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
`DEBUG_END
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`BRU_BITS-1:0] op;
|
||||
|
||||
wire rs1_is_PC;
|
||||
|
||||
wire [31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
|
||||
wire [31:0] offset;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,36 +0,0 @@
|
|||
`ifndef VX_CMT_TO_ISSUE_IF
|
||||
`define VX_CMT_TO_ISSUE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_cmt_to_issue_if ();
|
||||
|
||||
wire alu_valid;
|
||||
wire bru_valid;
|
||||
wire lsu_valid;
|
||||
wire csr_valid;
|
||||
wire mul_valid;
|
||||
wire fpu_valid;
|
||||
wire gpu_valid;
|
||||
|
||||
wire [`ISTAG_BITS-1:0] alu_tag;
|
||||
wire [`ISTAG_BITS-1:0] bru_tag;
|
||||
wire [`ISTAG_BITS-1:0] lsu_tag;
|
||||
wire [`ISTAG_BITS-1:0] csr_tag;
|
||||
wire [`ISTAG_BITS-1:0] mul_tag;
|
||||
wire [`ISTAG_BITS-1:0] fpu_tag;
|
||||
wire [`ISTAG_BITS-1:0] gpu_tag;
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
issue_data_t alu_data;
|
||||
issue_data_t bru_data;
|
||||
issue_data_t lsu_data;
|
||||
issue_data_t csr_data;
|
||||
issue_data_t mul_data;
|
||||
issue_data_t fpu_data;
|
||||
issue_data_t gpu_data;
|
||||
`IGNORE_WARNINGS_END
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -6,18 +6,13 @@
|
|||
interface VX_csr_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
`DEBUG_END
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [31:0] curr_PC;
|
||||
wire [`CSR_BITS-1:0] op;
|
||||
|
||||
wire [`CSR_ADDR_BITS-1:0] csr_addr;
|
||||
wire [31:0] csr_mask;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire is_io;
|
||||
|
@ -26,4 +21,4 @@ interface VX_csr_req_if ();
|
|||
|
||||
endinterface
|
||||
|
||||
`endif
|
||||
`endif
|
|
@ -1,15 +0,0 @@
|
|||
`ifndef VX_CSR_RSP_IF
|
||||
`define VX_CSR_RSP_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_csr_rsp_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,5 +1,5 @@
|
|||
`ifndef VX_CSR_TO_FPU_IF
|
||||
`define VX_CSR_TO_FPU_IF
|
||||
`ifndef VX_CSR_TO_ISSUE_IF
|
||||
`define VX_CSR_TO_ISSUE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
|||
`IGNORE_WARNINGS_BEGIN
|
||||
`endif
|
||||
|
||||
interface VX_csr_to_fpu_if ();
|
||||
interface VX_csr_to_issue_if ();
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`FRM_BITS-1:0] frm;
|
|
@ -6,29 +6,26 @@
|
|||
interface VX_decode_if ();
|
||||
|
||||
wire valid;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] ex_op;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
wire wb;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire [31:0] imm;
|
||||
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
|
||||
wire [`NUM_REGS-1:0] reg_use_mask;
|
||||
|
||||
// FP states
|
||||
wire [`NR_BITS-1:0] rs3;
|
||||
wire rs2_is_imm;
|
||||
wire use_rs3;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
wire wb;
|
||||
wire [`NUM_REGS-1:0] used_regs;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
|
|
@ -5,9 +5,14 @@
|
|||
|
||||
interface VX_exu_to_cmt_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -10,20 +10,18 @@
|
|||
interface VX_fpu_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
`DEBUG_BEGIN
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
`DEBUG_END
|
||||
|
||||
wire [`FPU_BITS-1:0] op;
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
|
|
@ -5,11 +5,16 @@
|
|||
|
||||
interface VX_fpu_to_cmt_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire has_fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags;
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire has_fflags;
|
||||
fflags_t [`NUM_THREADS-1:0] fflags;
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -19,7 +19,8 @@ interface VX_gpr_read_if ();
|
|||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
wire ready;
|
||||
wire ready_in;
|
||||
wire ready_out;
|
||||
|
||||
endinterface
|
||||
|
||||
|
|
|
@ -6,15 +6,15 @@
|
|||
interface VX_gpu_req_if();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`GPU_BITS-1:0] op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [31:0] rs2_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
`ifndef VX_ISSUE_IF
|
||||
`define VX_ISSUE_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_issue_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire [`EX_BITS-1:0] ex_type;
|
||||
wire [`OP_BITS-1:0] ex_op;
|
||||
|
||||
wire [`FRM_BITS-1:0] frm;
|
||||
|
||||
wire wb;
|
||||
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [31:0] imm;
|
||||
|
||||
wire rs1_is_PC;
|
||||
wire rs2_is_imm;
|
||||
|
||||
wire [`NT_BITS-1:0] tid;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -6,9 +6,9 @@
|
|||
interface VX_lsu_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
|
||||
wire rw;
|
||||
|
|
|
@ -10,16 +10,15 @@
|
|||
interface VX_mul_req_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||
`DEBUG_BEGIN
|
||||
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
wire [31:0] curr_PC;
|
||||
`DEBUG_END
|
||||
wire [`MUL_BITS-1:0] op;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
|
||||
wire ready;
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
interface VX_warp_ctl_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NW_BITS-1:0] wid;
|
||||
|
||||
gpu_tmc_t tmc;
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
`ifndef VX_WB_IF
|
||||
`define VX_WB_IF
|
||||
`ifndef VX_WRITEBACK_IF
|
||||
`define VX_WRITEBACK_IF
|
||||
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_wb_if ();
|
||||
interface VX_writeback_if ();
|
||||
|
||||
wire valid;
|
||||
wire [`NUM_THREADS-1:0] thread_mask;
|
||||
|
@ -16,6 +16,8 @@ interface VX_wb_if ();
|
|||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
|
||||
wire ready;
|
||||
|
||||
endinterface
|
||||
|
||||
`endif
|
|
@ -1,19 +1,21 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_cam_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter RPORTS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter RPORTS = 1,
|
||||
parameter CPORTS = 1,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire [DATAW-1:0] write_data,
|
||||
output wire [ADDRW-1:0] write_addr,
|
||||
input wire [DATAW-1:0] write_data,
|
||||
input wire acquire_slot,
|
||||
input wire [RPORTS-1:0][ADDRW-1:0] read_addr,
|
||||
output reg [RPORTS-1:0][DATAW-1:0] read_data,
|
||||
input wire [RPORTS-1:0] release_slot,
|
||||
input wire [CPORTS-1:0][ADDRW-1:0] release_addr,
|
||||
input wire [CPORTS-1:0] release_slot,
|
||||
output wire full
|
||||
);
|
||||
reg [DATAW-1:0] entries [SIZE-1:0];
|
||||
|
@ -34,12 +36,11 @@ module VX_cam_buffer #(
|
|||
|
||||
always @(*) begin
|
||||
free_slots_n = free_slots;
|
||||
for (integer i = 0; i < RPORTS; i++) begin
|
||||
for (integer i = 0; i < CPORTS; i++) begin
|
||||
if (release_slot[i]) begin
|
||||
free_slots_n[read_addr[i]] = 1;
|
||||
end
|
||||
read_data[i] = entries[read_addr[i]];
|
||||
end
|
||||
free_slots_n[release_addr[i]] = 1;
|
||||
end
|
||||
end
|
||||
if (acquire_slot) begin
|
||||
free_slots_n[write_addr_r] = 0;
|
||||
end
|
||||
|
@ -55,15 +56,19 @@ module VX_cam_buffer #(
|
|||
assert(1 == free_slots[write_addr]) else $display("%t: inused slot at port %d", $time, write_addr);
|
||||
entries[write_addr] <= write_data;
|
||||
end
|
||||
for (integer i = 0; i < RPORTS; i++) begin
|
||||
for (integer i = 0; i < CPORTS; i++) begin
|
||||
if (release_slot[i]) begin
|
||||
assert(0 == free_slots[read_addr[i]]) else $display("%t: freed slot at port %d", $time, read_addr[i]);
|
||||
assert(0 == free_slots[release_addr[i]]) else $display("%t: freed slot at port %d", $time, release_addr[i]);
|
||||
end
|
||||
end
|
||||
free_slots <= free_slots_n;
|
||||
write_addr_r <= free_index;
|
||||
full_r <= ~free_valid;
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < RPORTS; i++) begin
|
||||
assign read_data[i] = entries[read_addr[i]];
|
||||
end
|
||||
|
||||
assign write_addr = write_addr_r;
|
||||
|
|
|
@ -14,53 +14,25 @@ module VX_elastic_buffer #(
|
|||
input wire ready_out,
|
||||
output wire valid_out
|
||||
);
|
||||
if (0 == SIZE) begin
|
||||
wire empty, full;
|
||||
|
||||
reg [DATAW-1:0] skid_buffer;
|
||||
reg skid_valid;
|
||||
VX_generic_queue #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (BUFFERED)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (valid_in),
|
||||
.pop (ready_out),
|
||||
.data_in(data_in),
|
||||
.data_out(data_out),
|
||||
.empty (empty),
|
||||
.full (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
skid_valid <= 0;
|
||||
end else begin
|
||||
if (valid_in && ~ready_out) begin
|
||||
assert(~skid_valid);
|
||||
skid_buffer <= data_in;
|
||||
skid_valid <= 1;
|
||||
end
|
||||
if (ready_out) begin
|
||||
skid_valid <= 0;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ready_out || ~skid_valid;
|
||||
assign data_out = skid_valid ? skid_buffer : data_in;
|
||||
assign valid_out = valid_in || skid_valid;
|
||||
|
||||
end else begin
|
||||
|
||||
wire empty, full;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (BUFFERED)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (valid_in),
|
||||
.pop (ready_out),
|
||||
.data_in(data_in),
|
||||
.data_out(data_out),
|
||||
.empty (empty),
|
||||
.full (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign ready_in = ~full;
|
||||
assign valid_out = ~empty;
|
||||
|
||||
end
|
||||
assign ready_in = ~full;
|
||||
assign valid_out = ~empty;
|
||||
|
||||
endmodule
|
|
@ -70,7 +70,6 @@ module VX_generic_queue #(
|
|||
if (writing) begin
|
||||
data[wr_ptr_a] <= data_in;
|
||||
wr_ptr_r <= wr_ptr_r + 1;
|
||||
|
||||
if (!reading) begin
|
||||
size_r <= size_r + 1;
|
||||
end
|
||||
|
|
|
@ -36,14 +36,14 @@ module VX_rr_arbiter #(
|
|||
end
|
||||
end
|
||||
grant_onehot_r = N'(0);
|
||||
grant_onehot_r[grant_index] = 1;
|
||||
grant_onehot_r[grant_table[state]] = 1;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
state <= 0;
|
||||
end else begin
|
||||
state <= grant_index;
|
||||
state <= grant_table[state];
|
||||
end
|
||||
end
|
||||
|
||||
|
|
65
hw/rtl/libs/VX_skid_buffer.v
Normal file
65
hw/rtl/libs/VX_skid_buffer.v
Normal file
|
@ -0,0 +1,65 @@
|
|||
`include "VX_platform.vh"
|
||||
|
||||
module VX_skid_buffer #(
|
||||
parameter DATAW = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire valid_in,
|
||||
output reg ready_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output reg [DATAW-1:0] data_out,
|
||||
input wire ready_out,
|
||||
output reg valid_out
|
||||
);
|
||||
reg [DATAW-1:0] buffer;
|
||||
reg use_buffer;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
use_buffer <= 0;
|
||||
valid_out <= 0;
|
||||
end else begin
|
||||
if (push && (valid_out && !ready_out)) begin
|
||||
assert(!use_buffer);
|
||||
use_buffer <= 1;
|
||||
end
|
||||
if (ready_out) begin
|
||||
use_buffer <= 0;
|
||||
end
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (!valid_out || ready_out) begin
|
||||
valid_out <= valid_in || use_buffer;
|
||||
data_out <= use_buffer ? buffer : data_in;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = !use_buffer;
|
||||
|
||||
/*wire empty, full;
|
||||
|
||||
VX_generic_queue #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.BUFFERED (0)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (valid_in),
|
||||
.pop (ready_out),
|
||||
.data_in(data_in),
|
||||
.data_out(data_out),
|
||||
.empty (empty),
|
||||
.full (full),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign ready_in = ~full;
|
||||
assign valid_out = ~empty;*/
|
||||
|
||||
endmodule
|
|
@ -3,7 +3,7 @@
|
|||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#define ALL_TESTS
|
||||
//#define ALL_TESTS
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc == 1) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
set_time_format -unit ns -decimal_places 3
|
||||
|
||||
create_clock -name {clk} -period "300 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||
create_clock -name {clk} -period "240 MHz" -waveform { 0.0 1.0 } [get_ports {clk}]
|
||||
|
||||
derive_pll_clocks -create_base_clocks
|
||||
derive_clock_uncertainty
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue