Updates for 64bit extension integrated to the latest [incomplete]

This commit is contained in:
Varun Saxena 2023-02-16 12:24:33 -05:00 committed by Blaise Tine
parent ac6340fd8f
commit b25d2e7a7d
25 changed files with 297 additions and 149 deletions

View file

@ -19,10 +19,19 @@
///////////////////////////////////////////////////////////////////////////////
// comment out the top or bottom two lines to switch between 64 and 32 bit mode
`ifndef XLEN
`define XLEN 32
`define XLEN 64
`define MODE_64_BIT 1
// `define XLEN 32
// `define MODE_32_BIT 1
`endif
// Disable MULDIV, FPU, and TEX units since irrelevant to RV64I instructions
`define EXT_M_DISABLE 1
`define EXT_F_DISABLE 1
`define EXT_TEX_DISABLE 1
`ifndef NUM_CLUSTERS
`define NUM_CLUSTERS 1
`endif

View file

@ -65,6 +65,10 @@
`define INST_FENCE 7'b0001111 // Fence instructions
`define INST_SYS 7'b1110011 // system instructions
// RV64I instruction specific opcodes (for any W instruction)
`define INST_I_W 7'b0011011 // W type immediate instructions
`define INST_R_W 7'b0111011 // W type register instructions
`define INST_FL 7'b0000111 // float load instruction
`define INST_FS 7'b0100111 // float store instruction
`define INST_FMADD 7'b1000011
@ -91,25 +95,31 @@
///////////////////////////////////////////////////////////////////////////////
`define INST_OP_BITS 4
`define INST_OP_BITS 5
`define INST_MOD_BITS 3
///////////////////////////////////////////////////////////////////////////////
`define INST_ALU_ADD 4'b0000
`define INST_ALU_LUI 4'b0010
`define INST_ALU_AUIPC 4'b0011
`define INST_ALU_SLTU 4'b0100
`define INST_ALU_SLT 4'b0101
`define INST_ALU_SRL 4'b1000
`define INST_ALU_SRA 4'b1001
`define INST_ALU_SUB 4'b1011
`define INST_ALU_AND 4'b1100
`define INST_ALU_OR 4'b1101
`define INST_ALU_XOR 4'b1110
`define INST_ALU_SLL 4'b1111
`define INST_ALU_OTHER 4'b0111
`define INST_ALU_BITS 4
`define INST_ALU_ADD 5'b00000
`define INST_ALU_LUI 5'b00010
`define INST_ALU_AUIPC 5'b00011
`define INST_ALU_SLTU 5'b00100
`define INST_ALU_SLT 5'b00101
`define INST_ALU_SRL 5'b01000
`define INST_ALU_SRA 5'b01001
`define INST_ALU_SUB 5'b01011
`define INST_ALU_AND 5'b01100
`define INST_ALU_OR 5'b01101
`define INST_ALU_XOR 5'b01110
`define INST_ALU_SLL 5'b01111
`define INST_ALU_OTHER 5'b00111
// RV64I instruction versions
`define INST_ALU_ADD_W 5'b10000
`define INST_ALU_SUB_W 5'b11011
`define INST_ALU_SLL_W 5'b11111
`define INST_ALU_SRL_W 5'b11000
`define INST_ALU_SRA_W 5'b11001
`define INST_ALU_BITS 5
`define INST_ALU_OP(x) x[`INST_ALU_BITS-1:0]
`define INST_ALU_OP_CLASS(x) x[3:2]
`define INST_ALU_SIGNED(x) x[0]
@ -151,15 +161,20 @@
`define INST_FMT_W 3'b010
`define INST_FMT_BU 3'b100
`define INST_FMT_HU 3'b101
`define INST_FMT_WU 3'b110
`define INST_FMT_D 3'b011
`define INST_LSU_LB 4'b0000
`define INST_LSU_LH 4'b0001
`define INST_LSU_LW 4'b0010
`define INST_LSU_LD 4'b0011 // new for RV64I LD
`define INST_LSU_LBU 4'b0100
`define INST_LSU_LHU 4'b0101
`define INST_LSU_LWU 4'b0110 // new for RV64I LWU
`define INST_LSU_SB 4'b1000
`define INST_LSU_SH 4'b1001
`define INST_LSU_SW 4'b1010
`define INST_LSU_SD 4'b1011 // new for RV64I SD
`define INST_LSU_BITS 4
`define INST_LSU_FMT(x) x[2:0]
`define INST_LSU_WSIZE(x) x[1:0]
@ -287,13 +302,13 @@
`endif
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
`define VX_MEM_ADDR_WIDTH (32 - `CLOG2(`L3_LINE_SIZE))
`define VX_MEM_ADDR_WIDTH (`XLEN - `CLOG2(`L3_LINE_SIZE))
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
`define VX_MEM_TAG_WIDTH L3_MEM_TAG_WIDTH
`define VX_DCR_ADDR_WIDTH `DCR_ADDR_BITS
`define VX_DCR_DATA_WIDTH 32
`define VX_DCR_DATA_WIDTH `XLEN
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
`define TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)}
///////////////////////////////////////////////////////////////////////////////

View file

@ -4,7 +4,7 @@ module VX_mem_arb #(
parameter NUM_REQS = 1,
parameter DATA_WIDTH = 1,
parameter DATA_SIZE = (DATA_WIDTH / 8),
parameter ADDR_WIDTH = (32 - `CLOG2(DATA_SIZE)),
parameter ADDR_WIDTH = (`XLEN - `CLOG2(DATA_SIZE)),
parameter TAG_WIDTH = 1,
parameter TAG_SEL_IDX = 0,
parameter BUFFERED_REQ = 0,

View file

@ -48,7 +48,7 @@ module VX_mem_unit # (
VX_mem_req_if.master mem_req_if,
VX_mem_rsp_if.slave mem_rsp_if
);
`ifdef PERF_ENABLE
VX_perf_cache_if perf_icache_if();
VX_perf_cache_if perf_dcache_if();
@ -187,7 +187,7 @@ module VX_mem_unit # (
VX_smem_switch #(
.NUM_REQS (2),
.NUM_LANES (DCACHE_NUM_REQS),
.DATA_SIZE (4),
.DATA_SIZE (DCACHE_WORD_SIZE),
.TAG_WIDTH (DCACHE_ARB_TAG_WIDTH),
.TAG_SEL_IDX (0),
.ARBITER ("P"),

View file

@ -17,7 +17,7 @@ module VX_cache #(
// Number of associative ways
parameter NUM_WAYS = 1,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = `XLEN/8,
// Core Request Queue Size
parameter CREQ_SIZE = 0,

View file

@ -19,8 +19,8 @@
`define LINES_PER_BANK (`BANK_SIZE / (LINE_SIZE * NUM_WAYS))
`define WORDS_PER_LINE (LINE_SIZE / WORD_SIZE)
`define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE))
`define MEM_ADDR_WIDTH (32-`CLOG2(LINE_SIZE))
`define WORD_ADDR_WIDTH (`XLEN-`CLOG2(WORD_SIZE))
`define MEM_ADDR_WIDTH (`XLEN-`CLOG2(LINE_SIZE))
`define LINE_ADDR_WIDTH (`MEM_ADDR_WIDTH-`CLOG2(NUM_BANKS))
// Word select

View file

@ -14,7 +14,7 @@ module VX_shared_mem #(
// Address width
parameter ADDR_WIDTH = 22,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = `XLEN/8,
// Request debug identifier
parameter UUID_WIDTH = 0,

View file

@ -1,4 +1,5 @@
`include "VX_define.vh"
`include "VX_config.vh"
module VX_alu_unit #(
parameter CORE_ID = 0
@ -16,16 +17,18 @@ module VX_alu_unit #(
`UNUSED_PARAM (CORE_ID)
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
localparam RSP_ARB_DATAW = UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32;
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
reg [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][31:0] add_result;
wire [`NUM_THREADS-1:0][32:0] sub_result;
wire [`NUM_THREADS-1:0][31:0] shr_result;
reg [`NUM_THREADS-1:0][31:0] msc_result;
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
localparam RSP_ARB_DATAW = UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32;
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
localparam SHIFT_IMM_BITS = `CLOG2(`XLEN) - 1;
reg [`NUM_THREADS-1:0][31:0] alu_result;
reg [`NUM_THREADS-1:0][31:0] add_result;
reg [`NUM_THREADS-1:0][32:0] sub_result; // 33 or 65 bits to keep the overflow bit for branch calculations
reg [`NUM_THREADS-1:0][31:0] shr_result;
reg [`NUM_THREADS-1:0][31:0] msc_result;
wire ready_in;
@ -37,35 +40,72 @@ module VX_alu_unit #(
wire [1:0] alu_op_class = `INST_ALU_OP_CLASS(alu_op);
wire is_sub = (alu_op == `INST_ALU_SUB);
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in1 = alu_req_if.rs1_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2 = alu_req_if.rs2_data;
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1;
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
wire [`NUM_THREADS-1:0][31:0] trunc_alu_in1, trunc_alu_result;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
// PC operations should only be for 32 bits
assign trunc_alu_in1[i] = alu_in1[i][31:0];
assign trunc_alu_result[i] = alu_result[i][31:0];
end
// PC operations should only be for 32 bits
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : trunc_alu_in1;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2_imm = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2_less = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [`XLEN-1:0] temp_add_result = {{`XLEN-32{1'b0}}, alu_in1_PC[i]} + alu_in2_imm[i];
always @(*) begin
case(alu_op)
`INST_ALU_ADD: add_result[i] = temp_add_result;
`INST_ALU_LUI, `INST_ALU_AUIPC, `INST_ALU_ADD_W: add_result[i] = `XLEN'($signed(temp_add_result[31:0])); //{{`XLEN-32{add_result[31]}}, temp_add_result[31:0]};
default: add_result[i] = temp_add_result;
endcase
end
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
assign sub_result[i] = sub_in1 - sub_in2;
wire [`XLEN:0] sub_in1 = {alu_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
wire [`XLEN:0] sub_in2 = {alu_signed & alu_in2_less[i][`XLEN-1], alu_in2_less[i]};
wire [`XLEN:0] temp_sub_result = sub_in1 - sub_in2;
always @(*) begin
case(alu_op)
`INST_ALU_SUB: sub_result[i] = temp_sub_result;
`INST_ALU_SUB_W: sub_result[i] = {temp_sub_result[`XLEN], `XLEN'($signed(temp_sub_result[31:0]))};
default: sub_result[i] = temp_sub_result;
endcase
end
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
assign shr_result[i] = 32'($signed(shr_in1) >>> $signed(alu_in2_imm[i][4:0]));
end
wire [`XLEN:0] shr_in1 = {alu_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
wire [`XLEN-1:0] temp_shr_result = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS:0]);
wire [31:0] temp_shr_result_w = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]);
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
always @(*) begin
case(alu_op)
`INST_ALU_SRA, `INST_ALU_SRL: shr_result[i] = temp_shr_result;
`INST_ALU_SRA_W: shr_result[i] = `XLEN'($unsigned(temp_shr_result_w[31:0])); // is this needed or is it already 0 extended?
`INST_ALU_SRL_W: shr_result[i] = `XLEN'($signed(temp_shr_result_w[31:0]));
default: shr_result[i] = temp_shr_result;
endcase
end
end
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [31:0] temp_shift_result = alu_in1[i][31:0] << alu_in2_imm[i][4:0]; // only used for SLLW
always @(*) begin
case (alu_op)
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
`INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
// `INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
`INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS:0]; // TODO: CHANGED: adjust this to shift using 6 bits for 64 bit
`INST_ALU_SLL_W: msc_result[i] = `XLEN'($signed(temp_shift_result[31:0])); // TODO: CHANGED: adjust this to shift using 6 bits for 32 signed bit
default: msc_result[i] = 'x;
endcase
end
@ -74,11 +114,12 @@ module VX_alu_unit #(
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
always @(*) begin
case (alu_op_class)
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
2'b01: alu_result[i] = {31'b0, sub_result[i][32]}; // SLTU, SLT
2'b10: alu_result[i] = is_sub ? sub_result[i][31:0] // SUB
: shr_result[i]; // SRL, SRA
2'b11: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC, ADDIW, ADDW
2'b01: alu_result[i] = {{`XLEN-1{1'b0}}, sub_result[i][`XLEN]}; // SLTU, SLT
2'b10: alu_result[i] = is_sub ? sub_result[i][`XLEN-1:0] // SUB, SUBW
: shr_result[i]; // SRL, SRA, SRLW, SRAW, SRLIW, SRAIW, SRLI, SRAI
// 2'b11,
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLIW, SLLW, SLLI
endcase
end
end
@ -86,10 +127,10 @@ module VX_alu_unit #(
// branch
wire is_jal = is_br_op && (br_op == `INST_BR_JAL || br_op == `INST_BR_JALR);
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : trunc_alu_result;
wire [31:0] br_dest = add_result[alu_req_if.tid];
wire [32:0] cmp_result = sub_result[alu_req_if.tid];
wire [31:0] br_dest = add_result[alu_req_if.tid][31:0];
wire [32:0] cmp_result = sub_result[alu_req_if.tid][32:0];
wire is_less = cmp_result[32];
wire is_equal = ~(| cmp_result[31:0]);
@ -108,6 +149,11 @@ module VX_alu_unit #(
wire alu_wb;
wire [`NUM_THREADS-1:0][31:0] alu_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] full_alu_data;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign full_alu_data[i] = {{`XLEN-31{alu_data[i][31]}},alu_data[i][30:0]};
end
wire [`INST_BR_BITS-1:0] br_op_r;
wire [31:0] br_dest_r;
wire is_less_r;
@ -135,7 +181,7 @@ module VX_alu_unit #(
assign branch_ctl_if.valid = alu_valid_out && alu_ready_out && is_br_op_r;
assign branch_ctl_if.taken = ((br_less ? is_less_r : is_equal_r) ^ br_neg) | br_static;
assign branch_ctl_if.wid = alu_wid;
assign branch_ctl_if.dest = br_dest_r;
assign branch_ctl_if.dest = br_dest_r[31:0];
`ifdef EXT_M_ENABLE
@ -149,7 +195,7 @@ module VX_alu_unit #(
wire [31:0] mul_PC;
wire [`NR_BITS-1:0] mul_rd;
wire mul_wb;
wire [`NUM_THREADS-1:0][31:0] mul_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] mul_data;
wire [`INST_MUL_BITS-1:0] mul_op = `INST_MUL_BITS'(alu_req_if.op_type);
@ -220,7 +266,7 @@ module VX_alu_unit #(
`endif
}),
.data_in ({
{alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data}
{alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, full_alu_data}
`ifdef EXT_M_ENABLE
, {mul_uuid, mul_wid, mul_tmask, mul_PC, mul_rd, mul_wb, mul_data}
`endif

View file

@ -58,12 +58,12 @@ module VX_csr_unit #(
localparam NW_WIDTH = `UP(`NW_BITS);
reg [`NUM_THREADS-1:0][31:0] csr_read_data;
reg [31:0] csr_write_data;
wire [31:0] csr_read_data_ro, csr_read_data_rw;
wire [31:0] csr_req_data;
reg csr_rd_enable;
wire csr_wr_enable;
reg [`NUM_THREADS-1:0][31:0] csr_read_data;
reg [31:0] csr_write_data;
wire [31:0] csr_read_data_ro, csr_read_data_rw;
wire [31:0] csr_req_data;
reg csr_rd_enable;
wire csr_wr_enable;
`UNUSED_VAR (gpu_pending)
wire csr_access_pending = (0
@ -177,14 +177,14 @@ module VX_csr_unit #(
.read_wid (csr_req_if.wid),
.read_tmask (csr_req_if.tmask),
.read_addr (csr_req_if.addr),
.read_data_ro (csr_read_data_ro),
.read_data_rw (csr_read_data_rw),
.read_data_ro (csr_read_data_ro[31:0]),
.read_data_rw (csr_read_data_rw[31:0]),
.write_enable (csr_req_valid && csr_wr_enable),
.write_uuid (csr_req_if.uuid),
.write_wid (csr_req_if.wid),
.write_addr (csr_req_if.addr),
.write_data (csr_write_data)
.write_data (csr_write_data[31:0])
);
// CSR read
@ -194,14 +194,14 @@ module VX_csr_unit #(
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign wtid[i] = 32'(i);
assign ltid[i] = (32'(csr_req_if.wid) << `NT_BITS) + i;
assign gtid[i] = 32'((CORE_ID << (`NW_BITS + `NT_BITS)) + (32'(csr_req_if.wid) << `NT_BITS) + i);
assign gtid[i] = 32'((32'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (32'(csr_req_if.wid) << `NT_BITS) + i);
end
always @(*) begin
csr_rd_enable = 0;
`ifdef EXT_RASTER_ENABLE
if (raster_addr_enable) begin
csr_read_data = raster_csr_if.read_data;
csr_read_data = `XLEN'(raster_csr_if.read_data);
end else
`endif
case (csr_req_if.addr)
@ -241,6 +241,10 @@ module VX_csr_unit #(
end
// send response
wire [`NUM_THREADS-1:0][31:0] csr_commit_data;
for(genvar i = 0; i < `NUM_THREADS; ++i) begin
assign csr_commit_if.data[i] = `XLEN'(csr_commit_data[i]);
end
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32)
@ -250,7 +254,7 @@ module VX_csr_unit #(
.valid_in (csr_req_valid),
.ready_in (csr_req_ready),
.data_in ({csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_read_data}),
.data_out ({csr_commit_if.uuid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_commit_if.data}),
.data_out ({csr_commit_if.uuid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_commit_data}),
.valid_out (csr_commit_if.valid),
.ready_out (csr_commit_if.ready)
);

View file

@ -1,4 +1,5 @@
`include "VX_define.vh"
`include "VX_config.vh"
`ifndef NDEBUG
`include "VX_trace_info.vh"
`endif
@ -36,7 +37,7 @@ module VX_decode #(
reg [`INST_OP_BITS-1:0] op_type;
reg [`INST_MOD_BITS-1:0] op_mod;
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
reg [31:0] imm;
reg [`XLEN-1:0] imm;
reg use_rd, use_PC, use_imm;
reg is_join, is_wstall;
@ -59,7 +60,7 @@ module VX_decode #(
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
`UNUSED_VAR (rs3)
always @(*) begin
ex_type = '0;
@ -91,7 +92,7 @@ module VX_decode #(
endcase
use_rd = 1;
use_imm = 1;
imm = {{20{alu_imm[11]}}, alu_imm};
imm = {{(`XLEN-12){alu_imm[11]}}, alu_imm};
`USED_IREG (rd);
`USED_IREG (rs1);
end
@ -131,12 +132,41 @@ module VX_decode #(
`USED_IREG (rs1);
`USED_IREG (rs2);
end
`INST_I_W: begin
// ADDIW, SLLIW, SRLIW, SRAIW
ex_type = `EX_ALU;
case (func3)
3'h0: op_type = `INST_OP_BITS'(`INST_ALU_ADD_W);
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL_W);
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA_W) : `INST_OP_BITS'(`INST_ALU_SRL_W);
default:;
endcase
use_rd = 1;
use_imm = 1;
imm = {{(`XLEN-12){alu_imm[11]}}, alu_imm};
`USED_IREG (rd);
`USED_IREG (rs1);
end
`INST_R_W: begin
// ADDW, SUBW, SLLW, SRLW, SRAW
ex_type = `EX_ALU;
case (func3)
3'h0: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SUB_W) : `INST_OP_BITS'(`INST_ALU_ADD_W);
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL_W);
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA_W) : `INST_OP_BITS'(`INST_ALU_SRL_W);
default:;
endcase
use_rd = 1;
`USED_IREG (rd);
`USED_IREG (rs1);
`USED_IREG (rs2);
end
`INST_LUI: begin
ex_type = `EX_ALU;
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
use_rd = 1;
use_imm = 1;
imm = {upper_imm, 12'(0)};
imm = {{`XLEN-31{upper_imm[19]}}, upper_imm[18:0], 12'(0)};
`USED_IREG (rd);
end
`INST_AUIPC: begin
@ -145,7 +175,7 @@ module VX_decode #(
use_rd = 1;
use_imm = 1;
use_PC = 1;
imm = {upper_imm, 12'(0)};
imm = {{`XLEN-31{upper_imm[19]}}, upper_imm[18:0], 12'(0)};
`USED_IREG (rd);
end
`INST_JAL: begin
@ -156,7 +186,7 @@ module VX_decode #(
use_imm = 1;
use_PC = 1;
is_wstall = 1;
imm = {{11{jal_imm[20]}}, jal_imm};
imm = {{(`XLEN-21){jal_imm[20]}}, jal_imm};
`USED_IREG (rd);
end
`INST_JALR: begin
@ -166,7 +196,7 @@ module VX_decode #(
use_rd = 1;
use_imm = 1;
is_wstall = 1;
imm = {{20{u_12[11]}}, u_12};
imm = {{(`XLEN-12){u_12[11]}}, u_12};
`USED_IREG (rd);
`USED_IREG (rs1);
end
@ -185,7 +215,7 @@ module VX_decode #(
use_imm = 1;
use_PC = 1;
is_wstall = 1;
imm = {{19{b_imm[12]}}, b_imm};
imm = {{(`XLEN-13){b_imm[12]}}, b_imm};
`USED_IREG (rs1);
`USED_IREG (rs2);
end
@ -221,7 +251,7 @@ module VX_decode #(
use_imm = 1;
use_PC = 1;
is_wstall = 1;
imm = 32'd4;
imm = `XLEN'd4;
`USED_IREG (rd);
end
end
@ -232,7 +262,7 @@ module VX_decode #(
ex_type = `EX_LSU;
op_type = `INST_OP_BITS'({1'b0, func3});
use_rd = 1;
imm = {{20{u_12[11]}}, u_12};
imm = {{(`XLEN-12){u_12[11]}}, u_12};
`ifdef EXT_F_ENABLE
if (opcode[2]) begin
`USED_FREG (rd);
@ -247,7 +277,7 @@ module VX_decode #(
`INST_S: begin
ex_type = `EX_LSU;
op_type = `INST_OP_BITS'({1'b1, func3});
imm = {{20{s_imm[11]}}, s_imm};
imm = {{(`XLEN-12){s_imm[11]}}, s_imm};
`USED_IREG (rs1);
`ifdef EXT_F_ENABLE
if (opcode[2]) begin

View file

@ -1,4 +1,5 @@
`include "VX_define.vh"
`include "VX_config.vh"
module VX_dispatch (
input wire clk,
@ -46,7 +47,7 @@ module VX_dispatch (
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(dispatch_if.op_type);
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `UP(`NT_BITS) + (2 * `NUM_THREADS * 32)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + `XLEN + 1 + 1 + `NR_BITS + 1 + `UP(`NT_BITS) + (2 * `NUM_THREADS * `XLEN)),
.OUT_REG (1)
) alu_buffer (
.clk (clk),
@ -65,16 +66,25 @@ module VX_dispatch (
wire [`INST_LSU_BITS-1:0] lsu_op_type = `INST_LSU_BITS'(dispatch_if.op_type);
wire lsu_is_fence = `INST_LSU_IS_FENCE(dispatch_if.op_mod);
// USED TO TRUNCATE IMMEDIATE and RS1 TO 32 BITS
wire [31:0] trunc_ibuffer_imm = ibuffer_if.imm[31:0];
wire [`NUM_THREADS-1:0][31:0] trunc_rs1;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
// These values are used for PC calculations, so should stay as 32 bits
assign trunc_rs1[i] = gpr_rsp_if.rs1_data[i][31:0];
end
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + `NUM_THREADS*32 + `NUM_THREADS*`XLEN),
.OUT_REG (1)
) lsu_buffer (
.clk (clk),
.reset (reset),
.valid_in (lsu_req_valid),
.ready_in (lsu_req_ready),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, lsu_op_type, lsu_is_fence, dispatch_if.imm, dispatch_if.rd, dispatch_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
.data_out ({lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, lsu_op_type, lsu_is_fence, trunc_ibuffer_imm, dispatch_if.rd, dispatch_if.wb, trunc_rs1, gpr_rsp_if.rs2_data}),
.data_out ({lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
.valid_out (lsu_req_if.valid),
.ready_out (lsu_req_if.ready)
);
@ -86,8 +96,12 @@ module VX_dispatch (
wire [`CSR_ADDR_BITS-1:0] csr_addr = dispatch_if.imm[`CSR_ADDR_BITS-1:0];
wire [`NRI_BITS-1:0] csr_imm = dispatch_if.imm[`CSR_ADDR_BITS +: `NRI_BITS];
// USED TO TRUNCATE CSRs TO 32 BITS. I DONT KNOW IF THIS IS CORRECT???
wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid][31:0]; // CSR stays 32 bits
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * 32)),
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * `XLEN)),
.OUT_REG (1)
) csr_buffer (
.clk (clk),

View file

@ -43,7 +43,7 @@ module VX_gpr_stage #(
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
VX_dp_ram #(
.DATAW (32),
.DATAW (`XLEN),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)
@ -58,7 +58,7 @@ module VX_gpr_stage #(
);
VX_dp_ram #(
.DATAW (32),
.DATAW (`XLEN),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)
@ -83,7 +83,7 @@ module VX_gpr_stage #(
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
VX_dp_ram #(
.DATAW (32),
.DATAW (`XLEN),
.SIZE (RAM_SIZE),
.INIT_ENABLE (1),
.INIT_VALUE (0)

View file

@ -46,7 +46,7 @@ module VX_gpu_unit #(
localparam UUID_WIDTH = `UP(`UUID_BITS);
localparam NW_WIDTH = `UP(`NW_BITS);
localparam WCTL_DATAW = `GPU_TMC_BITS + `GPU_WSPAWN_BITS + `GPU_SPLIT_BITS + `GPU_BARRIER_BITS;
localparam RSP_DATAW = `MAX(`NUM_THREADS * 32, WCTL_DATAW);
localparam RSP_DATAW = `MAX(`NUM_THREADS * `XLEN, WCTL_DATAW);
localparam RSP_ARB_DATAW = UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1 + 1;
localparam RSP_ARB_SIZE = 1 + `EXT_TEX_ENABLED + `EXT_RASTER_ENABLED + `EXT_ROP_ENABLED + `EXT_IMADD_ENABLED;
@ -87,8 +87,8 @@ module VX_gpu_unit #(
wire is_bar = (gpu_req_if.op_type == `INST_GPU_BAR);
wire is_pred = (gpu_req_if.op_type == `INST_GPU_PRED);
wire [31:0] rs1_data = gpu_req_if.rs1_data[gpu_req_if.tid];
wire [31:0] rs2_data = gpu_req_if.rs2_data[gpu_req_if.tid];
wire [`XLEN-1:0] rs1_data = gpu_req_if.rs1_data[gpu_req_if.tid];
wire [`XLEN-1:0] rs2_data = gpu_req_if.rs2_data[gpu_req_if.tid];
wire [`NUM_THREADS-1:0] taken_tmask;
wire [`NUM_THREADS-1:0] not_taken_tmask;
@ -108,7 +108,7 @@ module VX_gpu_unit #(
// wspawn
wire [31:0] wspawn_pc = rs2_data;
wire [31:0] wspawn_pc = rs2_data[31:0];
wire [`NUM_WARPS-1:0] wspawn_wmask;
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
assign wspawn_wmask[i] = (i < rs1_data);
@ -344,7 +344,7 @@ module VX_gpu_unit #(
.ready_out (gpu_commit_if.ready)
);
assign gpu_commit_if.data = rsp_data[(`NUM_THREADS * 32)-1:0];
assign gpu_commit_if.data = rsp_data[(`NUM_THREADS * `XLEN)-1:0];
// warp control reponse

View file

@ -19,7 +19,7 @@ module VX_ibuffer #(
localparam ALM_FULL = SIZE - 1;
localparam ALM_EMPTY = 1;
localparam DATAW = `UP(`UUID_BITS) + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1;
localparam DATAW = `UP(`UUID_BITS) + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
localparam ADDRW = $clog2(SIZE);
localparam NWARPSW = $clog2(`NUM_WARPS+1);

View file

@ -1,5 +1,6 @@
`include "VX_define.vh"
`include "VX_gpu_types.vh"
`include "VX_config.vh"
`IGNORE_WARNINGS_BEGIN
import VX_gpu_types::*;
@ -28,7 +29,7 @@ module VX_lsu_unit #(
localparam NW_WIDTH = `UP(`NW_BITS);
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
localparam MEM_ADDRW = `XLEN - MEM_ASHIFT;
localparam REQ_ASHIFT = `CLOG2(DCACHE_WORD_SIZE);
`ifdef SM_ENABLE
@ -37,8 +38,8 @@ module VX_lsu_unit #(
localparam SMEM_LOCAL_SIZE_W = `SMEM_LOCAL_SIZE >> MEM_ASHIFT;
localparam TOTAL_STACK_SIZE = `STACK_SIZE * `NUM_THREADS * `NUM_WARPS * `NUM_CORES;
localparam STACK_START_W = MEM_ADDRW'(`STACK_BASE_ADDR >> MEM_ASHIFT);
localparam STACK_END_W = MEM_ADDRW'((`STACK_BASE_ADDR - TOTAL_STACK_SIZE) >> MEM_ASHIFT);
localparam STACK_START_W = MEM_ADDRW'(`XLEN'(`STACK_BASE_ADDR) >> MEM_ASHIFT);
localparam STACK_END_W = MEM_ADDRW'((`XLEN'(`STACK_BASE_ADDR) - TOTAL_STACK_SIZE) >> MEM_ASHIFT);
`endif
// uuid, addr_type, wid, PC, tmask, rd, op_type, align, is_dup
@ -54,9 +55,9 @@ module VX_lsu_unit #(
// full address calculation
wire [`NUM_THREADS-1:0][31:0] full_addr;
wire [`NUM_THREADS-1:0][`XLEN-1:0] full_addr;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign full_addr[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
assign full_addr[i] = lsu_req_if.base_addr[i][`XLEN-1:0] + lsu_req_if.offset;
end
// detect duplicate addresses
@ -77,7 +78,7 @@ module VX_lsu_unit #(
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [MEM_ADDRW-1:0] full_addr_b = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
// is non-cacheable address
wire is_addr_nc = (full_addr_b >= MEM_ADDRW'(`IO_BASE_ADDR >> MEM_ASHIFT));
wire is_addr_nc = (full_addr_b >= MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT));
`ifdef SM_ENABLE
// is stack address
wire is_stack_addr = (full_addr_b >= STACK_END_W) && (full_addr_b < STACK_START_W);
@ -106,15 +107,15 @@ module VX_lsu_unit #(
wire mem_req_valid;
wire [`NUM_THREADS-1:0] mem_req_mask;
wire mem_req_rw;
wire [`NUM_THREADS-1:0][29:0] mem_req_addr;
reg [`NUM_THREADS-1:0][3:0] mem_req_byteen;
reg [`NUM_THREADS-1:0][31:0] mem_req_data;
wire [`NUM_THREADS-1:0][`XLEN-3:0] mem_req_addr;
reg [`NUM_THREADS-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen;
reg [`NUM_THREADS-1:0][`XLEN-1:0] mem_req_data;
wire [TAG_WIDTH-1:0] mem_req_tag;
wire mem_req_ready;
wire mem_rsp_valid;
wire [`NUM_THREADS-1:0] mem_rsp_mask;
wire [`NUM_THREADS-1:0][31:0] mem_rsp_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] mem_rsp_data;
wire [TAG_WIDTH-1:0] mem_rsp_tag;
wire mem_rsp_eop;
wire mem_rsp_ready;
@ -133,32 +134,51 @@ module VX_lsu_unit #(
wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_align;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
assign req_align[i] = full_addr[i][1:0];
assign mem_req_addr[i] = full_addr[i][31:2];
assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0];
assign mem_req_addr[i] = full_addr[i][`XLEN-1:REQ_ASHIFT];
end
// data formatting
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [REQ_ASHIFT-1:0] req_align_X1 = {req_align[i][1], 1'b1};
wire [REQ_ASHIFT-1:0] req_align_X1;
`ifdef MODE_32_BIT
assign req_align_X1 = {req_align[i][1], 1'b1};
`endif
`ifdef MODE_64_BIT
// TODO: VARUN TO CHECK
assign req_align_X1 = {req_align[i][1:0]};
`endif
always @(*) begin
mem_req_byteen[i] = {4{lsu_req_if.wb}};
mem_req_byteen[i] = {DCACHE_WORD_SIZE{lsu_req_if.wb}};
case (`INST_LSU_WSIZE(lsu_req_if.op_type))
0: mem_req_byteen[i][req_align[i]] = 1;
1: begin
1: begin // half (16 bit)
mem_req_byteen[i][req_align[i]] = 1;
mem_req_byteen[i][req_align_X1] = 1;
end
default : mem_req_byteen[i] = {4{1'b1}};
2: begin // word (32 bit)
mem_req_byteen[i][req_align[i]] = 1;
mem_req_byteen[i][req_align_X1] = 1;
mem_req_byteen[i][req_align_X1+1] = 1;
mem_req_byteen[i][req_align_X1+2] = 1;
end
default : mem_req_byteen[i] = {DCACHE_WORD_SIZE{1'b1}}; // double (64 bit)
endcase
end
always @(*) begin
mem_req_data[i] = lsu_req_if.store_data[i];
case (req_align[i])
1: mem_req_data[i][31:8] = lsu_req_if.store_data[i][23:0];
2: mem_req_data[i][31:16] = lsu_req_if.store_data[i][15:0];
3: mem_req_data[i][31:24] = lsu_req_if.store_data[i][7:0];
1: mem_req_data[`XLEN-1:8] = lsu_req_if.store_data[i][`XLEN-9:0];
2: mem_req_data[`XLEN-1:16] = lsu_req_if.store_data[i][`XLEN-17:0];
3: mem_req_data[`XLEN-1:24] = lsu_req_if.store_data[i][`XLEN-25:0];
`ifdef MODE_64_BIT
4: mem_req_data[`XLEN-1:32] = lsu_req_if.store_data[i][`XLEN-33:0];
5: mem_req_data[`XLEN-1:40] = lsu_req_if.store_data[i][`XLEN-41:0];
6: mem_req_data[`XLEN-1:48] = lsu_req_if.store_data[i][`XLEN-49:0];
7: mem_req_data[`XLEN-1:56] = lsu_req_if.store_data[i][`XLEN-57:0];
`endif
default:;
endcase
end
@ -321,19 +341,28 @@ module VX_lsu_unit #(
reg [`NUM_THREADS-1:0][31:0] rsp_data;
wire [`NUM_THREADS-1:0] rsp_tmask;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
for (genvar i = 0; i < `NUM_THREADS; i++) begin // TODO: HOW TF DO I DO THE MEMORY response???
`ifdef MODE_64_BIT
wire [63:0] rsp_data64 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i]; // ONLY VALID ON XLEN == 64
wire [31:0] rsp_data32 = rsp_align[i][2] ? mem_rsp_data[0][63:32] : mem_rsp_data[i][31:0];
`else
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i]; // ONLY VALID ON XLEN == 32
`endif
wire [15:0] rsp_data16 = rsp_align[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
wire [7:0] rsp_data8 = rsp_align[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
always @(*) begin
case (`INST_LSU_FMT(rsp_op_type))
`INST_FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
`INST_FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
`INST_FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
`INST_FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
`INST_FMT_W: rsp_data[i] = rsp_data32;
default: rsp_data[i] = 'x;
`INST_FMT_B: rsp_data[i] = `XLEN'(signed'(rsp_data8));
`INST_FMT_H: rsp_data[i] = `XLEN'(signed'(rsp_data16));
`INST_FMT_BU: rsp_data[i] = `XLEN'(unsigned'(rsp_data8));
`INST_FMT_HU: rsp_data[i] = `XLEN'(unsigned'(rsp_data16));
`INST_FMT_W: rsp_data[i] = `XLEN'(signed'(rsp_data32));
`ifdef MODE_64_BIT // new instructions for unsigned 32 and 64 bit load modes
`INST_FMT_WU: rsp_data[i] = `XLEN'(unsigned'(rsp_data32));
`INST_FMT_D: rsp_data[i] = `XLEN'(signed'(rsp_data64));
`endif
default: rsp_data[i] = 'x;
endcase
end
end
@ -343,7 +372,7 @@ module VX_lsu_unit #(
// send load commit
VX_skid_buffer #(
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1)
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * `XLEN) + 1)
) rsp_sbuf (
.clk (clk),
.reset (reset),

View file

@ -19,12 +19,12 @@ module VX_writeback #(
VX_writeback_if.master writeback_if,
// simulation helper signals
output wire [`NUM_REGS-1:0][31:0] sim_wb_value
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value
);
`UNUSED_PARAM (CORE_ID)
localparam NW_WIDTH = `UP(`NW_BITS);
localparam DATAW = NW_WIDTH + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1;
localparam DATAW = NW_WIDTH + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * `XLEN) + 1;
localparam NUM_RSPS = 4 + `EXT_F_ENABLED;
`ifdef EXT_F_ENABLE
@ -61,7 +61,7 @@ module VX_writeback #(
wb_alu_ready_in,
wb_ld_ready_in
}),
.data_in ({
.data_in ({
`ifdef EXT_F_ENABLE
{fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.rd, fpu_commit_if.data, fpu_commit_if.eop},
`endif
@ -84,7 +84,7 @@ module VX_writeback #(
assign ld_commit_if.ready = wb_ld_ready_in || ~ld_commit_if.wb;
// simulation helper signal to get RISC-V tests Pass/Fail status
reg [`NUM_REGS-1:0][31:0] sim_wb_value_r;
reg [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value_r;
always @(posedge clk) begin
if (writeback_if.valid && writeback_if.ready) begin
sim_wb_value_r[writeback_if.rd] <= writeback_if.data[0];

View file

@ -12,10 +12,10 @@ interface VX_alu_req_if ();
wire [`INST_MOD_BITS-1:0] op_mod;
wire use_PC;
wire use_imm;
wire [31:0] imm;
wire [`XLEN-1:0] imm;
wire [`UP(`NT_BITS)-1:0] tid;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
wire [`NR_BITS-1:0] rd;
wire wb;
wire ready;

View file

@ -5,7 +5,7 @@ interface VX_branch_ctl_if ();
wire valid;
wire [`UP(`NW_BITS)-1:0] wid;
wire taken;
wire [31:0] dest;
wire [`XLEN-1:0] dest;
modport master (
output valid,

View file

@ -1,13 +1,13 @@
`include "VX_define.vh"
interface VX_commit_if ();
interface VX_commit_if();
wire valid;
wire [`UP(`UUID_BITS)-1:0] uuid;
wire [`UP(`NW_BITS)-1:0] wid;
wire [`NUM_THREADS-1:0] tmask;
wire [31:0] PC;
wire [`NUM_THREADS-1:0][31:0] data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] data;
wire [`NR_BITS-1:0] rd;
wire wb;
wire eop;

View file

@ -13,7 +13,7 @@ interface VX_decode_if ();
wire wb;
wire use_PC;
wire use_imm;
wire [31:0] imm;
wire [`XLEN-1:0] imm;
wire [`NR_BITS-1:0] rd;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;

View file

@ -1,10 +1,11 @@
`include "VX_define.vh"
`include "VX_config.vh"
interface VX_gpr_rsp_if ();
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
modport master (
output rs1_data,

View file

@ -11,9 +11,9 @@ interface VX_gpu_req_if();
wire [`INST_GPU_BITS-1:0] op_type;
wire [`INST_MOD_BITS-1:0] op_mod;
wire [`UP(`NT_BITS)-1:0] tid;
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NUM_THREADS-1:0][31:0] rs3_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
wire [`NR_BITS-1:0] rd;
wire wb;
wire ready;

View file

@ -13,7 +13,7 @@ interface VX_ibuffer_if ();
wire wb;
wire use_PC;
wire use_imm;
wire [31:0] imm;
wire [`XLEN-1:0] imm;
wire [`NR_BITS-1:0] rd;
wire [`NR_BITS-1:0] rs1;
wire [`NR_BITS-1:0] rs2;

View file

@ -9,9 +9,9 @@ interface VX_lsu_req_if ();
wire [31:0] PC;
wire [`INST_LSU_BITS-1:0] op_type;
wire is_fence;
wire [`NUM_THREADS-1:0][31:0] store_data;
wire [`NUM_THREADS-1:0][31:0] base_addr;
wire [31:0] offset;
wire [`NUM_THREADS-1:0][`XLEN-1:0] store_data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] base_addr;
wire [`XLEN-1:0] offset;
wire [`NR_BITS-1:0] rd;
wire wb;
wire ready;

View file

@ -8,7 +8,7 @@ interface VX_writeback_if ();
wire [`UP(`NW_BITS)-1:0] wid;
wire [31:0] PC;
wire [`NR_BITS-1:0] rd;
wire [`NUM_THREADS-1:0][31:0] data;
wire [`NUM_THREADS-1:0][`XLEN-1:0] data;
wire eop;
wire ready;