mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-04-24 13:57:17 -04:00
Updates for 64bit extension integrated to the latest [incomplete]
This commit is contained in:
parent
ac6340fd8f
commit
b25d2e7a7d
25 changed files with 297 additions and 149 deletions
|
@ -19,10 +19,19 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// comment out the top or bottom two lines to switch between 64 and 32 bit mode
|
||||
`ifndef XLEN
|
||||
`define XLEN 32
|
||||
`define XLEN 64
|
||||
`define MODE_64_BIT 1
|
||||
// `define XLEN 32
|
||||
// `define MODE_32_BIT 1
|
||||
`endif
|
||||
|
||||
// Disable MULDIV, FPU, and TEX units since irrelevant to RV64I instructions
|
||||
`define EXT_M_DISABLE 1
|
||||
`define EXT_F_DISABLE 1
|
||||
`define EXT_TEX_DISABLE 1
|
||||
|
||||
`ifndef NUM_CLUSTERS
|
||||
`define NUM_CLUSTERS 1
|
||||
`endif
|
||||
|
|
|
@ -65,6 +65,10 @@
|
|||
`define INST_FENCE 7'b0001111 // Fence instructions
|
||||
`define INST_SYS 7'b1110011 // system instructions
|
||||
|
||||
// RV64I instruction specific opcodes (for any W instruction)
|
||||
`define INST_I_W 7'b0011011 // W type immediate instructions
|
||||
`define INST_R_W 7'b0111011 // W type register instructions
|
||||
|
||||
`define INST_FL 7'b0000111 // float load instruction
|
||||
`define INST_FS 7'b0100111 // float store instruction
|
||||
`define INST_FMADD 7'b1000011
|
||||
|
@ -91,25 +95,31 @@
|
|||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_OP_BITS 4
|
||||
`define INST_OP_BITS 5
|
||||
`define INST_MOD_BITS 3
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_ALU_ADD 4'b0000
|
||||
`define INST_ALU_LUI 4'b0010
|
||||
`define INST_ALU_AUIPC 4'b0011
|
||||
`define INST_ALU_SLTU 4'b0100
|
||||
`define INST_ALU_SLT 4'b0101
|
||||
`define INST_ALU_SRL 4'b1000
|
||||
`define INST_ALU_SRA 4'b1001
|
||||
`define INST_ALU_SUB 4'b1011
|
||||
`define INST_ALU_AND 4'b1100
|
||||
`define INST_ALU_OR 4'b1101
|
||||
`define INST_ALU_XOR 4'b1110
|
||||
`define INST_ALU_SLL 4'b1111
|
||||
`define INST_ALU_OTHER 4'b0111
|
||||
`define INST_ALU_BITS 4
|
||||
`define INST_ALU_ADD 5'b00000
|
||||
`define INST_ALU_LUI 5'b00010
|
||||
`define INST_ALU_AUIPC 5'b00011
|
||||
`define INST_ALU_SLTU 5'b00100
|
||||
`define INST_ALU_SLT 5'b00101
|
||||
`define INST_ALU_SRL 5'b01000
|
||||
`define INST_ALU_SRA 5'b01001
|
||||
`define INST_ALU_SUB 5'b01011
|
||||
`define INST_ALU_AND 5'b01100
|
||||
`define INST_ALU_OR 5'b01101
|
||||
`define INST_ALU_XOR 5'b01110
|
||||
`define INST_ALU_SLL 5'b01111
|
||||
`define INST_ALU_OTHER 5'b00111
|
||||
// RV64I instruction versions
|
||||
`define INST_ALU_ADD_W 5'b10000
|
||||
`define INST_ALU_SUB_W 5'b11011
|
||||
`define INST_ALU_SLL_W 5'b11111
|
||||
`define INST_ALU_SRL_W 5'b11000
|
||||
`define INST_ALU_SRA_W 5'b11001
|
||||
`define INST_ALU_BITS 5
|
||||
`define INST_ALU_OP(x) x[`INST_ALU_BITS-1:0]
|
||||
`define INST_ALU_OP_CLASS(x) x[3:2]
|
||||
`define INST_ALU_SIGNED(x) x[0]
|
||||
|
@ -151,15 +161,20 @@
|
|||
`define INST_FMT_W 3'b010
|
||||
`define INST_FMT_BU 3'b100
|
||||
`define INST_FMT_HU 3'b101
|
||||
`define INST_FMT_WU 3'b110
|
||||
`define INST_FMT_D 3'b011
|
||||
|
||||
`define INST_LSU_LB 4'b0000
|
||||
`define INST_LSU_LH 4'b0001
|
||||
`define INST_LSU_LW 4'b0010
|
||||
`define INST_LSU_LD 4'b0011 // new for RV64I LD
|
||||
`define INST_LSU_LBU 4'b0100
|
||||
`define INST_LSU_LHU 4'b0101
|
||||
`define INST_LSU_LWU 4'b0110 // new for RV64I LWU
|
||||
`define INST_LSU_SB 4'b1000
|
||||
`define INST_LSU_SH 4'b1001
|
||||
`define INST_LSU_SW 4'b1010
|
||||
`define INST_LSU_SD 4'b1011 // new for RV64I SD
|
||||
`define INST_LSU_BITS 4
|
||||
`define INST_LSU_FMT(x) x[2:0]
|
||||
`define INST_LSU_WSIZE(x) x[1:0]
|
||||
|
@ -287,13 +302,13 @@
|
|||
`endif
|
||||
|
||||
`define VX_MEM_BYTEEN_WIDTH `L3_LINE_SIZE
|
||||
`define VX_MEM_ADDR_WIDTH (32 - `CLOG2(`L3_LINE_SIZE))
|
||||
`define VX_MEM_ADDR_WIDTH (`XLEN - `CLOG2(`L3_LINE_SIZE))
|
||||
`define VX_MEM_DATA_WIDTH (`L3_LINE_SIZE * 8)
|
||||
`define VX_MEM_TAG_WIDTH L3_MEM_TAG_WIDTH
|
||||
`define VX_DCR_ADDR_WIDTH `DCR_ADDR_BITS
|
||||
`define VX_DCR_DATA_WIDTH 32
|
||||
`define VX_DCR_DATA_WIDTH `XLEN
|
||||
|
||||
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||
`define TO_FULL_ADDR(x) {x, (`XLEN-$bits(x))'(0)}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ module VX_mem_arb #(
|
|||
parameter NUM_REQS = 1,
|
||||
parameter DATA_WIDTH = 1,
|
||||
parameter DATA_SIZE = (DATA_WIDTH / 8),
|
||||
parameter ADDR_WIDTH = (32 - `CLOG2(DATA_SIZE)),
|
||||
parameter ADDR_WIDTH = (`XLEN - `CLOG2(DATA_SIZE)),
|
||||
parameter TAG_WIDTH = 1,
|
||||
parameter TAG_SEL_IDX = 0,
|
||||
parameter BUFFERED_REQ = 0,
|
||||
|
|
|
@ -48,7 +48,7 @@ module VX_mem_unit # (
|
|||
VX_mem_req_if.master mem_req_if,
|
||||
VX_mem_rsp_if.slave mem_rsp_if
|
||||
);
|
||||
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_cache_if perf_icache_if();
|
||||
VX_perf_cache_if perf_dcache_if();
|
||||
|
@ -187,7 +187,7 @@ module VX_mem_unit # (
|
|||
VX_smem_switch #(
|
||||
.NUM_REQS (2),
|
||||
.NUM_LANES (DCACHE_NUM_REQS),
|
||||
.DATA_SIZE (4),
|
||||
.DATA_SIZE (DCACHE_WORD_SIZE),
|
||||
.TAG_WIDTH (DCACHE_ARB_TAG_WIDTH),
|
||||
.TAG_SEL_IDX (0),
|
||||
.ARBITER ("P"),
|
||||
|
|
2
hw/rtl/cache/VX_cache.sv
vendored
2
hw/rtl/cache/VX_cache.sv
vendored
|
@ -17,7 +17,7 @@ module VX_cache #(
|
|||
// Number of associative ways
|
||||
parameter NUM_WAYS = 1,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = `XLEN/8,
|
||||
|
||||
// Core Request Queue Size
|
||||
parameter CREQ_SIZE = 0,
|
||||
|
|
4
hw/rtl/cache/VX_cache_define.vh
vendored
4
hw/rtl/cache/VX_cache_define.vh
vendored
|
@ -19,8 +19,8 @@
|
|||
`define LINES_PER_BANK (`BANK_SIZE / (LINE_SIZE * NUM_WAYS))
|
||||
`define WORDS_PER_LINE (LINE_SIZE / WORD_SIZE)
|
||||
|
||||
`define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE))
|
||||
`define MEM_ADDR_WIDTH (32-`CLOG2(LINE_SIZE))
|
||||
`define WORD_ADDR_WIDTH (`XLEN-`CLOG2(WORD_SIZE))
|
||||
`define MEM_ADDR_WIDTH (`XLEN-`CLOG2(LINE_SIZE))
|
||||
`define LINE_ADDR_WIDTH (`MEM_ADDR_WIDTH-`CLOG2(NUM_BANKS))
|
||||
|
||||
// Word select
|
||||
|
|
2
hw/rtl/cache/VX_shared_mem.sv
vendored
2
hw/rtl/cache/VX_shared_mem.sv
vendored
|
@ -14,7 +14,7 @@ module VX_shared_mem #(
|
|||
// Address width
|
||||
parameter ADDR_WIDTH = 22,
|
||||
// Size of a word in bytes
|
||||
parameter WORD_SIZE = 4,
|
||||
parameter WORD_SIZE = `XLEN/8,
|
||||
|
||||
// Request debug identifier
|
||||
parameter UUID_WIDTH = 0,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_config.vh"
|
||||
|
||||
module VX_alu_unit #(
|
||||
parameter CORE_ID = 0
|
||||
|
@ -16,16 +17,18 @@ module VX_alu_unit #(
|
|||
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam UUID_WIDTH = `UP(`UUID_BITS);
|
||||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
localparam RSP_ARB_DATAW = UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] add_result;
|
||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
||||
localparam UUID_WIDTH = `UP(`UUID_BITS);
|
||||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
localparam RSP_ARB_DATAW = UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam SHIFT_IMM_BITS = `CLOG2(`XLEN) - 1;
|
||||
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] add_result;
|
||||
reg [`NUM_THREADS-1:0][32:0] sub_result; // 33 or 65 bits to keep the overflow bit for branch calculations
|
||||
reg [`NUM_THREADS-1:0][31:0] shr_result;
|
||||
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
||||
|
||||
wire ready_in;
|
||||
|
||||
|
@ -37,35 +40,72 @@ module VX_alu_unit #(
|
|||
wire [1:0] alu_op_class = `INST_ALU_OP_CLASS(alu_op);
|
||||
wire is_sub = (alu_op == `INST_ALU_SUB);
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in1 = alu_req_if.rs1_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2 = alu_req_if.rs2_data;
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : alu_in1;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
wire [`NUM_THREADS-1:0][31:0] trunc_alu_in1, trunc_alu_result;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||
// PC operations should only be for 32 bits
|
||||
assign trunc_alu_in1[i] = alu_in1[i][31:0];
|
||||
assign trunc_alu_result[i] = alu_result[i][31:0];
|
||||
end
|
||||
|
||||
// PC operations should only be for 32 bits
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.use_PC ? {`NUM_THREADS{alu_req_if.PC}} : trunc_alu_in1;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2_imm = alu_req_if.use_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] alu_in2_less = (alu_req_if.use_imm && ~is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [`XLEN-1:0] temp_add_result = {{`XLEN-32{1'b0}}, alu_in1_PC[i]} + alu_in2_imm[i];
|
||||
always @(*) begin
|
||||
case(alu_op)
|
||||
`INST_ALU_ADD: add_result[i] = temp_add_result;
|
||||
`INST_ALU_LUI, `INST_ALU_AUIPC, `INST_ALU_ADD_W: add_result[i] = `XLEN'($signed(temp_add_result[31:0])); //{{`XLEN-32{add_result[31]}}, temp_add_result[31:0]};
|
||||
default: add_result[i] = temp_add_result;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
|
||||
assign sub_result[i] = sub_in1 - sub_in2;
|
||||
wire [`XLEN:0] sub_in1 = {alu_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
wire [`XLEN:0] sub_in2 = {alu_signed & alu_in2_less[i][`XLEN-1], alu_in2_less[i]};
|
||||
|
||||
wire [`XLEN:0] temp_sub_result = sub_in1 - sub_in2;
|
||||
always @(*) begin
|
||||
case(alu_op)
|
||||
`INST_ALU_SUB: sub_result[i] = temp_sub_result;
|
||||
`INST_ALU_SUB_W: sub_result[i] = {temp_sub_result[`XLEN], `XLEN'($signed(temp_sub_result[31:0]))};
|
||||
default: sub_result[i] = temp_sub_result;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [32:0] shr_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||
assign shr_result[i] = 32'($signed(shr_in1) >>> $signed(alu_in2_imm[i][4:0]));
|
||||
end
|
||||
wire [`XLEN:0] shr_in1 = {alu_signed & alu_in1[i][`XLEN-1], alu_in1[i]};
|
||||
wire [`XLEN-1:0] temp_shr_result = `XLEN'($signed(shr_in1) >>> alu_in2_imm[i][SHIFT_IMM_BITS:0]);
|
||||
wire [31:0] temp_shr_result_w = 32'($signed(shr_in1) >>> alu_in2_imm[i][4:0]);
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
always @(*) begin
|
||||
case(alu_op)
|
||||
`INST_ALU_SRA, `INST_ALU_SRL: shr_result[i] = temp_shr_result;
|
||||
`INST_ALU_SRA_W: shr_result[i] = `XLEN'($unsigned(temp_shr_result_w[31:0])); // is this needed or is it already 0 extended?
|
||||
`INST_ALU_SRL_W: shr_result[i] = `XLEN'($signed(temp_shr_result_w[31:0]));
|
||||
default: shr_result[i] = temp_shr_result;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [31:0] temp_shift_result = alu_in1[i][31:0] << alu_in2_imm[i][4:0]; // only used for SLLW
|
||||
always @(*) begin
|
||||
case (alu_op)
|
||||
`INST_ALU_AND: msc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||
`INST_ALU_OR: msc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
`INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
`INST_ALU_XOR: msc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||
// `INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||
`INST_ALU_SLL: msc_result[i] = alu_in1[i] << alu_in2_imm[i][SHIFT_IMM_BITS:0]; // TODO: CHANGED: adjust this to shift using 6 bits for 64 bit
|
||||
`INST_ALU_SLL_W: msc_result[i] = `XLEN'($signed(temp_shift_result[31:0])); // TODO: CHANGED: adjust this to shift using 6 bits for 32 signed bit
|
||||
default: msc_result[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
|
@ -74,11 +114,12 @@ module VX_alu_unit #(
|
|||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
always @(*) begin
|
||||
case (alu_op_class)
|
||||
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC
|
||||
2'b01: alu_result[i] = {31'b0, sub_result[i][32]}; // SLTU, SLT
|
||||
2'b10: alu_result[i] = is_sub ? sub_result[i][31:0] // SUB
|
||||
: shr_result[i]; // SRL, SRA
|
||||
2'b11: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL
|
||||
2'b00: alu_result[i] = add_result[i]; // ADD, LUI, AUIPC, ADDIW, ADDW
|
||||
2'b01: alu_result[i] = {{`XLEN-1{1'b0}}, sub_result[i][`XLEN]}; // SLTU, SLT
|
||||
2'b10: alu_result[i] = is_sub ? sub_result[i][`XLEN-1:0] // SUB, SUBW
|
||||
: shr_result[i]; // SRL, SRA, SRLW, SRAW, SRLIW, SRAIW, SRLI, SRAI
|
||||
// 2'b11,
|
||||
default: alu_result[i] = msc_result[i]; // AND, OR, XOR, SLL, SLLIW, SLLW, SLLI
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
@ -86,10 +127,10 @@ module VX_alu_unit #(
|
|||
// branch
|
||||
|
||||
wire is_jal = is_br_op && (br_op == `INST_BR_JAL || br_op == `INST_BR_JALR);
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : trunc_alu_result;
|
||||
|
||||
wire [31:0] br_dest = add_result[alu_req_if.tid];
|
||||
wire [32:0] cmp_result = sub_result[alu_req_if.tid];
|
||||
wire [31:0] br_dest = add_result[alu_req_if.tid][31:0];
|
||||
wire [32:0] cmp_result = sub_result[alu_req_if.tid][32:0];
|
||||
|
||||
wire is_less = cmp_result[32];
|
||||
wire is_equal = ~(| cmp_result[31:0]);
|
||||
|
@ -108,6 +149,11 @@ module VX_alu_unit #(
|
|||
wire alu_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] alu_data;
|
||||
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] full_alu_data;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign full_alu_data[i] = {{`XLEN-31{alu_data[i][31]}},alu_data[i][30:0]};
|
||||
end
|
||||
|
||||
wire [`INST_BR_BITS-1:0] br_op_r;
|
||||
wire [31:0] br_dest_r;
|
||||
wire is_less_r;
|
||||
|
@ -135,7 +181,7 @@ module VX_alu_unit #(
|
|||
assign branch_ctl_if.valid = alu_valid_out && alu_ready_out && is_br_op_r;
|
||||
assign branch_ctl_if.taken = ((br_less ? is_less_r : is_equal_r) ^ br_neg) | br_static;
|
||||
assign branch_ctl_if.wid = alu_wid;
|
||||
assign branch_ctl_if.dest = br_dest_r;
|
||||
assign branch_ctl_if.dest = br_dest_r[31:0];
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
|
@ -149,7 +195,7 @@ module VX_alu_unit #(
|
|||
wire [31:0] mul_PC;
|
||||
wire [`NR_BITS-1:0] mul_rd;
|
||||
wire mul_wb;
|
||||
wire [`NUM_THREADS-1:0][31:0] mul_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] mul_data;
|
||||
|
||||
wire [`INST_MUL_BITS-1:0] mul_op = `INST_MUL_BITS'(alu_req_if.op_type);
|
||||
|
||||
|
@ -220,7 +266,7 @@ module VX_alu_unit #(
|
|||
`endif
|
||||
}),
|
||||
.data_in ({
|
||||
{alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, alu_data}
|
||||
{alu_uuid, alu_wid, alu_tmask, alu_PC, alu_rd, alu_wb, full_alu_data}
|
||||
`ifdef EXT_M_ENABLE
|
||||
, {mul_uuid, mul_wid, mul_tmask, mul_PC, mul_rd, mul_wb, mul_data}
|
||||
`endif
|
||||
|
|
|
@ -58,12 +58,12 @@ module VX_csr_unit #(
|
|||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
|
||||
|
||||
reg [`NUM_THREADS-1:0][31:0] csr_read_data;
|
||||
reg [31:0] csr_write_data;
|
||||
wire [31:0] csr_read_data_ro, csr_read_data_rw;
|
||||
wire [31:0] csr_req_data;
|
||||
reg csr_rd_enable;
|
||||
wire csr_wr_enable;
|
||||
reg [`NUM_THREADS-1:0][31:0] csr_read_data;
|
||||
reg [31:0] csr_write_data;
|
||||
wire [31:0] csr_read_data_ro, csr_read_data_rw;
|
||||
wire [31:0] csr_req_data;
|
||||
reg csr_rd_enable;
|
||||
wire csr_wr_enable;
|
||||
|
||||
`UNUSED_VAR (gpu_pending)
|
||||
wire csr_access_pending = (0
|
||||
|
@ -177,14 +177,14 @@ module VX_csr_unit #(
|
|||
.read_wid (csr_req_if.wid),
|
||||
.read_tmask (csr_req_if.tmask),
|
||||
.read_addr (csr_req_if.addr),
|
||||
.read_data_ro (csr_read_data_ro),
|
||||
.read_data_rw (csr_read_data_rw),
|
||||
.read_data_ro (csr_read_data_ro[31:0]),
|
||||
.read_data_rw (csr_read_data_rw[31:0]),
|
||||
|
||||
.write_enable (csr_req_valid && csr_wr_enable),
|
||||
.write_uuid (csr_req_if.uuid),
|
||||
.write_wid (csr_req_if.wid),
|
||||
.write_addr (csr_req_if.addr),
|
||||
.write_data (csr_write_data)
|
||||
.write_data (csr_write_data[31:0])
|
||||
);
|
||||
|
||||
// CSR read
|
||||
|
@ -194,14 +194,14 @@ module VX_csr_unit #(
|
|||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign wtid[i] = 32'(i);
|
||||
assign ltid[i] = (32'(csr_req_if.wid) << `NT_BITS) + i;
|
||||
assign gtid[i] = 32'((CORE_ID << (`NW_BITS + `NT_BITS)) + (32'(csr_req_if.wid) << `NT_BITS) + i);
|
||||
assign gtid[i] = 32'((32'(CORE_ID) << (`NW_BITS + `NT_BITS)) + (32'(csr_req_if.wid) << `NT_BITS) + i);
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
csr_rd_enable = 0;
|
||||
`ifdef EXT_RASTER_ENABLE
|
||||
if (raster_addr_enable) begin
|
||||
csr_read_data = raster_csr_if.read_data;
|
||||
csr_read_data = `XLEN'(raster_csr_if.read_data);
|
||||
end else
|
||||
`endif
|
||||
case (csr_req_if.addr)
|
||||
|
@ -241,6 +241,10 @@ module VX_csr_unit #(
|
|||
end
|
||||
|
||||
// send response
|
||||
wire [`NUM_THREADS-1:0][31:0] csr_commit_data;
|
||||
for(genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign csr_commit_if.data[i] = `XLEN'(csr_commit_data[i]);
|
||||
end
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + `NUM_THREADS * 32)
|
||||
|
@ -250,7 +254,7 @@ module VX_csr_unit #(
|
|||
.valid_in (csr_req_valid),
|
||||
.ready_in (csr_req_ready),
|
||||
.data_in ({csr_req_if.uuid, csr_req_if.wid, csr_req_if.tmask, csr_req_if.PC, csr_req_if.rd, csr_req_if.wb, csr_read_data}),
|
||||
.data_out ({csr_commit_if.uuid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_commit_if.data}),
|
||||
.data_out ({csr_commit_if.uuid, csr_commit_if.wid, csr_commit_if.tmask, csr_commit_if.PC, csr_commit_if.rd, csr_commit_if.wb, csr_commit_data}),
|
||||
.valid_out (csr_commit_if.valid),
|
||||
.ready_out (csr_commit_if.ready)
|
||||
);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_config.vh"
|
||||
`ifndef NDEBUG
|
||||
`include "VX_trace_info.vh"
|
||||
`endif
|
||||
|
@ -36,7 +37,7 @@ module VX_decode #(
|
|||
reg [`INST_OP_BITS-1:0] op_type;
|
||||
reg [`INST_MOD_BITS-1:0] op_mod;
|
||||
reg [`NR_BITS-1:0] rd_r, rs1_r, rs2_r, rs3_r;
|
||||
reg [31:0] imm;
|
||||
reg [`XLEN-1:0] imm;
|
||||
reg use_rd, use_PC, use_imm;
|
||||
reg is_join, is_wstall;
|
||||
|
||||
|
@ -59,7 +60,7 @@ module VX_decode #(
|
|||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
|
||||
`UNUSED_VAR (rs3)
|
||||
|
||||
|
||||
always @(*) begin
|
||||
|
||||
ex_type = '0;
|
||||
|
@ -91,7 +92,7 @@ module VX_decode #(
|
|||
endcase
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
imm = {{(`XLEN-12){alu_imm[11]}}, alu_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
|
@ -131,12 +132,41 @@ module VX_decode #(
|
|||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_I_W: begin
|
||||
// ADDIW, SLLIW, SRLIW, SRAIW
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `INST_OP_BITS'(`INST_ALU_ADD_W);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL_W);
|
||||
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA_W) : `INST_OP_BITS'(`INST_ALU_SRL_W);
|
||||
default:;
|
||||
endcase
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {{(`XLEN-12){alu_imm[11]}}, alu_imm};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
`INST_R_W: begin
|
||||
// ADDW, SUBW, SLLW, SRLW, SRAW
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SUB_W) : `INST_OP_BITS'(`INST_ALU_ADD_W);
|
||||
3'h1: op_type = `INST_OP_BITS'(`INST_ALU_SLL_W);
|
||||
3'h5: op_type = (func7[5]) ? `INST_OP_BITS'(`INST_ALU_SRA_W) : `INST_OP_BITS'(`INST_ALU_SRL_W);
|
||||
default:;
|
||||
endcase
|
||||
use_rd = 1;
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `INST_OP_BITS'(`INST_ALU_LUI);
|
||||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
imm = {{`XLEN-31{upper_imm[19]}}, upper_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
|
@ -145,7 +175,7 @@ module VX_decode #(
|
|||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
imm = {{`XLEN-31{upper_imm[19]}}, upper_imm[18:0], 12'(0)};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JAL: begin
|
||||
|
@ -156,7 +186,7 @@ module VX_decode #(
|
|||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{11{jal_imm[20]}}, jal_imm};
|
||||
imm = {{(`XLEN-21){jal_imm[20]}}, jal_imm};
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
`INST_JALR: begin
|
||||
|
@ -166,7 +196,7 @@ module VX_decode #(
|
|||
use_rd = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
`USED_IREG (rd);
|
||||
`USED_IREG (rs1);
|
||||
end
|
||||
|
@ -185,7 +215,7 @@ module VX_decode #(
|
|||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = {{19{b_imm[12]}}, b_imm};
|
||||
imm = {{(`XLEN-13){b_imm[12]}}, b_imm};
|
||||
`USED_IREG (rs1);
|
||||
`USED_IREG (rs2);
|
||||
end
|
||||
|
@ -221,7 +251,7 @@ module VX_decode #(
|
|||
use_imm = 1;
|
||||
use_PC = 1;
|
||||
is_wstall = 1;
|
||||
imm = 32'd4;
|
||||
imm = `XLEN'd4;
|
||||
`USED_IREG (rd);
|
||||
end
|
||||
end
|
||||
|
@ -232,7 +262,7 @@ module VX_decode #(
|
|||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b0, func3});
|
||||
use_rd = 1;
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
imm = {{(`XLEN-12){u_12[11]}}, u_12};
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
`USED_FREG (rd);
|
||||
|
@ -247,7 +277,7 @@ module VX_decode #(
|
|||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `INST_OP_BITS'({1'b1, func3});
|
||||
imm = {{20{s_imm[11]}}, s_imm};
|
||||
imm = {{(`XLEN-12){s_imm[11]}}, s_imm};
|
||||
`USED_IREG (rs1);
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (opcode[2]) begin
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_config.vh"
|
||||
|
||||
module VX_dispatch (
|
||||
input wire clk,
|
||||
|
@ -46,7 +47,7 @@ module VX_dispatch (
|
|||
wire [`INST_ALU_BITS-1:0] alu_op_type = `INST_ALU_BITS'(dispatch_if.op_type);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `UP(`NT_BITS) + (2 * `NUM_THREADS * 32)),
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + 32 + `INST_ALU_BITS + `INST_MOD_BITS + `XLEN + 1 + 1 + `NR_BITS + 1 + `UP(`NT_BITS) + (2 * `NUM_THREADS * `XLEN)),
|
||||
.OUT_REG (1)
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
|
@ -65,16 +66,25 @@ module VX_dispatch (
|
|||
wire [`INST_LSU_BITS-1:0] lsu_op_type = `INST_LSU_BITS'(dispatch_if.op_type);
|
||||
wire lsu_is_fence = `INST_LSU_IS_FENCE(dispatch_if.op_mod);
|
||||
|
||||
// USED TO TRUNCATE IMMEDIATE and RS1 TO 32 BITS
|
||||
wire [31:0] trunc_ibuffer_imm = ibuffer_if.imm[31:0];
|
||||
wire [`NUM_THREADS-1:0][31:0] trunc_rs1;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
// These values are used for PC calculations, so should stay as 32 bits
|
||||
assign trunc_rs1[i] = gpr_rsp_if.rs1_data[i][31:0];
|
||||
end
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_LSU_BITS + 1 + 32 + `NR_BITS + 1 + `NUM_THREADS*32 + `NUM_THREADS*`XLEN),
|
||||
.OUT_REG (1)
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (lsu_req_valid),
|
||||
.ready_in (lsu_req_ready),
|
||||
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, lsu_op_type, lsu_is_fence, dispatch_if.imm, dispatch_if.rd, dispatch_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.data_in ({dispatch_if.uuid, dispatch_if.wid, dispatch_if.tmask, dispatch_if.PC, lsu_op_type, lsu_is_fence, trunc_ibuffer_imm, dispatch_if.rd, dispatch_if.wb, trunc_rs1, gpr_rsp_if.rs2_data}),
|
||||
.data_out ({lsu_req_if.uuid, lsu_req_if.wid, lsu_req_if.tmask, lsu_req_if.PC, lsu_req_if.op_type, lsu_req_if.is_fence, lsu_req_if.offset, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.base_addr, lsu_req_if.store_data}),
|
||||
.valid_out (lsu_req_if.valid),
|
||||
.ready_out (lsu_req_if.ready)
|
||||
);
|
||||
|
@ -86,8 +96,12 @@ module VX_dispatch (
|
|||
wire [`CSR_ADDR_BITS-1:0] csr_addr = dispatch_if.imm[`CSR_ADDR_BITS-1:0];
|
||||
wire [`NRI_BITS-1:0] csr_imm = dispatch_if.imm[`CSR_ADDR_BITS +: `NRI_BITS];
|
||||
|
||||
// USED TO TRUNCATE CSRs TO 32 BITS. I DONT KNOW IF THIS IS CORRECT???
|
||||
|
||||
wire [31:0] csr_rs1_data = gpr_rsp_if.rs1_data[tid][31:0]; // CSR stays 32 bits
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * 32)),
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `INST_CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NRI_BITS + `UP(`NT_BITS) + (`NUM_THREADS * `XLEN)),
|
||||
.OUT_REG (1)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
|
|
|
@ -43,7 +43,7 @@ module VX_gpr_stage #(
|
|||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (32),
|
||||
.DATAW (`XLEN),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
|
@ -58,7 +58,7 @@ module VX_gpr_stage #(
|
|||
);
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (32),
|
||||
.DATAW (`XLEN),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
|
@ -83,7 +83,7 @@ module VX_gpr_stage #(
|
|||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
VX_dp_ram #(
|
||||
.DATAW (32),
|
||||
.DATAW (`XLEN),
|
||||
.SIZE (RAM_SIZE),
|
||||
.INIT_ENABLE (1),
|
||||
.INIT_VALUE (0)
|
||||
|
|
|
@ -46,7 +46,7 @@ module VX_gpu_unit #(
|
|||
localparam UUID_WIDTH = `UP(`UUID_BITS);
|
||||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
localparam WCTL_DATAW = `GPU_TMC_BITS + `GPU_WSPAWN_BITS + `GPU_SPLIT_BITS + `GPU_BARRIER_BITS;
|
||||
localparam RSP_DATAW = `MAX(`NUM_THREADS * 32, WCTL_DATAW);
|
||||
localparam RSP_DATAW = `MAX(`NUM_THREADS * `XLEN, WCTL_DATAW);
|
||||
localparam RSP_ARB_DATAW = UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + RSP_DATAW + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_TEX_ENABLED + `EXT_RASTER_ENABLED + `EXT_ROP_ENABLED + `EXT_IMADD_ENABLED;
|
||||
|
||||
|
@ -87,8 +87,8 @@ module VX_gpu_unit #(
|
|||
wire is_bar = (gpu_req_if.op_type == `INST_GPU_BAR);
|
||||
wire is_pred = (gpu_req_if.op_type == `INST_GPU_PRED);
|
||||
|
||||
wire [31:0] rs1_data = gpu_req_if.rs1_data[gpu_req_if.tid];
|
||||
wire [31:0] rs2_data = gpu_req_if.rs2_data[gpu_req_if.tid];
|
||||
wire [`XLEN-1:0] rs1_data = gpu_req_if.rs1_data[gpu_req_if.tid];
|
||||
wire [`XLEN-1:0] rs2_data = gpu_req_if.rs2_data[gpu_req_if.tid];
|
||||
|
||||
wire [`NUM_THREADS-1:0] taken_tmask;
|
||||
wire [`NUM_THREADS-1:0] not_taken_tmask;
|
||||
|
@ -108,7 +108,7 @@ module VX_gpu_unit #(
|
|||
|
||||
// wspawn
|
||||
|
||||
wire [31:0] wspawn_pc = rs2_data;
|
||||
wire [31:0] wspawn_pc = rs2_data[31:0];
|
||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||
for (genvar i = 0; i < `NUM_WARPS; ++i) begin
|
||||
assign wspawn_wmask[i] = (i < rs1_data);
|
||||
|
@ -344,7 +344,7 @@ module VX_gpu_unit #(
|
|||
.ready_out (gpu_commit_if.ready)
|
||||
);
|
||||
|
||||
assign gpu_commit_if.data = rsp_data[(`NUM_THREADS * 32)-1:0];
|
||||
assign gpu_commit_if.data = rsp_data[(`NUM_THREADS * `XLEN)-1:0];
|
||||
|
||||
// warp control reponse
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ module VX_ibuffer #(
|
|||
localparam ALM_FULL = SIZE - 1;
|
||||
localparam ALM_EMPTY = 1;
|
||||
|
||||
localparam DATAW = `UP(`UUID_BITS) + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1;
|
||||
localparam DATAW = `UP(`UUID_BITS) + `NUM_THREADS + 32 + `EX_BITS + `INST_OP_BITS + `INST_FRM_BITS + 1 + (`NR_BITS * 4) + `XLEN + 1 + 1;
|
||||
localparam ADDRW = $clog2(SIZE);
|
||||
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_gpu_types.vh"
|
||||
`include "VX_config.vh"
|
||||
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
import VX_gpu_types::*;
|
||||
|
@ -28,7 +29,7 @@ module VX_lsu_unit #(
|
|||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
|
||||
localparam MEM_ASHIFT = `CLOG2(`MEM_BLOCK_SIZE);
|
||||
localparam MEM_ADDRW = 32 - MEM_ASHIFT;
|
||||
localparam MEM_ADDRW = `XLEN - MEM_ASHIFT;
|
||||
localparam REQ_ASHIFT = `CLOG2(DCACHE_WORD_SIZE);
|
||||
|
||||
`ifdef SM_ENABLE
|
||||
|
@ -37,8 +38,8 @@ module VX_lsu_unit #(
|
|||
localparam SMEM_LOCAL_SIZE_W = `SMEM_LOCAL_SIZE >> MEM_ASHIFT;
|
||||
|
||||
localparam TOTAL_STACK_SIZE = `STACK_SIZE * `NUM_THREADS * `NUM_WARPS * `NUM_CORES;
|
||||
localparam STACK_START_W = MEM_ADDRW'(`STACK_BASE_ADDR >> MEM_ASHIFT);
|
||||
localparam STACK_END_W = MEM_ADDRW'((`STACK_BASE_ADDR - TOTAL_STACK_SIZE) >> MEM_ASHIFT);
|
||||
localparam STACK_START_W = MEM_ADDRW'(`XLEN'(`STACK_BASE_ADDR) >> MEM_ASHIFT);
|
||||
localparam STACK_END_W = MEM_ADDRW'((`XLEN'(`STACK_BASE_ADDR) - TOTAL_STACK_SIZE) >> MEM_ASHIFT);
|
||||
`endif
|
||||
|
||||
// uuid, addr_type, wid, PC, tmask, rd, op_type, align, is_dup
|
||||
|
@ -54,9 +55,9 @@ module VX_lsu_unit #(
|
|||
|
||||
// full address calculation
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] full_addr;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] full_addr;
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign full_addr[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
|
||||
assign full_addr[i] = lsu_req_if.base_addr[i][`XLEN-1:0] + lsu_req_if.offset;
|
||||
end
|
||||
|
||||
// detect duplicate addresses
|
||||
|
@ -77,7 +78,7 @@ module VX_lsu_unit #(
|
|||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [MEM_ADDRW-1:0] full_addr_b = full_addr[i][MEM_ASHIFT +: MEM_ADDRW];
|
||||
// is non-cacheable address
|
||||
wire is_addr_nc = (full_addr_b >= MEM_ADDRW'(`IO_BASE_ADDR >> MEM_ASHIFT));
|
||||
wire is_addr_nc = (full_addr_b >= MEM_ADDRW'(`XLEN'(`IO_BASE_ADDR) >> MEM_ASHIFT));
|
||||
`ifdef SM_ENABLE
|
||||
// is stack address
|
||||
wire is_stack_addr = (full_addr_b >= STACK_END_W) && (full_addr_b < STACK_START_W);
|
||||
|
@ -106,15 +107,15 @@ module VX_lsu_unit #(
|
|||
wire mem_req_valid;
|
||||
wire [`NUM_THREADS-1:0] mem_req_mask;
|
||||
wire mem_req_rw;
|
||||
wire [`NUM_THREADS-1:0][29:0] mem_req_addr;
|
||||
reg [`NUM_THREADS-1:0][3:0] mem_req_byteen;
|
||||
reg [`NUM_THREADS-1:0][31:0] mem_req_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-3:0] mem_req_addr;
|
||||
reg [`NUM_THREADS-1:0][DCACHE_WORD_SIZE-1:0] mem_req_byteen;
|
||||
reg [`NUM_THREADS-1:0][`XLEN-1:0] mem_req_data;
|
||||
wire [TAG_WIDTH-1:0] mem_req_tag;
|
||||
wire mem_req_ready;
|
||||
|
||||
wire mem_rsp_valid;
|
||||
wire [`NUM_THREADS-1:0] mem_rsp_mask;
|
||||
wire [`NUM_THREADS-1:0][31:0] mem_rsp_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] mem_rsp_data;
|
||||
wire [TAG_WIDTH-1:0] mem_rsp_tag;
|
||||
wire mem_rsp_eop;
|
||||
wire mem_rsp_ready;
|
||||
|
@ -133,32 +134,51 @@ module VX_lsu_unit #(
|
|||
wire [`NUM_THREADS-1:0][REQ_ASHIFT-1:0] req_align;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
assign req_align[i] = full_addr[i][1:0];
|
||||
assign mem_req_addr[i] = full_addr[i][31:2];
|
||||
assign req_align[i] = full_addr[i][REQ_ASHIFT-1:0];
|
||||
assign mem_req_addr[i] = full_addr[i][`XLEN-1:REQ_ASHIFT];
|
||||
end
|
||||
|
||||
// data formatting
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [REQ_ASHIFT-1:0] req_align_X1 = {req_align[i][1], 1'b1};
|
||||
wire [REQ_ASHIFT-1:0] req_align_X1;
|
||||
`ifdef MODE_32_BIT
|
||||
assign req_align_X1 = {req_align[i][1], 1'b1};
|
||||
`endif
|
||||
`ifdef MODE_64_BIT
|
||||
// TODO: VARUN TO CHECK
|
||||
assign req_align_X1 = {req_align[i][1:0]};
|
||||
`endif
|
||||
always @(*) begin
|
||||
mem_req_byteen[i] = {4{lsu_req_if.wb}};
|
||||
mem_req_byteen[i] = {DCACHE_WORD_SIZE{lsu_req_if.wb}};
|
||||
case (`INST_LSU_WSIZE(lsu_req_if.op_type))
|
||||
0: mem_req_byteen[i][req_align[i]] = 1;
|
||||
1: begin
|
||||
1: begin // half (16 bit)
|
||||
mem_req_byteen[i][req_align[i]] = 1;
|
||||
mem_req_byteen[i][req_align_X1] = 1;
|
||||
end
|
||||
default : mem_req_byteen[i] = {4{1'b1}};
|
||||
2: begin // word (32 bit)
|
||||
mem_req_byteen[i][req_align[i]] = 1;
|
||||
mem_req_byteen[i][req_align_X1] = 1;
|
||||
mem_req_byteen[i][req_align_X1+1] = 1;
|
||||
mem_req_byteen[i][req_align_X1+2] = 1;
|
||||
end
|
||||
default : mem_req_byteen[i] = {DCACHE_WORD_SIZE{1'b1}}; // double (64 bit)
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
mem_req_data[i] = lsu_req_if.store_data[i];
|
||||
case (req_align[i])
|
||||
1: mem_req_data[i][31:8] = lsu_req_if.store_data[i][23:0];
|
||||
2: mem_req_data[i][31:16] = lsu_req_if.store_data[i][15:0];
|
||||
3: mem_req_data[i][31:24] = lsu_req_if.store_data[i][7:0];
|
||||
1: mem_req_data[`XLEN-1:8] = lsu_req_if.store_data[i][`XLEN-9:0];
|
||||
2: mem_req_data[`XLEN-1:16] = lsu_req_if.store_data[i][`XLEN-17:0];
|
||||
3: mem_req_data[`XLEN-1:24] = lsu_req_if.store_data[i][`XLEN-25:0];
|
||||
`ifdef MODE_64_BIT
|
||||
4: mem_req_data[`XLEN-1:32] = lsu_req_if.store_data[i][`XLEN-33:0];
|
||||
5: mem_req_data[`XLEN-1:40] = lsu_req_if.store_data[i][`XLEN-41:0];
|
||||
6: mem_req_data[`XLEN-1:48] = lsu_req_if.store_data[i][`XLEN-49:0];
|
||||
7: mem_req_data[`XLEN-1:56] = lsu_req_if.store_data[i][`XLEN-57:0];
|
||||
`endif
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
@ -321,19 +341,28 @@ module VX_lsu_unit #(
|
|||
reg [`NUM_THREADS-1:0][31:0] rsp_data;
|
||||
wire [`NUM_THREADS-1:0] rsp_tmask;
|
||||
|
||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i];
|
||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin // TODO: HOW TF DO I DO THE MEMORY response???
|
||||
`ifdef MODE_64_BIT
|
||||
wire [63:0] rsp_data64 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i]; // ONLY VALID ON XLEN == 64
|
||||
wire [31:0] rsp_data32 = rsp_align[i][2] ? mem_rsp_data[0][63:32] : mem_rsp_data[i][31:0];
|
||||
`else
|
||||
wire [31:0] rsp_data32 = (i == 0 || rsp_is_dup) ? mem_rsp_data[0] : mem_rsp_data[i]; // ONLY VALID ON XLEN == 32
|
||||
`endif
|
||||
wire [15:0] rsp_data16 = rsp_align[i][1] ? rsp_data32[31:16] : rsp_data32[15:0];
|
||||
wire [7:0] rsp_data8 = rsp_align[i][0] ? rsp_data16[15:8] : rsp_data16[7:0];
|
||||
|
||||
always @(*) begin
|
||||
case (`INST_LSU_FMT(rsp_op_type))
|
||||
`INST_FMT_B: rsp_data[i] = 32'(signed'(rsp_data8));
|
||||
`INST_FMT_H: rsp_data[i] = 32'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data16));
|
||||
`INST_FMT_W: rsp_data[i] = rsp_data32;
|
||||
default: rsp_data[i] = 'x;
|
||||
`INST_FMT_B: rsp_data[i] = `XLEN'(signed'(rsp_data8));
|
||||
`INST_FMT_H: rsp_data[i] = `XLEN'(signed'(rsp_data16));
|
||||
`INST_FMT_BU: rsp_data[i] = `XLEN'(unsigned'(rsp_data8));
|
||||
`INST_FMT_HU: rsp_data[i] = `XLEN'(unsigned'(rsp_data16));
|
||||
`INST_FMT_W: rsp_data[i] = `XLEN'(signed'(rsp_data32));
|
||||
`ifdef MODE_64_BIT // new instructions for unsigned 32 and 64 bit load modes
|
||||
`INST_FMT_WU: rsp_data[i] = `XLEN'(unsigned'(rsp_data32));
|
||||
`INST_FMT_D: rsp_data[i] = `XLEN'(signed'(rsp_data64));
|
||||
`endif
|
||||
default: rsp_data[i] = 'x;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
@ -343,7 +372,7 @@ module VX_lsu_unit #(
|
|||
// send load commit
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1)
|
||||
.DATAW (UUID_WIDTH + NW_WIDTH + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * `XLEN) + 1)
|
||||
) rsp_sbuf (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
|
|
@ -19,12 +19,12 @@ module VX_writeback #(
|
|||
VX_writeback_if.master writeback_if,
|
||||
|
||||
// simulation helper signals
|
||||
output wire [`NUM_REGS-1:0][31:0] sim_wb_value
|
||||
output wire [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value
|
||||
);
|
||||
`UNUSED_PARAM (CORE_ID)
|
||||
|
||||
localparam NW_WIDTH = `UP(`NW_BITS);
|
||||
localparam DATAW = NW_WIDTH + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32) + 1;
|
||||
localparam DATAW = NW_WIDTH + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * `XLEN) + 1;
|
||||
localparam NUM_RSPS = 4 + `EXT_F_ENABLED;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
|
@ -61,7 +61,7 @@ module VX_writeback #(
|
|||
wb_alu_ready_in,
|
||||
wb_ld_ready_in
|
||||
}),
|
||||
.data_in ({
|
||||
.data_in ({
|
||||
`ifdef EXT_F_ENABLE
|
||||
{fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.rd, fpu_commit_if.data, fpu_commit_if.eop},
|
||||
`endif
|
||||
|
@ -84,7 +84,7 @@ module VX_writeback #(
|
|||
assign ld_commit_if.ready = wb_ld_ready_in || ~ld_commit_if.wb;
|
||||
|
||||
// simulation helper signal to get RISC-V tests Pass/Fail status
|
||||
reg [`NUM_REGS-1:0][31:0] sim_wb_value_r;
|
||||
reg [`NUM_REGS-1:0][`XLEN-1:0] sim_wb_value_r;
|
||||
always @(posedge clk) begin
|
||||
if (writeback_if.valid && writeback_if.ready) begin
|
||||
sim_wb_value_r[writeback_if.rd] <= writeback_if.data[0];
|
||||
|
|
|
@ -12,10 +12,10 @@ interface VX_alu_req_if ();
|
|||
wire [`INST_MOD_BITS-1:0] op_mod;
|
||||
wire use_PC;
|
||||
wire use_imm;
|
||||
wire [31:0] imm;
|
||||
wire [`XLEN-1:0] imm;
|
||||
wire [`UP(`NT_BITS)-1:0] tid;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
|
|
@ -5,7 +5,7 @@ interface VX_branch_ctl_if ();
|
|||
wire valid;
|
||||
wire [`UP(`NW_BITS)-1:0] wid;
|
||||
wire taken;
|
||||
wire [31:0] dest;
|
||||
wire [`XLEN-1:0] dest;
|
||||
|
||||
modport master (
|
||||
output valid,
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
`include "VX_define.vh"
|
||||
|
||||
interface VX_commit_if ();
|
||||
interface VX_commit_if();
|
||||
|
||||
wire valid;
|
||||
wire [`UP(`UUID_BITS)-1:0] uuid;
|
||||
wire [`UP(`NW_BITS)-1:0] wid;
|
||||
wire [`NUM_THREADS-1:0] tmask;
|
||||
wire [31:0] PC;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire eop;
|
||||
|
|
|
@ -13,7 +13,7 @@ interface VX_decode_if ();
|
|||
wire wb;
|
||||
wire use_PC;
|
||||
wire use_imm;
|
||||
wire [31:0] imm;
|
||||
wire [`XLEN-1:0] imm;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
`include "VX_define.vh"
|
||||
`include "VX_config.vh"
|
||||
|
||||
interface VX_gpr_rsp_if ();
|
||||
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
|
||||
|
||||
modport master (
|
||||
output rs1_data,
|
||||
|
|
|
@ -11,9 +11,9 @@ interface VX_gpu_req_if();
|
|||
wire [`INST_GPU_BITS-1:0] op_type;
|
||||
wire [`INST_MOD_BITS-1:0] op_mod;
|
||||
wire [`UP(`NT_BITS)-1:0] tid;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs1_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs2_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] rs3_data;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
|
|
@ -13,7 +13,7 @@ interface VX_ibuffer_if ();
|
|||
wire wb;
|
||||
wire use_PC;
|
||||
wire use_imm;
|
||||
wire [31:0] imm;
|
||||
wire [`XLEN-1:0] imm;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NR_BITS-1:0] rs1;
|
||||
wire [`NR_BITS-1:0] rs2;
|
||||
|
|
|
@ -9,9 +9,9 @@ interface VX_lsu_req_if ();
|
|||
wire [31:0] PC;
|
||||
wire [`INST_LSU_BITS-1:0] op_type;
|
||||
wire is_fence;
|
||||
wire [`NUM_THREADS-1:0][31:0] store_data;
|
||||
wire [`NUM_THREADS-1:0][31:0] base_addr;
|
||||
wire [31:0] offset;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] store_data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] base_addr;
|
||||
wire [`XLEN-1:0] offset;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire wb;
|
||||
wire ready;
|
||||
|
|
|
@ -8,7 +8,7 @@ interface VX_writeback_if ();
|
|||
wire [`UP(`NW_BITS)-1:0] wid;
|
||||
wire [31:0] PC;
|
||||
wire [`NR_BITS-1:0] rd;
|
||||
wire [`NUM_THREADS-1:0][31:0] data;
|
||||
wire [`NUM_THREADS-1:0][`XLEN-1:0] data;
|
||||
wire eop;
|
||||
wire ready;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue