lkg build with pipeline + FPU fixes

This commit is contained in:
Blaise Tine 2020-07-31 09:29:44 -04:00
parent 0d82a8aa4f
commit c9755a0c48
33 changed files with 408 additions and 294 deletions

View file

@ -3,4 +3,10 @@ all:
$(MAKE) -C hw
$(MAKE) -C driver
$(MAKE) -C runtime
$(MAKE) -C simX
$(MAKE) -C simX
clean:
$(MAKE) -C hw clean
$(MAKE) -C driver clean
$(MAKE) -C runtime clean
$(MAKE) -C simX clean

View file

@ -37,7 +37,7 @@ Install development tools
Install gnu-riscv-tools
$ export RISC_GNU_TOOLS_PATH=/opt/riscv-gnu-toolchain
$ export RISCV_TOOLCHAIN_PATH=/opt/riscv-gnu-toolchain
$ sudo apt-get -y install \
binutils build-essential libtool texinfo \
@ -51,7 +51,7 @@ Install gnu-riscv-tools
$ git submodule update --init --recursive
$ mkdir build
$ cd build
$ ../configure --prefix=$RISC_GNU_TOOLS_PATH --with-arch=rv32im --with-abi=ilp32
$ ../configure --prefix=$RISCV_TOOLCHAIN_PATH --with-arch=rv32im --with-abi=ilp32
$ make -j`nproc`
$ make -j`nproc` build-qemu

View file

@ -173,7 +173,7 @@ extern int vx_dev_open(vx_device_h* hdevice) {
{
// Load device CAPS
int ret = 0;
ret |= vx_csr_get(device, 0, CSR_IMPL_ID, &device->implementation_id);
ret |= vx_csr_get(device, 0, CSR_MIMPID, &device->implementation_id);
ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores);
ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps);
ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads);
@ -217,14 +217,14 @@ extern int vx_dev_close(vx_device_h hdevice) {
unsigned value;
int ret = 0;
ret |= vx_csr_get(hdevice, 0, CSR_INSTR_H, &value);
ret |= vx_csr_get(hdevice, 0, CSR_INSTRET_H, &value);
instrs = value;
ret |= vx_csr_get(hdevice, 0, CSR_INSTR_L, &value);
ret |= vx_csr_get(hdevice, 0, CSR_INSTRET, &value);
instrs = (instrs << 32) | value;
ret |= vx_csr_get(hdevice, 0, CSR_CYCLE_H, &value);
cycles = value;
ret |= vx_csr_get(hdevice, 0, CSR_CYCLE_L, &value);
ret |= vx_csr_get(hdevice, 0, CSR_CYCLE, &value);
cycles = (cycles << 32) | value;
float IPC = (float)(double(instrs) / double(cycles));

BIN
driver/tests/demo/kernel.bin Normal file → Executable file

Binary file not shown.

View file

@ -97,7 +97,7 @@ Disassembly of section .text:
80000134: 0005006b 0x5006b
80000138: 00002197 auipc gp,0x2
8000013c: d3818193 addi gp,gp,-712 # 80001e70 <__global_pointer$>
80000140: f14025f3 csrr a1,mhartid
80000140: 022025f3 csrr a1,0x22
80000144: 00a59593 slli a1,a1,0xa
80000148: 02002673 csrr a2,0x20
8000014c: 00261613 slli a2,a2,0x2
@ -145,7 +145,7 @@ Disassembly of section .text:
800001ac: 00008067 ret
800001b0 <vx_thread_gid>:
800001b0: f1402573 csrr a0,mhartid
800001b0: 02202573 csrr a0,0x22
800001b4: 00008067 ret
800001b8 <vx_core_id>:

Binary file not shown.

View file

@ -1,4 +1,7 @@
.PHONY: build_config
build_config:
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
clean:
rm ./rtl/VX_user_config.vh ./VX_config.h

View file

@ -48,7 +48,7 @@ module VX_alu_unit #(
end
end
wire [`NT_BITS-1:0] br_result_index, br_result_index_o;
wire [`NT_BITS-1:0] br_result_index;
VX_priority_encoder #(
.N(`NUM_THREADS)
@ -58,8 +58,14 @@ module VX_alu_unit #(
`UNUSED_PIN (valid_out)
);
wire [`BR_BITS-1:0] br_op = `IS_BR_OP(alu_req_if.alu_op) ? `BR_OP(alu_req_if.alu_op) : 0;
wire [`BR_BITS-1:0] br_op_o;
wire [32:0] br_result = sub_result[br_result_index];
wire br_sign = br_result[32];
wire br_nzero = (| br_result[31:0]);
wire br_sign_s1;
wire br_nzero_s1;
wire [`BR_BITS-1:0] br_op = `IS_BR_OP(alu_req_if.alu_op) ? `BR_OP(alu_req_if.alu_op) : `BR_NO;
wire [`BR_BITS-1:0] br_op_s1;
wire [31:0] br_addr = (br_op == `BR_JALR) ? alu_req_if.rs1_data[br_result_index] : alu_req_if.curr_PC;
wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset);
@ -70,34 +76,30 @@ module VX_alu_unit #(
wire stall = ~alu_commit_if.ready && alu_commit_if.valid;
VX_generic_register #(
.N(1 + `NW_BITS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BR_BITS + 32 + `NT_BITS)
.N(1 + `NW_BITS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BR_BITS + 32 + 1 + 1)
) alu_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.issue_tag, alu_jal_result, br_op, br_dest, br_result_index}),
.out ({alu_commit_if.valid, branch_ctl_if.warp_num, alu_commit_if.issue_tag, alu_commit_if.data, br_op_o, branch_ctl_if.dest, br_result_index_o})
);
wire [31:0] br_result = alu_commit_if.data[br_result_index_o];
wire br_sign = br_result[31];
wire br_nzero = (| br_result[31:0]);
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.issue_tag, alu_jal_result, br_op, br_dest, br_sign, br_nzero}),
.out ({alu_commit_if.valid, branch_ctl_if.warp_num, alu_commit_if.issue_tag, alu_commit_if.data, br_op_s1, branch_ctl_if.dest, br_sign_s1, br_nzero_s1})
);
reg br_taken;
always @(*) begin
case (br_op_o)
`BR_NE: br_taken = br_nzero;
`BR_EQ: br_taken = ~br_nzero;
case (br_op_s1)
`BR_NE: br_taken = br_nzero_s1;
`BR_EQ: br_taken = ~br_nzero_s1;
`BR_LT,
`BR_LTU: br_taken = br_sign;
`BR_LTU: br_taken = br_sign_s1;
`BR_GE,
`BR_GEU: br_taken = ~br_sign;
`BR_GEU: br_taken = ~br_sign_s1;
default: br_taken = 1'b1;
endcase
end
assign branch_ctl_if.valid = alu_req_if.valid && (br_op_o != 0);
assign branch_ctl_if.valid = alu_commit_if.valid && (br_op_s1 != `BR_NO);
assign branch_ctl_if.taken = br_taken;
assign alu_req_if.ready = ~stall;

View file

@ -39,15 +39,27 @@ module VX_commit #(
);
assign cmt_to_csr_if.valid = (| commited_mask);
assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num;
assign cmt_to_csr_if.num_commits = num_commits;
assign cmt_to_csr_if.upd_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.upd_fflags;
integer i;
assign cmt_to_csr_if.upd_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.upd_fflags;
assign cmt_to_csr_if.fpu_warp_num = cmt_to_issue_if.fpu_data.warp_num;
assign cmt_to_csr_if.fflags_NV = fpu_commit_if.fflags_NV;
assign cmt_to_csr_if.fflags_DZ = fpu_commit_if.fflags_DZ;
assign cmt_to_csr_if.fflags_OF = fpu_commit_if.fflags_OF;
assign cmt_to_csr_if.fflags_UF = fpu_commit_if.fflags_UF;
assign cmt_to_csr_if.fflags_NX = fpu_commit_if.fflags_NX;
reg [`FFG_BITS-1:0] fflags;
always @(*) begin
fflags = 0;
for (i = 0; i < `NUM_THREADS; i++) begin
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
fflags[0] |= fpu_commit_if.fflags[i][0];
fflags[1] |= fpu_commit_if.fflags[i][1];
fflags[2] |= fpu_commit_if.fflags[i][2];
fflags[3] |= fpu_commit_if.fflags[i][3];
fflags[4] |= fpu_commit_if.fflags[i][4];
end
end
end
assign cmt_to_csr_if.fflags = fflags;
// Notify issue stage

View file

@ -27,10 +27,6 @@
`define GLOBAL_BLOCK_SIZE 16
`endif
`ifndef NUM_CSRS
`define NUM_CSRS 64
`endif
`ifndef STARTUP_ADDR
`define STARTUP_ADDR 32'h80000000
`endif
@ -59,38 +55,11 @@
`define EXT_F_ENABLE
// Configuration Values =======================================================
// Device identification
`define VENDOR_ID 0
`define ARCHITECTURE_ID 0
`define IMPLEMENTATION_ID 0
// CSR Addresses ==============================================================
`define CSR_FFLAGS 12'h001
`define CSR_FRM 12'h002
`define CSR_FCSR 12'h003
`define CSR_VEND_ID 12'hF11
`define CSR_ARCH_ID 12'hF12
`define CSR_IMPL_ID 12'hF13
`define CSR_GTID 12'hF14
`define CSR_LTID 12'h020
`define CSR_LWID 12'h021
`define CSR_GWID 12'h023
`define CSR_GCID 12'h024
`define CSR_NT 12'h025
`define CSR_NW 12'h026
`define CSR_NC 12'h027
`define CSR_CYCLE_L 12'hC00
`define CSR_CYCLE_H 12'hC80
`define CSR_INSTR_L 12'hC02
`define CSR_INSTR_H 12'hC82
`define CSR_MISA 12'h301
// Size of MUL Request Queue Size
`ifndef MULRQ_SIZE
`define MULRQ_SIZE 8
@ -106,6 +75,45 @@
`define ISSUEQ_SIZE (8 + `NUM_WARPS)
`endif
// CSR Addresses //////////////////////////////////////////////////////////////
`define CSR_FFLAGS 12'h001
`define CSR_FRM 12'h002
`define CSR_FCSR 12'h003
`define CSR_LTID 12'h020
`define CSR_LWID 12'h021
`define CSR_GTID 12'h022
`define CSR_GWID 12'h023
`define CSR_GCID 12'h024
`define CSR_NT 12'h025
`define CSR_NW 12'h026
`define CSR_NC 12'h027
`define CSR_SATP 12'h180
`define CSR_PMPCFG0 12'h3A0
`define CSR_PMPADDR0 12'h3B0
`define CSR_MSTATUS 12'h300
`define CSR_MISA 12'h301
`define CSR_MEDELEG 12'h302
`define CSR_MIDELEG 12'h303
`define CSR_MIE 12'h304
`define CSR_MTVEC 12'h305
`define CSR_MEPC 12'h341
`define CSR_CYCLE 12'hC00
`define CSR_CYCLE_H 12'hC80
`define CSR_INSTRET 12'hC02
`define CSR_INSTRET_H 12'hC82
`define CSR_MVENDORID 12'hF11
`define CSR_MARCHID 12'hF12
`define CSR_MIMPID 12'hF13
`define CSR_MHARTID 12'hF14
// Dcache Configurable Knobs ==================================================
// Size of cache in bytes

View file

@ -11,107 +11,129 @@ module VX_csr_data #(
input wire[`NW_BITS-1:0] warp_num,
input wire[`CSR_ADDR_SIZE-1:0] read_addr,
input wire read_enable,
input wire[`CSR_ADDR_BITS-1:0] read_addr,
output reg[31:0] read_data,
input wire write_enable,
`IGNORE_WARNINGS_BEGIN
// We use a smaller storage for CSRs than the standard 4KB in RISC-V
input wire[`CSR_ADDR_SIZE-1:0] write_addr,
`IGNORE_WARNINGS_END
input wire[`CSR_ADDR_BITS-1:0] write_addr,
input wire[`CSR_WIDTH-1:0] write_data
);
reg [`CSR_WIDTH-1:0] csr_table[`NUM_CSRS-1:0];
reg [`FFG_BITS+`FRM_BITS-1:0] fflags_table [`NUM_WARPS-1:0];
reg [`FRM_BITS-1:0] frm_table [`NUM_WARPS-1:0];
reg [`FFG_BITS+`FRM_BITS-1:0] fcsr_table [`NUM_WARPS-1:0]; // fflags + frm
// cast address to physical CSR range
wire [$clog2(`NUM_CSRS)-1:0] rd_addr, wr_addr;
assign rd_addr = $size(rd_addr)'(read_addr);
assign wr_addr = $size(wr_addr)'(write_addr);
wire [`FFG_BITS-1:0] fflags_update;
assign fflags_update[4] = cmt_to_csr_if.fflags_NV;
assign fflags_update[3] = cmt_to_csr_if.fflags_DZ;
assign fflags_update[2] = cmt_to_csr_if.fflags_OF;
assign fflags_update[1] = cmt_to_csr_if.fflags_UF;
assign fflags_update[0] = cmt_to_csr_if.fflags_NX;
integer i;
reg [`CSR_WIDTH-1:0] csr_satp;
reg [`CSR_WIDTH-1:0] csr_mstatus;
reg [`CSR_WIDTH-1:0] csr_medeleg;
reg [`CSR_WIDTH-1:0] csr_mideleg;
reg [`CSR_WIDTH-1:0] csr_mie;
reg [`CSR_WIDTH-1:0] csr_mtvec;
reg [`CSR_WIDTH-1:0] csr_mepc;
reg [`CSR_WIDTH-1:0] csr_pmpcfg [0:0];
reg [`CSR_WIDTH-1:0] csr_pmpaddr [0:0];
reg [63:0] csr_cycle;
reg [63:0] csr_instret;
reg [`FFG_BITS-1:0] csr_fflags [`NUM_WARPS-1:0];
reg [`FRM_BITS-1:0] csr_frm [`NUM_WARPS-1:0];
reg [`FRM_BITS+`FFG_BITS-1:0] csr_fcsr [`NUM_WARPS-1:0]; // fflags + frm
always @(posedge clk) begin
if (reset) begin
for (i = 0; i < `NUM_WARPS; i++) begin
fflags_table[i] <= 0;
frm_table[i] <= 0;
fcsr_table[i] <= 0;
end
end else begin
if (write_enable) begin
case (write_addr)
`CSR_FFLAGS: begin
fcsr_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
end
`CSR_FRM: begin
fcsr_table[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
frm_table[warp_num] <= write_data[`FRM_BITS-1:0];
end
`CSR_FCSR: begin
fcsr_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
frm_table[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
fflags_table[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
end
default: begin
csr_table[wr_addr] <= write_data;
if (cmt_to_csr_if.upd_fflags) begin
csr_fflags[cmt_to_csr_if.warp_num] <= cmt_to_csr_if.fflags;
csr_fcsr[cmt_to_csr_if.warp_num][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
end
if (write_enable) begin
case (write_addr)
`CSR_FFLAGS: begin
csr_fcsr[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
end
`CSR_FRM: begin
csr_fcsr[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
csr_frm[warp_num] <= write_data[`FRM_BITS-1:0];
end
`CSR_FCSR: begin
csr_fcsr[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
csr_frm[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
end
`CSR_SATP: csr_satp <= write_data;
`CSR_MSTATUS: csr_mstatus <= write_data;
`CSR_MEDELEG: csr_medeleg <= write_data;
`CSR_MIDELEG: csr_mideleg <= write_data;
`CSR_MIE: csr_mie <= write_data;
`CSR_MTVEC: csr_mtvec <= write_data;
`CSR_MEPC: csr_mepc <= write_data;
`CSR_PMPCFG0: csr_pmpcfg[0] <= write_data;
`CSR_PMPADDR0: csr_pmpaddr[0] <= write_data;
default: begin
assert(~write_enable) else $error("%t: invalid CSR write address: %0h", $time, write_addr);
end
endcase
end else if (cmt_to_csr_if.upd_fflags) begin
fflags_table[cmt_to_csr_if.fpu_warp_num][`FFG_BITS-1:0] <= fflags_update;
fcsr_table[cmt_to_csr_if.fpu_warp_num][`FFG_BITS-1:0] <= fflags_update;
end
endcase
end
end
reg [63:0] total_cycles, total_instrs;
always @(posedge clk) begin
if (reset) begin
total_cycles <= 0;
total_instrs <= 0;
csr_cycle <= 0;
csr_instret <= 0;
end else begin
total_cycles <= total_cycles + 1;
csr_cycle <= csr_cycle + 1;
if (cmt_to_csr_if.valid) begin
total_instrs <= total_instrs + 64'(cmt_to_csr_if.num_commits);
csr_instret <= csr_instret + 64'(cmt_to_csr_if.num_commits);
end
end
end
always @(*) begin
case (read_addr)
`CSR_FFLAGS : read_data = 32'(fflags_table[warp_num]);
`CSR_FRM : read_data = 32'(frm_table[warp_num]);
`CSR_FCSR : read_data = 32'(fcsr_table[warp_num]);
`CSR_FFLAGS : read_data = 32'(csr_fflags[warp_num]);
`CSR_FRM : read_data = 32'(csr_frm[warp_num]);
`CSR_FCSR : read_data = 32'(csr_fcsr[warp_num]);
`CSR_LWID : read_data = 32'(warp_num);
`CSR_LTID ,
`CSR_GTID ,
`CSR_MHARTID ,
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
`CSR_GCID : read_data = CORE_ID;
`CSR_NT : read_data = `NUM_THREADS;
`CSR_NW : read_data = `NUM_WARPS;
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
`CSR_CYCLE_L : read_data = total_cycles[31:0];
`CSR_CYCLE_H : read_data = total_cycles[63:32];
`CSR_INSTR_L : read_data = total_instrs[31:0];
`CSR_INSTR_H : read_data = total_instrs[63:32];
`CSR_VEND_ID : read_data = `VENDOR_ID;
`CSR_ARCH_ID : read_data = `ARCHITECTURE_ID;
`CSR_IMPL_ID : read_data = `IMPLEMENTATION_ID;
`CSR_SATP : read_data = 32'(csr_satp);
`CSR_MSTATUS : read_data = 32'(csr_mstatus);
`CSR_MISA : read_data = `ISA_CODE;
default : read_data = 32'(csr_table[rd_addr]);
`CSR_MEDELEG : read_data = 32'(csr_medeleg);
`CSR_MIDELEG : read_data = 32'(csr_mideleg);
`CSR_MIE : read_data = 32'(csr_mie);
`CSR_MTVEC : read_data = 32'(csr_mtvec);
`CSR_MEPC : read_data = 32'(csr_mepc);
`CSR_PMPCFG0 : read_data = 32'(csr_pmpcfg[0]);
`CSR_PMPADDR0: read_data = 32'(csr_pmpaddr[0]);
`CSR_CYCLE : read_data = csr_cycle[31:0];
`CSR_CYCLE_H : read_data = csr_cycle[63:32];
`CSR_INSTRET : read_data = csr_instret[31:0];
`CSR_INSTRET_H:read_data = csr_instret[63:32];
`CSR_MVENDORID:read_data = `VENDOR_ID;
`CSR_MARCHID : read_data = `ARCHITECTURE_ID;
`CSR_MIMPID : read_data = `IMPLEMENTATION_ID;
default: begin
assert(~read_enable) else $error("%t: invalid CSR read address: %0h", $time, read_addr);
end
endcase
end
assign csr_to_fpu_if.frm = frm_table[csr_to_fpu_if.warp_num];
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.warp_num];
endmodule
endmodule

View file

@ -37,12 +37,11 @@ module VX_csr_unit #(
.select_io_rsp (select_io_rsp)
);
wire [`CSR_ADDR_SIZE-1:0] csr_addr_s2;
wire [31:0] csr_read_data_s2;
wire [31:0] csr_updated_data_s2;
wire [31:0] csr_read_data_unqual;
wire is_csr_s2 = csr_pipe_commit_if.valid;
wire csr_we_s1;
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
wire [31:0] csr_read_data, csr_read_data_s1;
wire [31:0] csr_updated_data_s1;
wire [`NW_BITS-1:0] warp_num_s1;
VX_csr_data #(
.CORE_ID(CORE_ID)
@ -51,51 +50,64 @@ module VX_csr_unit #(
.reset (reset),
.cmt_to_csr_if (cmt_to_csr_if),
.csr_to_fpu_if (csr_to_fpu_if),
.read_enable (csr_pipe_req_if.valid),
.read_addr (csr_pipe_req_if.csr_addr),
.read_data (csr_read_data_unqual),
.write_enable (is_csr_s2),
.write_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_addr (csr_addr_s2),
.read_data (csr_read_data),
.write_enable (csr_we_s1),
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
.write_addr (csr_addr_s1),
.warp_num (csr_pipe_req_if.warp_num)
);
);
wire [`NW_BITS-1:0] warp_num_s2;
wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr)
&& (warp_num_s1 == csr_pipe_req_if.warp_num)
&& csr_pipe_commit_if.valid;
wire csr_hazard = (csr_addr_s2 == csr_pipe_req_if.csr_addr)
&& (warp_num_s2 == csr_pipe_req_if.warp_num)
&& is_csr_s2;
wire [31:0] csr_read_data = csr_hazard ? csr_updated_data_s2 : csr_read_data_unqual;
wire [31:0] csr_read_data_qual = csr_hazard ? csr_updated_data_s1 : csr_read_data;
reg [31:0] csr_updated_data;
reg csr_we_s0_unqual;
always @(*) begin
csr_we_s0_unqual = 0;
case (csr_pipe_req_if.csr_op)
`CSR_RW: csr_updated_data = csr_pipe_req_if.csr_mask;
`CSR_RS: csr_updated_data = csr_read_data | csr_pipe_req_if.csr_mask;
`CSR_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask);
`CSR_RW: begin
csr_updated_data = csr_pipe_req_if.csr_mask;
csr_we_s0_unqual = 1;
end
`CSR_RS: begin
csr_updated_data = csr_read_data_qual | csr_pipe_req_if.csr_mask;
csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0);
end
`CSR_RC: begin
csr_updated_data = csr_read_data_qual & (32'hFFFFFFFF - csr_pipe_req_if.csr_mask);
csr_we_s0_unqual = (csr_pipe_req_if.csr_mask != 0);
end
default: csr_updated_data = 32'hdeadbeef;
endcase
end
end
wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid;
wire stall = ~csr_pipe_commit_if.ready && csr_pipe_commit_if.valid;
VX_generic_register #(
.N(1 + `ISTAG_BITS + `NW_BITS + `CSR_ADDR_SIZE + 1 + 32 + 32)
.N(1 + `ISTAG_BITS + `NW_BITS + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
) csr_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data, csr_updated_data}),
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s2, csr_addr_s2, select_io_rsp, csr_read_data_s2, csr_updated_data_s2})
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s1, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
);
genvar i;
for (i = 0; i < `NUM_THREADS; i++) begin
assign csr_pipe_commit_if.data[i] = (csr_addr_s2 == `CSR_LTID) ? i :
(csr_addr_s2 == `CSR_GTID) ? (csr_read_data_s2 * `NUM_THREADS + i) :
csr_read_data_s2;
assign csr_pipe_commit_if.data[i] = (csr_addr_s1 == `CSR_LTID) ? i :
(csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
csr_read_data_s1;
end
assign csr_pipe_req_if.ready = ~stall;

View file

@ -337,10 +337,10 @@ module VX_decode #(
assign decode_tmp_if.use_rs3 = use_rs3;
assign decode_tmp_if.reg_use_mask = ((`NUM_REGS)'(use_rd) << rd)
| ((`NUM_REGS)'(use_rs1) << rs1_qual)
| ((`NUM_REGS)'(use_rs2) << rs2)
| ((`NUM_REGS)'(use_rs3) << rs3);
assign decode_tmp_if.reg_use_mask = ((`NUM_REGS)'(use_rd) << decode_tmp_if.rd)
| ((`NUM_REGS)'(use_rs1) << decode_tmp_if.rs1)
| ((`NUM_REGS)'(use_rs2) << decode_tmp_if.rs2)
| ((`NUM_REGS)'(use_rs3) << decode_tmp_if.rs3);
assign decode_tmp_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
(is_jal || is_jalr || is_jals) ? jalx_offset :

View file

@ -30,7 +30,7 @@
`define NR_BITS `LOG2UP(`NUM_REGS)
`define CSR_ADDR_SIZE 12
`define CSR_ADDR_BITS 12
`define CSR_WIDTH 12
@ -38,8 +38,8 @@
///////////////////////////////////////////////////////////////////////////////
`define LATENCY_IDIV 24
`define LATENCY_IMUL 2
`define LATENCY_IDIV 24
`define LATENCY_IMUL 2
`define LATENCY_FMULADD 2
`define LATENCY_FDIVSQRT 2
@ -91,6 +91,7 @@
`define BR_MRET 4'hA
`define BR_SRET 4'hB
`define BR_DRET 4'hC
`define BR_NO 4'hF
`define BR_BITS 4
`define OP_BITS 5

View file

@ -20,12 +20,12 @@ module VX_fpu_unit #(
localparam FMTF_BITS = $clog2(fpnew_pkg::NUM_FP_FORMATS);
localparam FMTI_BITS = $clog2(fpnew_pkg::NUM_INT_FORMATS);
localparam int FPU_DPATHW = `NUM_THREADS * 32;
localparam FPU_DPATHW = 32'd32;
localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{
Width: FPU_DPATHW,
EnableVectors: 1,
EnableNanBox: 1,
EnableVectors: 1'b0,
EnableNanBox: 1'b1,
FpFmtMask: 5'b10000,
IntFmtMask: 4'b0010
};
@ -54,7 +54,7 @@ module VX_fpu_unit #(
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
wire [`NUM_THREADS-1:0][31:0] fpu_result;
fpnew_pkg::status_t fpu_status;
fpnew_pkg::status_t fpu_status [0:`NUM_THREADS-1];
assign csr_to_fpu_if.warp_num = fpu_req_if.warp_num;
wire [`FRM_BITS-1:0] real_frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
@ -65,7 +65,7 @@ module VX_fpu_unit #(
reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd;
reg fpu_op_mod;
reg fflags_en, fflags_en_o;
reg fflags_en, fflags_en_o;
always @(*) begin
fpu_op = fpnew_pkg::SGNJ;
@ -87,88 +87,94 @@ module VX_fpu_unit #(
fpu_operands[2] = fpu_req_if.rs2_data;
fpu_op_mod = 1;
end
`FPU_MUL: fpu_op = fpnew_pkg::MUL;
`FPU_DIV: fpu_op = fpnew_pkg::DIV;
`FPU_SQRT: fpu_op = fpnew_pkg::SQRT;
`FPU_MADD: fpu_op = fpnew_pkg::FMADD;
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: fpu_op = fpnew_pkg::FNMSUB;
`FPU_MUL: begin fpu_op = fpnew_pkg::MUL; end
`FPU_DIV: begin fpu_op = fpnew_pkg::DIV; end
`FPU_SQRT: begin fpu_op = fpnew_pkg::SQRT; end
`FPU_MADD: begin fpu_op = fpnew_pkg::FMADD; end
`FPU_MSUB: begin fpu_op = fpnew_pkg::FMADD; fpu_op_mod = 1; end
`FPU_NMSUB: begin fpu_op = fpnew_pkg::FNMSUB; end
`FPU_NMADD: begin fpu_op = fpnew_pkg::FNMSUB; fpu_op_mod = 1; end
`FPU_SGNJ: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RNE; fflags_en = 0; end
`FPU_SGNJN: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RTZ; fflags_en = 0; end
`FPU_SGNJX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RDN; fflags_en = 0; end
`FPU_MIN: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RNE; end
`FPU_MAX: begin fpu_op = fpnew_pkg::MINMAX; fpu_rnd = `FRM_RTZ; end
`FPU_CVTWS: fpu_op = fpnew_pkg::F2I;
`FPU_CVTWS: begin fpu_op = fpnew_pkg::F2I; end
`FPU_CVTWUS:begin fpu_op = fpnew_pkg::F2I; fpu_op_mod = 1; end
`FPU_CVTSW: fpu_op = fpnew_pkg::I2F;
`FPU_CVTSW: begin fpu_op = fpnew_pkg::I2F; end
`FPU_CVTSWU:begin fpu_op = fpnew_pkg::I2F; fpu_op_mod = 1; end
`FPU_MVXW: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
`FPU_MVWX: begin fpu_op = fpnew_pkg::SGNJ; fpu_rnd = `FRM_RUP; fflags_en = 0; end
`FPU_CLASS: begin fpu_op = fpnew_pkg::CLASSIFY; fflags_en = 0; end
`FPU_CMP: fpu_op = fpnew_pkg::CMP;
`FPU_CMP: begin fpu_op = fpnew_pkg::CMP; end
default:;
endcase
end
end
genvar i;
`DISABLE_TRACING
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic[`LOG2UP(`FPURQ_SIZE)-1+2:0])
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i (fpu_operands),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b1),
.tag_i ({fpu_in_tag, fflags_en, is_class_op_i}),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i (reset),
.result_o (fpu_result),
.status_o (fpu_status),
.tag_o ({fpu_out_tag, fflags_en_o, is_class_op_o}),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
`ENABLE_TRACING
reg [`NUM_THREADS-1:0][31:0] fpu_result_qual;
always @(8) begin
// unpack classify mask result
if (is_class_op_o) begin
integer i;
for (i = 0; i < `NUM_THREADS; i++) begin
automatic integer l = i / 4;
automatic integer w = i % 4;
automatic integer class_mask = fpu_result[l][w * 8 +: 8];
fpu_result_qual[i][0] = class_mask[7] & class_mask[0];
fpu_result_qual[i][1] = class_mask[7] & class_mask[1];
fpu_result_qual[i][2] = class_mask[7] & class_mask[2];
fpu_result_qual[i][3] = class_mask[7] & class_mask[3];
fpu_result_qual[i][4] = class_mask[6] & class_mask[3];
fpu_result_qual[i][5] = class_mask[6] & class_mask[2];
fpu_result_qual[i][6] = class_mask[6] & class_mask[1];
fpu_result_qual[i][7] = class_mask[6] & class_mask[0];
fpu_result_qual[i][8] = class_mask[4];
fpu_result_qual[i][9] = class_mask[5];
end
for (i = 0; i < `NUM_THREADS; i++) begin
if (0 == i) begin
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic[`LOG2UP(`FPURQ_SIZE)+1+1-1:0])
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i ({fpu_operands[2][0], fpu_operands[1][0], fpu_operands[0][0]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b0),
.tag_i ({fpu_in_tag, fflags_en, is_class_op_i}),
.in_valid_i (fpu_in_valid),
.in_ready_o (fpu_in_ready),
.flush_i (reset),
.result_o (fpu_result),
.status_o (fpu_status[0]),
.tag_o ({fpu_out_tag, fflags_en_o, is_class_op_o}),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
end else begin
fpu_result_qual = fpu_result;
fpnew_top #(
.Features (FPU_FEATURES),
.Implementation (FPU_IMPLEMENTATION),
.TagType (logic)
) fpnew_core (
.clk_i (clk),
.rst_ni (1'b1),
.operands_i ({fpu_operands[2][i], fpu_operands[1][i], fpu_operands[0][i]}),
.rnd_mode_i (fpnew_pkg::roundmode_e'(fpu_rnd)),
.op_i (fpnew_pkg::operation_e'(fpu_op)),
.op_mod_i (fpu_op_mod),
.src_fmt_i (fpnew_pkg::fp_format_e'(fpu_src_fmt)),
.dst_fmt_i (fpnew_pkg::fp_format_e'(fpu_dst_fmt)),
.int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)),
.vectorial_op_i (1'b0),
.tag_i (1'b0),
.in_valid_i (fpu_in_valid),
`UNUSED_PIN (in_ready_o),
.flush_i (reset),
.result_o (fpu_result[i]),
.status_o (fpu_status[i]),
`UNUSED_PIN (tag_o),
`UNUSED_PIN (out_valid_o),
.out_ready_i (fpu_out_ready),
`UNUSED_PIN (busy_o)
);
end
end
`ENABLE_TRACING
assign fpu_in_valid = fpu_req_if.valid;
assign fpu_in_tag = fpu_req_if.issue_tag;
@ -177,15 +183,18 @@ module VX_fpu_unit #(
assign fpu_commit_if.valid = fpu_out_valid;
assign fpu_commit_if.issue_tag = fpu_out_tag;
assign fpu_commit_if.data = fpu_result_qual;
assign fpu_commit_if.data = fpu_result;
assign fpu_commit_if.upd_fflags = fflags_en_o;
assign fpu_commit_if.fflags_NV = fpu_status.NV;
assign fpu_commit_if.fflags_DZ = fpu_status.DZ;
assign fpu_commit_if.fflags_OF = fpu_status.OF;
assign fpu_commit_if.fflags_UF = fpu_status.UF;
assign fpu_commit_if.fflags_NX = fpu_status.NX;
assign fpu_commit_if.upd_fflags = fflags_en_o;
for (i = 0; i < `NUM_THREADS; i++) begin
assign fpu_commit_if.fflags[i][0] = fpu_status[i].NX;
assign fpu_commit_if.fflags[i][1] = fpu_status[i].UF;
assign fpu_commit_if.fflags[i][2] = fpu_status[i].OF;
assign fpu_commit_if.fflags[i][3] = fpu_status[i].DZ;
assign fpu_commit_if.fflags[i][4] = fpu_status[i].NV;
end
assign fpu_out_ready = fpu_commit_if.ready;
endmodule

View file

@ -46,7 +46,7 @@ module VX_icache_stage #(
assign ifetch_req_if.ready = icache_req_if.ready;
`ifdef DBG_CORE_REQ_INFO
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, 5'b0, ifetch_req_if.warp_num, req_tag};
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, `NR_BITS'(0), ifetch_req_if.warp_num, req_tag};
`else
assign icache_req_if.tag = req_tag;
`endif

View file

@ -97,7 +97,7 @@ module VX_issue #(
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%d, rd=%0d, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
end
if (lsu_req_if.valid && lsu_req_if.ready) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset);
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0b, rd=%0d, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
end
if (csr_req_if.valid && csr_req_if.ready) begin
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%d, rd=%0d, addr=%0h, mask=%0h", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC, issue_tmp_tag, decode_tmp_if.thread_mask, decode_tmp_if.wb, decode_tmp_if.rd, csr_req_if.csr_addr, csr_req_if.csr_mask);

View file

@ -46,7 +46,7 @@ module VX_issue_demux (
assign csr_req_if.warp_num = decode_if.warp_num;
assign csr_req_if.curr_PC = decode_if.curr_PC;
assign csr_req_if.csr_op = `CSR_OP(decode_if.ex_op);
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_SIZE-1:0];
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_BITS-1:0];
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_read_if.rs1_data[0];
assign csr_req_if.is_io = 1'b0;

View file

@ -49,7 +49,7 @@ module VX_scheduler #(
for (i = 0; i < `NUM_REGS; i++) begin
inuse_registers[w][i] <= 0;
end
inuse_reg_mask[w] <= 0;
inuse_reg_mask[w] <= `NUM_REGS'(0);
end
end else begin
if (acquire_rd) begin

View file

@ -24,9 +24,10 @@ module VX_writeback #(
reg [`NUM_THREADS-1:0] wb_thread_mask [`ISSUEQ_SIZE-1:0];
reg [31:0] wb_curr_PC [`ISSUEQ_SIZE-1:0];
reg [`NR_BITS-1:0] wb_rd [`ISSUEQ_SIZE-1:0];
reg [`ISSUEQ_SIZE-1:0] wb_pending, wb_pending_n;
reg [`ISTAG_BITS-1:0] wb_index;
reg [`ISSUEQ_SIZE-1:0] wb_pending;
reg [`ISSUEQ_SIZE-1:0] wb_pending_n;
reg [`ISTAG_BITS-1:0] wb_index;
wire [`ISTAG_BITS-1:0] wb_index_n;
reg wb_valid;
@ -67,6 +68,8 @@ module VX_writeback #(
always @(posedge clk) begin
if (reset) begin
wb_pending <= 0;
wb_index <= 0;
wb_valid <= 0;
end else begin
if (alu_commit_if.valid) begin
wb_data [alu_commit_if.issue_tag] <= alu_commit_if.data;

View file

@ -105,7 +105,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
/* verilator lint_off UNUSED */
wire[31:0] debug_use_pc_st0;
wire[31:0] debug_pc_st0;
wire debug_wb_st0;
wire[`NR_BITS-1:0] debug_rd_st0;
wire[`NW_BITS-1:0] debug_warp_num_st0;
@ -114,7 +114,7 @@ module VX_bank #(
wire[`REQS_BITS-1:0] debug_tid_st0;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st0;
wire[31:0] debug_use_pc_st1e;
wire[31:0] debug_pc_st1e;
wire debug_wb_st1e;
wire[`NR_BITS-1:0] debug_rd_st1e;
wire[`NW_BITS-1:0] debug_warp_num_st1e;
@ -123,7 +123,7 @@ module VX_bank #(
wire[`REQS_BITS-1:0] debug_tid_st1e;
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e;
wire[31:0] debug_use_pc_st2;
wire[31:0] debug_pc_st2;
wire debug_wb_st2;
wire[`NR_BITS-1:0] debug_rd_st2;
wire[`NW_BITS-1:0] debug_warp_num_st2;
@ -360,7 +360,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
end
`endif
@ -432,6 +432,9 @@ module VX_bank #(
&& (addr_st2 == addr_st1e);
VX_tag_data_access #(
.BANK_ID (BANK_ID),
.CACHE_ID (CACHE_ID),
.CORE_TAG_ID_BITS(CORE_TAG_ID_BITS),
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
@ -442,6 +445,15 @@ module VX_bank #(
) tag_data_access (
.clk (clk),
.reset (reset),
`ifdef DBG_CORE_REQ_INFO
.debug_pc_st1e(debug_pc_st1e),
.debug_wb_st1e(debug_wb_st1e),
.debug_rd_st1e(debug_rd_st1e),
.debug_warp_num_st1e(debug_warp_num_st1e),
.debug_tagid_st1e(debug_tagid_st1e),
`endif
.stall (stall_bank_pipe),
.stall_bank_pipe(stall_bank_pipe),
@ -478,7 +490,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
end
`endif
@ -519,7 +531,7 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_use_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
end
`endif

View file

@ -1,6 +1,9 @@
`include "VX_cache_config.vh"
module VX_tag_data_access #(
parameter CACHE_ID = 0,
parameter BANK_ID = 0,
parameter CORE_TAG_ID_BITS = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 0,
// Size of line inside a bank in bytes
@ -22,6 +25,14 @@ module VX_tag_data_access #(
input wire clk,
input wire reset,
`ifdef DBG_CORE_REQ_INFO
input wire[31:0] debug_pc_st1e,
input wire debug_wb_st1e,
input wire[`NR_BITS-1:0] debug_rd_st1e,
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
`endif
input wire stall,
input wire is_snp_st1e,
input wire snp_invalidate_st1e,
@ -85,10 +96,10 @@ module VX_tag_data_access #(
wire[`LINE_SELECT_BITS-1:0] writeladdr_st1e = writeaddr_st1e[`LINE_SELECT_BITS-1:0];
VX_tag_data_store #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE)
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE)
) tag_data_store (
.clk (clk),
.reset (reset),
@ -125,7 +136,7 @@ module VX_tag_data_access #(
genvar i;
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
VX_generic_register #(
.N( 1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
) s0_1_cc (
.clk (clk),
.reset (reset),
@ -200,4 +211,23 @@ module VX_tag_data_access #(
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
assign invalidate_line = snoop_hit_no_pending;
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
if (valid_req_st1e) begin
if ((| use_write_enable)) begin
if (writefill_st1e) begin
$display("%t: bank%0d:%0d store-fill: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
end else begin
$display("%t: bank%0d:%0d store-write: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
end
end else
if (miss_st1e) begin
$display("%t: bank%0d:%0d store-miss: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
end else begin
$display("%t: bank%0d:%0d store-read: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
end
end
end
`endif
endmodule

View file

@ -6,15 +6,13 @@
interface VX_cmt_to_csr_if ();
wire valid;
wire [`NW_BITS-1:0] warp_num;
wire [`NE_BITS:0] num_commits;
wire upd_fflags;
wire [`NW_BITS-1:0] fpu_warp_num;
wire fflags_NV;
wire fflags_DZ;
wire fflags_OF;
wire fflags_UF;
wire fflags_NX;
wire [`FFG_BITS-1:0] fflags;
endinterface

View file

@ -5,11 +5,11 @@
interface VX_csr_io_req_if ();
wire valid;
wire [`CSR_ADDR_SIZE-1:0] addr;
wire rw;
wire [31:0] data;
wire ready;
wire valid;
wire [`CSR_ADDR_BITS-1:0] addr;
wire rw;
wire [31:0] data;
wire ready;
endinterface

View file

@ -12,7 +12,7 @@ interface VX_csr_req_if ();
wire [`CSR_BITS-1:0] csr_op;
wire [`CSR_ADDR_SIZE-1:0] csr_addr;
wire [`CSR_ADDR_BITS-1:0] csr_addr;
wire [31:0] csr_mask;
wire [`NR_BITS-1:0] rd;

View file

@ -9,11 +9,7 @@ interface VX_fpu_to_cmt_if ();
wire [`ISTAG_BITS-1:0] issue_tag;
wire [`NUM_THREADS-1:0][31:0] data;
wire upd_fflags;
wire fflags_NV;
wire fflags_DZ;
wire fflags_OF;
wire fflags_UF;
wire fflags_NX;
wire [`NUM_THREADS-1:0][`FFG_BITS-1:0] fflags;
wire ready;
endinterface

View file

@ -15,7 +15,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
#DBG_FLAGS += -DDBG_CORE_REQ_INFO
DBG_FLAGS += -DDBG_CORE_REQ_INFO
FPU_INCLUDE = -I../rtl/fp_cores/fpnew/src/common_cells/include -I../rtl/fp_cores/fpnew/src/common_cells/src -I../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl -I../rtl/fp_cores/fpnew/src
INCLUDE = -I../rtl/ -I../rtl/libs -I../rtl/interfaces -I../rtl/cache -I../rtl/fp_cores -I../rtl/simulate $(FPU_INCLUDE)

View file

@ -210,7 +210,7 @@ void Simulator::wait(uint32_t cycles) {
}
}
bool Simulator::is_busy() {
bool Simulator::is_busy() const {
return vortex_->busy || snp_req_active_;
}
@ -255,11 +255,11 @@ void Simulator::run() {
this->wait(5);
}
int Simulator::get_last_wb_value(int reg) {
#if (NUM_CLUSTERS == 1)
return (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
int Simulator::get_last_wb_value(int reg) const {
#if (NUM_CLUSTERS != 1)
return (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#else
return (int)vortex_->Vortex->genblk2__DOT__genblk1__BRA__0__KET____DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
return (int)vortex_->Vortex->genblk1__DOT__cluster->genblk1__BRA__0__KET____DOT__core->pipeline->commit->writeback->last_wb_value[reg];
#endif
}

View file

@ -34,7 +34,7 @@ public:
void load_bin(const char* program_file);
void load_ihex(const char* program_file);
bool is_busy();
bool is_busy() const;
void reset();
void step();
@ -44,7 +44,7 @@ public:
void attach_ram(RAM* ram);
void run();
int get_last_wb_value(int reg);
int get_last_wb_value(int reg) const;
void print_stats(std::ostream& out);
private:

View file

@ -83,11 +83,11 @@ vx_num_cores:
.type vx_num_cycles, @function
.global vx_num_cycles
vx_num_cycles:
csrr a0, CSR_CYCLE_L
csrr a0, CSR_CYCLE
ret
.type vx_num_instrs, @function
.global vx_num_instrs
vx_num_instrs:
csrr a0, CSR_INSTR_L
csrr a0, CSR_INSTRET
ret

View file

@ -454,7 +454,7 @@ Disassembly of section .text:
80000698: 0005006b 0x5006b
8000069c: 00001197 auipc gp,0x1
800006a0: 16c18193 addi gp,gp,364 # 80001808 <__global_pointer$>
800006a4: f14025f3 csrr a1,mhartid
800006a4: 022025f3 csrr a1,0x22
800006a8: 00a59593 slli a1,a1,0xa
800006ac: 02002673 csrr a2,0x20
800006b0: 00261613 slli a2,a2,0x2
@ -502,7 +502,7 @@ Disassembly of section .text:
80000710: 00008067 ret
80000714 <vx_thread_gid>:
80000714: f1402573 csrr a0,mhartid
80000714: 02202573 csrr a0,0x22
80000718: 00008067 ret
8000071c <vx_core_id>:

Binary file not shown.

View file

@ -105,14 +105,14 @@
:10066800130504D7032481008320C100832441009B
:10067800130101016F00000D13050000EF00800554
:100688006FF09FFA130500006B00050073255002F8
:100698006B000500971100009381C116F32540F106
:100698006B000500971100009381C116F325200215
:1006A8009395A500732600021316260037F1FF6FF5
:1006B8003301B1403301C100F326100263860600FE
:1006C800130500006B000500678000006B10B50083
:1006D800678000006B000500678000006B40B50074
:1006E800678000006B200500678000006B30000009
:1006F80067800000732510026780000073253002B0
:10070800678000007325000267800000732540F1B0
:1007080067800000732500026780000073252002BF
:10071800678000007325400267800000732550023F
:1007280067800000732560026780000073257002EF
:1007380067800000732500C067800000732520C013