mirror of
https://github.com/vortexgpgpu/vortex.git
synced 2025-06-28 09:37:38 -04:00
bug fixes
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (cupbop, 32) (push) Blocked by required conditions
CI / tests (cupbop, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (tensor, 32) (push) Blocked by required conditions
CI / tests (tensor, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
Some checks are pending
CI / setup (push) Waiting to run
CI / build (32) (push) Blocked by required conditions
CI / build (64) (push) Blocked by required conditions
CI / tests (cache, 32) (push) Blocked by required conditions
CI / tests (cache, 64) (push) Blocked by required conditions
CI / tests (config1, 32) (push) Blocked by required conditions
CI / tests (config1, 64) (push) Blocked by required conditions
CI / tests (config2, 32) (push) Blocked by required conditions
CI / tests (config2, 64) (push) Blocked by required conditions
CI / tests (cupbop, 32) (push) Blocked by required conditions
CI / tests (cupbop, 64) (push) Blocked by required conditions
CI / tests (debug, 32) (push) Blocked by required conditions
CI / tests (debug, 64) (push) Blocked by required conditions
CI / tests (opencl, 32) (push) Blocked by required conditions
CI / tests (opencl, 64) (push) Blocked by required conditions
CI / tests (regression, 32) (push) Blocked by required conditions
CI / tests (regression, 64) (push) Blocked by required conditions
CI / tests (scope, 32) (push) Blocked by required conditions
CI / tests (scope, 64) (push) Blocked by required conditions
CI / tests (stress, 32) (push) Blocked by required conditions
CI / tests (stress, 64) (push) Blocked by required conditions
CI / tests (synthesis, 32) (push) Blocked by required conditions
CI / tests (synthesis, 64) (push) Blocked by required conditions
CI / tests (tensor, 32) (push) Blocked by required conditions
CI / tests (tensor, 64) (push) Blocked by required conditions
CI / tests (vector, 32) (push) Blocked by required conditions
CI / tests (vector, 64) (push) Blocked by required conditions
CI / tests (vm, 32) (push) Blocked by required conditions
CI / tests (vm, 64) (push) Blocked by required conditions
CI / complete (push) Blocked by required conditions
This commit is contained in:
parent
c04c8b1534
commit
b21f2c22d6
6 changed files with 24 additions and 20 deletions
|
@ -81,7 +81,7 @@ inline bool is_nan_boxed(uint64_t value) {
|
||||||
|
|
||||||
inline bool is_nan_boxed16(uint64_t value) {
|
inline bool is_nan_boxed16(uint64_t value) {
|
||||||
#ifdef XLEN_64
|
#ifdef XLEN_64
|
||||||
return (uint32_t(value >> 16) == 0xffffffffffff);
|
return (uint64_t(value >> 16) == 0xffffffffffff);
|
||||||
#else
|
#else
|
||||||
return true;
|
return true;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -70,7 +70,7 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
|
||||||
.ready_out(uop_sequencer_if.ready)
|
.ready_out(uop_sequencer_if.ready)
|
||||||
);
|
);
|
||||||
`ifndef L1_ENABLE
|
`ifndef L1_ENABLE
|
||||||
assign decode_if.ibuf_pop[w] = uop_sequencer_if[w].valid && uop_sequencer_if[w].ready;
|
assign decode_if.ibuf_pop[w] = uop_sequencer_if.valid && uop_sequencer_if.ready;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
VX_uop_sequencer uop_sequencer (
|
VX_uop_sequencer uop_sequencer (
|
||||||
|
|
|
@ -24,10 +24,10 @@ module VX_tcu_fedp_fp #(
|
||||||
input wire[2:0] fmt_s,
|
input wire[2:0] fmt_s,
|
||||||
input wire[2:0] fmt_d,
|
input wire[2:0] fmt_d,
|
||||||
|
|
||||||
input wire [N-1:0][DATAW-1:0] a_row,
|
input wire [N-1:0][`XLEN-1:0] a_row,
|
||||||
input wire [N-1:0][DATAW-1:0] b_col,
|
input wire [N-1:0][`XLEN-1:0] b_col,
|
||||||
input wire [DATAW-1:0] c_val,
|
input wire [`XLEN-1:0] c_val,
|
||||||
output wire [DATAW-1:0] d_val
|
output wire [`XLEN-1:0] d_val
|
||||||
);
|
);
|
||||||
wire [DATAW-1:0] a_row_p [0:N-1];
|
wire [DATAW-1:0] a_row_p [0:N-1];
|
||||||
wire [DATAW-1:0] b_col_p [0:N-1];
|
wire [DATAW-1:0] b_col_p [0:N-1];
|
||||||
|
@ -35,6 +35,8 @@ module VX_tcu_fedp_fp #(
|
||||||
wire [2:0] fmt_s_p [0:N-1];
|
wire [2:0] fmt_s_p [0:N-1];
|
||||||
wire [2:0] fmt_d_p [0:N-1];
|
wire [2:0] fmt_d_p [0:N-1];
|
||||||
|
|
||||||
|
`UNUSED_VAR ({a_row, b_col, c_val});
|
||||||
|
|
||||||
for (genvar i = 0; i < N; i++) begin: g_pipe
|
for (genvar i = 0; i < N; i++) begin: g_pipe
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
|
@ -43,7 +45,7 @@ module VX_tcu_fedp_fp #(
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (enable),
|
.enable (enable),
|
||||||
.data_in (a_row[i]),
|
.data_in (a_row[i][DATAW-1:0]),
|
||||||
.data_out (a_row_p[i])
|
.data_out (a_row_p[i])
|
||||||
);
|
);
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
|
@ -53,7 +55,7 @@ module VX_tcu_fedp_fp #(
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (enable),
|
.enable (enable),
|
||||||
.data_in (b_col[i]),
|
.data_in (b_col[i][DATAW-1:0]),
|
||||||
.data_out (b_col_p[i])
|
.data_out (b_col_p[i])
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -71,7 +73,7 @@ module VX_tcu_fedp_fp #(
|
||||||
|
|
||||||
wire [DATAW-1:0] fma_out [0:N];
|
wire [DATAW-1:0] fma_out [0:N];
|
||||||
|
|
||||||
assign fma_out[0] = c_val;
|
assign fma_out[0] = c_val[DATAW-1:0];
|
||||||
|
|
||||||
for (genvar i = 0; i < N; i++) begin : g_fmas
|
for (genvar i = 0; i < N; i++) begin : g_fmas
|
||||||
`ifdef TCU_DPI
|
`ifdef TCU_DPI
|
||||||
|
@ -106,6 +108,6 @@ module VX_tcu_fedp_fp #(
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
assign d_val = fma_out[N];
|
assign d_val = `XLEN'(fma_out[N]);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
|
@ -24,10 +24,10 @@ module VX_tcu_fedp_int #(
|
||||||
input wire[2:0] fmt_s,
|
input wire[2:0] fmt_s,
|
||||||
input wire[2:0] fmt_d,
|
input wire[2:0] fmt_d,
|
||||||
|
|
||||||
input wire [N-1:0][DATAW-1:0] a_row,
|
input wire [N-1:0][`XLEN-1:0] a_row,
|
||||||
input wire [N-1:0][DATAW-1:0] b_col,
|
input wire [N-1:0][`XLEN-1:0] b_col,
|
||||||
input wire [DATAW-1:0] c_val,
|
input wire [`XLEN-1:0] c_val,
|
||||||
output wire [DATAW-1:0] d_val
|
output wire [`XLEN-1:0] d_val
|
||||||
);
|
);
|
||||||
wire [DATAW-1:0] a_row_p [0:N-1];
|
wire [DATAW-1:0] a_row_p [0:N-1];
|
||||||
wire [DATAW-1:0] b_col_p [0:N-1];
|
wire [DATAW-1:0] b_col_p [0:N-1];
|
||||||
|
@ -35,6 +35,8 @@ module VX_tcu_fedp_int #(
|
||||||
wire [2:0] fmt_s_p [0:N-1];
|
wire [2:0] fmt_s_p [0:N-1];
|
||||||
wire [2:0] fmt_d_p [0:N-1];
|
wire [2:0] fmt_d_p [0:N-1];
|
||||||
|
|
||||||
|
`UNUSED_VAR ({a_row, b_col, c_val});
|
||||||
|
|
||||||
for (genvar i = 0; i < N; i++) begin: g_pipe
|
for (genvar i = 0; i < N; i++) begin: g_pipe
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
.DATAW (DATAW),
|
.DATAW (DATAW),
|
||||||
|
@ -43,7 +45,7 @@ module VX_tcu_fedp_int #(
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (enable),
|
.enable (enable),
|
||||||
.data_in (a_row[i]),
|
.data_in (a_row[i][DATAW-1:0]),
|
||||||
.data_out (a_row_p[i])
|
.data_out (a_row_p[i])
|
||||||
);
|
);
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
|
@ -53,7 +55,7 @@ module VX_tcu_fedp_int #(
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (enable),
|
.enable (enable),
|
||||||
.data_in (b_col[i]),
|
.data_in (b_col[i][DATAW-1:0]),
|
||||||
.data_out (b_col_p[i])
|
.data_out (b_col_p[i])
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -71,7 +73,7 @@ module VX_tcu_fedp_int #(
|
||||||
|
|
||||||
wire [DATAW-1:0] fma_out [0:N];
|
wire [DATAW-1:0] fma_out [0:N];
|
||||||
|
|
||||||
assign fma_out[0] = c_val;
|
assign fma_out[0] = c_val[DATAW-1:0];
|
||||||
|
|
||||||
for (genvar i = 0; i < N; i++) begin : g_fmas
|
for (genvar i = 0; i < N; i++) begin : g_fmas
|
||||||
VX_tcu_fma_int #(
|
VX_tcu_fma_int #(
|
||||||
|
@ -89,6 +91,6 @@ module VX_tcu_fedp_int #(
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
assign d_val = fma_out[N];
|
assign d_val = `XLEN'(fma_out[N]);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|
|
@ -110,7 +110,7 @@ module VX_tcu_fp import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
|
||||||
wire [`XLEN-1:0] c_val = execute_if.data.rs3_data[i * TCU_TC_N + j];
|
wire [`XLEN-1:0] c_val = execute_if.data.rs3_data[i * TCU_TC_N + j];
|
||||||
|
|
||||||
VX_tcu_fedp_fp #(
|
VX_tcu_fedp_fp #(
|
||||||
.DATAW (`XLEN),
|
.DATAW (32),
|
||||||
.N (TCU_TC_K)
|
.N (TCU_TC_K)
|
||||||
) fedp (
|
) fedp (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
|
|
@ -110,7 +110,7 @@ module VX_tcu_int import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
|
||||||
wire [`XLEN-1:0] c_val = execute_if.data.rs3_data[i * TCU_TC_N + j];
|
wire [`XLEN-1:0] c_val = execute_if.data.rs3_data[i * TCU_TC_N + j];
|
||||||
|
|
||||||
VX_tcu_fedp_int #(
|
VX_tcu_fedp_int #(
|
||||||
.DATAW (`XLEN),
|
.DATAW (32),
|
||||||
.N (TCU_TC_K)
|
.N (TCU_TC_K)
|
||||||
) fedp (
|
) fedp (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue