multicore fix

This commit is contained in:
Blaise Tine 2020-05-10 08:30:04 -04:00
parent 359601cfd3
commit cc84e0691c
40 changed files with 27930 additions and 28148 deletions

View file

@ -7,7 +7,7 @@ CFLAGS += -I../../include -I../../../hw/simulate -I../../../runtime
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
DEBUG = 1
#DEBUG = 1
CFLAGS += -fPIC
@ -21,6 +21,10 @@ RTL_INCLUDE = -I../../hw/rtl -I../../hw/rtl/libs -I../../hw/rtl/interfaces -I../
VL_FLAGS += --language 1800-2009 --assert -Wall -Wpedantic $(MULTICORE)
# Use 64 bytes DRAM blocks
CFLAGS += -DGLOBAL_BLOCK_SIZE=64
VL_FLAGS += -DGLOBAL_BLOCK_SIZE=64
# Enable Verilator multithreaded simulation
#THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
#VL_FLAGS += --threads $(THREADS)

View file

@ -60,9 +60,7 @@ private:
class vx_device {
public:
vx_device()
: is_done_(false)
, simulator_(&ram_) {
simulator_.reset();
: is_done_(false) {
thread_ = new std::thread(__thread_proc__, this);
mem_allocation_ = vx_dev_caps(VX_CAPS_ALLOC_BASE_ADDR);
}
@ -95,7 +93,7 @@ public:
/*printf("VXDRV: upload %d bytes to 0x%x\n", size, dest_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-write: 0x%x <- 0x%x\n", dest_addr + i, *(uint32_t*)((uint8_t*)src + src_offset + i));
printf("mem-write: 0x%x <- 0x%x\n", uint32_t(dest_addr + i), *(uint32_t*)((uint8_t*)src + src_offset + i));
}*/
ram_.write(dest_addr, asize, (uint8_t*)src + src_offset);
@ -111,7 +109,7 @@ public:
/*printf("VXDRV: download %d bytes from 0x%x\n", size, src_addr);
for (int i = 0; i < size; i += 4) {
printf("mem-read: 0x%x -> 0x%x\n", src_addr + i, *(uint32_t*)((uint8_t*)dest + dest_offset + i));
printf("mem-read: 0x%x -> 0x%x\n", uint32_t(src_addr + i), *(uint32_t*)((uint8_t*)dest + dest_offset + i));
}*/
return 0;
@ -120,7 +118,9 @@ public:
int flush_caches(size_t dev_maddr, size_t size) {
mutex_.lock();
simulator_.attach_ram(&ram_);
simulator_.flush_caches(dev_maddr, size);
simulator_.attach_ram(nullptr);
mutex_.unlock();
return 0;
@ -130,7 +130,8 @@ public:
mutex_.lock();
simulator_.reset();
mutex_.unlock();
simulator_.attach_ram(&ram_);
mutex_.unlock();
return 0;
}
@ -142,8 +143,14 @@ public:
bool is_busy = simulator_.is_busy();
mutex_.unlock();
if (!is_busy || 0 == timeout_sec--)
if (!is_busy || 0 == timeout_sec--) {
if (!is_busy) {
mutex_.lock();
simulator_.attach_ram(nullptr);
mutex_.unlock();
}
break;
}
std::this_thread::sleep_for(std::chrono::seconds(1));
}

View file

@ -1,6 +1,7 @@
#include <iostream>
#include <unistd.h>
#include <vortex.h>
#include "common.h"
int test = -1;
@ -61,7 +62,7 @@ int run_memcopy_test(vx_buffer_h sbuf,
int errors = 0;
// write sbuf data
for (int i = 0; i < 8 * num_blocks; ++i) {
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value);
}
@ -75,12 +76,12 @@ int run_memcopy_test(vx_buffer_h sbuf,
// verify result
std::cout << "verify result" << std::endl;
for (int i = 0; i < 8 * num_blocks; ++i) {
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
auto ref = shuffle(i, value);
if (curr != ref) {
std::cout << "error @ " << std::hex << (address + 64 * i)
<< ": actual " << curr << ", expected " << ref << std::endl;
std::cout << "error at 0x" << std::hex << (address + 8 * i)
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
++errors;
}
}
@ -101,13 +102,13 @@ int run_kernel_test(vx_device_h device,
int errors = 0;
uint64_t seed = 0x0badf00d40ff40ff;
int num_blocks = 4;
unsigned src_dev_addr = 0x10000000;
unsigned dest_dev_addr = 0x20000000;
int src_dev_addr = DEV_MEM_SRC_ADDR;
int dest_dev_addr = DEV_MEM_DST_ADDR;
int num_blocks = NUM_BLOCKS;
// write sbuf data
for (int i = 0; i < 8 * num_blocks; ++i) {
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed);
}
@ -137,12 +138,12 @@ int run_kernel_test(vx_device_h device,
// verify result
std::cout << "verify result" << std::endl;
for (int i = 0; i < 8 * num_blocks; ++i) {
for (int i = 0; i < (64 * num_blocks) / 8; ++i) {
auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i];
auto ref = shuffle(i, seed);
if (curr != ref) {
std::cout << "error @ " << std::hex << (dest_dev_addr + 64 * i)
<< ": actual " << curr << ", expected " << ref << std::endl;
std::cout << "error at 0x" << std::hex << (dest_dev_addr + 8 * i)
<< ": actual 0x" << curr << ", expected 0x" << ref << std::endl;
++errors;
}
}
@ -174,10 +175,10 @@ int main(int argc, char *argv[]) {
RT_CHECK(vx_alloc_shared_mem(device, 4096, &dbuf));
// run tests
/*9if (0 == test || -1 == test) {
/*if (0 == test || -1 == test) {
std::cout << "run memcopy test" << std::endl;
RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1));
RT_CHECK(run_memcopy_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8));
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d00ff00ff, 1));
RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_DST_ADDR, 0x0badf00d40ff40ff, 8));
}*/
if (1 == test || -1 == test) {

View file

@ -0,0 +1,8 @@
#ifndef _COMMON_H_
#define _COMMON_H_
#define DEV_MEM_SRC_ADDR 0x10000000
#define DEV_MEM_DST_ADDR 0x20000000
#define NUM_BLOCKS 16
#endif

Binary file not shown.

View file

@ -1,9 +1,20 @@
#include <stdint.h>
#include "config.h"
#include "intrinsics/vx_intrinsics.h"
#include "common.h"
void main() {
int64_t* x = (int64_t*)0x10000000;
int64_t* y = (int64_t*)0x20000000;
for (int i = 0; i < 8 * 4; ++i) {
y[i] = x[i];
int64_t* x = (int64_t*)DEV_MEM_SRC_ADDR;
int64_t* y = (int64_t*)DEV_MEM_DST_ADDR;
int num_words = (NUM_BLOCKS * 64) / 8;
int core_id = vx_core_id();
int num_cores = vx_num_cores();
int num_words_per_core = num_words / num_cores;
int offset = core_id * num_words_per_core;
for (int i = 0; i < num_words_per_core; ++i) {
y[offset + i] = x[offset + i];
}
}

View file

@ -49,7 +49,7 @@ run-ase: $(PROJECT)
ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
run-rtlsim: $(PROJECT)
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16
LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 1
run-simx: $(PROJECT)
LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16

Binary file not shown.

Binary file not shown.

View file

@ -6,6 +6,10 @@ VF += --language 1800-2009 --assert -Wall -Wpedantic
VF += -exe $(SRCS) $(INCLUDE)
# Use 64 bytes DRAM blocks
CF += -DGLOBAL_BLOCK_SIZE=64
VF += -DGLOBAL_BLOCK_SIZE=64
MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2
#MULTICORE += -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0

View file

@ -73,7 +73,9 @@ module VX_back_end #(
assign ebreak = exec_unit_req_if.is_etype && (| exec_unit_req_if.valid);
VX_lsu_unit lsu_unit (
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
.clk (clk),
.reset (reset),
.lsu_req_if (lsu_req_if),

View file

@ -57,8 +57,12 @@
`define CSR_LTID 12'h020
`define CSR_LWID 12'h021
`define CSR_GWID 12'h022
`define CSR_GTID 12'h023
`define CSR_GTID 12'h022
`define CSR_GWID 12'h023
`define CSR_GCID 12'h024
`define CSR_NT 12'h025
`define CSR_NW 12'h026
`define CSR_NC 12'h027
`define CSR_CYCLL 12'hC00
`define CSR_CYCLH 12'hC80

View file

@ -46,6 +46,10 @@ module VX_csr_data #(
`CSR_LWID : read_data = 32'(warp_num);
`CSR_GTID ,
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
`CSR_GCID : read_data = CORE_ID;
`CSR_NT : read_data = `NUM_THREADS;
`CSR_NW : read_data = `NUM_WARPS;
`CSR_NC : read_data = `NUM_CORES * `NUM_CLUSTERS;
`CSR_CYCLL : read_data = num_cycles[31:0];
`CSR_CYCLH : read_data = num_cycles[63:32];
`CSR_INSTL : read_data = num_instrs[31:0];

View file

@ -1,6 +1,8 @@
`include "VX_define.vh"
module VX_front_end (
module VX_front_end #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
@ -58,7 +60,9 @@ module VX_front_end (
.fd_inst_meta_de (fe_inst_meta_fi2)
);
VX_icache_stage icache_stage (
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
.clk (clk),
.reset (reset),
.total_freeze (total_freeze),

View file

@ -1,6 +1,8 @@
`include "VX_define.vh"
module VX_icache_stage (
module VX_icache_stage #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
input wire total_freeze,
@ -26,7 +28,7 @@ module VX_icache_stage (
// Icache Request
assign icache_req_if.core_req_valid = valid_inst;
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
assign icache_req_if.core_req_data = 'z;
assign icache_req_if.core_req_data = 0;
assign icache_req_if.core_req_read = `BYTE_EN_LW;
assign icache_req_if.core_req_write = `BYTE_EN_NO;
assign icache_req_if.core_req_tag = {fe_inst_meta_fi.inst_pc, 2'b1, 5'b0, fe_inst_meta_fi.warp_num};
@ -50,8 +52,6 @@ module VX_icache_stage (
// Core can't accept response
assign icache_rsp_if.core_rsp_ready = ~total_freeze;
integer i;
always @(posedge clk) begin
if (reset) begin
//--
@ -64,10 +64,10 @@ module VX_icache_stage (
/*always_comb begin
if (1'($time & 1) && icache_req_if.core_req_ready && icache_req_if.core_req_valid) begin
$display("*** %t: I$ req: pc=%0h, warp=%d", $time, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
$display("*** %t: I%01d$ req: pc=%0h, warp=%0d", $time, CORE_ID, fe_inst_meta_fi.inst_pc, fe_inst_meta_fi.warp_num);
end
if (1'($time & 1) && icache_rsp_if.core_rsp_ready && icache_rsp_if.core_rsp_valid) begin
$display("*** %t: I$ rsp: pc=%0h, warp=%d, instr=%0h", $time, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
$display("*** %t: I%01d$ rsp: pc=%0h, warp=%0d, instr=%0h", $time, CORE_ID, fe_inst_meta_id.inst_pc, fe_inst_meta_id.warp_num, fe_inst_meta_id.instruction);
end
end*/

View file

@ -1,6 +1,8 @@
`include "VX_define.vh"
module VX_lsu_unit (
module VX_lsu_unit #(
parameter CORE_ID = 0
) (
input wire clk,
input wire reset,
@ -62,10 +64,10 @@ module VX_lsu_unit (
/*always_comb begin
if (1'($time & 1) && dcache_req_if.core_req_ready && (| dcache_req_if.core_req_valid)) begin
$display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, pc=%0h, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
$display("*** %t: D%01d$ req: valid=%b, addr=%0h, r=%0d, w=%0d, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
end
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && (| dcache_rsp_if.core_rsp_valid)) begin
$display("*** %t: D$ rsp: valid=%b, pc=%0h, rd=%d, warp=%d, data=%0h", $time, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
$display("*** %t: D%01d$ rsp: valid=%b, pc=%0h, rd=%0d, warp=%0d, data=%0h", $time, CORE_ID, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
end
end*/

View file

@ -178,7 +178,9 @@ module Vortex #(
assign dcache_snp_req_if.snp_req_addr = snp_req_addr;
assign snp_req_ready = dcache_snp_req_if.snp_req_ready;
VX_front_end front_end (
VX_front_end #(
.CORE_ID(CORE_ID)
) front_end (
.clk (clk),
.reset (reset),
.warp_ctl_if (warp_ctl_if),

View file

@ -281,4 +281,13 @@ module Vortex_Socket (
);
end
/*always_comb begin
if (1'($time & 1) && (dram_req_read || dram_req_write) && dram_req_ready) begin
$display("*** %t: DRAM req: w=%b addr=%0h, tag=%0h, data=%0h", $time, dram_req_write, dram_req_addr, dram_req_tag, dram_req_data);
end
if (1'($time & 1) && dram_rsp_valid && dram_rsp_ready) begin
$display("*** %t: DRAM rsp: tag=%0h, data=%0h", $time, dram_rsp_tag, dram_rsp_data);
end
end*/
endmodule

View file

@ -2,54 +2,54 @@
`include "VX_define.vh"
module VX_bank #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
parameter CACHE_SIZE = 0,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
parameter NUM_BANKS = 0,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 0,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
parameter NUM_REQUESTS = 0,
// Number of cycles to complete i 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
parameter STAGE_1_CYCLES = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
parameter REQQ_SIZE = 0,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
parameter MRVQ_SIZE = 0,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
parameter DFPQ_SIZE = 0,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
parameter SNRQ_SIZE = 0,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
parameter CWBQ_SIZE = 0,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
parameter DWBQ_SIZE = 0,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
parameter DFQQ_SIZE = 0,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
parameter LLVQ_SIZE = 0,
// Fill Forward SNP Queue
parameter FFSQ_SIZE = 8,
parameter FFSQ_SIZE = 0,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
parameter FILL_INVALIDAOR_SIZE = 0,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
parameter WRITE_ENABLE = 0,
// Enable dram update
parameter DRAM_ENABLE = 1,
parameter DRAM_ENABLE = 0,
// Enable snoop forwarding
parameter SNOOP_FORWARDING = 0,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
parameter CORE_TAG_WIDTH = 0,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
@ -172,21 +172,9 @@ module VX_bank #(
assign reqq_push = core_req_ready && (| core_req_valids);
VX_cache_req_queue #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) req_queue (
@ -363,17 +351,7 @@ module VX_bank #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.DRAM_ENABLE (DRAM_ENABLE),
.WRITE_ENABLE (WRITE_ENABLE)
) tag_data_access (
@ -458,21 +436,11 @@ module VX_bank #(
assign {miss_add_tag, miss_add_mem_read, miss_add_mem_write, miss_add_tid} = inst_meta_st2;
VX_cache_miss_resrv #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH)
) cache_miss_resrv (
.clk (clk),
@ -568,20 +536,8 @@ module VX_bank #(
wire [`LINE_ADDR_WIDTH-1:0] fill_invalidator_addr = addr_st2;
VX_fill_invalidator #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
) fill_invalidator (
.clk (clk),

View file

@ -140,21 +140,10 @@ module VX_cache #(
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
VX_cache_core_req_bank_sel #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
.NUM_REQUESTS (NUM_REQUESTS)
) cache_core_req_bank_sell (
.core_req_valid (core_req_valid),
.core_req_addr (core_req_addr),
@ -320,21 +309,9 @@ module VX_cache #(
endgenerate
VX_cache_core_rsp_merge #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_rsp_merge (
@ -351,21 +328,10 @@ module VX_cache #(
);
VX_cache_dram_req_arb #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE)
) cache_dram_req_arb (

View file

@ -2,43 +2,14 @@
`include "VX_cache_config.vh"
module VX_cache_core_req_bank_sel #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
parameter BANK_LINE_SIZE = 0,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 0,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16
parameter NUM_REQUESTS = 0
) (
input wire [NUM_REQUESTS-1:0] core_req_valid,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,

View file

@ -1,49 +1,16 @@
`include "VX_cache_config.vh"
module VX_cache_core_rsp_merge #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
parameter NUM_BANKS = 0,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 0,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
parameter NUM_REQUESTS = 0,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
parameter CORE_TAG_WIDTH = 0,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
// dram request tag size
parameter DRAM_TAG_WIDTH = 1
parameter CORE_TAG_ID_BITS = 0
) (
// Per Bank WB
input wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_rsp_tid,

View file

@ -1,41 +1,12 @@
`include "VX_cache_config.vh"
module VX_cache_dfq_queue #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
parameter NUM_BANKS = 0,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16
parameter DFQQ_SIZE = 0
) (
input wire clk,
input wire reset,

View file

@ -1,46 +1,17 @@
`include "VX_cache_config.vh"
module VX_cache_dram_req_arb #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
parameter NUM_BANKS = 0,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
parameter WORD_SIZE = 0,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
// Prefetcher
parameter PRFQ_SIZE = 64,
parameter PRFQ_STRIDE = 2
parameter DFQQ_SIZE = 0,
// Prefetcher
parameter PRFQ_SIZE = 0,
parameter PRFQ_STRIDE = 0
) (
input wire clk,
input wire reset,
@ -101,7 +72,9 @@ module VX_cache_dram_req_arb #(
wire dfqq_push = (| per_bank_dram_fill_req_valid);
VX_cache_dfq_queue #(
.BANK_LINE_SIZE(BANK_LINE_SIZE),
.NUM_BANKS(NUM_BANKS),
.DFQQ_SIZE(DFQQ_SIZE)
) cache_dfq_queue (
.clk (clk),
.reset (reset),

View file

@ -1,44 +1,18 @@
`include "VX_cache_config.vh"
module VX_cache_miss_resrv #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
parameter NUM_BANKS = 0,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 0,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
parameter NUM_REQUESTS = 0,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
parameter MRVQ_SIZE = 0,
// caceh requests tag size
parameter CORE_TAG_WIDTH = 1
parameter CORE_TAG_WIDTH = 0
) (
input wire clk,
input wire reset,

View file

@ -1,45 +1,14 @@
`include "VX_cache_config.vh"
module VX_cache_req_queue #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
// Size of a word in bytes
parameter WORD_SIZE = 4,
parameter WORD_SIZE = 0,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 0,
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
parameter REQQ_SIZE = 0,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
parameter CORE_TAG_WIDTH = 0,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) (

View file

@ -1,41 +1,12 @@
`include "VX_cache_config.vh"
module VX_fill_invalidator #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
parameter NUM_BANKS = 0,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16
parameter FILL_INVALIDAOR_SIZE = 0
) (
input wire clk,
input wire reset,

View file

@ -1,12 +1,12 @@
`include "VX_cache_config.vh"
module VX_prefetcher #(
parameter PRFQ_SIZE = 64,
parameter PRFQ_STRIDE = 2,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
parameter BANK_LINE_SIZE = 0,
// Size of a word in bytes
parameter WORD_SIZE = 4
parameter WORD_SIZE = 0,
parameter PRFQ_SIZE = 0,
parameter PRFQ_STRIDE = 0
) (
input wire clk,
input wire reset,

View file

@ -1,7 +1,7 @@
`include "VX_cache_config.vh"
module VX_snp_fwd_arb #(
parameter NUM_BANKS = 1,
parameter NUM_BANKS = 1,
parameter BANK_LINE_SIZE = 1
) (
input wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid,

View file

@ -2,46 +2,22 @@
module VX_tag_data_access #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
parameter CACHE_SIZE = 0,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
parameter NUM_BANKS = 0,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
parameter WORD_SIZE = 0,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16,
parameter STAGE_1_CYCLES = 0,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
parameter WRITE_ENABLE = 0,
// Enable dram update
parameter DRAM_ENABLE = 1
parameter DRAM_ENABLE = 0
) (
input wire clk,
input wire reset,
@ -100,18 +76,7 @@ module VX_tag_data_access #(
.CACHE_SIZE (CACHE_SIZE),
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQUESTS (NUM_REQUESTS),
.STAGE_1_CYCLES (STAGE_1_CYCLES),
.REQQ_SIZE (REQQ_SIZE),
.MRVQ_SIZE (MRVQ_SIZE),
.DFPQ_SIZE (DFPQ_SIZE),
.SNRQ_SIZE (SNRQ_SIZE),
.CWBQ_SIZE (CWBQ_SIZE),
.DWBQ_SIZE (DWBQ_SIZE),
.DFQQ_SIZE (DFQQ_SIZE),
.LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE)
.WORD_SIZE (WORD_SIZE)
) tag_data_structure (
.clk (clk),
.reset (reset),

View file

@ -2,42 +2,13 @@
module VX_tag_data_structure #(
// Size of cache in bytes
parameter CACHE_SIZE = 1024,
parameter CACHE_SIZE = 0,
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 16,
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8,
parameter NUM_BANKS = 0,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size
parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2,
// Snoop Req Queue
parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size
parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size
parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16
parameter WORD_SIZE = 0
) (
input wire clk,
input wire reset,

View file

@ -8,8 +8,8 @@ double sc_time_stamp() {
return time_stamp;
}
Simulator::Simulator(RAM *ram) {
ram_ = ram;
Simulator::Simulator() {
ram_ = nullptr;
vortex_ = new VVortex_Socket();
#ifdef VCD_OUTPUT
@ -27,12 +27,20 @@ Simulator::~Simulator() {
delete vortex_;
}
void Simulator::attach_ram(RAM* ram) {
ram_ = ram;
dram_rsp_vec_.clear();
}
void Simulator::print_stats(std::ostream& out) {
out << std::left;
out << std::setw(24) << "# of total cycles:" << std::dec << time_stamp/2 << std::endl;
}
void Simulator::dbus_driver() {
if (ram_ == nullptr)
return;
// handle DRAM response cycle
int dequeue_index = -1;
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
@ -149,9 +157,6 @@ bool Simulator::is_busy() {
}
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
// send snoop requests to the caches
printf("[sim] total cycles: %ld\n", time_stamp/2);
// align address to LLC block boundaries
auto aligned_addr_start = mem_addr / GLOBAL_BLOCK_SIZE;
auto aligned_addr_end = (mem_addr + size + GLOBAL_BLOCK_SIZE - 1) / GLOBAL_BLOCK_SIZE;

View file

@ -29,7 +29,7 @@ typedef struct {
class Simulator {
public:
Simulator(RAM *ram);
Simulator();
virtual ~Simulator();
bool is_busy();
@ -37,6 +37,9 @@ public:
void step();
void wait(uint32_t cycles);
void flush_caches(uint32_t mem_addr, uint32_t size);
void attach_ram(RAM* ram);
bool run();
void print_stats(std::ostream& out);

View file

@ -71,7 +71,8 @@ int main(int argc, char **argv)
RAM ram;
loadHexImpl(s.c_str(), &ram);
Simulator simulator(&ram);
Simulator simulator;
simulator.attach_ram(&ram);
bool curr = simulator.run();
if (curr) std::cerr << GREEN << "Test Passed: " << s << std::endl;
@ -106,7 +107,8 @@ int main(int argc, char **argv)
RAM ram;
loadHexImpl(testing, &ram);
Simulator simulator(&ram);
Simulator simulator;
simulator.attach_ram(&ram);
bool curr = simulator.run();
if (curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl;

View file

@ -35,26 +35,50 @@ vx_join:
.type vx_warp_id, @function
.global vx_warp_id
vx_warp_id:
csrr a0, CSR_LWID # read warp index
csrr a0, CSR_LWID
ret
.type vx_warp_gid, @function
.global vx_warp_gid
vx_warp_gid:
csrr a0, CSR_GWID # read warp index
csrr a0, CSR_GWID
ret
.type vx_thread_id, @function
.global vx_thread_id
vx_thread_id:
csrr a0, CSR_LTID # read thread index
csrr a0, CSR_LTID
ret
.type vx_thread_gid, @function
.global vx_thread_gid
vx_thread_gid:
csrr a0, CSR_GTID # read thread index
csrr a0, CSR_GTID
ret
.type vx_core_id, @function
.global vx_core_id
vx_core_id:
csrr a0, CSR_GCID
ret
.type vx_num_threads, @function
.global vx_num_threads
vx_num_threads:
csrr a0, CSR_NT
ret
.type vx_num_warps, @function
.global vx_num_warps
vx_num_warps:
csrr a0, CSR_NW
ret
.type vx_num_cores, @function
.global vx_num_cores
vx_num_cores:
csrr a0, CSR_NC
ret
.type vx_num_cycles, @function
.global vx_num_cycles

View file

@ -8,37 +8,49 @@ extern "C" {
#endif
// Spawn warps
void vx_wspawn(unsigned numWarps, unsigned PC_spawn);
void vx_wspawn(int numWarps, int PC_spawn);
// Set thread mask
void vx_tmc(unsigned numThreads);
void vx_tmc(int numThreads);
// Warp Barrier
void vx_barrier(unsigned barriedID, unsigned numWarps);
void vx_barrier(int barriedID, int numWarps);
// Split on a predicate
void vx_split(unsigned predicate);
void vx_split(int predicate);
// Join
void vx_join(void);
void vx_join();
// Return the warp thread index
unsigned vx_thread_id(void);
// Return the warp's unique thread id
int vx_thread_id();
// Return the core warp index
unsigned vx_warp_id(void);
// Return the core's unique warp id
int vx_warp_id();
// Return processsor unique thread id
unsigned vx_thread_gid(void);
// Return processsor unique core id
int vx_core_id();
// Return processsor unique warp id
unsigned vx_warp_gid(void);
// Return processsor global thread id
int vx_thread_gid();
// Return number cycles
unsigned vx_num_cycles(void);
// Return processsor global warp id
int vx_warp_gid();
// Return number instructions
unsigned vx_num_instrs(void);
// Return the number of threads in a warp
int vx_num_threads();
// Return the number of warps in a core
int vx_num_warps();
// Return the number of cores in the processsor
int vx_num_cores();
// Return the number of cycles
int vx_num_cycles();
// Return the number of instructions
int vx_num_instrs();
#define __if(b) vx_split(b); \
if (b)

View file

@ -5,7 +5,7 @@
.type _start, @function
_start:
la a1, vx_set_sp
li a0, NUM_WARPS # activate all warps
csrr a0, CSR_NW # get num warps
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
jal vx_set_sp
li a0, 1
@ -32,12 +32,11 @@ _exit:
li a0, 0
.word 0x0005006b # disable all threads
.section .text
.type vx_set_sp, @function
.global vx_set_sp
vx_set_sp:
li a0, NUM_THREADS
csrr a0, CSR_NT # get num threads
.word 0x0005006b # activate all threads
.option push
@ -46,10 +45,10 @@ vx_set_sp:
addi gp, gp, %pcrel_lo(1b)
.option pop
csrr a1, CSR_GTID # get gtid
slli a1, a1, 10 # multiply tid by 1024
csrr a2, CSR_LTID # get tid
slli a2, a2, 2 # multiply tid by 4
csrr a1, CSR_GTID # get global thread id
slli a1, a1, 10 # multiply by 1024
csrr a2, CSR_LTID # get local thread id
slli a2, a2, 2 # multiply by 4
lui sp, STACK_BASE_ADDR # load base sp
sub sp, sp, a1 # sub thread block
add sp, sp, a2 # reduce addr collision for perf

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff