Merge remote-tracking branch 'refs/remotes/origin/master'

This commit is contained in:
Euna Kim 2019-11-23 22:25:45 -05:00
commit 62ab6b3e98
137 changed files with 2472848 additions and 2586571 deletions

View file

@ -131,6 +131,7 @@ void _clCmdParams(int argc, char* argv[]){
// devices have no relationship with context
void _clInit()
{
printf("_clInit()\n");
int DEVICE_ID_INUSED = device_id_inused;
cl_int resultCL;
@ -225,15 +226,18 @@ void _clInit()
throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
//-----------------------------------------------
//--cambine-5: Load CL file, build CL program object, create CL kernel object
std::string source_str = FileToString(kernel_file);
/*std::string source_str = FileToString(kernel_file);
const char * source = source_str.c_str();
size_t sourceSize[] = { source_str.length() };
size_t sourceSize[] = { source_str.length() };*/
oclHandles.program = clCreateProgramWithSource(oclHandles.context,
oclHandles.program =
clCreateProgramWithBuiltInKernels(oclHandles.context, 1, &oclHandles.devices[DEVICE_ID_INUSED], "BFS_1, BFS_2", &resultCL);
/*oclHandles.program = clCreateProgramWithSource(oclHandles.context,
1,
&source,
sourceSize,
&resultCL);
&resultCL);*/
if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));

View file

@ -1,33 +1,35 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_RT_PATH=$(wildcard ../../../runtime)
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH)
LIBS = -lOpenCL
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=bfs
@ -37,7 +39,10 @@ lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc timer.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -45,8 +50,17 @@ $(PROJECT).hex: $(PROJECT).elf
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run:
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
run: $(PROJECT).hex
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.elf *.dump *.hex *.a *.pocl
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

28677
benchmarks/opencl/bfs/graph4096.txt Executable file

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -1,12 +1,14 @@
//--by Jianbin Fang
#define __CL_ENABLE_EXCEPTIONS
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <string>
#include <cstring>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#ifdef PROFILING
#ifdef PROFILING
#include "timer.h"
#endif
@ -15,285 +17,281 @@
#define MAX_THREADS_PER_BLOCK 256
//Structure to hold a node information
struct Node
{
int starting;
int no_of_edges;
// Structure to hold a node information
struct Node {
int starting;
int no_of_edges;
};
//----------------------------------------------------------
//--bfs on cpu
//--programmer: jianbin
//--date: 26/01/2011
//--note: width is changed to the new_width
//----------------------------------------------------------
void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
char *h_graph_visited, int *h_cost_ref){
char stop;
int k = 0;
do{
//if no thread changes this value then the loop stops
stop=false;
for(int tid = 0; tid < no_of_nodes; tid++ )
{
if (h_graph_mask[tid] == true){
h_graph_mask[tid]=false;
for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){
int id = h_graph_edges[i]; //--cambine: node id is connected with node tid
if(!h_graph_visited[id]){ //--cambine: if node id has not been visited, enter the body below
h_cost_ref[id]=h_cost_ref[tid]+1;
h_updating_graph_mask[id]=true;
}
}
}
}
void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
int *h_graph_edges, char *h_graph_mask,
char *h_updating_graph_mask, char *h_graph_visited,
int *h_cost_ref) {
char stop;
int k = 0;
do {
// if no thread changes this value then the loop stops
stop = false;
for (int tid = 0; tid < no_of_nodes; tid++) {
if (h_graph_mask[tid] == true) {
h_graph_mask[tid] = false;
for (int i = h_graph_nodes[tid].starting;
i < (h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting);
i++) {
int id =
h_graph_edges[i]; //--cambine: node id is connected with node tid
if (!h_graph_visited[id]) { //--cambine: if node id has not been
//visited, enter the body below
h_cost_ref[id] = h_cost_ref[tid] + 1;
h_updating_graph_mask[id] = true;
}
}
}
}
for(int tid=0; tid< no_of_nodes ; tid++ )
{
if (h_updating_graph_mask[tid] == true){
h_graph_mask[tid]=true;
h_graph_visited[tid]=true;
stop=true;
h_updating_graph_mask[tid]=false;
}
}
k++;
}
while(stop);
for (int tid = 0; tid < no_of_nodes; tid++) {
if (h_updating_graph_mask[tid] == true) {
h_graph_mask[tid] = true;
h_graph_visited[tid] = true;
stop = true;
h_updating_graph_mask[tid] = false;
}
}
k++;
} while (stop);
}
//----------------------------------------------------------
//--breadth first search on GPUs
//----------------------------------------------------------
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
char *h_graph_visited, int *h_cost)
throw(std::string){
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size,
int *h_graph_edges, char *h_graph_mask,
char *h_updating_graph_mask, char *h_graph_visited,
int *h_cost) throw(std::string) {
//int number_elements = height*width;
char h_over;
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
d_graph_visited, d_cost, d_over;
try{
//--1 transfer data from host to device
_clInit();
d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);
d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask);
d_graph_visited = _clMallocRW(no_of_nodes*sizeof(char), h_graph_visited);
// int number_elements = height*width;
char h_over;
cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask,
d_graph_visited, d_cost, d_over;
try {
//--1 transfer data from host to device
_clInit();
d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes);
d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges);
d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask);
d_updating_graph_mask =
_clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask);
d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited);
d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost);
d_over = _clMallocRW(sizeof(char), &h_over);
d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
d_over = _clMallocRW(sizeof(char), &h_over);
_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);
_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);
_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);
_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);
//--2 invoke kernel
#ifdef PROFILING
timer kernel_timer;
double kernel_time = 0.0;
kernel_timer.reset();
kernel_timer.start();
_clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes);
_clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges);
_clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask);
_clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char),
h_updating_graph_mask);
_clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited);
_clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost);
//--2 invoke kernel
#ifdef PROFILING
timer kernel_timer;
double kernel_time = 0.0;
kernel_timer.reset();
kernel_timer.start();
#endif
do{
h_over = false;
_clMemcpyH2D(d_over, sizeof(char), &h_over);
//--kernel 0
int kernel_id = 0;
int kernel_idx = 0;
_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
_clSetArgs(kernel_id, kernel_idx++, d_cost);
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
//int work_items = no_of_nodes;
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
//--kernel 1
kernel_id = 1;
kernel_idx = 0;
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
_clSetArgs(kernel_id, kernel_idx++, d_over);
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
//work_items = no_of_nodes;
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
_clMemcpyD2H(d_over,sizeof(char), &h_over);
}while(h_over);
_clFinish();
#ifdef PROFILING
kernel_timer.stop();
kernel_time = kernel_timer.getTimeInSeconds();
do {
h_over = false;
_clMemcpyH2D(d_over, sizeof(char), &h_over);
//--kernel 0
int kernel_id = 0;
int kernel_idx = 0;
_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
_clSetArgs(kernel_id, kernel_idx++, d_cost);
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
// int work_items = no_of_nodes;
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
//--kernel 1
kernel_id = 1;
kernel_idx = 0;
_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
_clSetArgs(kernel_id, kernel_idx++, d_over);
_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
// work_items = no_of_nodes;
_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
_clMemcpyD2H(d_over, sizeof(char), &h_over);
} while (h_over);
_clFinish();
#ifdef PROFILING
kernel_timer.stop();
kernel_time = kernel_timer.getTimeInSeconds();
#endif
//--3 transfer data from device to host
_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
//--statistics
#ifdef PROFILING
std::cout<<"kernel time(s):"<<kernel_time<<std::endl;
//--3 transfer data from device to host
_clMemcpyD2H(d_cost, no_of_nodes * sizeof(int), h_cost);
//--statistics
#ifdef PROFILING
std::cout << "kernel time(s):" << kernel_time << std::endl;
#endif
//--4 release cl resources.
_clFree(d_graph_nodes);
_clFree(d_graph_edges);
_clFree(d_graph_mask);
_clFree(d_updating_graph_mask);
_clFree(d_graph_visited);
_clFree(d_cost);
_clFree(d_over);
_clRelease();
}
catch(std::string msg){
_clFree(d_graph_nodes);
_clFree(d_graph_edges);
_clFree(d_graph_mask);
_clFree(d_updating_graph_mask);
_clFree(d_graph_visited);
_clFree(d_cost);
_clFree(d_over);
_clRelease();
std::string e_str = "in run_transpose_gpu -> ";
e_str += msg;
throw(e_str);
}
return ;
//--4 release cl resources.
_clFree(d_graph_nodes);
_clFree(d_graph_edges);
_clFree(d_graph_mask);
_clFree(d_updating_graph_mask);
_clFree(d_graph_visited);
_clFree(d_cost);
_clFree(d_over);
_clRelease();
} catch (std::string msg) {
_clFree(d_graph_nodes);
_clFree(d_graph_edges);
_clFree(d_graph_mask);
_clFree(d_updating_graph_mask);
_clFree(d_graph_visited);
_clFree(d_cost);
_clFree(d_over);
_clRelease();
std::string e_str = "in run_transpose_gpu -> ";
e_str += msg;
throw(e_str);
}
return;
}
void Usage(int argc, char**argv){
fprintf(stderr,"Usage: %s <input_file>\n", argv[0]);
}
//----------------------------------------------------------
//--cambine: main function
//--author: created by Jianbin Fang
//--date: 25/01/2011
//----------------------------------------------------------
int main(int argc, char * argv[])
{
int no_of_nodes;
int edge_list_size;
FILE *fp;
Node* h_graph_nodes;
char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
try{
char *input_f;
if(argc!=2){
Usage(argc, argv);
exit(0);
}
input_f = argv[1];
printf("Reading File\n");
//Read in Graph from a file
fp = fopen(input_f,"r");
if(!fp){
printf("Error Reading graph file\n");
return 0;
}
int main(int argc, char *argv[]) {
printf("enter demo main\n");
int source = 0;
int no_of_nodes;
int edge_list_size;
FILE *fp;
Node *h_graph_nodes;
char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
fscanf(fp,"%d",&no_of_nodes);
try {
char *input_f = "graph4096.txt";
printf("Reading File\n");
// Read in Graph from a file
fp = fopen(input_f, "r");
if (!fp) {
printf("Error Reading graph file\n");
return 0;
}
int num_of_blocks = 1;
int num_of_threads_per_block = no_of_nodes;
printf("Reading File completed!\n");
//Make execution Parameters according to the number of nodes
//Distribute threads across multiple Blocks if necessary
if(no_of_nodes>MAX_THREADS_PER_BLOCK){
num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK);
num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
}
work_group_size = num_of_threads_per_block;
// allocate host memory
h_graph_nodes = (Node*) malloc(sizeof(Node)*no_of_nodes);
h_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
h_updating_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
h_graph_visited = (char*) malloc(sizeof(char)*no_of_nodes);
int start, edgeno;
// initalize the memory
for(int i = 0; i < no_of_nodes; i++){
fscanf(fp,"%d %d",&start,&edgeno);
h_graph_nodes[i].starting = start;
h_graph_nodes[i].no_of_edges = edgeno;
h_graph_mask[i]=false;
h_updating_graph_mask[i]=false;
h_graph_visited[i]=false;
}
//read the source node from the file
fscanf(fp,"%d",&source);
source=0;
//set the source node as true in the mask
h_graph_mask[source]=true;
h_graph_visited[source]=true;
fscanf(fp,"%d",&edge_list_size);
int id,cost;
int* h_graph_edges = (int*) malloc(sizeof(int)*edge_list_size);
for(int i=0; i < edge_list_size ; i++){
fscanf(fp,"%d",&id);
fscanf(fp,"%d",&cost);
h_graph_edges[i] = id;
}
int source = 0;
if(fp)
fclose(fp);
// allocate mem for the result on host side
int *h_cost = (int*) malloc(sizeof(int)*no_of_nodes);
int *h_cost_ref = (int*)malloc(sizeof(int)*no_of_nodes);
for(int i=0;i<no_of_nodes;i++){
h_cost[i]=-1;
h_cost_ref[i] = -1;
}
h_cost[source]=0;
h_cost_ref[source]=0;
//---------------------------------------------------------
//--gpu entry
run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
//---------------------------------------------------------
//--cpu entry
// initalize the memory again
for(int i = 0; i < no_of_nodes; i++){
h_graph_mask[i]=false;
h_updating_graph_mask[i]=false;
h_graph_visited[i]=false;
}
//set the source node as true in the mask
source=0;
h_graph_mask[source]=true;
h_graph_visited[source]=true;
run_bfs_cpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost_ref);
//---------------------------------------------------------
//--result varification
compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
//release host memory
free(h_graph_nodes);
free(h_graph_mask);
free(h_updating_graph_mask);
free(h_graph_visited);
fscanf(fp, "%d", &no_of_nodes);
}
catch(std::string msg){
std::cout<<"--cambine: exception in main ->"<<msg<<std::endl;
//release host memory
free(h_graph_nodes);
free(h_graph_mask);
free(h_updating_graph_mask);
free(h_graph_visited);
}
return 0;
int num_of_blocks = 1;
int num_of_threads_per_block = no_of_nodes;
// Make execution Parameters according to the number of nodes
// Distribute threads across multiple Blocks if necessary
if (no_of_nodes > MAX_THREADS_PER_BLOCK) {
num_of_blocks = (int)ceil(no_of_nodes / (double)MAX_THREADS_PER_BLOCK);
num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
}
work_group_size = num_of_threads_per_block;
// allocate host memory
h_graph_nodes = (Node *)malloc(sizeof(Node) * no_of_nodes);
h_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes);
h_updating_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes);
h_graph_visited = (char *)malloc(sizeof(char) * no_of_nodes);
int start, edgeno;
// initalize the memory
for (int i = 0; i < no_of_nodes; i++) {
fscanf(fp, "%d %d", &start, &edgeno);
h_graph_nodes[i].starting = start;
h_graph_nodes[i].no_of_edges = edgeno;
h_graph_mask[i] = false;
h_updating_graph_mask[i] = false;
h_graph_visited[i] = false;
}
// read the source node from the file
fscanf(fp, "%d", &source);
source = 0;
// set the source node as true in the mask
h_graph_mask[source] = true;
h_graph_visited[source] = true;
fscanf(fp, "%d", &edge_list_size);
int id, cost;
int *h_graph_edges = (int *)malloc(sizeof(int) * edge_list_size);
for (int i = 0; i < edge_list_size; i++) {
fscanf(fp, "%d", &id);
fscanf(fp, "%d", &cost);
h_graph_edges[i] = id;
}
if (fp)
fclose(fp);
// allocate mem for the result on host side
int *h_cost = (int *)malloc(sizeof(int) * no_of_nodes);
int *h_cost_ref = (int *)malloc(sizeof(int) * no_of_nodes);
for (int i = 0; i < no_of_nodes; i++) {
h_cost[i] = -1;
h_cost_ref[i] = -1;
}
h_cost[source] = 0;
h_cost_ref[source] = 0;
//---------------------------------------------------------
//--gpu entry
run_bfs_gpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
//---------------------------------------------------------
//--cpu entry
// initalize the memory again
for (int i = 0; i < no_of_nodes; i++) {
h_graph_mask[i] = false;
h_updating_graph_mask[i] = false;
h_graph_visited[i] = false;
}
// set the source node as true in the mask
source = 0;
h_graph_mask[source] = true;
h_graph_visited[source] = true;
run_bfs_cpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges,
h_graph_mask, h_updating_graph_mask, h_graph_visited,
h_cost_ref);
//---------------------------------------------------------
//--result varification
compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
// release host memory
free(h_graph_nodes);
free(h_graph_mask);
free(h_updating_graph_mask);
free(h_graph_visited);
} catch (std::string msg) {
std::cout << "--cambine: exception in main ->" << msg << std::endl;
// release host memory
free(h_graph_nodes);
free(h_graph_mask);
free(h_updating_graph_mask);
free(h_graph_visited);
}
return 0;
}

View file

@ -3,126 +3,99 @@
#include <iostream>
class timer {
public:
timer(const char *name = 0);
timer(const char *name, std::ostream &write_on_exit);
public:
timer(const char *name = 0);
timer(const char *name, std::ostream &write_on_exit);
~timer();
~timer();
void start(), stop();
void reset();
std::ostream &print(std::ostream &);
void start(), stop();
void reset();
std::ostream &print(std::ostream &);
double getTimeInSeconds();
double getTimeInSeconds();
private:
void print_time(std::ostream &, const char *which, double time) const;
private:
void print_time(std::ostream &, const char *which, double time) const;
union {
long long total_time;
struct {
union {
long long total_time;
struct {
#if defined __PPC__
int high, low;
int high, low;
#else
int low, high;
int low, high;
#endif
};
};
};
};
unsigned long long count;
const char *const name;
std::ostream *const write_on_exit;
unsigned long long count;
const char *const name;
std::ostream *const write_on_exit;
static double CPU_speed_in_MHz, get_CPU_speed_in_MHz();
static double CPU_speed_in_MHz, get_CPU_speed_in_MHz();
};
std::ostream &operator<<(std::ostream &, class timer &);
std::ostream &operator << (std::ostream &, class timer &);
inline void timer::reset()
{
total_time = 0;
count = 0;
inline void timer::reset() {
total_time = 0;
count = 0;
}
inline timer::timer(const char *name)
:
name(name),
write_on_exit(0)
{
reset();
inline timer::timer(const char *name) : name(name), write_on_exit(0) {
reset();
}
inline timer::timer(const char *name, std::ostream &write_on_exit)
:
name(name),
write_on_exit(&write_on_exit)
{
reset();
: name(name), write_on_exit(&write_on_exit) {
reset();
}
inline timer::~timer()
{
if (write_on_exit != 0)
print(*write_on_exit);
inline timer::~timer() {
if (write_on_exit != 0)
print(*write_on_exit);
}
inline void timer::start()
{
inline void timer::start() {
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
unsigned eax, edx;
unsigned eax, edx;
asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
total_time -= ((unsigned long long) edx << 32) + eax;
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && (defined __i386 || defined __x86_64)
asm volatile
(
"rdtsc\n\t"
"subl %%eax, %0\n\t"
"sbbl %%edx, %1"
:
"+m" (low), "+m" (high)
:
:
"eax", "edx"
);
total_time -= ((unsigned long long)edx << 32) + eax;
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
(defined __i386 || defined __x86_64)
asm volatile("rdtsc\n\t"
"subl %%eax, %0\n\t"
"sbbl %%edx, %1"
: "+m"(low), "+m"(high)
:
: "eax", "edx");
#else
#error Compiler/Architecture not recognized
#endif
}
inline void timer::stop()
{
inline void timer::stop() {
#if (defined __PATHSCALE__) && (defined __i386 || defined __x86_64)
unsigned eax, edx;
unsigned eax, edx;
asm volatile ("rdtsc" : "=a" (eax), "=d" (edx));
asm volatile("rdtsc" : "=a"(eax), "=d"(edx));
total_time += ((unsigned long long) edx << 32) + eax;
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && (defined __i386 || defined __x86_64)
asm volatile
(
"rdtsc\n\t"
"addl %%eax, %0\n\t"
"adcl %%edx, %1"
:
"+m" (low), "+m" (high)
:
:
"eax", "edx"
);
total_time += ((unsigned long long)edx << 32) + eax;
#elif (defined __GNUC__ || defined __INTEL_COMPILER) && \
(defined __i386 || defined __x86_64)
asm volatile("rdtsc\n\t"
"addl %%eax, %0\n\t"
"adcl %%edx, %1"
: "+m"(low), "+m"(high)
:
: "eax", "edx");
#endif
++ count;
++count;
}
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,131 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D10_H
#define __OPENCL_CL_D3D10_H
#include <d3d10.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d10_sharing */
#define cl_khr_d3d10_sharing 1
typedef cl_uint cl_d3d10_device_source_khr;
typedef cl_uint cl_d3d10_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D10_DEVICE_KHR -1002
#define CL_INVALID_D3D10_RESOURCE_KHR -1003
#define CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR -1004
#define CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR -1005
/* cl_d3d10_device_source_nv */
#define CL_D3D10_DEVICE_KHR 0x4010
#define CL_D3D10_DXGI_ADAPTER_KHR 0x4011
/* cl_d3d10_device_set_nv */
#define CL_PREFERRED_DEVICES_FOR_D3D10_KHR 0x4012
#define CL_ALL_DEVICES_FOR_D3D10_KHR 0x4013
/* cl_context_info */
#define CL_CONTEXT_D3D10_DEVICE_KHR 0x4014
#define CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR 0x402C
/* cl_mem_info */
#define CL_MEM_D3D10_RESOURCE_KHR 0x4015
/* cl_image_info */
#define CL_IMAGE_D3D10_SUBRESOURCE_KHR 0x4016
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR 0x4017
#define CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR 0x4018
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D10KHR_fn)(
cl_platform_id platform,
cl_d3d10_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d10_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D10Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D10Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D10ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D10_H */

View file

@ -0,0 +1,131 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_D3D11_H
#define __OPENCL_CL_D3D11_H
#include <d3d11.h>
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************
* cl_khr_d3d11_sharing */
#define cl_khr_d3d11_sharing 1
typedef cl_uint cl_d3d11_device_source_khr;
typedef cl_uint cl_d3d11_device_set_khr;
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_D3D11_DEVICE_KHR -1006
#define CL_INVALID_D3D11_RESOURCE_KHR -1007
#define CL_D3D11_RESOURCE_ALREADY_ACQUIRED_KHR -1008
#define CL_D3D11_RESOURCE_NOT_ACQUIRED_KHR -1009
/* cl_d3d11_device_source */
#define CL_D3D11_DEVICE_KHR 0x4019
#define CL_D3D11_DXGI_ADAPTER_KHR 0x401A
/* cl_d3d11_device_set */
#define CL_PREFERRED_DEVICES_FOR_D3D11_KHR 0x401B
#define CL_ALL_DEVICES_FOR_D3D11_KHR 0x401C
/* cl_context_info */
#define CL_CONTEXT_D3D11_DEVICE_KHR 0x401D
#define CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR 0x402D
/* cl_mem_info */
#define CL_MEM_D3D11_RESOURCE_KHR 0x401E
/* cl_image_info */
#define CL_IMAGE_D3D11_SUBRESOURCE_KHR 0x401F
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR 0x4020
#define CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR 0x4021
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromD3D11KHR_fn)(
cl_platform_id platform,
cl_d3d11_device_source_khr d3d_device_source,
void * d3d_object,
cl_d3d11_device_set_khr d3d_device_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11BufferKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Buffer * resource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture2DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture2D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromD3D11Texture3DKHR_fn)(
cl_context context,
cl_mem_flags flags,
ID3D11Texture3D * resource,
UINT subresource,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseD3D11ObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_D3D11_H */

View file

@ -0,0 +1,132 @@
/**********************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
/* cl_khr_dx9_media_sharing */
#define cl_khr_dx9_media_sharing 1
typedef cl_uint cl_dx9_media_adapter_type_khr;
typedef cl_uint cl_dx9_media_adapter_set_khr;
#if defined(_WIN32)
#include <d3d9.h>
typedef struct _cl_dx9_surface_info_khr
{
IDirect3DSurface9 *resource;
HANDLE shared_handle;
} cl_dx9_surface_info_khr;
#endif
/******************************************************************************/
/* Error Codes */
#define CL_INVALID_DX9_MEDIA_ADAPTER_KHR -1010
#define CL_INVALID_DX9_MEDIA_SURFACE_KHR -1011
#define CL_DX9_MEDIA_SURFACE_ALREADY_ACQUIRED_KHR -1012
#define CL_DX9_MEDIA_SURFACE_NOT_ACQUIRED_KHR -1013
/* cl_media_adapter_type_khr */
#define CL_ADAPTER_D3D9_KHR 0x2020
#define CL_ADAPTER_D3D9EX_KHR 0x2021
#define CL_ADAPTER_DXVA_KHR 0x2022
/* cl_media_adapter_set_khr */
#define CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2023
#define CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR 0x2024
/* cl_context_info */
#define CL_CONTEXT_ADAPTER_D3D9_KHR 0x2025
#define CL_CONTEXT_ADAPTER_D3D9EX_KHR 0x2026
#define CL_CONTEXT_ADAPTER_DXVA_KHR 0x2027
/* cl_mem_info */
#define CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR 0x2028
#define CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR 0x2029
/* cl_image_info */
#define CL_IMAGE_DX9_MEDIA_PLANE_KHR 0x202A
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR 0x202B
#define CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR 0x202C
/******************************************************************************/
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetDeviceIDsFromDX9MediaAdapterKHR_fn)(
cl_platform_id platform,
cl_uint num_media_adapters,
cl_dx9_media_adapter_type_khr * media_adapter_type,
void * media_adapters,
cl_dx9_media_adapter_set_khr media_adapter_set,
cl_uint num_entries,
cl_device_id * devices,
cl_uint * num_devices) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
cl_context context,
cl_mem_flags flags,
cl_dx9_media_adapter_type_khr adapter_type,
void * surface_info,
cl_uint plane,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9MediaSurfacesKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_H */

View file

@ -0,0 +1,182 @@
/**********************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_dx9_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <d3d9.h>
#include <dxvahd.h>
#include <wtypes.h>
#include <d3d9types.h>
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_dx9_media_sharing extension *
****************************************/
#define cl_intel_dx9_media_sharing 1
typedef cl_uint cl_dx9_device_source_intel;
typedef cl_uint cl_dx9_device_set_intel;
/* error codes */
#define CL_INVALID_DX9_DEVICE_INTEL -1010
#define CL_INVALID_DX9_RESOURCE_INTEL -1011
#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL -1012
#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL -1013
/* cl_dx9_device_source_intel */
#define CL_D3D9_DEVICE_INTEL 0x4022
#define CL_D3D9EX_DEVICE_INTEL 0x4070
#define CL_DXVA_DEVICE_INTEL 0x4071
/* cl_dx9_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL 0x4024
#define CL_ALL_DEVICES_FOR_DX9_INTEL 0x4025
/* cl_context_info */
#define CL_CONTEXT_D3D9_DEVICE_INTEL 0x4026
#define CL_CONTEXT_D3D9EX_DEVICE_INTEL 0x4072
#define CL_CONTEXT_DXVA_DEVICE_INTEL 0x4073
/* cl_mem_info */
#define CL_MEM_DX9_RESOURCE_INTEL 0x4027
#define CL_MEM_DX9_SHARED_HANDLE_INTEL 0x4074
/* cl_image_info */
#define CL_IMAGE_DX9_PLANE_INTEL 0x4075
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL 0x402A
#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL 0x402B
/******************************************************************************/
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromDX9INTEL(
cl_platform_id platform,
cl_dx9_device_source_intel dx9_device_source,
void* dx9_object,
cl_dx9_device_set_intel dx9_device_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
cl_platform_id platform,
cl_dx9_device_source_intel dx9_device_source,
void* dx9_object,
cl_dx9_device_set_intel dx9_device_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromDX9MediaSurfaceINTEL(
cl_context context,
cl_mem_flags flags,
IDirect3DSurface9* resource,
HANDLE sharedHandle,
UINT plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
cl_context context,
cl_mem_flags flags,
IDirect3DSurface9* resource,
HANDLE sharedHandle,
UINT plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireDX9ObjectsINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseDX9ObjectsINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */

View file

@ -0,0 +1,132 @@
/*******************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __OPENCL_CL_EGL_H
#define __OPENCL_CL_EGL_H
#include <CL/cl.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Command type for events created with clEnqueueAcquireEGLObjectsKHR */
#define CL_COMMAND_EGL_FENCE_SYNC_OBJECT_KHR 0x202F
#define CL_COMMAND_ACQUIRE_EGL_OBJECTS_KHR 0x202D
#define CL_COMMAND_RELEASE_EGL_OBJECTS_KHR 0x202E
/* Error type for clCreateFromEGLImageKHR */
#define CL_INVALID_EGL_OBJECT_KHR -1093
#define CL_EGL_RESOURCE_NOT_ACQUIRED_KHR -1092
/* CLeglImageKHR is an opaque handle to an EGLImage */
typedef void* CLeglImageKHR;
/* CLeglDisplayKHR is an opaque handle to an EGLDisplay */
typedef void* CLeglDisplayKHR;
/* CLeglSyncKHR is an opaque handle to an EGLSync object */
typedef void* CLeglSyncKHR;
/* properties passed to clCreateFromEGLImageKHR */
typedef intptr_t cl_egl_image_properties_khr;
#define cl_khr_egl_image 1
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromEGLImageKHR(cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
cl_context context,
CLeglDisplayKHR egldisplay,
CLeglImageKHR eglimage,
cl_mem_flags flags,
const cl_egl_image_properties_khr * properties,
cl_int * errcode_ret);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
#define cl_khr_egl_event 1
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromEGLSyncKHR(cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
cl_context context,
CLeglSyncKHR sync,
CLeglDisplayKHR display,
cl_int * errcode_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_EGL_H */

View file

@ -0,0 +1,762 @@
/*******************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* cl_ext.h contains OpenCL extensions which don't have external */
/* (OpenGL, D3D) dependencies. */
#ifndef __CL_EXT_H
#define __CL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl.h>
/* cl_khr_fp64 extension - no extension #define since it has no functions */
/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
#if CL_TARGET_OPENCL_VERSION <= 110
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
#endif
/* cl_khr_fp16 extension - no extension #define since it has no functions */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* Memory object destruction
*
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
*
* Registers a user callback function that will be called when the memory object is deleted and its resources
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
* stack associated with memobj. The registered user callback functions are called in the reverse order in
* which they were registered. The user callback functions are called and then the memory object is deleted
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
* the storage bits for the memory object, can be reused or freed.
*
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
*
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*/
#define cl_APPLE_SetMemObjectDestructor 1
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem memobj,
void (* pfn_notify)(cl_mem memobj, void * user_data),
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/* Context Logging Functions
*
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * errstr,
const void * private_info,
size_t cb,
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * errstr,
const void * private_info,
size_t cb,
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * errstr,
const void * private_info,
size_t cb,
void * user_data) CL_EXT_SUFFIX__VERSION_1_0;
/************************
* cl_khr_icd extension *
************************/
#define cl_khr_icd 1
/* cl_platform_info */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
/* Additional Error Codes */
#define CL_PLATFORM_NOT_FOUND_KHR -1001
extern CL_API_ENTRY cl_int CL_API_CALL
clIcdGetPlatformIDsKHR(cl_uint num_entries,
cl_platform_id * platforms,
cl_uint * num_platforms);
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint num_entries,
cl_platform_id * platforms,
cl_uint * num_platforms);
/*******************************
* cl_khr_il_program extension *
*******************************/
#define cl_khr_il_program 1
/* New property to clGetDeviceInfo for retrieving supported intermediate
* languages
*/
#define CL_DEVICE_IL_VERSION_KHR 0x105B
/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
* program
*/
#define CL_PROGRAM_IL_KHR 0x1169
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithILKHR(cl_context context,
const void * il,
size_t length,
cl_int * errcode_ret);
typedef CL_API_ENTRY cl_program
(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context context,
const void * il,
size_t length,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
/* Extension: cl_khr_image2d_from_buffer
*
* This extension allows a 2D image to be created from a cl_mem buffer without
* a copy. The type associated with a 2D image created from a buffer in an
* OpenCL program is image2d_t. Both the sampler and sampler-less read_image
* built-in functions are supported for 2D images and 2D images created from
* a buffer. Similarly, the write_image built-ins are also supported for 2D
* images created from a buffer.
*
* When the 2D image from buffer is created, the client must specify the
* width, height, image format (i.e. channel order and channel data type)
* and optionally the row pitch.
*
* The pitch specified must be a multiple of
* CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR pixels.
* The base address of the buffer must be aligned to
* CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR pixels.
*/
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR 0x104A
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR 0x104B
/**************************************
* cl_khr_initialize_memory extension *
**************************************/
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x2030
/**************************************
* cl_khr_terminate_context extension *
**************************************/
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x2031
#define CL_CONTEXT_TERMINATE_KHR 0x2032
#define cl_khr_terminate_context 1
extern CL_API_ENTRY cl_int CL_API_CALL
clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
/*
* Extension: cl_khr_spir
*
* This extension adds support to create an OpenCL program object from a
* Standard Portable Intermediate Representation (SPIR) instance
*/
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
/*****************************************
* cl_khr_create_command_queue extension *
*****************************************/
#define cl_khr_create_command_queue 1
typedef cl_bitfield cl_queue_properties_khr;
extern CL_API_ENTRY cl_command_queue CL_API_CALL
clCreateCommandQueueWithPropertiesKHR(cl_context context,
cl_device_id device,
const cl_queue_properties_khr* properties,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_command_queue
(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
cl_device_id device,
const cl_queue_properties_khr* properties,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_nv_device_attribute_query extension *
******************************************/
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
/*********************************
* cl_amd_device_attribute_query *
*********************************/
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
/*********************************
* cl_arm_printf extension
*********************************/
#define CL_PRINTF_CALLBACK_ARM 0x40B0
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
/***********************************
* cl_ext_device_fission extension
***********************************/
#define cl_ext_device_fission 1
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
typedef cl_ulong cl_device_partition_property_ext;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevicesEXT(cl_device_id in_device,
const cl_device_partition_property_ext * properties,
cl_uint num_entries,
cl_device_id * out_devices,
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id in_device,
const cl_device_partition_property_ext * properties,
cl_uint num_entries,
cl_device_id * out_devices,
cl_uint * num_devices) CL_EXT_SUFFIX__VERSION_1_1;
/* cl_device_partition_property_ext */
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
/* clDeviceGetInfo selectors */
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
/* error codes */
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
#define CL_INVALID_PARTITION_COUNT_EXT -1058
#define CL_INVALID_PARTITION_NAME_EXT -1059
/* CL_AFFINITY_DOMAINs */
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_device_partition_property_ext list terminators */
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
/***********************************
* cl_ext_migrate_memobject extension definitions
***********************************/
#define cl_ext_migrate_memobject 1
typedef cl_bitfield cl_mem_migration_flags_ext;
#define CL_MIGRATE_MEM_OBJECT_HOST_EXT 0x1
#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT 0x4040
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
cl_uint num_mem_objects,
const cl_mem * mem_objects,
cl_mem_migration_flags_ext flags,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
cl_uint num_mem_objects,
const cl_mem * mem_objects,
cl_mem_migration_flags_ext flags,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event);
/*********************************
* cl_qcom_ext_host_ptr extension
*********************************/
#define cl_qcom_ext_host_ptr 1
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
typedef cl_uint cl_image_pitch_info_qcom;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceImageInfoQCOM(cl_device_id device,
size_t image_width,
size_t image_height,
const cl_image_format *image_format,
cl_image_pitch_info_qcom param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
typedef struct _cl_mem_ext_host_ptr
{
/* Type of external memory allocation. */
/* Legal values will be defined in layered extensions. */
cl_uint allocation_type;
/* Host cache policy for this external memory allocation. */
cl_uint host_cache_policy;
} cl_mem_ext_host_ptr;
/*******************************************
* cl_qcom_ext_host_ptr_iocoherent extension
********************************************/
/* Cache policy specifying io-coherence */
#define CL_MEM_HOST_IOCOHERENT_QCOM 0x40A9
/*********************************
* cl_qcom_ion_host_ptr extension
*********************************/
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
typedef struct _cl_mem_ion_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
cl_mem_ext_host_ptr ext_host_ptr;
/* ION file descriptor */
int ion_filedesc;
/* Host pointer to the ION allocated memory */
void* ion_hostptr;
} cl_mem_ion_host_ptr;
/*********************************
* cl_qcom_android_native_buffer_host_ptr extension
*********************************/
#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM 0x40C6
typedef struct _cl_mem_android_native_buffer_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
cl_mem_ext_host_ptr ext_host_ptr;
/* Virtual pointer to the android native buffer */
void* anb_ptr;
} cl_mem_android_native_buffer_host_ptr;
/******************************************
* cl_img_yuv_image extension *
******************************************/
/* Image formats used in clCreateImage */
#define CL_NV21_IMG 0x40D0
#define CL_YV12_IMG 0x40D1
/******************************************
* cl_img_cached_allocations extension *
******************************************/
/* Flag values used by clCreateBuffer */
#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG (1 << 26)
#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG (1 << 27)
/******************************************
* cl_img_use_gralloc_ptr extension *
******************************************/
#define cl_img_use_gralloc_ptr 1
/* Flag values used by clCreateBuffer */
#define CL_MEM_USE_GRALLOC_PTR_IMG (1 << 28)
/* To be used by clGetEventInfo: */
#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG 0x40D2
#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG 0x40D3
/* Error code from clEnqueueReleaseGrallocObjectsIMG */
#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG 0x40D4
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGrallocObjectsIMG(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGrallocObjectsIMG(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
/*********************************
* cl_khr_subgroups extension
*********************************/
#define cl_khr_subgroups 1
#if !defined(CL_VERSION_2_1)
/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
In hindsight, there should have been a khr suffix on this type for
the extension, but keeping it un-suffixed to maintain backwards
compatibility. */
typedef cl_uint cl_kernel_sub_group_info;
#endif
/* cl_kernel_sub_group_info */
#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelSubGroupInfoKHR(cl_kernel in_kernel,
cl_device_id in_device,
cl_kernel_sub_group_info param_name,
size_t input_value_size,
const void * input_value,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel in_kernel,
cl_device_id in_device,
cl_kernel_sub_group_info param_name,
size_t input_value_size,
const void * input_value,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
/*********************************
* cl_khr_mipmap_image extension
*********************************/
/* cl_sampler_properties */
#define CL_SAMPLER_MIP_FILTER_MODE_KHR 0x1155
#define CL_SAMPLER_LOD_MIN_KHR 0x1156
#define CL_SAMPLER_LOD_MAX_KHR 0x1157
/*********************************
* cl_khr_priority_hints extension
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_priority_hints 1
typedef cl_uint cl_queue_priority_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_PRIORITY_KHR 0x1096
/* cl_queue_priority_khr */
#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
/*********************************
* cl_khr_throttle_hints extension
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_throttle_hints 1
typedef cl_uint cl_queue_throttle_khr;
/* cl_command_queue_properties */
#define CL_QUEUE_THROTTLE_KHR 0x1097
/* cl_queue_throttle_khr */
#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
/*********************************
* cl_khr_subgroup_named_barrier
*********************************/
/* This extension define is for backwards compatibility.
It shouldn't be required since this extension has no new functions. */
#define cl_khr_subgroup_named_barrier 1
/* cl_device_info */
#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR 0x2035
/**********************************
* cl_arm_import_memory extension *
**********************************/
#define cl_arm_import_memory 1
typedef intptr_t cl_import_properties_arm;
/* Default and valid proporties name for cl_arm_import_memory */
#define CL_IMPORT_TYPE_ARM 0x40B2
/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_HOST_ARM 0x40B3
/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_DMA_BUF_ARM 0x40B4
/* Protected DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
#define CL_IMPORT_TYPE_PROTECTED_ARM 0x40B5
/* This extension adds a new function that allows for direct memory import into
* OpenCL via the clImportMemoryARM function.
*
* Memory imported through this interface will be mapped into the device's page
* tables directly, providing zero copy access. It will never fall back to copy
* operations and aliased buffers.
*
* Types of memory supported for import are specified as additional extension
* strings.
*
* This extension produces cl_mem allocations which are compatible with all other
* users of cl_mem in the standard API.
*
* This extension maps pages with the same properties as the normal buffer creation
* function clCreateBuffer.
*/
extern CL_API_ENTRY cl_mem CL_API_CALL
clImportMemoryARM( cl_context context,
cl_mem_flags flags,
const cl_import_properties_arm *properties,
void *memory,
size_t size,
cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
/******************************************
* cl_arm_shared_virtual_memory extension *
******************************************/
#define cl_arm_shared_virtual_memory 1
/* Used by clGetDeviceInfo */
#define CL_DEVICE_SVM_CAPABILITIES_ARM 0x40B6
/* Used by clGetMemObjectInfo */
#define CL_MEM_USES_SVM_POINTER_ARM 0x40B7
/* Used by clSetKernelExecInfoARM: */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM 0x40B8
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM 0x40B9
/* To be used by clGetEventInfo: */
#define CL_COMMAND_SVM_FREE_ARM 0x40BA
#define CL_COMMAND_SVM_MEMCPY_ARM 0x40BB
#define CL_COMMAND_SVM_MEMFILL_ARM 0x40BC
#define CL_COMMAND_SVM_MAP_ARM 0x40BD
#define CL_COMMAND_SVM_UNMAP_ARM 0x40BE
/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_ARM (1 << 3)
/* Flag values used by clSVMAllocARM: */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM (1 << 10)
#define CL_MEM_SVM_ATOMICS_ARM (1 << 11)
typedef cl_bitfield cl_svm_mem_flags_arm;
typedef cl_uint cl_kernel_exec_info_arm;
typedef cl_bitfield cl_device_svm_capabilities_arm;
extern CL_API_ENTRY void * CL_API_CALL
clSVMAllocARM(cl_context context,
cl_svm_mem_flags_arm flags,
size_t size,
cl_uint alignment) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY void CL_API_CALL
clSVMFreeARM(cl_context context,
void * svm_pointer) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMFreeARM(cl_command_queue command_queue,
cl_uint num_svm_pointers,
void * svm_pointers[],
void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
cl_uint num_svm_pointers,
void * svm_pointers[],
void * user_data),
void * user_data,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemcpyARM(cl_command_queue command_queue,
cl_bool blocking_copy,
void * dst_ptr,
const void * src_ptr,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMemFillARM(cl_command_queue command_queue,
void * svm_ptr,
const void * pattern,
size_t pattern_size,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMMapARM(cl_command_queue command_queue,
cl_bool blocking_map,
cl_map_flags flags,
void * svm_ptr,
size_t size,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueSVMUnmapARM(cl_command_queue command_queue,
void * svm_ptr,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArgSVMPointerARM(cl_kernel kernel,
cl_uint arg_index,
const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelExecInfoARM(cl_kernel kernel,
cl_kernel_exec_info_arm param_name,
size_t param_value_size,
const void * param_value) CL_EXT_SUFFIX__VERSION_1_2;
/********************************
* cl_arm_get_core_id extension *
********************************/
#ifdef CL_VERSION_1_2
#define cl_arm_get_core_id 1
/* Device info property for bitfield of cores present */
#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM 0x40BF
#endif /* CL_VERSION_1_2 */
/*********************************
* cl_arm_job_slot_selection
*********************************/
#define cl_arm_job_slot_selection 1
/* cl_device_info */
#define CL_DEVICE_JOB_SLOTS_ARM 0x41E0
/* cl_command_queue_properties */
#define CL_QUEUE_JOB_SLOT_ARM 0x41E1
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_H */

View file

@ -0,0 +1,423 @@
/*******************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_ext_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __CL_EXT_INTEL_H
#define __CL_EXT_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/***************************************
* cl_intel_thread_local_exec extension *
****************************************/
#define cl_intel_thread_local_exec 1
#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL (((cl_bitfield)1) << 31)
/***********************************************
* cl_intel_device_partition_by_names extension *
************************************************/
#define cl_intel_device_partition_by_names 1
#define CL_DEVICE_PARTITION_BY_NAMES_INTEL 0x4052
#define CL_PARTITION_BY_NAMES_LIST_END_INTEL -1
/************************************************
* cl_intel_accelerator extension *
* cl_intel_motion_estimation extension *
* cl_intel_advanced_motion_estimation extension *
*************************************************/
#define cl_intel_accelerator 1
#define cl_intel_motion_estimation 1
#define cl_intel_advanced_motion_estimation 1
typedef struct _cl_accelerator_intel* cl_accelerator_intel;
typedef cl_uint cl_accelerator_type_intel;
typedef cl_uint cl_accelerator_info_intel;
typedef struct _cl_motion_estimation_desc_intel {
cl_uint mb_block_type;
cl_uint subpixel_mode;
cl_uint sad_adjust_mode;
cl_uint search_path_type;
} cl_motion_estimation_desc_intel;
/* error codes */
#define CL_INVALID_ACCELERATOR_INTEL -1094
#define CL_INVALID_ACCELERATOR_TYPE_INTEL -1095
#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL -1096
#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL -1097
/* cl_accelerator_type_intel */
#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL 0x0
/* cl_accelerator_info_intel */
#define CL_ACCELERATOR_DESCRIPTOR_INTEL 0x4090
#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL 0x4091
#define CL_ACCELERATOR_CONTEXT_INTEL 0x4092
#define CL_ACCELERATOR_TYPE_INTEL 0x4093
/* cl_motion_detect_desc_intel flags */
#define CL_ME_MB_TYPE_16x16_INTEL 0x0
#define CL_ME_MB_TYPE_8x8_INTEL 0x1
#define CL_ME_MB_TYPE_4x4_INTEL 0x2
#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL 0x2
#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL 0x0
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL 0x0
#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL 0x1
#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL 0x2
#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL 0x4
#define CL_ME_FORWARD_INPUT_MODE_INTEL 0x1
#define CL_ME_BACKWARD_INPUT_MODE_INTEL 0x2
#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL 0x3
#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL 16
#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL 21
#define CL_ME_BIDIR_WEIGHT_HALF_INTEL 32
#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 43
#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 48
#define CL_ME_COST_PENALTY_NONE_INTEL 0x0
#define CL_ME_COST_PENALTY_LOW_INTEL 0x1
#define CL_ME_COST_PENALTY_NORMAL_INTEL 0x2
#define CL_ME_COST_PENALTY_HIGH_INTEL 0x3
#define CL_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
/* cl_device_info */
#define CL_DEVICE_ME_VERSION_INTEL 0x407E
#define CL_ME_VERSION_LEGACY_INTEL 0x0
#define CL_ME_VERSION_ADVANCED_VER_1_INTEL 0x1
#define CL_ME_VERSION_ADVANCED_VER_2_INTEL 0x2
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
clCreateAcceleratorINTEL(
cl_context context,
cl_accelerator_type_intel accelerator_type,
size_t descriptor_size,
const void* descriptor,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
cl_context context,
cl_accelerator_type_intel accelerator_type,
size_t descriptor_size,
const void* descriptor,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetAcceleratorInfoINTEL(
cl_accelerator_intel accelerator,
cl_accelerator_info_intel param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
cl_accelerator_intel accelerator,
cl_accelerator_info_intel param_name,
size_t param_value_size,
void* param_value,
size_t* param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainAcceleratorINTEL(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseAcceleratorINTEL(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
cl_accelerator_intel accelerator) CL_EXT_SUFFIX__VERSION_1_2;
/******************************************
* cl_intel_simultaneous_sharing extension *
*******************************************/
#define cl_intel_simultaneous_sharing 1
#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL 0x4104
#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL 0x4105
/***********************************
* cl_intel_egl_image_yuv extension *
************************************/
#define cl_intel_egl_image_yuv 1
#define CL_EGL_YUV_PLANE_INTEL 0x4107
/********************************
* cl_intel_packed_yuv extension *
*********************************/
#define cl_intel_packed_yuv 1
#define CL_YUYV_INTEL 0x4076
#define CL_UYVY_INTEL 0x4077
#define CL_YVYU_INTEL 0x4078
#define CL_VYUY_INTEL 0x4079
/********************************************
* cl_intel_required_subgroup_size extension *
*********************************************/
#define cl_intel_required_subgroup_size 1
#define CL_DEVICE_SUB_GROUP_SIZES_INTEL 0x4108
#define CL_KERNEL_SPILL_MEM_SIZE_INTEL 0x4109
#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL 0x410A
/****************************************
* cl_intel_driver_diagnostics extension *
*****************************************/
#define cl_intel_driver_diagnostics 1
typedef cl_uint cl_diagnostics_verbose_level;
#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL 0x4106
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL ( 0xff )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL ( 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL ( 1 << 1 )
#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL ( 1 << 2 )
/********************************
* cl_intel_planar_yuv extension *
*********************************/
#define CL_NV12_INTEL 0x410E
#define CL_MEM_NO_ACCESS_INTEL ( 1 << 24 )
#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL ( 1 << 25 )
#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
/*******************************************************
* cl_intel_device_side_avc_motion_estimation extension *
********************************************************/
#define CL_DEVICE_AVC_ME_VERSION_INTEL 0x410B
#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL 0x410D
#define CL_AVC_ME_VERSION_0_INTEL 0x0; // No support.
#define CL_AVC_ME_VERSION_1_INTEL 0x1; // First supported version.
#define CL_AVC_ME_MAJOR_16x16_INTEL 0x0
#define CL_AVC_ME_MAJOR_16x8_INTEL 0x1
#define CL_AVC_ME_MAJOR_8x16_INTEL 0x2
#define CL_AVC_ME_MAJOR_8x8_INTEL 0x3
#define CL_AVC_ME_MINOR_8x8_INTEL 0x0
#define CL_AVC_ME_MINOR_8x4_INTEL 0x1
#define CL_AVC_ME_MINOR_4x8_INTEL 0x2
#define CL_AVC_ME_MINOR_4x4_INTEL 0x3
#define CL_AVC_ME_MAJOR_FORWARD_INTEL 0x0
#define CL_AVC_ME_MAJOR_BACKWARD_INTEL 0x1
#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2
#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0
#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E
#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D
#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B
#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77
#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F
#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F
#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F
#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0
#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1
#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3
#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4
#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6
#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7
#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8
#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL 0x9
#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL 0x2
#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL 0xa
#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0
#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2
#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0
#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1
#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3
#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0
#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1
#define CL_AVC_ME_COST_PRECISION_PEL_INTEL 0x2
#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3
#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10
#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15
#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20
#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B
#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30
#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0
#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2
#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4
#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0
#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000
#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL ( 0x3 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL ( 0x55 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL ( 0xAA << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL ( 0xFF << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL ( 0x1 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL ( 0x2 << 24 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL ( 0x1 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL ( 0x2 << 26 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL ( 0x1 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL ( 0x2 << 28 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL ( 0x1 << 30 )
#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL ( 0x2 << 30 )
#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00
#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80
#define CL_AVC_ME_INTRA_16x16_INTEL 0x0
#define CL_AVC_ME_INTRA_8x8_INTEL 0x1
#define CL_AVC_ME_INTRA_4x4_INTEL 0x2
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5
#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3
#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8
#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7
#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2
#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3
#define CL_AVC_ME_FRAME_FORWARD_INTEL 0x1
#define CL_AVC_ME_FRAME_BACKWARD_INTEL 0x2
#define CL_AVC_ME_FRAME_DUAL_INTEL 0x3
#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0
#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1
#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2
#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0
#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_INTEL_H */

View file

@ -0,0 +1,171 @@
/**********************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#include <CL/cl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#ifdef CL_VERSION_1_2
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
#endif
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#ifdef CL_VERSION_1_2
#define CL_GL_NUM_SAMPLES 0x2012
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context context,
cl_mem_flags flags,
cl_GLuint bufobj,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
#ifdef CL_VERSION_1_2
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
#endif
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context context,
cl_mem_flags flags,
cl_GLuint renderbuffer,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem memobj,
cl_gl_object_type * gl_object_type,
cl_GLuint * gl_object_name) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem memobj,
cl_gl_texture_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem * mem_objects,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event) CL_API_SUFFIX__VERSION_1_0;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context context,
cl_mem_flags flags,
cl_GLenum target,
cl_GLint miplevel,
cl_GLuint texture,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */

View file

@ -0,0 +1,52 @@
/**********************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl_gl.h>
/*
* cl_khr_gl_event extension
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context context,
cl_GLsync cl_GLsync,
cl_int * errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_EXT_H */

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,172 @@
/**********************************************************************************
* Copyright (c) 2008-2019 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/*****************************************************************************\
Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
File Name: cl_va_api_media_sharing_intel.h
Abstract:
Notes:
\*****************************************************************************/
#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
#include <CL/cl.h>
#include <CL/cl_platform.h>
#include <va/va.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************
* cl_intel_va_api_media_sharing extension *
*******************************************/
#define cl_intel_va_api_media_sharing 1
/* error codes */
#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL -1098
#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL -1099
#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL -1100
#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL -1101
/* cl_va_api_device_source_intel */
#define CL_VA_API_DISPLAY_INTEL 0x4094
/* cl_va_api_device_set_intel */
#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL 0x4095
#define CL_ALL_DEVICES_FOR_VA_API_INTEL 0x4096
/* cl_context_info */
#define CL_CONTEXT_VA_API_DISPLAY_INTEL 0x4097
/* cl_mem_info */
#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL 0x4098
/* cl_image_info */
#define CL_IMAGE_VA_API_PLANE_INTEL 0x4099
/* cl_command_type */
#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL 0x409A
#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL 0x409B
typedef cl_uint cl_va_api_device_source_intel;
typedef cl_uint cl_va_api_device_set_intel;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
cl_platform_id platform,
cl_va_api_device_source_intel media_adapter_type,
void* media_adapter,
cl_va_api_device_set_intel media_adapter_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
cl_platform_id platform,
cl_va_api_device_source_intel media_adapter_type,
void* media_adapter,
cl_va_api_device_set_intel media_adapter_set,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromVA_APIMediaSurfaceINTEL(
cl_context context,
cl_mem_flags flags,
VASurfaceID* surface,
cl_uint plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
cl_context context,
cl_mem_flags flags,
VASurfaceID* surface,
cl_uint plane,
cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireVA_APIMediaSurfacesINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseVA_APIMediaSurfacesINTEL(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
cl_command_queue command_queue,
cl_uint num_objects,
const cl_mem* mem_objects,
cl_uint num_events_in_wait_list,
const cl_event* event_wait_list,
cl_event* event) CL_EXT_SUFFIX__VERSION_1_2;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */

View file

@ -0,0 +1,86 @@
/*******************************************************************************
* Copyright (c) 2018 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __CL_VERSION_H
#define __CL_VERSION_H
/* Detect which version to target */
#if !defined(CL_TARGET_OPENCL_VERSION)
#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
#define CL_TARGET_OPENCL_VERSION 220
#endif
#if CL_TARGET_OPENCL_VERSION != 100 && \
CL_TARGET_OPENCL_VERSION != 110 && \
CL_TARGET_OPENCL_VERSION != 120 && \
CL_TARGET_OPENCL_VERSION != 200 && \
CL_TARGET_OPENCL_VERSION != 210 && \
CL_TARGET_OPENCL_VERSION != 220
#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)")
#undef CL_TARGET_OPENCL_VERSION
#define CL_TARGET_OPENCL_VERSION 220
#endif
/* OpenCL Version */
#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
#define CL_VERSION_2_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
#define CL_VERSION_2_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
#define CL_VERSION_2_0 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
#define CL_VERSION_1_2 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
#define CL_VERSION_1_1 1
#endif
#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
#define CL_VERSION_1_0 1
#endif
/* Allow deprecated APIs for older OpenCL versions. */
#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#endif
#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
#endif
#endif /* __CL_VERSION_H */

View file

@ -0,0 +1,47 @@
/*******************************************************************************
* Copyright (c) 2008-2015 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
* KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
* SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
* https://www.khronos.org/registry/
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <CL/cl.h>
#include <CL/cl_gl.h>
#include <CL/cl_gl_ext.h>
#include <CL/cl_ext.h>
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */

View file

@ -1,44 +1,60 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_RT_PATH=$(wildcard ../../../runtime)
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH)
LIBS = -lOpenCL
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=kmeans
PROJECT=saxpy
all: $(PROJECT).dump $(PROJECT).hex
lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc rmse.c read_input.c cluster.c kmeans_clustering.c -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
kmeans_clustering.o: kmeans_clustering.c
$(CC) $(CXXFLAGS) -c kmeans_clustering.c
cluster.o: cluster.c
$(CC) $(CXXFLAGS) -c cluster.c
read_input.o: read_input.c
$(CC) $(CXXFLAGS) -c read_input.c
rmse.o: rmse.c
$(CC) $(CXXFLAGS) -c rmse.c
$(PROJECT).elf: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a read_input.o rmse.o cluster.o kmeans_clustering.o
$(CXX) $(CXXFLAGS) main.cc read_input.o rmse.o cluster.o kmeans_clustering.o $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -46,8 +62,17 @@ $(PROJECT).hex: $(PROJECT).elf
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run:
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
run: $(PROJECT).hex
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -strace -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.elf *.dump *.hex *.a *.pocl
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

Binary file not shown.

View file

@ -1,33 +1,35 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_RT_PATH=$(wildcard ../../../runtime)
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH)
LIBS = -lOpenCL
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=saxpy
@ -37,7 +39,10 @@ lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -45,8 +50,17 @@ $(PROJECT).hex: $(PROJECT).elf
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run:
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
run: $(PROJECT).hex
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.elf *.dump *.hex *.a *.pocl
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

Binary file not shown.

View file

@ -17,481 +17,175 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c -lOpenCL
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c
* -lOpenCL
*
*/
#include <CL/cl.h>
#include <iostream>
#include <fstream>
#include <iostream>
#include <sstream>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#define NUM_DATA 65536
//#define NUM_DATA 65536
#define NUM_DATA 4096
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} while (0)
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} while (0)
#define CL_CHECK_ERR(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
typeof(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} \
_ret; \
})
#define CL_CHECK_ERR(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
typeof(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} \
_ret; \
})
void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
}
///
// Create an OpenCL program from the kernel source file
//
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
cl_int errNum;
cl_program program;
std::ifstream kernelFile(fileName, std::ios::in);
if (!kernelFile.is_open())
{
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
return NULL;
}
std::ostringstream oss;
oss << kernelFile.rdbuf();
std::string srcStdStr = oss.str();
const char *srcStr = srcStdStr.c_str();
program = clCreateProgramWithSource(context, 1,
(const char**)&srcStr,
NULL, NULL);
if (program == NULL)
{
std::cerr << "Failed to create CL program from source." << std::endl;
return NULL;
}
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
// Determine the reason for the error
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cerr << "Error in kernel: " << std::endl;
std::cerr << buildLog;
clReleaseProgram(program);
return NULL;
}
return program;
}
//
///
// Retreive program binary for all of the devices attached to the
// program an and store the one for the device passed in
//
bool SaveProgramBinary(cl_program program, cl_device_id device, const char* fileName)
{
//cl_uint numDevices = malloc(sizeof(cl_uint));
//cl_uint* numDevices = malloc(sizeof(cl_uint));
cl_int errNum;
printf("try getting program info\n");
// 1 - Query for number of devices attached to program
/*errNum = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint),
&numDevices, NULL);
printf("Got program_num_devices\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for number of devices." << std::endl;
return false;
}*/
// 2 - Get all of the Device IDs
cl_device_id *devices = new cl_device_id[1];
errNum = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
sizeof(cl_device_id) * 1,
devices, NULL);
printf("Got program_devices\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for devices." << std::endl;
delete [] devices;
return false;
}
// 3 - Determine the size of each program binary
size_t *programBinarySizes = new size_t [1];
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
sizeof(size_t) * 1,
programBinarySizes, NULL);
printf("Got program_binary_sizes\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for program binary sizes." << std::endl;
delete [] devices;
delete [] programBinarySizes;
return false;
}
unsigned char **programBinaries = new unsigned char*[1];
for (cl_uint i = 0; i < 1; i++)
{
programBinaries[i] = new unsigned char[programBinarySizes[i]];
}
// 4 - Get all of the program binaries
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*) * 1,
programBinaries, NULL);
printf("Got program_binarys\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for program binaries" << std::endl;
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < 1; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
return false;
}
// 5 - Finally store the binaries for the device requested out to disk for future reading.
for (cl_uint i = 0; i < 1; i++)
{
// Store the binary just for the device requested. In a scenario where
// multiple devices were being used you would save all of the binaries out here.
if (devices[i] == device)
{
FILE *fp = fopen(fileName, "wb");
if(fp ==NULL){
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < 1; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
return false;
}
printf("Opened file\n");
fwrite(programBinaries[i], 1, programBinarySizes[i], fp);
printf("wrote file\n");
fclose(fp);
printf("close file\n");
break;
}
}
// Cleanup
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < 1; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
return true;
}
///
// Attempt to create the program object from a cached binary. Note that
// on first run this will fail because the binary has not yet been created.
//
cl_program CreateProgramFromBinary(cl_context context, cl_device_id device, const char* fileName)
{
FILE *fp = fopen(fileName, "rb");
if (fp == NULL)
{
return NULL;
}
// Determine the size of the binary
size_t binarySize;
fseek(fp, 0, SEEK_END);
binarySize = ftell(fp);
rewind(fp);
unsigned char *programBinary = new unsigned char[binarySize];
fread(programBinary, 1, binarySize, fp);
fclose(fp);
cl_int errNum = 0;
cl_program program;
cl_int binaryStatus;
program = clCreateProgramWithBinary(context,
1,
&device,
&binarySize,
(const unsigned char**)&programBinary,
&binaryStatus,
&errNum);
delete [] programBinary;
if (errNum != CL_SUCCESS)
{
std::cerr << "Error loading program binary." << std::endl;
return NULL;
}
if (binaryStatus != CL_SUCCESS)
{
std::cerr << "Invalid binary for device" << std::endl;
return NULL;
}
errNum = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
printf("build errNum:%d\n", errNum);
// Determine the reason for the error
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cerr << "Error in program: " << std::endl;
std::cerr << buildLog << std::endl;
clReleaseProgram(program);
return NULL;
}
return program;
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
void *user_data) {
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
}
///
// Cleanup any created OpenCL resources
//
void Cleanup(cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel, cl_mem memObjects[3])
{
for (int i = 0; i < 3; i++)
{
if (memObjects[i] != 0)
clReleaseMemObject(memObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
for (int i = 0; i < 3; i++) {
if (memObjects[i] != 0)
clReleaseMemObject(memObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
if (kernel != 0)
clReleaseKernel(kernel);
if (kernel != 0)
clReleaseKernel(kernel);
if (program != 0)
clReleaseProgram(program);
if (context != 0)
clReleaseContext(context);
if (program != 0)
clReleaseProgram(program);
if (context != 0)
clReleaseContext(context);
}
int main(int argc, char **argv)
{
int main(int argc, char **argv) {
printf("enter demo main\n");
fflush(stdout);
putenv("POCL_VERBOSE=1");
putenv("POCL_DEVICES=basic");
putenv("POCL_LEAVE_TEMP_DIRS=1");
putenv("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES=1");
putenv("POCL_TEMP_DIR=pocl");
putenv("POCL_CACHE_DIR=pocl");
putenv("POCL_WORK_GROUP_METHOD=spmd");
if(argc >= 2){
printf("argv[1]:%s:\n",argv[1]);
if(!strcmp(argv[1], "h"))
putenv("POCL_WORK_GROUP_METHOD=spmd");
if(!strcmp(argv[1], "c"))
putenv("POCL_CROSS_COMPILE=1");
}
if(argc >= 3){
printf("argv[2]:%s:\n",argv[2]);
if(!strcmp(argv[2], "h"))
putenv("POCL_WORK_GROUP_METHOD=spmd");
if(!strcmp(argv[2], "c"))
putenv("POCL_CROSS_COMPILE=1");
}
cl_platform_id platform_id;
cl_device_id device_id;
size_t binary_size;
int i;
//putenv("LD_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
//putenv("LTDL_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
//lt_dlsetsearchpath("/scratch/colins/build/linux/fs/lib");
//printf("SEARCH_PATH:%s\n",lt_dlgetsearchpath());
cl_platform_id platforms[100];
cl_uint platforms_n = 0;
CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
// Getting platform and device information
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
for (int i=0; i<platforms_n; i++)
{
char buffer[10240];
printf(" -- %d --\n", i);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
printf(" PROFILE = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL));
printf(" VERSION = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL));
printf(" NAME = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL));
printf(" VENDOR = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL));
printf(" EXTENSIONS = %s\n", buffer);
}
cl_context context;
context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
if (platforms_n == 0)
return 1;
cl_device_id devices[100];
cl_uint devices_n = 0;
// CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));
printf("=== %d OpenCL device(s) found on platform:\n", platforms_n);
for (int i=0; i<devices_n; i++)
{
char buffer[10240];
cl_uint buf_uint;
cl_ulong buf_ulong;
size_t wi_size[3];
printf(" -- %d --\n", i);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
printf(" DEVICE_NAME = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL));
printf(" DEVICE_VENDOR = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL));
printf(" DEVICE_VERSION = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL));
printf(" DRIVER_VERSION = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL));
printf(" DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL));
printf(" DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL));
printf(" DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(wi_size), &wi_size, NULL));
printf(" DEVICE_MAX_WG_SIZE X=%ld,Y=%ld,Z=%ld\n", wi_size[0], wi_size[1], wi_size[2]);
}
if (devices_n == 0)
return 1;
cl_context context;
context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices+1, &pfn_notify, NULL, &_err));
cl_command_queue queue;
queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE, &_err));
cl_kernel kernel = 0;
cl_mem memObjects[2] = {0,0};
cl_command_queue queue;
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err));
cl_kernel kernel = 0;
cl_mem memObjects[2] = {0, 0};
// Create OpenCL program - first attempt to load cached binary.
// If that is not available, then create the program from source
// and store the binary for future use.
std::cout << "Attempting to create program from binary..." << std::endl;
cl_program program = CreateProgramFromBinary(context, devices[1], "kernel.cl.bin");
if (program == NULL)
{
std::cout << "Binary not loaded, create from source..." << std::endl;
program = CreateProgram(context, devices[1], "kernel.cl");
if (program == NULL)
{
Cleanup(context, queue, program, kernel, memObjects);
return 1;
}
cl_program program =
clCreateProgramWithBuiltInKernels(context, 1, &device_id, "saxpy", NULL);
if (program == NULL) {
std::cerr << "Failed to write program binary" << std::endl;
Cleanup(context, queue, program, kernel, memObjects);
return 1;
} else {
std::cout << "Read program from binary." << std::endl;
}
std::cout << "Save program binary for future run..." << std::endl;
if (SaveProgramBinary(program, devices[1], "kernel.cl.bin") == false)
{
std::cerr << "Failed to write program binary" << std::endl;
Cleanup(context, queue, program, kernel, memObjects);
return 1;
}
}
else
{
std::cout << "Read program from binary." << std::endl;
}
// Build program
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
printf("attempting to create input buffer\n");
fflush(stdout);
cl_mem input_buffer;
input_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA, NULL, &_err));
cl_mem input_buffer;
input_buffer = CL_CHECK_ERR(clCreateBuffer(
context, CL_MEM_READ_ONLY, sizeof(float) * NUM_DATA, NULL, &_err));
printf("attempting to create output buffer\n");
fflush(stdout);
cl_mem output_buffer;
output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*NUM_DATA, NULL, &_err));
cl_mem output_buffer;
output_buffer = CL_CHECK_ERR(clCreateBuffer(
context, CL_MEM_WRITE_ONLY, sizeof(float) * NUM_DATA, NULL, &_err));
memObjects[0] = input_buffer;
memObjects[1] = output_buffer;
float factor = ((float)rand()/(float)(RAND_MAX)) * 100.0;
float factor = ((float)rand() / (float)(RAND_MAX)) * 100.0;
printf("attempting to create kernel\n");
fflush(stdout);
kernel = CL_CHECK_ERR(clCreateKernel(program, "saxpy", &_err));
printf("setting up kernel args cl_mem:%lx \n",input_buffer);
kernel = CL_CHECK_ERR(clCreateKernel(program, "saxpy", &_err));
printf("setting up kernel args cl_mem:%lx \n", input_buffer);
fflush(stdout);
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(factor), &factor));
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(factor), &factor));
printf("attempting to enqueue write buffer\n");
fflush(stdout);
for (int i=0; i<NUM_DATA; i++) {
float in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
}
for (int i = 0; i < NUM_DATA; i++) {
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE,
i * sizeof(float), 4, &in, 0, NULL, NULL));
}
cl_event kernel_completion;
size_t global_work_size[1] = { NUM_DATA };
cl_event kernel_completion;
size_t global_work_size[1] = {NUM_DATA};
printf("attempting to enqueue kernel\n");
fflush(stdout);
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, &kernel_completion));
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
NULL, 0, NULL, &kernel_completion));
printf("Enqueue'd kerenel\n");
fflush(stdout);
cl_ulong time_start, time_end;
CL_CHECK(clWaitForEvents(1, &kernel_completion));
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL));
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL));
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
CL_PROFILING_COMMAND_START,
sizeof(time_start), &time_start, NULL));
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
sizeof(time_end), &time_end, NULL));
double elapsed = time_end - time_start;
printf("time(ns):%lg\n",elapsed);
CL_CHECK(clReleaseEvent(kernel_completion));
printf("time(ns):%lg\n", elapsed);
CL_CHECK(clReleaseEvent(kernel_completion));
printf("Result:");
for (int i=0; i<NUM_DATA; i++) {
float data;
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(float), 4, &data, 0, NULL, NULL));
//printf(" %f", data);
}
printf("\n");
printf("Result:");
for (int i = 0; i < NUM_DATA; i++) {
float data;
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
i * sizeof(float), 4, &data, 0, NULL, NULL));
// printf(" %f", data);
}
printf("\n");
CL_CHECK(clReleaseMemObject(memObjects[0]));
CL_CHECK(clReleaseMemObject(memObjects[1]));
CL_CHECK(clReleaseMemObject(memObjects[0]));
CL_CHECK(clReleaseMemObject(memObjects[1]));
CL_CHECK(clReleaseKernel(kernel));
CL_CHECK(clReleaseProgram(program));
CL_CHECK(clReleaseContext(context));
CL_CHECK(clReleaseKernel(kernel));
CL_CHECK(clReleaseProgram(program));
CL_CHECK(clReleaseContext(context));
return 0;
return 0;
}

File diff suppressed because it is too large Load diff

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -1,33 +1,35 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_RT_PATH=$(wildcard ../../../runtime)
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH)
LIBS = -lOpenCL
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=sfilter
@ -37,7 +39,10 @@ lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -45,8 +50,17 @@ $(PROJECT).hex: $(PROJECT).elf
$(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run:
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
run: $(PROJECT).hex
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.elf *.dump *.hex *.a *.pocl
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

View file

@ -17,95 +17,95 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c -lOpenCL
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c
* -lOpenCL
*
*/
#include <CL/cl.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <errno.h>
#include <fstream>
#include <iostream>
#include <math.h>
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <unistd.h>
#define NUM_DATA 66
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} while (0)
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} while (0)
#define CL_CHECK_ERR(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
typeof(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} \
_ret; \
})
#define CL_CHECK_ERR(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
typeof(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} \
_ret; \
})
void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
void *user_data) {
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
}
// inlcude pocl float to half conversions
typedef union
{
typedef union {
int32_t i;
float f;
} FloatConvUnion;
cl_half
poclu_float_to_cl_half(float value)
{
cl_half poclu_float_to_cl_half(float value) {
FloatConvUnion u;
u.f = value;
cl_half half = (u.i >> 16) & 0x8000; // sign
cl_half fraction = (u.i >> 12) & 0x007ff; // fraction with extra bit for rounding
cl_half exponent = (u.i >> 23) & 0xff; // exponent
if(exponent < 0x0067) // Return signed zero if zero or value is too small for denormal half
cl_half fraction =
(u.i >> 12) & 0x007ff; // fraction with extra bit for rounding
cl_half exponent = (u.i >> 23) & 0xff; // exponent
if (exponent < 0x0067) // Return signed zero if zero or value is too small for
// denormal half
return half;
if(exponent > 0x008e){// value was NaN or Inf
half |= 0x7c00u; // Make into inf
half |= exponent == 255 && (u.i & 0x007fffffu); // If value was NaN make this into NaN
if (exponent > 0x008e) { // value was NaN or Inf
half |= 0x7c00u; // Make into inf
half |= exponent == 255 &&
(u.i & 0x007fffffu); // If value was NaN make this into NaN
return half;
}
if(exponent < 0x0071){// Denormal
if (exponent < 0x0071) { // Denormal
fraction |= 0x0800u;
// rounding
half |= (fraction >> (0x0072 - exponent)) + ((fraction >> (0x0071 - exponent)) & 1);
half |= (fraction >> (0x0072 - exponent)) +
((fraction >> (0x0071 - exponent)) & 1);
return half;
}
half |= ((exponent - 0x0070) << 10) | (fraction >> 1);
half += fraction & 1;// rounding
half += fraction & 1; // rounding
return half;
}
#ifndef INFINITY
#define INFINITY 1.0/0.0
#define INFINITY 1.0 / 0.0
#endif
#ifndef NAN
#define NAN 0.0/0.0
#define NAN 0.0 / 0.0
#endif
float
poclu_cl_half_to_float(cl_half value)
{
float poclu_cl_half_to_float(cl_half value) {
if (value == 0xFC00) {
return -INFINITY;
}
@ -131,384 +131,78 @@ poclu_cl_half_to_float(cl_half value)
return v;
}
///
// Create an OpenCL program from the kernel source file
//
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
cl_int errNum;
cl_program program;
std::ifstream kernelFile(fileName, std::ios::in);
if (!kernelFile.is_open())
{
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
return NULL;
}
std::ostringstream oss;
oss << kernelFile.rdbuf();
std::string srcStdStr = oss.str();
const char *srcStr = srcStdStr.c_str();
program = clCreateProgramWithSource(context, 1,
(const char**)&srcStr,
NULL, NULL);
if (program == NULL)
{
std::cerr << "Failed to create CL program from source." << std::endl;
return NULL;
}
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
// Determine the reason for the error
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cerr << "Error in kernel: " << std::endl;
std::cerr << buildLog;
clReleaseProgram(program);
return NULL;
}
return program;
}
//
///
// Retreive program binary for all of the devices attached to the
// program an and store the one for the device passed in
//
bool SaveProgramBinary(cl_program program, cl_device_id device, const char* fileName)
{
//cl_uint numDevices = malloc(sizeof(cl_uint));
//cl_uint* numDevices = malloc(sizeof(cl_uint));
cl_int errNum;
printf("try getting program info\n");
// 1 - Query for number of devices attached to program
/*errNum = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint),
&numDevices, NULL);
printf("Got program_num_devices\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for number of devices." << std::endl;
return false;
}*/
// 2 - Get all of the Device IDs
cl_device_id *devices = new cl_device_id[1];
errNum = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
sizeof(cl_device_id) * 1,
devices, NULL);
printf("Got program_devices\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for devices." << std::endl;
delete [] devices;
return false;
}
// 3 - Determine the size of each program binary
size_t *programBinarySizes = new size_t [1];
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
sizeof(size_t) * 1,
programBinarySizes, NULL);
printf("Got program_binary_sizes\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for program binary sizes." << std::endl;
delete [] devices;
delete [] programBinarySizes;
return false;
}
unsigned char **programBinaries = new unsigned char*[1];
for (cl_uint i = 0; i < 1; i++)
{
programBinaries[i] = new unsigned char[programBinarySizes[i]];
}
// 4 - Get all of the program binaries
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*) * 1,
programBinaries, NULL);
printf("Got program_binarys\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for program binaries" << std::endl;
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < 1; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
return false;
}
// 5 - Finally store the binaries for the device requested out to disk for future reading.
for (cl_uint i = 0; i < 1; i++)
{
// Store the binary just for the device requested. In a scenario where
// multiple devices were being used you would save all of the binaries out here.
if (devices[i] == device)
{
FILE *fp = fopen(fileName, "wb");
if(fp ==NULL){
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < 1; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
return false;
}
printf("Opened file\n");
fwrite(programBinaries[i], 1, programBinarySizes[i], fp);
printf("wrote file\n");
fclose(fp);
printf("close file\n");
break;
}
}
// Cleanup
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < 1; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
return true;
}
///
// Attempt to create the program object from a cached binary. Note that
// on first run this will fail because the binary has not yet been created.
//
cl_program CreateProgramFromBinary(cl_context context, cl_device_id device, const char* fileName)
{
FILE *fp = fopen(fileName, "rb");
if (fp == NULL)
{
return NULL;
}
// Determine the size of the binary
size_t binarySize;
fseek(fp, 0, SEEK_END);
binarySize = ftell(fp);
rewind(fp);
unsigned char *programBinary = new unsigned char[binarySize];
fread(programBinary, 1, binarySize, fp);
fclose(fp);
cl_int errNum = 0;
cl_program program;
cl_int binaryStatus;
program = clCreateProgramWithBinary(context,
1,
&device,
&binarySize,
(const unsigned char**)&programBinary,
&binaryStatus,
&errNum);
delete [] programBinary;
if (errNum != CL_SUCCESS)
{
std::cerr << "Error loading program binary." << std::endl;
return NULL;
}
if (binaryStatus != CL_SUCCESS)
{
std::cerr << "Invalid binary for device" << std::endl;
return NULL;
}
errNum = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
printf("build errNum:%d\n", errNum);
// Determine the reason for the error
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cerr << "Error in program: " << std::endl;
std::cerr << buildLog << std::endl;
clReleaseProgram(program);
return NULL;
}
return program;
}
///
// Cleanup any created OpenCL resources
//
void Cleanup(cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel, cl_mem memObjects[3])
{
for (int i = 0; i < 3; i++)
{
if (memObjects[i] != 0)
clReleaseMemObject(memObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
for (int i = 0; i < 3; i++) {
if (memObjects[i] != 0)
clReleaseMemObject(memObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
if (kernel != 0)
clReleaseKernel(kernel);
if (kernel != 0)
clReleaseKernel(kernel);
if (program != 0)
clReleaseProgram(program);
if (context != 0)
clReleaseContext(context);
if (program != 0)
clReleaseProgram(program);
if (context != 0)
clReleaseContext(context);
}
int main(int argc, char **argv)
{
int main(int argc, char **argv) {
printf("enter demo main\n");
fflush(stdout);
putenv("POCL_VERBOSE=1");
putenv("POCL_DEVICES=basic");
putenv("POCL_LEAVE_TEMP_DIRS=1");
putenv("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES=1");
putenv("POCL_TEMP_DIR=pocl");
putenv("POCL_CACHE_DIR=pocl");
putenv("POCL_WORK_GROUP_METHOD=spmd");
if(argc >= 2){
printf("argv[1]:%s:\n",argv[1]);
if(!strcmp(argv[1], "h"))
putenv("POCL_WORK_GROUP_METHOD=spmd");
if(!strcmp(argv[1], "c"))
putenv("POCL_CROSS_COMPILE=1");
}
if(argc >= 3){
printf("argv[2]:%s:\n",argv[2]);
if(!strcmp(argv[2], "h"))
putenv("POCL_WORK_GROUP_METHOD=spmd");
if(!strcmp(argv[2], "c"))
putenv("POCL_CROSS_COMPILE=1");
}
//putenv("LD_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
//putenv("LTDL_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
//lt_dlsetsearchpath("/scratch/colins/build/linux/fs/lib");
//printf("SEARCH_PATH:%s\n",lt_dlgetsearchpath());
cl_platform_id platforms[100];
cl_uint platforms_n = 0;
CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
cl_platform_id platform_id;
cl_device_id device_id;
size_t binary_size;
int i;
printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
for (int i=0; i<platforms_n; i++)
{
char buffer[10240];
printf(" -- %d --\n", i);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
printf(" PROFILE = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL));
printf(" VERSION = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL));
printf(" NAME = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL));
printf(" VENDOR = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL));
printf(" EXTENSIONS = %s\n", buffer);
}
// Getting platform and device information
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
if (platforms_n == 0)
return 1;
cl_context context;
context = CL_CHECK_ERR(clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
cl_device_id devices[100];
cl_uint devices_n = 0;
// CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));
printf("=== %d OpenCL device(s) found on platform:\n", platforms_n);
for (int i=0; i<devices_n; i++)
{
char buffer[10240];
cl_uint buf_uint;
cl_ulong buf_ulong;
printf(" -- %d --\n", i);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
printf(" DEVICE_NAME = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL));
printf(" DEVICE_VENDOR = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL));
printf(" DEVICE_VERSION = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL));
printf(" DRIVER_VERSION = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL));
printf(" DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL));
printf(" DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL));
printf(" DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);
}
if (devices_n == 0)
return 1;
cl_context context;
context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices+1, &pfn_notify, NULL, &_err));
cl_command_queue queue;
queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE, &_err));
cl_kernel kernel = 0;
cl_mem memObjects[2] = {0,0};
cl_command_queue queue;
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &_err));
cl_kernel kernel = 0;
cl_mem memObjects[2] = {0, 0};
// Create OpenCL program - first attempt to load cached binary.
// If that is not available, then create the program from source
// and store the binary for future use.
std::cout << "Attempting to create program from binary..." << std::endl;
cl_program program = CreateProgramFromBinary(context, devices[1], "kernel.cl.bin");
if (program == NULL)
{
std::cout << "Binary not loaded, create from source..." << std::endl;
program = CreateProgram(context, devices[1], "kernel.cl");
if (program == NULL)
{
Cleanup(context, queue, program, kernel, memObjects);
return 1;
}
cl_program program = clCreateProgramWithBuiltInKernels(context, 1, &device_id, "sfilter", NULL);
if (program == NULL) {
std::cerr << "Failed to write program binary" << std::endl;
Cleanup(context, queue, program, kernel, memObjects);
return 1;
} else {
std::cout << "Read program from binary." << std::endl;
}
std::cout << "Save program binary for future run..." << std::endl;
if (SaveProgramBinary(program, devices[1], "kernel.cl.bin") == false)
{
std::cerr << "Failed to write program binary" << std::endl;
Cleanup(context, queue, program, kernel, memObjects);
return 1;
}
}
else
{
std::cout << "Read program from binary." << std::endl;
}
// Build program
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
printf("attempting to create input buffer\n");
fflush(stdout);
cl_mem input_buffer;
input_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
cl_mem input_buffer;
input_buffer = CL_CHECK_ERR(
clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
printf("attempting to create output buffer\n");
fflush(stdout);
cl_mem output_buffer;
output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
cl_mem output_buffer;
output_buffer = CL_CHECK_ERR(
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
memObjects[0] = input_buffer;
memObjects[1] = output_buffer;
@ -527,61 +221,67 @@ int main(int argc, char **argv)
printf("attempting to create kernel\n");
fflush(stdout);
kernel = CL_CHECK_ERR(clCreateKernel(program, "sfilter", &_err));
printf("setting up kernel args cl_mem:%lx \n",input_buffer);
kernel = CL_CHECK_ERR(clCreateKernel(program, "sfilter", &_err));
printf("setting up kernel args cl_mem:%lx \n", input_buffer);
fflush(stdout);
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(ldc), (&ldc)));
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(m0), (&m0)));
CL_CHECK(clSetKernelArg(kernel, 4, sizeof(m1), (&m1)));
CL_CHECK(clSetKernelArg(kernel, 5, sizeof(m2), (&m2)));
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(m3), (&m3)));
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(m4), (&m4)));
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(m5), (&m5)));
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(m6), (&m6)));
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(m7), (&m7)));
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(m8), (&m8)));
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(ldc), (&ldc)));
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(m0), (&m0)));
CL_CHECK(clSetKernelArg(kernel, 4, sizeof(m1), (&m1)));
CL_CHECK(clSetKernelArg(kernel, 5, sizeof(m2), (&m2)));
CL_CHECK(clSetKernelArg(kernel, 6, sizeof(m3), (&m3)));
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(m4), (&m4)));
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(m5), (&m5)));
CL_CHECK(clSetKernelArg(kernel, 9, sizeof(m6), (&m6)));
CL_CHECK(clSetKernelArg(kernel, 10, sizeof(m7), (&m7)));
CL_CHECK(clSetKernelArg(kernel, 11, sizeof(m8), (&m8)));
printf("attempting to enqueue write buffer\n");
fflush(stdout);
for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
float in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
}
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE,
i * sizeof(float), 4, &in, 0, NULL, NULL));
}
cl_event kernel_completion;
size_t global_offset[2] = { 1, 1};
size_t global_work_size[2] = { NUM_DATA - 2, NUM_DATA - 2};//avoid the edges
const size_t local_work_size[2] = { 64, 1 };
cl_event kernel_completion;
size_t global_offset[2] = {1, 1};
size_t global_work_size[2] = {NUM_DATA - 2, NUM_DATA - 2}; // avoid the edges
const size_t local_work_size[2] = {64, 1};
printf("attempting to enqueue kernel\n");
fflush(stdout);
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset, global_work_size, local_work_size, 0, NULL, &kernel_completion));
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, global_offset,
global_work_size, local_work_size, 0, NULL,
&kernel_completion));
printf("Enqueue'd kerenel\n");
fflush(stdout);
cl_ulong time_start, time_end;
CL_CHECK(clWaitForEvents(1, &kernel_completion));
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL));
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL));
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
CL_PROFILING_COMMAND_START,
sizeof(time_start), &time_start, NULL));
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
sizeof(time_end), &time_end, NULL));
double elapsed = time_end - time_start;
printf("time(ns):%lg\n",elapsed);
CL_CHECK(clReleaseEvent(kernel_completion));
printf("time(ns):%lg\n", elapsed);
CL_CHECK(clReleaseEvent(kernel_completion));
printf("Result:");
for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
float data;
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(float), 4, &data, 0, NULL, NULL));
//printf(" %f", data);
}
printf("\n");
printf("Result:");
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
float data;
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
i * sizeof(float), 4, &data, 0, NULL, NULL));
// printf(" %f", data);
}
printf("\n");
CL_CHECK(clReleaseMemObject(memObjects[0]));
CL_CHECK(clReleaseMemObject(memObjects[1]));
CL_CHECK(clReleaseMemObject(memObjects[0]));
CL_CHECK(clReleaseMemObject(memObjects[1]));
CL_CHECK(clReleaseKernel(kernel));
CL_CHECK(clReleaseProgram(program));
CL_CHECK(clReleaseContext(context));
CL_CHECK(clReleaseKernel(kernel));
CL_CHECK(clReleaseProgram(program));
CL_CHECK(clReleaseContext(context));
return 0;
return 0;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,36 +1,35 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
POCL_RT0_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt0)
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_RT_PATH=$(wildcard ../../../runtime)
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
#CXXFLAGS = -g -O0 -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld -march=rv32im -mabi=ilp32
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH)
LIBS = -lOpenCL
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=sgemm
@ -40,10 +39,10 @@ lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) -I$(POCL_RT0_PATH)/include -L$(POCL_RT0_PATH)/lib/static -L. main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).qemu
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -52,10 +51,16 @@ $(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run: $(PROJECT).hex
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.elf *.dump *.hex *.a *.pocl
rm -rf *.elf *.dump *.hex

File diff suppressed because it is too large Load diff

Binary file not shown.

View file

@ -17,15 +17,16 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c -lOpenCL
* gcc -o cldemo -std=gnu99 -Wall -I/usr/include/nvidia-current cldemo.c
* -lOpenCL
*
*/
#include <CL/cl.h>
#include <iostream>
#include <fstream>
#include <sstream>
#include <errno.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -33,469 +34,180 @@
#define NUM_DATA 64
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} while (0)
#define CL_CHECK(_expr) \
do { \
cl_int _err = _expr; \
if (_err == CL_SUCCESS) \
break; \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} while (0)
#define CL_CHECK_ERR(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
typeof(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} \
_ret; \
})
#define CL_CHECK_ERR(_expr) \
({ \
cl_int _err = CL_INVALID_VALUE; \
typeof(_expr) _ret = _expr; \
if (_err != CL_SUCCESS) { \
fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
abort(); \
} \
_ret; \
})
void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
}
///
// Create an OpenCL program from the kernel source file
//
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
cl_int errNum;
cl_program program;
std::ifstream kernelFile(fileName, std::ios::in);
if (!kernelFile.is_open())
{
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
return NULL;
}
std::ostringstream oss;
oss << kernelFile.rdbuf();
std::string srcStdStr = oss.str();
const char *srcStr = srcStdStr.c_str();
program = clCreateProgramWithSource(context, 1,
(const char**)&srcStr,
NULL, NULL);
if (program == NULL)
{
std::cerr << "Failed to create CL program from source." << std::endl;
return NULL;
}
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
// Determine the reason for the error
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cerr << "Error in kernel: " << std::endl;
std::cerr << buildLog;
clReleaseProgram(program);
return NULL;
}
return program;
}
//
///
// Retreive program binary for all of the devices attached to the
// program an and store the one for the device passed in
//
bool SaveProgramBinary(cl_program program, cl_device_id device, const char* fileName)
{
//cl_uint numDevices = malloc(sizeof(cl_uint));
//cl_uint* numDevices = malloc(sizeof(cl_uint));
cl_int errNum;
printf("try getting program info\n");
// 1 - Query for number of devices attached to program
/*errNum = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint),
&numDevices, NULL);
printf("Got program_num_devices\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for number of devices." << std::endl;
return false;
}*/
// 2 - Get all of the Device IDs
cl_device_id *devices = new cl_device_id[1];
errNum = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
sizeof(cl_device_id) * 1,
devices, NULL);
printf("Got program_devices\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for devices." << std::endl;
delete [] devices;
return false;
}
// 3 - Determine the size of each program binary
size_t *programBinarySizes = new size_t [1];
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
sizeof(size_t) * 1,
programBinarySizes, NULL);
printf("Got program_binary_sizes\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for program binary sizes." << std::endl;
delete [] devices;
delete [] programBinarySizes;
return false;
}
unsigned char **programBinaries = new unsigned char*[1];
for (cl_uint i = 0; i < 1; i++)
{
programBinaries[i] = new unsigned char[programBinarySizes[i]];
}
// 4 - Get all of the program binaries
errNum = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(unsigned char*) * 1,
programBinaries, NULL);
printf("Got program_binarys\n");
if (errNum != CL_SUCCESS)
{
std::cerr << "Error querying for program binaries" << std::endl;
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < 1; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
return false;
}
// 5 - Finally store the binaries for the device requested out to disk for future reading.
for (cl_uint i = 0; i < 1; i++)
{
// Store the binary just for the device requested. In a scenario where
// multiple devices were being used you would save all of the binaries out here.
if (devices[i] == device)
{
FILE *fp = fopen(fileName, "wb");
if(fp ==NULL){
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < 1; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
return false;
}
printf("Opened file\n");
fwrite(programBinaries[i], 1, programBinarySizes[i], fp);
printf("wrote file\n");
fclose(fp);
printf("close file\n");
break;
}
}
// Cleanup
delete [] devices;
delete [] programBinarySizes;
for (cl_uint i = 0; i < 1; i++)
{
delete [] programBinaries[i];
}
delete [] programBinaries;
return true;
}
///
// Attempt to create the program object from a cached binary. Note that
// on first run this will fail because the binary has not yet been created.
//
cl_program CreateProgramFromBinary(cl_context context, cl_device_id device, const char* fileName)
{
FILE *fp = fopen(fileName, "rb");
if (fp == NULL)
{
return NULL;
}
// Determine the size of the binary
size_t binarySize;
fseek(fp, 0, SEEK_END);
binarySize = ftell(fp);
rewind(fp);
unsigned char *programBinary = new unsigned char[binarySize];
fread(programBinary, 1, binarySize, fp);
fclose(fp);
cl_int errNum = 0;
cl_program program;
cl_int binaryStatus;
program = clCreateProgramWithBinary(context,
1,
&device,
&binarySize,
(const unsigned char**)&programBinary,
&binaryStatus,
&errNum);
delete [] programBinary;
if (errNum != CL_SUCCESS)
{
std::cerr << "Error loading program binary." << std::endl;
return NULL;
}
if (binaryStatus != CL_SUCCESS)
{
std::cerr << "Invalid binary for device" << std::endl;
return NULL;
}
errNum = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
printf("build errNum:%d\n", errNum);
// Determine the reason for the error
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cerr << "Error in program: " << std::endl;
std::cerr << buildLog << std::endl;
clReleaseProgram(program);
return NULL;
}
return program;
void pfn_notify(const char *errinfo, const void *private_info, size_t cb,
void *user_data) {
fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
}
///
// Cleanup any created OpenCL resources
//
void Cleanup(cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel, cl_mem memObjects[3])
{
for (int i = 0; i < 3; i++)
{
if (memObjects[i] != 0)
clReleaseMemObject(memObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
cl_program program, cl_kernel kernel, cl_mem memObjects[3]) {
for (int i = 0; i < 3; i++) {
if (memObjects[i] != 0)
clReleaseMemObject(memObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
if (kernel != 0)
clReleaseKernel(kernel);
if (kernel != 0)
clReleaseKernel(kernel);
if (program != 0)
clReleaseProgram(program);
if (context != 0)
clReleaseContext(context);
if (program != 0)
clReleaseProgram(program);
if (context != 0)
clReleaseContext(context);
}
int main(int argc, char **argv)
{
int main(int argc, char **argv) {
printf("enter demo main\n");
fflush(stdout);
putenv("POCL_VERBOSE=1");
putenv("POCL_DEVICES=basic");
putenv("POCL_LEAVE_TEMP_DIRS=1");
putenv("POCL_LEAVE_KERNEL_COMPILER_TEMP_FILES=1");
putenv("POCL_TEMP_DIR=pocl");
putenv("POCL_CACHE_DIR=pocl");
putenv("POCL_WORK_GROUP_METHOD=spmd");
if(argc >= 2){
printf("argv[1]:%s:\n",argv[1]);
if(!strcmp(argv[1], "h"))
putenv("POCL_WORK_GROUP_METHOD=spmd");
if(!strcmp(argv[1], "c"))
putenv("POCL_CROSS_COMPILE=1");
}
if(argc >= 3){
printf("argv[2]:%s:\n",argv[2]);
if(!strcmp(argv[2], "h"))
putenv("POCL_WORK_GROUP_METHOD=spmd");
if(!strcmp(argv[2], "c"))
putenv("POCL_CROSS_COMPILE=1");
}
cl_platform_id platform_id;
cl_device_id device_id;
size_t binary_size;
int i;
//putenv("LD_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
//putenv("LTDL_LIBRARY_PATH=/scratch/colins/build/linux/fs/lib");
//lt_dlsetsearchpath("/scratch/colins/build/linux/fs/lib");
//printf("SEARCH_PATH:%s\n",lt_dlgetsearchpath());
cl_platform_id platforms[100];
cl_uint platforms_n = 0;
CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
// Getting platform and device information
CL_CHECK(clGetPlatformIDs(1, &platform_id, NULL));
CL_CHECK(clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, NULL));
printf("=== %d OpenCL platform(s) found: ===\n", platforms_n);
for (int i=0; i<platforms_n; i++)
{
char buffer[10240];
printf(" -- %d --\n", i);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, 10240, buffer, NULL));
printf(" PROFILE = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 10240, buffer, NULL));
printf(" VERSION = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 10240, buffer, NULL));
printf(" NAME = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 10240, buffer, NULL));
printf(" VENDOR = %s\n", buffer);
CL_CHECK(clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL));
printf(" EXTENSIONS = %s\n", buffer);
}
cl_context context;
context = CL_CHECK_ERR(
clCreateContext(NULL, 1, &device_id, &pfn_notify, NULL, &_err));
if (platforms_n == 0)
return 1;
cl_device_id devices[100];
cl_uint devices_n = 0;
// CL_CHECK(clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 100, devices, &devices_n));
CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 100, devices, &devices_n));
printf("=== %d OpenCL device(s) found on platform:\n", platforms_n);
for (int i=0; i<devices_n; i++)
{
char buffer[10240];
cl_uint buf_uint;
cl_ulong buf_ulong;
printf(" -- %d --\n", i);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL));
printf(" DEVICE_NAME = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL));
printf(" DEVICE_VENDOR = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL));
printf(" DEVICE_VERSION = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL));
printf(" DRIVER_VERSION = %s\n", buffer);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL));
printf(" DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL));
printf(" DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
CL_CHECK(clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL));
printf(" DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);
}
if (devices_n == 0)
return 1;
cl_context context;
context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices+1, &pfn_notify, NULL, &_err));
cl_command_queue queue;
queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[1], CL_QUEUE_PROFILING_ENABLE, &_err));
cl_kernel kernel = 0;
cl_mem memObjects[3] = {0,0,0};
cl_command_queue queue;
queue = CL_CHECK_ERR(clCreateCommandQueue(context, device_id,
CL_QUEUE_PROFILING_ENABLE, &_err));
cl_kernel kernel = 0;
cl_mem memObjects[3] = {0, 0, 0};
// Create OpenCL program - first attempt to load cached binary.
// If that is not available, then create the program from source
// and store the binary for future use.
std::cout << "Attempting to create program from binary..." << std::endl;
//cl_program program = CreateProgramFromBinary(context, devices[1], "kernel.cl.bin");
cl_program program = clCreateProgramWithBuiltInKernels(context, 1, &devices[1], "sgemm", NULL);
if (program == NULL)
{
std::cout << "Binary not loaded, create from source..." << std::endl;
program = CreateProgram(context, devices[1], "kernel.cl");
if (program == NULL)
{
Cleanup(context, queue, program, kernel, memObjects);
return 1;
}
// cl_program program = CreateProgramFromBinary(context, device_id,
// "kernel.cl.bin");
cl_program program =
clCreateProgramWithBuiltInKernels(context, 1, &device_id, "sgemm", NULL);
if (program == NULL) {
std::cerr << "Failed to write program binary" << std::endl;
Cleanup(context, queue, program, kernel, memObjects);
return 1;
} else {
std::cout << "Read program from binary." << std::endl;
}
std::cout << "Save program binary for future run..." << std::endl;
if (SaveProgramBinary(program, devices[1], "kernel.cl.bin") == false)
{
std::cerr << "Failed to write program binary" << std::endl;
Cleanup(context, queue, program, kernel, memObjects);
return 1;
}
}
else
{
std::cout << "Read program from binary." << std::endl;
}
// Build program
CL_CHECK(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
printf("attempting to create input buffer\n");
fflush(stdout);
cl_mem input_bufferA;
input_bufferA = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
cl_mem input_bufferB;
input_bufferB = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
cl_mem input_bufferA;
input_bufferA = CL_CHECK_ERR(
clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
cl_mem input_bufferB;
input_bufferB = CL_CHECK_ERR(
clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
printf("attempting to create output buffer\n");
fflush(stdout);
cl_mem output_buffer;
output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float)*NUM_DATA*NUM_DATA, NULL, &_err));
cl_mem output_buffer;
output_buffer = CL_CHECK_ERR(
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(float) * NUM_DATA * NUM_DATA, NULL, &_err));
memObjects[0] = input_bufferA;
memObjects[1] = input_bufferB;
memObjects[2] = output_buffer;
size_t width = NUM_DATA;
size_t width = NUM_DATA;
printf("attempting to create kernel\n");
fflush(stdout);
kernel = CL_CHECK_ERR(clCreateKernel(program, "sgemm_single", &_err));
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer));
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(width), &width));
kernel = CL_CHECK_ERR(clCreateKernel(program, "sgemm", &_err));
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_bufferA), &input_bufferA));
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(input_bufferB), &input_bufferB));
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer));
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(width), &width));
printf("attempting to enqueue write buffer\n");
fflush(stdout);
for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
float in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferA, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
in = ((float)rand()/(float)(RAND_MAX)) * 100.0;
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferB, CL_TRUE, i*sizeof(float), 4, &in, 0, NULL, NULL));
}
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
cl_event kernel_completion;
const size_t local_work_size[3] = { 64, 1, 1};
// a_offset
size_t global_work_size[3] = { NUM_DATA, NUM_DATA, NUM_DATA };
float in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferA, CL_TRUE,
i * sizeof(float), 4, &in, 0, NULL, NULL));
in = ((float)rand() / (float)(RAND_MAX)) * 100.0;
CL_CHECK(clEnqueueWriteBuffer(queue, input_bufferB, CL_TRUE,
i * sizeof(float), 4, &in, 0, NULL, NULL));
}
printf("Done enqueueing\n");
cl_event kernel_completion;
const size_t local_work_size[3] = {1, 1, 1};
// a_offset
size_t global_work_size[3] = {NUM_DATA, NUM_DATA, NUM_DATA};
printf("attempting to enqueue kernel\n");
fflush(stdout);
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &kernel_completion));
CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size,
local_work_size, 0, NULL,
&kernel_completion));
printf("Enqueue'd kerenel\n");
fflush(stdout);
cl_ulong time_start, time_end;
CL_CHECK(clWaitForEvents(1, &kernel_completion));
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL));
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL));
CL_CHECK(clGetEventProfilingInfo(kernel_completion,
CL_PROFILING_COMMAND_START,
sizeof(time_start), &time_start, NULL));
CL_CHECK(clGetEventProfilingInfo(kernel_completion, CL_PROFILING_COMMAND_END,
sizeof(time_end), &time_end, NULL));
double elapsed = time_end - time_start;
printf("time(ns):%lg\n",elapsed);
CL_CHECK(clReleaseEvent(kernel_completion));
printf("time(ns):%lg\n", elapsed);
CL_CHECK(clReleaseEvent(kernel_completion));
printf("Result:");
for (int i=0; i<NUM_DATA*NUM_DATA; i++) {
float data;
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, i*sizeof(float), 4, &data, 0, NULL, NULL));
//printf(" %f", data);
}
printf("\n");
printf("Result:");
for (int i = 0; i < NUM_DATA * NUM_DATA; i++) {
float data;
CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE,
i * sizeof(float), 4, &data, 0, NULL, NULL));
// printf(" %f", data);
}
printf("\n");
CL_CHECK(clReleaseMemObject(memObjects[0]));
CL_CHECK(clReleaseMemObject(memObjects[1]));
CL_CHECK(clReleaseMemObject(memObjects[2]));
CL_CHECK(clReleaseMemObject(memObjects[0]));
CL_CHECK(clReleaseMemObject(memObjects[1]));
CL_CHECK(clReleaseMemObject(memObjects[2]));
CL_CHECK(clReleaseKernel(kernel));
CL_CHECK(clReleaseProgram(program));
CL_CHECK(clReleaseContext(context));
CL_CHECK(clReleaseKernel(kernel));
CL_CHECK(clReleaseProgram(program));
CL_CHECK(clReleaseContext(context));
return 0;
return 0;
}

File diff suppressed because it is too large Load diff

Binary file not shown.

File diff suppressed because it is too large Load diff

View file

@ -1,36 +1,35 @@
RISCV_TOOL_PATH = $(wildcard ~/dev/riscv-gnu-toolchain/drops)
POCL_CC_PATH = $(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_INC_PATH = $(wildcard ../include)
POCL_LIB_PATH = $(wildcard ../lib)
VX_RT_PATH = $(wildcard ../../../runtime)
VX_SIMX_PATH = $(wildcard ../../../simX/obj_dir)
RISCV_TOOL_PATH=$(wildcard ~/dev/riscv-gnu-toolchain/drops)
CC = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
GDB = $(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gdb
POCL_CC_PATH=$(wildcard ~/dev/pocl/drops_riscv_cc)
POCL_RT_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt)
POCL_RT0_PATH=$(wildcard ~/dev/pocl/drops_riscv_rt0)
VX_RT_PATH=$(wildcard ../../../runtime)
VX_SIMX_PATH=$(wildcard ../../../simX/obj_dir)
CC=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-gcc
CXX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-g++
DMP=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objdump
HEX=$(RISCV_TOOL_PATH)/bin/riscv32-unknown-elf-objcopy
NEWLIB_PATH=$(RISCV_TOOL_PATH)/riscv32-unknown-elf/lib
VX_NEWLIB = $(VX_RT_PATH)/newlib/newlib.c
VX_STR = $(VX_RT_PATH)/startup/vx_start.s
VX_INT = $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_IO = $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_FIO = $(VX_RT_PATH)/fileio/fileio.s
VX_API = $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS = $(VX_STR) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_SRCS = $(VX_RT_PATH)/newlib/newlib.c
VX_SRCS += $(VX_RT_PATH)/startup/vx_start.s
VX_SRCS += $(VX_RT_PATH)/intrinsics/vx_intrinsics.s
VX_SRCS += $(VX_RT_PATH)/io/vx_io.s $(VX_RT_PATH)/io/vx_io.c
VX_SRCS += $(VX_RT_PATH)/fileio/fileio.s
VX_SRCS += $(VX_RT_PATH)/tests/tests.c
VX_SRCS += $(VX_RT_PATH)/vx_api/vx_api.c
VX_SRCS += $(VX_STR) $(VX_FIO) $(VX_NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST)
VX_CFLAGS = -nostartfiles -Wl,-Bstatic,-T,$(VX_RT_PATH)/mains/vortex_link.ld
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS = -g -O0 -march=rv32im -mabi=ilp32
CXXFLAGS += -ffreestanding # program may not begin at main()
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -Wl,--gc-sections # enable garbage collection of unused input sections
CXXFLAGS += -fno-rtti -fno-non-call-exceptions # disable RTTI and exceptions
CXXFLAGS += -I$(POCL_INC_PATH)
LIBS = -lOpenCL
VX_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/libOpenCL.a
QEMU_LIBS = -Wl,--whole-archive lib$(PROJECT).a -Wl,--no-whole-archive $(POCL_LIB_PATH)/qemu/libOpenCL.a
PROJECT=vecadd
@ -40,10 +39,10 @@ lib$(PROJECT).a: kernel.cl
POCL_DEBUG=all POCL_DEBUG_LLVM_PASSES=1 LD_LIBRARY_PATH=$(RISCV_TOOL_PATH)/lib:$(POCL_CC_PATH)/lib $(POCL_CC_PATH)/bin/poclcc -o lib$(PROJECT).a kernel.cl
$(PROJECT).elf: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) -I$(POCL_RT_PATH)/include -L$(POCL_RT_PATH)/lib/static -L. $(VX_SRCS) main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).elf
$(CXX) $(CXXFLAGS) $(VX_CFLAGS) $(VX_SRCS) main.cc $(VX_LIBS) -o $(PROJECT).elf
$(PROJECT).qemu: main.cc lib$(PROJECT).a
$(CXX) $(CXXFLAGS) -I$(POCL_RT0_PATH)/include -L$(POCL_RT0_PATH)/lib/static -L. main.cc -Wl,--whole-archive -l$(PROJECT) -Wl,--no-whole-archive $(LIBS) -o $(PROJECT).qemu
$(CXX) $(CXXFLAGS) main.cc $(QEMU_LIBS) -o $(PROJECT).qemu
$(PROJECT).hex: $(PROJECT).elf
$(HEX) -O ihex $(PROJECT).elf $(PROJECT).hex
@ -52,10 +51,16 @@ $(PROJECT).dump: $(PROJECT).elf
$(DMP) -D $(PROJECT).elf > $(PROJECT).dump
run: $(PROJECT).hex
$(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
POCL_DEBUG=all $(VX_SIMX_PATH)/Vcache_simX -E -a rv32i --core $(PROJECT).hex -s -b 1> emulator.debug
qemu: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -d in_asm -D debug.log $(PROJECT).qemu
gdb-s: $(PROJECT).qemu
POCL_DEBUG=all $(RISCV_TOOL_PATH)/bin/qemu-riscv32 -g 1234 -d in_asm -D debug.log $(PROJECT).qemu
gdb-c: $(PROJECT).qemu
$(GDB) $(PROJECT).qemu
clean:
rm -rf *.elf *.dump *.hex *.a *.pocl
rm -rf *.o *.elf *.dump *.hex *.a *.pocl *.qemu

Binary file not shown.

View file

@ -31,46 +31,6 @@
_ret; \
})
typedef struct {
const char* name;
const void* pfn;
uint32_t num_args;
uint32_t num_locals;
const uint8_t* arg_types;
const uint32_t* local_sizes;
} kernel_info_t;
static int g_num_kernels = 0;
static kernel_info_t g_kernels [MAX_KERNELS];
int _pocl_register_kernel(const char* name, const void* pfn, uint32_t num_args, uint32_t num_locals, const uint8_t* arg_types, const uint32_t* local_sizes) {
if (g_num_kernels == MAX_KERNELS)
return -1;
kernel_info_t* kernel = g_kernels + g_num_kernels++;
kernel->name = name;
kernel->pfn = pfn;
kernel->num_args = num_args;
kernel->num_locals = num_locals;
kernel->arg_types = arg_types;
kernel->local_sizes = local_sizes;
return 0;
}
int _pocl_query_kernel(const char* name, const void** p_pfn, uint32_t* p_num_args, uint32_t* p_num_locals, const uint8_t** p_arg_types, const uint32_t** p_local_sizes) {
for (int i = 0; i < g_num_kernels; ++i) {
kernel_info_t* kernel = g_kernels + i;
if (strcmp(kernel->name, name) != 0)
continue;
if (p_pfn) *p_pfn = kernel->pfn;
if (p_num_args) *p_num_args = kernel->num_args;
if (p_num_locals) *p_num_locals = kernel->num_locals;
if (p_arg_types) *p_arg_types = kernel->arg_types;
if (p_local_sizes) *p_local_sizes = kernel->local_sizes;
return 0;
}
return -1;
}
int exitcode = 0;
cl_context context = NULL;
cl_command_queue commandQueue = NULL;
@ -98,7 +58,9 @@ void cleanup() {
if (C) free(C);
}
int main (int argc, char **argv) {
int main (int argc, char **argv) {
printf("enter demo main\n");
cl_platform_id platform_id;
cl_device_id device_id;
size_t binary_size;

View file

@ -31,6 +31,10 @@ unsigned vx_threadID(void);
// Get hardware warp ID
unsigned vx_warpID(void);
// Get Number cycles/Inst
unsigned vx_getCycles(void);
unsigned vx_getInst(void);
void vx_resetStack(void);

View file

@ -49,6 +49,19 @@ vx_threadID:
csrr a0, 0x20 # read thread IDs
ret
.type vx_getCycles, @function
.global vx_getCycles
vx_getCycles:
csrr a0, 0x26 # read thread IDs
ret
.type vx_getInst, @function
.global vx_getInst
vx_getInst:
csrr a0, 0x25 # read thread IDs
ret
.type vx_resetStack, @function
.global vx_resetStack

View file

@ -19,4 +19,5 @@ vx_vec_test:
vsw.v v2, (a3) # Store result
add a3, a3, t0 # Bump pointer
bnez a0, loop # Loop back
vmacc.vv v1, v2, v2
ret # Finished

View file

@ -0,0 +1,22 @@
.type vx_vec_test, @function
.global vx_vec_test
vx_vec_test:
# vector-vector add routine of 32-bit integers
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
#
# a0 = n, a1 = x, a2 = y, a3 = z
# Non-vector instructions are indented
vsetvli t0, a0, e32 # Set vector length based on 32-bit vectors
loop:
vlw.v v0, (a1) # Get first vector
sub a0, a0, t0 # Decrement number done
slli t0, t0, 2 # Multiply number done by 4 bytes
add a1, a1, t0 # Bump pointer
vlw.v v1, (a2) # Get second vector
add a2, a2, t0 # Bump pointer
vadd.vv v2, v0, v1 # Sum vectors
vsw.v v2, (a3) # Store result
add a3, a3, t0 # Bump pointer
bnez a0, loop # Loop back
ret # Finished

View file

@ -5,7 +5,7 @@ int main()
{
vx_tmc(1);
int n = 5;
int n = 32;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};

File diff suppressed because it is too large Load diff

View file

@ -1,11 +1,11 @@
:0200000480007A
:100000009705000093854506130540006B10B50069
:10001000EF004005130510006B0005001385C108B3
:100020001386810C3306A64093050000EF10003CB8
:10003000171500001305C5AEEF00102AEF001034AD
:08004000EF0090156F00902AFB
:100020001386810C3306A64093050000EF10403C78
:1000300017150000130505AFEF00502AEF005034EC
:08004000EF00D0156F00D02A7B
:10004800B70700009387070063880700371500800B
:100058001305C5B16F00D027678000001305400065
:10005800130505B26F0010286780000013054000E3
:100068006B000500972100009381C179F3261002E7
:100078009396A601732600029315A6001316260070
:1000880037F1FF6F3301B1403301D1403301C10073
@ -13,448 +13,448 @@
:1000A80067800000D772850007E0051233055540C8
:1000B80093922200B38555008760061233065600D6
:1000C8005781000227E10602B3865600E31E05FCAD
:1000D80067800000130101FD232681021304010338
:1000E800232EA4FC232CB4FC232AC4FC8327C4FDA0
:1000F80083A707002326F4FE930744FD2322F4FE7A
:10010800832744FE03C707008327C4FE2380E70034
:100118008327C4FE93871700032744FE034717006D
:100128002380E7008327C4FE93872700032744FE24
:10013800034727002380E7008327C4FE93873700FF
:10014800032744FE034737002380E7008327C4FEC4
:10015800938747002326F4FE232404FE6F00400300
:10016800832784FE032784FDB307F70003C707002E
:100178008327C4FE2380E7008327C4FE93871700E4
:100188002326F4FE832784FE938717002324F4FE96
:10019800832744FD032784FEE344F7FC8327C4FE3A
:1001A8002320F4FE832704FE93F73700032704FE79
:1001B800B307F7002320F4FE832704FE2326F4FE6A
:1001C8008327C4FD0327C4FE23A0E7001300000013
:1001D8000324C1021301010367800000130101FC1D
:1001E800232E8102130401042326A4FC2324B4FC37
:1001F8008327C4FC83A707002326F4FE9307C4FDC6
:100208002322F4FE8327C4FE03C70700832744FE86
:100218002380E700832744FE938717000327C4FE43
:10022800034717002380E700832744FE93872700AE
:100238000327C4FE034727002380E700832744FEE3
:10024800938737000327C4FE034737002380E7005E
:100258008327C4FE938747002326F4FE232404FE45
:100268006F004003832784FE032784FCB307F7004D
:100278000327C4FE034707002380E7008327C4FE43
:10028800938717002326F4FE832784FE938717009D
:100298002324F4FE8327C4FD032784FEE344F7FCEC
:1002A8008327C4FE2320F4FE832704FE93F7370038
:1002B800032704FEB307F7002320F4FE832704FE78
:1002C8002326F4FE8327C4FC0327C4FE23A0E700EB
:1002D800130000000324C103130101046780000018
:1002E800130101FF232681001304010113000000FC
:1002F8000324C1001301010167800000130101FEFE
:10030800232E8100130401022326A4FE2324B4FE15
:10031800832784FE3727000023A2E7009307000005
:10032800138507000324C10113010102678000003F
:10033800130101FE232E1100232C81001304010256
:100348002326A4FEB72700801385C7B2EF00C04F4D
:1003580093071000138507008320C101032481013E
:100368001301010267800000130101FD2326110219
:100378002324810213040103232EA4FC232CB4FCA0
:10038800232AC4FCB70700712326F4FEB7070072BE
:100398002324F4FE930730002322F4FE130744FEBF
:1003A8009307C4FE13064000930507001385070052
:1003B800EFF05FD21307C4FD9307C4FE1306400095
:1003C8009305070013850700EFF0DFD0130784FDBE
:1003D8009307C4FE13064000930507001385070022
:1003E800EFF05FCF130744FD9307C4FE13064000E8
:1003F8009305070013850700EFF0DFCD83A74107BA
:10040800E7800700130704FE930784FE930507009F
:1004180013850700EFF09FDC832704FE1385070090
:100428008320C102032481021301010367800000B5
:10043800130101FD23261102232481021304010361
:10044800232EA4FC232CB4FC232AC4FCB707007178
:100458002324F4FEB70700722326F4FE9307400016
:100468002322F4FE130744FE930784FE130640007C
:100478009305070013850700EFF0DFC51307C4FDD8
:10048800930784FE130640009305070013850700B1
:10049800EFF05FC4130784FD930784FE1306400042
:1004A8009305070013850700EFF0DFC2130744FD2B
:1004B800930784FE13064000930507001385070081
:1004C800EFF05FC183A74107E7800700832744FD5A
:1004D800138507008320C10203248102130101034D
:1004E80067800000130101FD2326110223248102E5
:1004F80013040103232EA4FC232CB4FC232AC4FCDC
:10050800B70700712326F4FE930750002324F4FE56
:10051800130784FE9307C4FE1306400093050700E3
:1005280013850700EFF01FBB1307C4FD9307C4FE34
:10053800130640009305070013850700EFF09FB9E5
:100548009307C4FE032644FD832584FD1385070015
:10055800EFF05FB883A74107E7800700832744FDD2
:10056800138507008320C1020324810213010103BC
:1005780067800000130101FD232681021304010393
:10058800232EA4FC8327C4FD63D807008327C4FD5A
:10059800B307F040232EF4FC0327C4FDB73700004F
:1005A8009387078063D8E700B737000093870780F1
:1005B800232EF4FC83A781072326F4FE03A78107D3
:1005C8008327C4FD3307F70023ACE1068327C4FE65
:1005D800138507000324C10213010103678000008B
:1005E800130101FE232E1100232C810013040102A4
:1005F8002326A4FE13050000EF00001E13000000D0
:100608008320C101032481011301010267800000D6
:10061800130101FD2326110223248102130401037F
:10062800232EA4FC232CB4FC232AC4FCB707007196
:100638002326F4FEB70700722324F4FE9307700004
:100648002322F4FE130744FE9307C4FE130640005A
:100658009305070013850700EFF0DFA71307C4FD14
:100668009307C4FE1306400093050700138507008F
:10067800EFF05FA6130784FD9307C4FE130640003E
:100688009305070013850700EFF0DFA4130744FD67
:100698009307C4FE1306400093050700138507005F
:1006A800EFF05FA383A74107E7800700130704FE65
:1006B800930784FE9305070013850700EFF01FB228
:1006C800832704FE138507008320C10203248102C7
:1006D8001301010367800000130101FF23261100A5
:1006E8002324810013040101B7270080138507B470
:1006F800EF008015130000008320C100032481004F
:100708001301010167800000130101FF2326110076
:100718002324810013040101EF00800E93070500D4
:10072800138507008320C100032481001301010100
:1007380067800000130101FF232611002324810094
:1007480013040101B7270080138547B6EF00C00FD7
:10075800130000008320C10003248100130101015C
:1007680067800000130101FF232611002324810064
:1007780013040101B7270080138587B8EF00C00C68
:1007880083A781099386170023ACD1081385070036
:100798008320C10003248100130101016780000048
:1007A800130101FF232611002324810013040101F2
:1007B800B7270080138547BBEF000009130000002E
:1007C8008320C10003248100130101016780000018
:1007D8006B10B500678000006B00050067800000A3
:1007E8006B40B500678000006B2005006780000043
:1007F8006B300000678000007325100267800000DE
:100808007325000267800000130540006B00050097
:10081800F32610029396F600732600029315A6009D
:100828001316260037F1FF6F3301B1403301D14071
:100838003301C100F3261002638606001305000089
:100848006B00050067800000130141FF23201100A1
:100858002322B1008345050063880500EF00C0012D
:10086800130515006FF01FFF832001008325410049
:100878001301C10067800000B702010023A0B20085
:1008880067800000130101FD232611022324810241
:1008980013040103232EA4FC0327C4FD9307F000CF
:1008A80063E4E702B72700800327C4FD1317270076
:1008B80093878700B307F70083A70700138507000E
:1008C800EFF09FF86F004007930700022326F4FE1D
:1008D800A30504FE8327C4FE9387C7FF0327C4FD2F
:1008E800B357F70093F7F7002322F4FE832744FE5B
:1008F8006386070093071000A305F4FE8347B4FE40
:1009080063820702B7270080032744FE13172700D6
:1009180093878700B307F70083A7070013850700AD
:10092800EFF09FF28327C4FE9387C7FF2326F4FEC8
:100938008327C4FEE340F0FA8320C1020324810226
:100948001301010367800000130101FE232E11002B
:10095800232C8100130401022326A4FE2324B4FEC1
:100968000325C4FEEFF05FEE032584FEEFF09FF150
:10097800B7270080138587C1EFF01FED1300000033
:100988008320C10103248101130101026780000053
:10099800130101FD232611022324810213040103FC
:1009A80013051000EFF05FE3930750002322F4FED5
:1009B800832744FE9397270013850700EF004025FF
:1009C800930705002320F4FE832744FE939727000E
:1009D80013850700EF00C02393070500232EF4FCBE
:1009E800832744FE9397270013850700EF004022D2
:1009F80093070500232CF4FC232604FE6F008005D2
:100A08008327C4FE93972700032704FEB307F70044
:100A18001307100023A0E7008327C4FE939727003D
:100A28000327C4FDB307F7001307100023A0E7004E
:100A38008327C4FE93972700032784FDB307F70095
:100A48001307100023A0E7008327C4FE938717002D
:100A58002326F4FE0327C4FE832744FEE342F7FA65
:100A6800832684FD0326C4FD832504FE032544FE56
:100A7800EFF04FE3232404FE6F00C002832784FEB7
:100A880093972700032784FDB307F70083A7070080
:100A980013850700EFF01FDF832784FE9387170075
:100AA8002324F4FE032784FE832744FEE348F7FC4F
:100AB80013050000EFF05FD2130000001385070054
:100AC8008320C1020324810213010103678000000F
:100AD80093050500930600001306000013050000A7
:100AE8006F005024130101FF9305000023248100A7
:100AF8002326110013040500EF00502C03A501075D
:100B08008327C50363840700E780070013050400F3
:100B1800EFF01FAD130101FF232481002322910070
:100B280037240080B72400809387440013044400CE
:100B38003304F440232611001354244063020402B2
:100B4800931424009384C4FFB384F40083A704009F
:100B58001304F4FF9384C4FFE7800700E31804FE3E
:100B68008320C10003248100832441001301010173
:100B780067800000130101FF232481002320210145
:100B880037240080372900809307040013090900DF
:100B98003309F940232611002322910013592940D3
:100BA800630009021304040093040000832704006F
:100BB8009384140013044400E7800700E31899FEA7
:100BC800372400803729008093070400130949005F
:100BD8003309F9401359294063000902130404003A
:100BE8009304000083270400938414001304440032
:100BF800E7800700E31899FE8320C10003248100E1
:100C080083244100032901001301010167800000CA
:100C18009305050003A501086F0000019305050071
:100C280003A501086F00103D130101FD2322910265
:100C3800232E31012326110223248102232021039C
:100C4800232C4101232A5101232861012326710104
:100C580023248101232291019384B5009307600125
:100C68009309050063E497069307000163ECB72036
:100C7800EF0010059304000193078001130620007C
:100C8800138981C6B307F90003A44700138787FFB8
:100C98006302E424832744008326C40003268400D7
:100CA80093F7C7FFB307F40003A747002326D6002E
:100CB80023A4C600136717001385090023A2E700C1
:100CC800EF005000130584006F00801C93F484FF2C
:100CD80063CA041A63E8B41AEF00807E9307701F92
:100CE80063F8974693D79400638E071C1307400058
:100CF800636CF73E93D764001386970313858703C5
:100D080093163600138981C6B306D90003A446009A
:100D1800938686FF638C8602832744009305F00040
:100D280093F7C7FF3387974063C0E50263580734DA
:100D38000324C400638C86008327440093F7C7FF0D
:100D480033879740E3D4E5FE130605000324090121
:100D580013088900630C0419832544001307F00065
:100D680093F5C5FFB3879540634AF740232A0901E5
:100D78002328090163D4073E9307F01F63E6B730C1
:100D880093D53500938715009397370003254900BD
:100D9800B307F90083A6070093D525401307100071
:100DA8003317B7003367A700938587FF2326B4005E
:100DB8002324D4002322E90023A0870023A6860049
:100DC8009357264093061000B396F600636AD7122D
:100DD800B3F7E60063940702939616001376C6FFEE
:100DE800B3F7E60013064600639A070093961600C9
:100DF800B3F7E60013064600E38A07FE1305F00082
:100E080093183600B30819019385080013030600E8
:100E180003A4C500639A85006F00402F6352073012
:100E28000324C4006384852E8327440093F7C7FFF7
:100E380033879740E354E5FE8326C40003268400E5
:100E480093E514002322B4002326D60023A4C60069
:100E5800B3049400232A9900232899009366170065
:100E680023A6040123A4040123A2D400B307F40099
:100E78001385090023A0E700EF00C0641305840070
:100E88006F0000019307C00023A0F90013050000BC
:100E98008320C10203248102832441020329010221
:100EA8008329C101032A8101832A4101032B0101FE
:100EB800832BC100032C8100832C41001301010303
:100EC8006780000093060020130600041305F00352
:100ED8006FF05FE303A4C70013062600E39C87DADC
:100EE8000324090113088900E31804E703274900CC
:100EF8009357264093061000B396F600E37AD7EC92
:100F080003248900832A440013FCCAFF63689C00F9
:100F1800B3079C401307F000634EF71283AA4109F8
:100F280003A741089307F0FF330A8401B38A5401E9
:100F38006308F732B71700009387F700B38AFA00FF
:100F4800B7F7FFFFB3FAFA0093850A001385090083
:100F5800EF0080579307F0FF130B0500630AF5268F
:100F680063664527938BC10983A70B00B387FA00F3
:100F780023A0FB00138707006306AA3883A641084D
:100F88009307F0FF638EF638330A4B413307EA00C4
:100F980023A0EB00937C7B0063860C2E330B9B41D4
:100FA800B7170000130B8B00138AF7FFB30A5B0116
:100FB8009387870033F74A01B3879741B387E740A0
:100FC80033FA470193050A0013850900EF00C04F63
:100FD8009307F0FF6300F53A33056541B30A45010D
:100FE80083A70B002324690193EA1A00B307FA00C8
:100FF80023A0FB0023225B01630824331306F000BF
:10100800637886338326440013074CFF137787FFE2
:1010180093F61600B3E6E6002322D40093055000A9
:10102800B306E40023A2B60023A4B600636EE63438
:10103800832A4B0013040B0083A6010963F4F6000E
:1010480023A8F10883A6C10863F8F61823A6F108B7
:101058006F00801813E714002322E400B3049400FF
:101068002324990093E717001385090023A2F400AD
:10107800EF004045130584006FF09FE18326C4000C
:10108800032684006FF01FC293D795001307400012
:101098006374F712130740016360F7229386C7054C
:1010A8001387B70593963600B306D90083A70600C1
:1010B800938686FF638AF61A03A747001377C7FF4C
:1010C80063F6E50083A78700E398F6FE83A6C700CA
:1010D800032749002326D4002324F40023A48600F0
:1010E80023A687006FF0DFCD130740016378F71060
:1010F800130740056360F71E93D7C4001386F706ED
:101108001385E706931636006FF0DFBF130313004D
:101118009377330093858500E39C07CE6F00C00F5B
:10112800B307F40003A747008326C40003268400FE
:101138001367170023A2E7002326D60013850900AA
:1011480023A4C600EF000038130584006FF05FD4B5
:1011580013D63400938784006FF09FB2B305B400B0
:1011680083A745001385090093E7170023A2F5001C
:10117800EF004035130584006FF09FD113E714008A
:101188002322E400B3049400232A99002328990019
:1011980013E7170023A6040123A4040123A2E400F3
:1011A800B305B4001385090023A0F500EF008031D2
:1011B800130584006FF0DFCD93D7650093869703FE
:1011C80013878703939636006FF01FEE630224138C
:1011D80003248900832A440093FACAFFB3879A40FC
:1011E80063E69A001307F000E346F7E61385090063
:1011F800EF00402D130500006FF09FC91386C70547
:101208001385B705931636006FF0DFAF83A7880004
:101218001306F6FF6394171D93773600938888FFAB
:10122800E39607FE0327490093C7F6FFB3F7E700E5
:101238002322F90093961600E3E4D7CCE38206CC88
:1012480033F7F600631A07009396160033F7F60093
:1012580013034300E30A07FE130603006FF05FBAA7
:10126800938A0A016FF05FCE032549009355274002
:10127800130710003317B7003367A7002322E900CC
:101288006FF05FE5B71700001387F7FF330A5B01BC
:10129800337AEA00B387474133FAE70093050A0037
:1012A80013850900EF0040229307F0FFE316F5D2FB
:1012B800130A00006FF0DFD2130740056360F708D8
:1012C80093D7C5009386F7061387E70693963600EB
:1012D8006FF09FDD130740156360F70893D7F4009C
:1012E8001386870713857707931636006FF09FA13B
:1012F800938BC10903A70B003387EA0023A0EB00F7
:101308006FF0DFC793164A01E39A06C603248900E3
:10131800B30A5C0193EA1A00232254016FF0DFD16B
:1013280023A261096FF01FC713040B006FF0DFD011
:10133800930710002322FB006FF05FEB13074015A3
:101348006362F70693D7F5009386870713877707B5
:10135800939636006FF05FD5130740556362F70622
:1013680093D724011386D7071385C707931636002A
:101378006FF05F99938C8CFFB38A9A01B38A6A41A4
:10138800130A00006FF0DFC5930584001385090078
:10139800EF0040460324890083A70B00832A4400FA
:1013A8006FF09FC9130740556364F70293D725016F
:1013B8009386D7071387C707939636006FF0DFCE5B
:1013C8009306803F1306F0071305E0076FF09F931D
:1013D8009306803F1307E0076FF01FCD832749006E
:1013E8006FF05FE51303F00013070500637EC30287
:1013F8009377F7006390070A63920508937606FFD0
:101408001376F600B386E6002320B7002322B70040
:101418002324B7002326B70013070701E366D7FE86
:101428006314060067800000B306C3409396260045
:1014380097020000B38656006780C6002307B700EE
:10144800A306B7002306B700A305B7002305B70016
:10145800A304B7002304B700A303B7002303B7000E
:10146800A302B7002302B700A301B7002301B70006
:10147800A300B7002300B7006780000093F5F50FBD
:1014880093968500B3E5D50093960501B3E5D5009D
:101498006FF0DFF69396270097020000B386560098
:1014A80093820000E78006FA93800200938707FF83
:1014B8003307F7403306F600E378C3F66FF0DFF33F
:1014C8006780000067800000130101FF232481006A
:1014D800232291009304050013850500232611009B
:1014E80023A2010CEFF00F899307F0FF630CF500BE
:1014F8008320C100032481008324410013010101DA
:101508006780000083A7410CE38407FE8320C100A5
:101518000324810023A0F400832441001301010166
:101528006780000003A7010783278714638C0704DB
:1015380003A747001308F001634EE80613182700B5
:1015480063060502338307012324C30883A8871889
:10155800130610003316E600B3E8C80023A41719D1
:101568002324D310930620006304D5021307170021
:1015780023A2E700B387070123A4B70013050000DF
:10158800678000009307C7142324F7146FF05FFAED
:1015980083A6C7181307170023A2E70033E6C6007F
:1015A80023A6C718B387070123A4B70013050000B3
:1015B800678000001305F0FF67800000130101FD3C
:1015C8002324810103AC0107232E3101232C41017F
:1015D800232A510123286101232611022324810291
:1015E800232291022320210323267101930A050057
:1015F800138B0500130A10009309F0FF03298C14BC
:1016080063080902832449001384F4FF6342040237
:1016180093942400B304990063040B0483A7441033
:10162800638067051304F4FF9384C4FFE31634FF53
:101638008320C10203248102832441020329010279
:101648008329C101032A8101832A4101032B010156
:10165800832BC100032C8100130101036780000064
:101668008327490083A644009387F7FF638A87048A
:1016780023A20400E38806FA8327891833178A000F
:10168800832B4900B377F700639E0700E7800600C5
:1016980083274900E39477F783278C14E38427F999
:1016A8006FF0DFF58327C91883A544083377F7005F
:1016B800631C070013850A00E78006006FF05FFDD2
:1016C800232289006FF01FFB13850500E7800600C1
:1016D8006FF01FFC130101FE23282101232C810038
:1016E800232A91002326310113840500232E11009B
:1016F80093090500138981C6EFF09FDC83268900D2
:10170800371700009307F7FE83A446003384874009
:1017180093F4C4FF330494001354C4001304F4FF77
:101728001314C400634EE4009305000013850900F8
:10173800EFF09FD983278900B38797006306F502E6
:1017480013850900EFF01FD88320C101032481010C
:1017580083244101032901018329C10013050000E5
:101768001301010267800000B3058040138509005A
:10177800EFF09FD59307F0FF6304F50483A7C10931
:1017880083268900B384844093E41400338487401B
:101798001385090023A2960023AE8108EFF09FD29B
:1017A8008320C1010324810183244101032901010C
:1017B8008329C1001305100013010102678000008E
:1017C8009305000013850900EFF01FD00327890057
:1017D8009306F000B307E540E3D4F6F683A6410884
:1017E80093E717002322F7003305D54023AEA1085D
:1017F8006FF01FF563860512130101FF2324810092
:101808002322910013840500930405002326110068
:10181800EFF01FCB0325C4FF130784FF9377E5FF81
:101828003306F700938581C68326460003A8850002
:1018380093F6C6FF630EC8182322D6001375150049
:101848003308D6006310050A032384FF03284800E1
:101858003307674083288700138501C7B38767006C
:10186800137818006380A8140323C70023A6680010
:10187800232413016306081C93E617002322D700CC
:101888002320F6009306F01F63E8F60A93D7370083
:10189800938617009396360003A84500B386D500B3
:1018A80003A5060013D6274093071000B397C70077
:1018B800B3E70701138686FF2326C7002324A70062
:1018C80023A2F50023A0E6002326E50003248100D7
:1018D8008320C10013850400832441001301010102
:1018E8006FF05FBE0325480013751500631C0502E1
:1018F800B387D700138501C78326860093E81700AE
:101908003308F700638AA6140326C60023A6C60078
:101918002324D600232217012320F8006FF09FF616
:101928006780000093E61700232ED4FE2320F600DC
:101938009306F01FE3FCF6F493D6970013064000D5
:10194800636ED60C93D667001385960313868603B9
:10195800131535003385A50083260500130585FF7B
:10196800630AD51003A646001376C6FF63F6C700C0
:1019780083A68600E318D5FE03A5C6002326A70084
:101988002324D700032481002324E5008320C100F9
:10199800138504008324410023A6E60013010101F6
:1019A8006FF05FB263180812832586000326C6000D
:1019B800B387F60093E6170023A6C5002324B600D4
:1019C8002322D7003307F7002320F7006FF01FF01A
:1019D80013751500B387D70063100502032584FF2C
:1019E8003307A7408326C70003268700B387A700CD
:1019F8002326D60023A4C60013E6170083A6810871
:101A08002322C70023A4E500E3E2D7EC83A541091C
:101A180013850400EFF01FCC6FF05FEB1306400155
:101A28006374D602130640056364D60693D6C700CE
:101A38001385F6061386E606131535006FF09FF139
:101A4800B387D7006FF05FEB1385C6051386B6051D
:101A5800131535006FF01FF023AAE50023A8E50051
:101A68002326A7002324A700232217012320F800F8
:101A78006FF0DFE503A845001356264093071000D2
:101A88003396C7003366060123A2C5006FF01FEF27
:101A980013064015636CD60093D6F70013858607A6
:101AA80013867607131535006FF0DFEA13064055E5
:101AB800636CD60093D627011385D6071386C6070D
:101AC800131535006FF01FE91305803F1306E00773
:101AD8006FF05FE893E617002322D7002320F60073
:041AE8006FF0DFDEDE
:101AEC003000000031000000320000003300000024
:101AFC003400000035000000360000003700000004
:101B0C003800000039000000610000006200000095
:101B1C006300000064000000650000006600000027
:101B2C0048656C6C6F2066726F6D205F69736174B1
:101B3C0074790A004552524F523A205F6B696C6CB3
:101B4C00206E6F742079657420696D706C656D659D
:101B5C006E7465640A0000004552524F523A205F81
:101B6C00756E6C696E6B206E6F742079657420696C
:101B7C006D706C656D656E7465640A004552524FEC
:101B8C00523A205F67657474696D656F666461793C
:101B9C00206E6F742079657420696D706C656D654D
:101BAC006E7465640A0000004552524F523A205F31
:101BBC006C696E6B206E6F742079657420696D7022
:101BCC006C656D656E7465640A0000003000000081
:101BDC00310000003200000033000000340000002F
:101BEC00350000003600000037000000380000000F
:101BFC00390000006100000062000000630000007A
:0E1C0C006400000065000000660000000A0091
:1000D800D72021B667800000130101FD2326810285
:1000E80013040103232EA4FC232CB4FC232AC4FCF0
:1000F8008327C4FD83A707002326F4FE930744FD46
:100108002322F4FE832744FE03C707008327C4FE87
:100118002380E7008327C4FE93871700032744FE44
:10012800034717002380E7008327C4FE938727002F
:10013800032744FE034727002380E7008327C4FEE4
:1001480093873700032744FE034737002380E700DF
:100158008327C4FE938747002326F4FE232404FE46
:100168006F004003832784FE032784FDB307F7004D
:1001780003C707008327C4FE2380E7008327C4FE44
:10018800938717002326F4FE832784FE938717009E
:100198002324F4FE832744FD032784FEE344F7FC6D
:1001A8008327C4FE2320F4FE832704FE93F7370039
:1001B800032704FEB307F7002320F4FE832704FE79
:1001C8002326F4FE8327C4FD0327C4FE23A0E700EB
:1001D800130000000324C10213010103678000001B
:1001E800130101FC232E8102130401042326A4FC1D
:1001F8002324B4FC8327C4FC83A707002326F4FE2A
:100208009307C4FD2322F4FE8327C4FE03C7070017
:10021800832744FE2380E700832744FE9387170043
:100228000327C4FE034717002380E700832744FE03
:10023800938727000327C4FE034727002380E7008E
:10024800832744FE938737000327C4FE03473700FC
:100258002380E7008327C4FE938747002326F4FE04
:10026800232404FE6F004003832784FE032784FCB5
:10027800B307F7000327C4FE034707002380E700FE
:100288008327C4FE938717002326F4FE832784FE62
:10029800938717002324F4FE8327C4FD032784FED5
:1002A800E344F7FC8327C4FE2320F4FE832704FEDF
:1002B80093F73700032704FEB307F7002320F4FE63
:1002C800832704FE2326F4FE8327C4FC0327C4FEE9
:1002D80023A0E700130000000324C1031301010455
:1002E80067800000130101FF232681001304010128
:1002F800130000000324C1001301010167800000FE
:10030800130101FE232E8100130401022326A4FEFB
:100318002324B4FE832784FE3727000023A2E700A6
:1003280093070000138507000324C101130101028C
:1003380067800000130101FE232E1100232C810089
:10034800130401022326A4FEB7270080138507B3F0
:10035800EF00C04F93071000138507008320C101E9
:10036800032481011301010267800000130101FDCC
:10037800232611022324810213040103232EA4FC43
:10038800232CB4FC232AC4FCB70700712326F4FEEF
:10039800B70700722324F4FE930730002322F4FEEB
:1003A800130744FE9307C4FE130640009305070095
:1003B80013850700EFF05FD21307C4FD9307C4FE4F
:1003C800130640009305070013850700EFF0DFD000
:1003D800130784FD9307C4FE130640009305070026
:1003E80013850700EFF05FCF130744FD9307C4FEA2
:1003F800130640009305070013850700EFF0DFCDD3
:1004080083A74107E7800700130704FE930784FECC
:100418009305070013850700EFF09FDC832704FE90
:10042800138507008320C1020324810213010103FD
:1004380067800000130101FD232611022324810295
:1004480013040103232EA4FC232CB4FC232AC4FC8C
:10045800B70700712324F4FEB70700722326F4FEC1
:10046800930740002322F4FE130744FE930784FEFB
:10047800130640009305070013850700EFF0DFC55A
:100488001307C4FD930784FE130640009305070075
:1004980013850700EFF05FC4130784FD930784FEFC
:1004A800130640009305070013850700EFF0DFC22D
:1004B800130744FD930784FE1306400093050700C5
:1004C80013850700EFF05FC183A74107E7800700A6
:1004D800832744FD138507008320C102032481027A
:1004E8001301010367800000130101FD2326110297
:1004F8002324810213040103232EA4FC232CB4FC1F
:10050800232AC4FCB70700712326F4FE9307500082
:100518002324F4FE130784FE9307C4FE1306400049
:100528009305070013850700EFF01FBB1307C4FDF1
:100538009307C4FE130640009305070013850700C0
:10054800EFF09FB99307C4FE032644FD832584FD7D
:1005580013850700EFF05FB883A74107E78007001E
:10056800832744FD138507008320C10203248102E9
:100578001301010367800000130101FD2326810296
:1005880013040103232EA4FC8327C4FD63D80700AA
:100598008327C4FDB307F040232EF4FC0327C4FDD2
:1005A800B73700009387078063D8E700B7370000A4
:1005B80093870780232EF4FC83A781072326F4FE64
:1005C80003A781078327C4FD3307F70023ACE1069F
:1005D8008327C4FE138507000324C1021301010306
:1005E80067800000130101FE232E1100232C8100D7
:1005F800130401022326A4FE13050000EF00001EC9
:10060800130000008320C1010324810113010102AA
:1006180067800000130101FD2326110223248102B3
:1006280013040103232EA4FC232CB4FC232AC4FCAA
:10063800B70700712326F4FEB70700722324F4FEDF
:10064800930770002322F4FE130744FE9307C4FEA9
:10065800130640009305070013850700EFF0DFA796
:100668001307C4FD9307C4FE130640009305070053
:1006780013850700EFF05FA6130784FD9307C4FEF8
:10068800130640009305070013850700EFF0DFA469
:10069800130744FD9307C4FE1306400093050700A3
:1006A80013850700EFF05FA383A74107E7800700E2
:1006B800130704FE930784FE9305070013850700BC
:1006C800EFF01FB2832704FE138507008320C102C1
:1006D800032481021301010367800000130101FF55
:1006E800232611002324810013040101B727008069
:1006F800138547B4EF008015130000008320C10064
:10070800032481001301010167800000130101FF28
:10071800232611002324810013040101EF00800E19
:1007280093070500138507008320C1000324810077
:100738001301010167800000130101FF2326110046
:100748002324810013040101B7270080138587B68D
:10075800EF00C00F130000008320C10003248100B4
:100768001301010167800000130101FF2326110016
:100778002324810013040101B72700801385C7B81B
:10078800EF00C00C83A781099386170023ACD1081A
:10079800138507008320C100032481001301010190
:1007A80067800000130101FF232611002324810024
:1007B80013040101B7270080138587BBEF000009E8
:1007C800130000008320C1000324810013010101EC
:1007D800678000006B10B500678000006B000500A3
:1007E800678000006B40B500678000006B20050043
:1007F800678000006B3000006780000073251002DE
:100808006780000073250002678000001305400020
:100818006B000500F32610029396F600732600027B
:100828009315A6001316260037F1FF6F3301B14068
:100838003301D1403301C100F3261002638606005C
:10084800130500006B00050067800000130141FFDD
:10085800232011002322B100834505006388050089
:10086800EF00C001130515006FF01FFF8320010082
:10087800832541001301C10067800000B702010011
:1008880023A0B20067800000130101FD2326110296
:100898002324810213040103232EA4FC0327C4FD8F
:1008A8009307F00063E4E702B72700800327C4FD3D
:1008B8001317270093878700B307F70083A707005C
:1008C80013850700EFF09FF86F00400793070002B9
:1008D8002326F4FEA30504FE8327C4FE9387C7FFDF
:1008E8000327C4FDB357F70093F7F7002322F4FE5C
:1008F800832744FE6386070093071000A305F4FED0
:100908008347B4FE63820702B7270080032744FEAB
:100918001317270093878700B307F70083A70700FB
:1009280013850700EFF09FF28327C4FE9387C7FF64
:100938002326F4FE8327C4FEE340F0FA8320C10295
:10094800032481021301010367800000130101FEE3
:10095800232E1100232C8100130401022326A4FE58
:100968002324B4FE0325C4FEEFF05FEE032584FEC6
:10097800EFF09FF1B72700801385C7C1EFF01FED97
:10098800130000008320C101032481011301010227
:1009980067800000130101FD232611022324810230
:1009A8001304010313051000EFF05FE3930700023F
:1009B8002322F4FE832744FE93972700138507001C
:1009C800EF004025930705002320F4FE832744FE0B
:1009D8009397270013850700EF00C02393070500AE
:1009E800232EF4FC832744FE9397270013850700E2
:1009F800EF00402293070500232CF4FC232604FE75
:100A08006F0080058327C4FE93972700032704FE01
:100A1800B307F7001307100023A0E7008327C4FEDD
:100A2800939727000327C4FDB307F70013071000A7
:100A380023A0E7008327C4FE93972700032784FD9C
:100A4800B307F7001307100023A0E7008327C4FEAD
:100A5800938717002326F4FE0327C4FE832744FE4A
:100A6800E342F7FA832684FD0326C4FD832504FEAA
:100A7800032544FEEFF00FE3232404FE6F00C002B9
:100A8800832784FE93972700032784FDB307F70085
:100A980083A7070013850700EFF01FDF832784FE75
:100AA800938717002324F4FE032784FE832744FE3C
:100AB800E348F7FC13050000EFF05FD213000000D5
:100AC800138507008320C102032481021301010357
:100AD80067800000930505009306000013060000D8
:100AE800130500006F005024130101FF9305000057
:100AF800232481002326110013040500EF00502C45
:100B080003A501078327C50363840700E78007005F
:100B180013050400EFF01FAD130101FF232481002A
:100B28002322910037240080B72400809387440053
:100B3800130444003304F4402326110013542440C2
:100B480063020402931424009384C4FFB384F40062
:100B580083A704001304F4FF9384C4FFE78007000D
:100B6800E31804FE8320C10003248100832441008C
:100B78001301010167800000130101FF2324810094
:100B8800232021013724008037290080930704009F
:100B9800130909003309F940232611002322910083
:100BA8001359294063000902130404009304000048
:100BB800832704009384140013044400E78007008B
:100BC800E31899FE37240080372900809307040032
:100BD800130949003309F9401359294063000902F0
:100BE8001304040093040000832704009384140072
:100BF80013044400E7800700E31899FE8320C1002E
:100C08000324810083244100032901001301010109
:100C1800678000009305050003A501086F00000127
:100C28009305050003A501086F00103D130101FDA0
:100C380023229102232E310123261102232481022B
:100C480023202103232C4101232A51012328610158
:100C58002326710123248101232291019384B50065
:100C6800930760019309050063E497069307000161
:100C780063ECB720EF00100593040001930780018F
:100C880013062000138981C6B307F90003A447009F
:100C9800138787FF6302E424832744008326C40064
:100CA8000326840093F7C7FFB307F40003A74700A0
:100CB8002326D60023A4C60013671700138509004E
:100CC80023A2E700EF005000130584006F00801C8A
:100CD80093F484FF63CA041A63E8B41AEF00807EB1
:100CE8009307701F63F8974693D79400638E071C89
:100CF80013074000636CF73E93D76400138697038D
:100D08001385870393163600138981C6B306D90065
:100D180003A44600938686FF638C860283274400DB
:100D28009305F00093F7C7FF3387974063C0E50248
:100D3800635807340324C400638C86008327440067
:100D480093F7C7FF33879740E3D4E5FE1306050002
:100D58000324090113088900630C0419832544003E
:100D68001307F00093F5C5FFB3879540634AF74032
:100D7800232A09012328090163D4073E9307F01F9A
:100D880063E6B73093D535009387150093973700FE
:100D980003254900B307F90083A6070093D525402A
:100DA800130710003317B7003367A700938587FF31
:100DB8002326B4002324D4002322E90023A087009B
:100DC80023A686009357264093061000B396F60094
:100DD800636AD712B3F7E600639407029396160086
:100DE8001376C6FFB3F7E60013064600639A0700BA
:100DF80093961600B3F7E60013064600E38A07FE4B
:100E08001305F00093183600B308190193850800FC
:100E18001303060003A4C500639A85006F00402FE2
:100E2800635207300324C4006384852E832744005B
:100E380093F7C7FF33879740E354E5FE8326C40042
:100E48000326840093E514002322B4002326D60049
:100E580023A4C600B3049400232A990023289900E8
:100E68009366170023A6040123A4040123A2D40037
:100E7800B307F4001385090023A0E700EF00C0645E
:100E8800130584006F0000019307C00023A0F90038
:100E9800130500008320C102032481028324410238
:100EA800032901028329C101032A8101832A4101FF
:100EB800032B0101832BC100032C8100832C4100EB
:100EC8001301010367800000930600201306000445
:100ED8001305F0036FF05FE303A4C70013062600B1
:100EE800E39C87DA0324090113088900E31804E75F
:100EF800032749009357264093061000B396F6003F
:100F0800E37AD7EC03248900832A440013FCCAFF40
:100F180063689C00B3079C401307F000634EF71208
:100F280083AA410903A741089307F0FF330A840104
:100F3800B38A54016308F732B71700009387F700A4
:100F4800B38AFA00B7F7FFFFB3FAFA0093850A00ED
:100F580013850900EF0080579307F0FF130B050076
:100F6800630AF52663664527938BC10983A70B009F
:100F7800B387FA0023A0FB00138707006306AA388B
:100F880083A641089307F0FF638EF638330A4B4176
:100F98003307EA0023A0EB00937C7B0063860C2ECA
:100FA800330B9B41B7170000130B8B00138AF7FF15
:100FB800B30A5B019387870033F74A01B3879741E8
:100FC800B387E74033FA470193050A001385090000
:100FD800EF00C04F9307F0FF6300F53A3305654112
:100FE800B30A450183A70B002324690193EA1A0079
:100FF800B307FA0023A0FB0023225B016308243314
:101008001306F000637886338326440013074CFFE9
:10101800137787FF93F61600B3E6E6002322D40081
:1010280093055000B306E40023A2B60023A4B6003B
:10103800636EE634832A4B0013040B0083A6010970
:1010480063F4F60023A8F10883A6C10863F8F6182C
:1010580023A6F1086F00801813E714002322E40088
:10106800B30494002324990093E71700138509001B
:1010780023A2F400EF004045130584006FF09FE1C0
:101088008326C400032684006FF01FC293D79500FF
:10109800130740006374F712130740016360F722D7
:1010A8009386C7051387B70593963600B306D9000C
:1010B80083A70600938686FF638AF61A03A747006C
:1010C8001377C7FF63F6E50083A78700E398F6FE6A
:1010D80083A6C700032749002326D4002324F4004D
:1010E80023A4860023A687006FF0DFCD13074001F5
:1010F8006378F710130740056360F71E93D7C400A1
:101108001386F7061385E706931636006FF0DFBFE0
:10111800130313009377330093858500E39C07CE70
:101128006F00C00FB307F40003A747008326C4006D
:10113800032684001367170023A2E7002326D6009E
:101148001385090023A4C600EF00003813058400A6
:101158006FF05FD413D63400938784006FF09FB28A
:10116800B305B40083A745001385090093E717006A
:1011780023A2F500EF004035130584006FF09FD1DE
:1011880013E714002322E400B3049400232A9900EF
:101198002328990013E7170023A6040123A40401B8
:1011A80023A2E400B305B4001385090023A0F500C9
:1011B800EF008031130584006FF0DFCD93D7650011
:1011C8009386970313878703939636006FF01FEE75
:1011D8006302241303248900832A440093FACAFF74
:1011E800B3879A4063E69A001307F000E346F7E6F0
:1011F80013850900EF00402D130500006FF09FC90B
:101208001386C7051385B705931636006FF0DFAF51
:1012180083A788001306F6FF6394171D937736009B
:10122800938888FFE39607FE0327490093C7F6FFD4
:10123800B3F7E7002322F90093961600E3E4D7CC2E
:10124800E38206CC33F7F600631A0700939616007C
:1012580033F7F60013034300E30A07FE13060300FF
:101268006FF05FBA938A0A016FF05FCE03254900D9
:1012780093552740130710003317B7003367A700AB
:101288002322E9006FF05FE5B71700001387F7FF27
:10129800330A5B01337AEA00B387474133FAE70040
:1012A80093050A0013850900EF0040229307F0FF19
:1012B800E316F5D2130A00006FF0DFD213074005DA
:1012C8006360F70893D7C5009386F7061387E70688
:1012D800939636006FF09FDD130740156360F7089B
:1012E80093D7F4001386870713857707931636007C
:1012F8006FF09FA1938BC10903A70B003387EA0006
:1013080023A0EB006FF0DFC793164A01E39A06C6E5
:1013180003248900B30A5C0193EA1A0023225401CA
:101328006FF0DFD123A261096FF01FC713040B0010
:101338006FF0DFD0930710002322FB006FF05FEB04
:10134800130740156362F70693D7F500938687075E
:1013580013877707939636006FF05FD513074055CC
:101368006362F70693D724011386D7071385C70747
:10137800931636006FF05F99938C8CFFB38A9A01AD
:10138800B38A6A41130A00006FF0DFC59305840031
:1013980013850900EF0040460324890083A70B004A
:1013A800832A44006FF09FC9130740556364F7020E
:1013B80093D725019386D7071387C70793963600D7
:1013C8006FF0DFCE9306803F1306F0071305E007A2
:1013D8006FF09F939306803F1307E0076FF01FCDD0
:1013E800832749006FF05FE51303F000130705003A
:1013F800637EC3029377F7006390070A6392050838
:10140800937606FF1376F600B386E6002320B7002E
:101418002322B7002324B7002326B70013070701A8
:10142800E366D7FE6314060067800000B306C34076
:101438009396260097020000B38656006780C60080
:101448002307B700A306B7002306B700A305B70014
:101458002305B700A304B7002304B700A303B7000C
:101468002303B700A302B7002302B700A301B70004
:101478002301B700A300B7002300B700678000006E
:1014880093F5F50F93968500B3E5D500939605017E
:10149800B3E5D5006FF0DFF69396270097020000BA
:1014A800B386560093820000E78006FA9380020014
:1014B800938707FF3307F7403306F600E378C3F650
:1014C8006FF0DFF36780000067800000130101FF01
:1014D800232481002322910093040500138505002D
:1014E8002326110023A2010CEFF00F899307F0FFC8
:1014F800630CF5008320C10003248100832441008C
:10150800130101016780000083A7410CE38407FEF3
:101518008320C1000324810023A0F4008324410018
:10152800130101016780000003A7010783278714BF
:10153800638C070403A747001308F001634EE8060D
:101548001318270063060502338307012324C30801
:1015580083A88718130610003316E600B3E8C800FE
:1015680023A417192324D310930620006304D5025B
:101578001307170023A2E700B387070123A4B700C6
:1015880013050000678000009307C7142324F7148D
:101598006FF05FFA83A6C7181307170023A2E700A6
:1015A80033E6C60023A6C718B387070123A4B700EC
:1015B80013050000678000001305F0FF6780000036
:1015C800130101FD2324810103AC0107232E3101FE
:1015D800232C4101232A51012328610123261102CA
:1015E800232481022322910223202103232671012F
:1015F800930A0500138B0500130A10009309F0FFE6
:1016080003298C1463080902832449001384F4FF16
:101618006342040293942400B304990063040B0406
:1016280083A74410638067051304F4FF9384C4FF01
:10163800E31634FF8320C10203248102832441027C
:10164800032901028329C101032A8101832A410157
:10165800032B0101832BC100032C8100130101031B
:10166800678000008327490083A644009387F7FF1B
:10167800638A870423A20400E38806FA832789186B
:1016880033178A00832B4900B377F700639E07005E
:10169800E780060083274900E39477F783278C14B3
:1016A800E38427F96FF0DFF58327C91883A5440879
:1016B8003377F700631C070013850A00E7800600EC
:1016C8006FF05FFD232289006FF01FFB1385050073
:1016D800E78006006FF01FFC130101FE232821019B
:1016E800232C8100232A910023263101138405002D
:1016F800232E110093090500138981C6EFF09FDCA2
:1017080083268900371700009307F7FE83A4460055
:101718003384874093F4C4FF330494001354C40003
:101728001304F4FF1314C400634EE400930500008F
:1017380013850900EFF09FD983278900B3879700A5
:101748006306F50213850900EFF01FD88320C10155
:101758000324810183244101032901018329C10054
:10176800130500001301010267800000B3058040E3
:1017780013850900EFF09FD59307F0FF6304F50484
:1017880083A7C10983268900B384844093E41400A5
:10179800338487401385090023A2960023AE81086D
:1017A800EFF09FD28320C1010324810183244101EA
:1017B800032901018329C100130510001301010247
:1017C800678000009305000013850900EFF01FD023
:1017D800032789009306F000B307E540E3D4F6F643
:1017E80083A6410893E717002322F7003305D54065
:1017F80023AEA1086FF01FF563860512130101FFE0
:1018080023248100232291001384050093040500FA
:1018180023261100EFF01FCB0325C4FF130784FF15
:101828009377E5FF3306F700938581C68326460044
:1018380003A8850093F6C6FF630EC8182322D600B6
:10184800137515003308D6006310050A032384FFB7
:10185800032848003307674083288700138501C79A
:10186800B3876700137818006380A8140323C700A0
:1018780023A66800232413016306081C93E61700B7
:101888002322D7002320F6009306F01F63E8F60A08
:1018980093D73700938617009396360003A8450020
:1018A800B386D50003A5060013D62740930710007A
:1018B800B397C700B3E70701138686FF2326C7003F
:1018C8002324A70023A2F50023A0E6002326E50091
:1018D800032481008320C100138504008324410070
:1018E800130101016FF05FBE032548001375150051
:1018F800631C0502B387D700138501C783268600BA
:1019080093E817003308F700638AA6140326C60075
:1019180023A6C6002324D600232217012320F8007B
:101928006FF09FF66780000093E61700232ED4FE21
:101938002320F6009306F01FE3FCF6F493D69700F5
:1019480013064000636ED60C93D667001385960382
:1019580013868603131535003385A50083260500F5
:10196800130585FF630AD51003A646001376C6FF44
:1019780063F6C70083A68600E318D5FE03A5C60054
:101988002326A7002324D700032481002324E5006D
:101998008320C100138504008324410023A6E600A8
:1019A800130101016FF05FB26318081283258600E6
:1019B8000326C600B387F60093E6170023A6C500E2
:1019C8002324B6002322D7003307F7002320F7008B
:1019D8006FF01FF013751500B387D7006310050269
:1019E800032584FF3307A7408326C7000326870003
:1019F800B387A7002326D60023A4C60013E6170042
:101A080083A681082322C70023A4E500E3E2D7ECDC
:101A180083A5410913850400EFF01FCC6FF05FEB3D
:101A2800130640016374D602130640056364D606A4
:101A380093D6C7001385F6061386E60613153500F8
:101A48006FF09FF1B387D7006FF05FEB1385C60582
:101A58001386B605131535006FF01FF023AAE500AD
:101A680023A8E5002326A7002324A7002322170183
:101A78002320F8006FF0DFE503A845001356264041
:101A8800930710003396C7003366060123A2C500EA
:101A98006FF01FEF13064015636CD60093D6F7005E
:101AA8001385860713867607131535006FF0DFEA6E
:101AB80013064055636CD60093D627011385D607C5
:101AC8001386C607131535006FF01FE91305803F0D
:101AD8001306E0076FF05FE893E617002322D700AC
:081AE8002320F6006FF0DFDEA1
:101AF0003000000031000000320000003300000020
:101B000034000000350000003600000037000000FF
:101B10003800000039000000610000006200000091
:101B20006300000064000000650000006600000023
:101B300048656C6C6F2066726F6D205F69736174AD
:101B400074790A004552524F523A205F6B696C6CAF
:101B5000206E6F742079657420696D706C656D6599
:101B60006E7465640A0000004552524F523A205F7D
:101B7000756E6C696E6B206E6F7420796574206968
:101B80006D706C656D656E7465640A004552524FE8
:101B9000523A205F67657474696D656F6664617938
:101BA000206E6F742079657420696D706C656D6549
:101BB0006E7465640A0000004552524F523A205F2D
:101BC0006C696E6B206E6F742079657420696D701E
:101BD0006C656D656E7465640A000000300000007D
:101BE000310000003200000033000000340000002B
:101BF000350000003600000037000000380000000B
:101C00003900000061000000620000006300000075
:0E1C10006400000065000000660000000A008D
:042000004800008014
:10200800D81B0080DC1B0080E01B0080E41B0080E4
:10201800E81B0080EC1B0080F01B0080F41B008094
:10202800F81B0080FC1B0080001C0080041C008042
:10203800081C00800C1C0080101C0080141C0080F0
:10200800DC1B0080E01B0080E41B0080E81B0080D4
:10201800EC1B0080F01B0080F41B0080F81B008084
:10202800FC1B0080001C0080041C0080081C008031
:102038000C1C0080101C0080141C0080181C0080E0
:1020480000000000342300809C23008004240080CA
:102058000000000000000000000000000000000078
:102068000000000000000000000000000000000068

View file

@ -148,7 +148,7 @@ int _fstat(int file, struct stat * st)
int _isatty (int file)
{
vx_print_str("Hello from _isatty\n");
// vx_print_str("Hello from _isatty\n");
return 1;
}
@ -237,8 +237,8 @@ static int head_end = (int) 0x20000000;
void * _sbrk (int nbytes)
{
//vx_print_str("Hello from _sbrk\n");
//vx_printf("nbytes: ", nbytes);
// vx_print_str("Hello from _sbrk\n");
// vx_printf("nbytes: ", nbytes);
//if (nbytes < 0) //vx_print_str("nbytes less than zero\n");
// printf("nBytes: %d\n", nbytes);
@ -248,19 +248,21 @@ void * _sbrk (int nbytes)
nbytes = nbytes * -1;
}
if (nbytes > 10240)
{
nbytes = 10240;
}
// vx_printf("New nbytes: ", nbytes);
// if (nbytes > 10240)
// {
// nbytes = 10240;
// }
// if (((unsigned) head_end) > ((unsigned) (heap_ptr + nbytes)))
if (true)
{
int base = heap_start;
heap_start += nbytes;
////vx_print_str("_sbrk returning: ");
//vx_print_hex((unsigned) base);
////vx_print_str("\n");
// vx_print_str("_sbrk returning: ");
// vx_print_hex((unsigned) base);
// vx_print_str("\n");
return (void *) base;
}
else
@ -303,7 +305,7 @@ int _open(const char *name, int flags, int mode)
void _kill()
{
vx_print_str("ERROR: _kill not yet implemented\n");
vx_tmc(0);
}
unsigned _getpid()
@ -320,7 +322,7 @@ static int curr_time = 0;
int _gettimeofday()
{
vx_print_str("ERROR: _gettimeofday not yet implemented\n");
// vx_print_str("ERROR: _gettimeofday not yet implemented\n");
return curr_time++;
}

View file

@ -20,7 +20,7 @@ _start:
# Initialize SP
# la sp, __stack_top
la a1, vx_set_sp
li a0, 4
li a0, 32
.word 0x00b5106b # wspawn a0(numWarps), a1(PC SPAWN)
jal vx_set_sp
li a0, 1
@ -46,7 +46,7 @@ _start:
.type vx_set_sp, @function
.global vx_set_sp
vx_set_sp:
li a0, 4
li a0, 32
.word 0x0005006b # tmc 4
.option push

View file

@ -7,6 +7,8 @@
extern "C" {
#endif
#define TOTAL_WARPS 2
#define TOTAL_THREADS 16
func_t global_function_pointer;
// void (func_t)(void *)
@ -46,16 +48,39 @@ uint8_t * pocl_args;
uint8_t * pocl_ctx;
vx_pocl_workgroup_func pocl_pfn;
unsigned global_z;
unsigned global_y;
unsigned global_x;
void pocl_spawn_real()
{
vx_tmc(pocl_threads);
int x = vx_threadID();
int y = vx_warpID();
int base_x = vx_threadID();
int base_y = vx_warpID();
(pocl_pfn)( pocl_args, pocl_ctx, x, y, 0);
int local_x;
int local_y;
if (y != 0)
for (int iter_z = 0; iter_z < global_z; iter_z++)
{
for (int iter_x = 0; iter_x < global_x; iter_x++)
{
for (int iter_y = 0; iter_y < global_y; iter_y++)
{
local_x = (iter_x * TOTAL_THREADS) + base_x;
local_y = (iter_y * TOTAL_WARPS ) + base_y;
(pocl_pfn)( pocl_args, pocl_ctx, local_x, local_y, iter_z);
}
}
}
// (pocl_pfn)( pocl_args, pocl_ctx, x, y, 0);
if (base_y != 0)
{
vx_tmc(0);
}
@ -66,24 +91,67 @@ void pocl_spawn_real()
void pocl_spawn(struct context_t * ctx, const void * pfn, void * arguments)
{
if (ctx->num_groups[2] > 1)
// printf("ctx->num_groups[0]: %d\n", ctx->num_groups[0]);
// printf("ctx->num_groups[1]: %d\n", ctx->num_groups[1]);
// printf("ctx->num_groups[2]: %d\n", ctx->num_groups[2]);
// printf("\n\n");
// printf("ctx->local_size[0]: %d\n", ctx->local_size[0]);
// printf("ctx->local_size[1]: %d\n", ctx->local_size[1]);
// printf("ctx->local_size[2]: %d\n", ctx->local_size[2]);
if (ctx->num_groups[0] > TOTAL_THREADS)
{
printf("ERROR: pocl_spawn doesn't support Z dimension yet!\n");
return;
pocl_threads = TOTAL_THREADS;
global_x = ctx->num_groups[0] / TOTAL_THREADS;
printf("pocl_threads: %d\n", pocl_threads);
// printf("global_x: %d\n", global_x);
}
else
{
pocl_threads = ctx->num_groups[0];
global_x = 1;
// printf("pocl_threads: %d\n", pocl_threads);
// printf("global_x: %d\n", global_x);
}
pocl_threads = ctx->num_groups[0];
global_z = ctx->num_groups[2];
pocl_pfn = (vx_pocl_workgroup_func) pfn;
pocl_ctx = (uint8_t *) ctx;
pocl_args = (uint8_t *) arguments;
if (ctx->num_groups[1] > 1)
{
vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real);
if (ctx->num_groups[1] > TOTAL_WARPS)
{
global_y = ctx->num_groups[1] / TOTAL_WARPS;
vx_wspawn(TOTAL_WARPS, (unsigned) &pocl_spawn_real);
// printf("global_y: %d\n", global_y);
// printf("Warps: %d\n", TOTAL_WARPS);
}
else
{
global_y = 1;
vx_wspawn(ctx->num_groups[1], (unsigned) &pocl_spawn_real);
// printf("global_y: %d\n", global_y);
// printf("Warps: %d\n", ctx->num_groups[1]);
}
}
unsigned starting_cycles = vx_getCycles();
unsigned starting_inst = vx_getInst();
pocl_spawn_real();
unsigned end_cycles = vx_getCycles();
unsigned end_inst = vx_getInst();
printf("pocl_spawn: Total Cycles: %d\n", (end_cycles - starting_cycles));
printf("pocl_spawn: Total Inst : %d\n", (end_inst - starting_inst ));
// int z;
// int y;
// int x;

View file

@ -35,3 +35,8 @@ HEX: ELF
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
run:
../../simX/obj_dir/Vcache_simX -E -a rv32i --core vx_vector_main.hex -s -b 1> emulator.debug

View file

@ -0,0 +1,30 @@
.type vx_vec_test, @function
.global vx_vec_test
vx_vec_test:
li a1, 7
sw a1, 0(a0)
ret
# slli a0, a0, 2
# add a0, a0, a3
# vmv.v.x vv0, a2
# # vsplat4 vv0, a2
# stripmine_loop:
# vlb4 vv1, (a1)
# vcmpez4 vp0, vv1
# !vp0 vlw4 vv1, (a3)
# !vp0 vlw4 vv2, (a4)
# !vp0 vfma4 vv1, vv0, vv1, vv2
# !vp0 vsw4 vv1, (a4)
# addi a1, a1, 4
# addi a3, a3, 16
# addi a4, a4, 16
# bleu a3, a0, stripmine_loop
# handle edge cases
# when (n % 4) != 0 ...

View file

@ -0,0 +1,32 @@
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec.h"
int main()
{
vx_tmc(1);
// int * a = malloc(4);
// int * b = malloc(4);
// int * c = malloc(4);
int * a = malloc(4);
*a = 5;
printf("Value of a: %d\n", *a);
vx_vec_test(a);
printf("Value of a: %d\n", *a);
// for (int i = 0; i < 4; i++)
// {
// if (c[i] != (a[i] + b[i]))
// {
// printf("Fail\n");
// break;
// }
// }
vx_tmc(0);
}

View file

@ -0,0 +1,91 @@
#include <stdio.h>
#include <stdlib.h>
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec.h"
int main()
{
vx_tmc(1);
#if 0
# vector-vector add routine of 32-bit integers
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
#
# a0 = n, a1 = x, a2 = y, a3 = z
# Non-vector instructions are indented
#endif
#if 1
int n = 5;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for(int i = 0; i < n; ++i)
{
a[i] = b[i] = c[i] = 1;
}
for(int i = 0; i < n; ++i) printf("%d, ", a[i]);
printf("\n");
// for(int i = 0; i < n; ++i) printf("%d, ", b[i]);
// printf("\n");
// for(int i = 0; i < n; ++i) printf("%d, ", c[i]);
int *d;
*d = 1;
vx_vec_test(n, d, b, c);
printf("(after: n = %d, %d)\n", n, *d);
for(int i = 0; i < n; ++i) printf("%d, ", a[i]);
// printf("\n");
// for(int i = 0; i < n; ++i) printf("%d, ", b[i]);
// printf("\n");
// for(int i = 0; i < n; ++i) printf("%d, ", c[i]);
#endif
#if 0
int * a = malloc(sizeof(int) * 10);
for(int i = 0; i < 10; ++i) a[i] = 5;
for(int i = 0; i < 10; ++i)
printf("%d, ", a[i]);
vx_vec_test(a);
//vx_vec_test(2, a, a, a);
printf("after--------\n");
for(int i = 0; i < 10; ++i)
printf("%d, ", a[i]);
#endif
#if 0
int n = 5;
int *a = (int*)malloc(sizeof(int) * 5); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * 5); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * 5); //{1, 1, 1, 1, 1};
for(int i = 0; i < n; ++i)
{
a[i] = 1;
b[i] = 1;
c[i] = 0;
}
printf("Value of a: %d, b: %d, c: %d, n: %d\n", a[0], b[0], c[0], n);
vx_vec_test(n, a, b, c);
printf("Value of a: %d, b: %d, c: %d, n: %d\n", a[0], b[0], c[0], n);
#endif
// for (int i = 0; i < 4; i++)
// {
// if (c[i] != (a[i] + b[i]))
// {
// printf("Fail\n");
// break;
// }
// }
vx_tmc(0);
}

View file

@ -7,7 +7,7 @@
extern "C" {
#endif
void vx_vec_test(int *);
void vx_vec_test(int n, int* a, int* b, int* c); //vvaddint32
#ifdef __cplusplus

View file

@ -1,30 +1,23 @@
.type vx_vec_test, @function
.global vx_vec_test
vx_vec_test:
li a1, 7
sw a1, 0(a0)
ret
# slli a0, a0, 2
# add a0, a0, a3
# vmv.v.x vv0, a2
# # vsplat4 vv0, a2
# stripmine_loop:
# vlb4 vv1, (a1)
# vcmpez4 vp0, vv1
# !vp0 vlw4 vv1, (a3)
# !vp0 vlw4 vv2, (a4)
# !vp0 vfma4 vv1, vv0, vv1, vv2
# !vp0 vsw4 vv1, (a4)
# addi a1, a1, 4
# addi a3, a3, 16
# addi a4, a4, 16
# bleu a3, a0, stripmine_loop
# handle edge cases
# when (n % 4) != 0 ...
# vector-vector add routine of 32-bit integers
# void vvaddint32(size_t n, const int*x, const int*y, int*z)
# { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } }
#
# a0 = n, a1 = x, a2 = y, a3 = z
# Non-vector instructions are indented
vsetvli t0, a0, e32 # Set vector length based on 32-bit vectors
vlw.v v0, (a1) # Get first vector
sub a0, a0, t0 # Decrement number done
slli t0, t0, 2 # Multiply number done by 4 bytes
add a1, a1, t0 # Bump pointer
vlw.v v1, (a2) # Get second vector
add a2, a2, t0 # Bump pointer
vadd.vv v2, v0, v1 # Sum vectors
vsw.v v2, (a3) # Store result
add a3, a3, t0 # Bump pointer
bnez a0, vx_vec_test # Loop back
ret # Finished

View file

@ -0,0 +1,27 @@
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec.h"
int main()
{
vx_tmc(1);
printf("----------------hello!!! \n");
int n = 8;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
printf("hello!!! \n");
for(int i = 0; i < n; ++i)
{
a[i] = b[i] = c[i] = 1;
}
vx_vec_test(n, a, b, c);
for(int i = 0; i < n; ++i)
printf("%d ", c[i]);
vx_tmc(0);
}

View file

@ -1,32 +1,29 @@
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec.h"
int main()
{
vx_tmc(1);
// int * a = malloc(4);
// int * b = malloc(4);
// int * c = malloc(4);
vx_tmc(1);
printf("Hello\n");
int n = 64;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for(int i = 0; i < n; ++i)
{
a[i] = b[i] = c[i] = 1;
}
vx_vec_test(n, a, b, c);
for (int i = 0; i < n; ++i)
{
printf("a[%d]=%d, b[%d]=%d, c[%d]=%d\n", i, a[i], i, b[i], i, c[i]);
}
int * a = malloc(4);
*a = 5;
printf("Value of a: %d\n", *a);
vx_vec_test(a);
printf("Value of a: %d\n", *a);
// for (int i = 0; i < 4; i++)
// {
// if (c[i] != (a[i] + b[i]))
// {
// printf("Fail\n");
// break;
// }
// }
vx_tmc(0);
vx_tmc(0);
}

File diff suppressed because it is too large Load diff

166
rvvector/benchmark_temp/1 Normal file
View file

@ -0,0 +1,166 @@
#include <stdio.h>
#include <stdlib.h>
#include "../../runtime/intrinsics/vx_intrinsics.h"
#include "vx_vec_benchmark.h"
int main()
{
vx_tmc(1);
int n = 65536;
int scalar = 10;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2; c[i] = 5; }
#if 0
//---------------------------------------------------------------
/* vvaddint32
* # vector-vector add routine of 32-bit integers
* # void vvaddint32(size_t n, const int*x, const int*y, int*z)
* # { for (size_t i=0; i<n; i++) { z[i]=x[i]+y[i]; } } */
printf("vvaddint...\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d ", b[i]);
printf("\nc[%d] = a[%d] + b[%d]: ", n, n, n);
for(int i = 0; i < n; ++i) printf("%d ", c[i]);
vx_vec_vvaddint32(n, a, b, c);
for(int i = 0; i < n; ++i)
{
if(c[i] != (a[i]+b[i]))
{
printf("\n<vddint32> failed at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <vddint32> \n");
#endif
#if 0
//---------------------------------------------------------------
/* # vector-scalar add
# for (i=0; i<N; i++) { C[i] = A[i] + B; } // 32-bit ints */
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 1;}
printf("vsadd...scalar:%d\na[%d]: ", scalar, n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb: %d", scalar);
vx_vec_vsadd(n, a, scalar);
for(int i = 0; i < n; ++i)
{
if(a[i] != (b[i] * scalar))
{
printf("\n<vsadd> failed at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <vsadd> \n");
#endif
#if 0
//---------------------------------------------------------------
/* # memory copy
# void *memcpy(void* dest, const void* src, size_t n) */
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2;}
printf("memcpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
vx_vec_memcpy(a, b, n);
for(int i = 0; i < n; ++i)
{
if(a[i] != b[i])
{
printf("\n<memcpy> failed at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <memcpy> \n");
#endif
#if 1
//---------------------------------------------------------------
/* # void saxpy(size_t n, const float a, const float *x, float *y)
# ==> convert to int!!
# void saxpy(size_t n, const int a, const int *x, int *y)
# {
# size_t i;
# for (i=0; i<n; i++) y[i] = a * x[i] + y[i];
# } */
for (int i = 0; i < n; ++i) { a[i] = 4; b[i] = 2; c[i] = 2;}
printf("saxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
vx_vec_saxpy(n, scalar, a, b);
printf("saxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
for(int i = 0; i < n; ++i)
{
if(b[i] != ((a[i] * scalar) + c[i]))
{
printf("\n<saxpy> failed at <index: %d>! \n", i);
return 1;
}
}
printf("\nPASSED.......................... <saxpy> \n");
#endif
#if 0
//---------------------------------------------------------------
/* # void sgemm_nn(size_t n, size_t m, size_t k, const float*a, // m * k matrix
# size_t lda, const float*b, // k * n matrix
# size_t ldb, float*c, // m * n matrix
# size_t ldc)
# c += a*b (alpha=1, no transpose on input matrices)
# matrices stored in C row-major order */
int m = 8;
int k = 8;
int n = 8
int lda = 4;
int ldb = 4;
int ldc = 4;
int* a1 = (int*)malloc(sizeof(m * k));
int* b1 = (int*)malloc(sizeof(k * n));
int* c1 = (int*)malloc(sizeof(m * n));
for(int i = 0; i < (m * k); ++i) a1[i] = 1;
for(int i = 0; i < (k * n); ++i) b1[i] = 1;
for(int i = 0; i < (m * n); ++i) c1[i] = 1;
printf("sgemm_nn\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a1[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b1[i]);
vx_vec_sgemm_nn(n, m, k, a1, lda, b1, ldb, c1, ldc);
//for(int i = 0; i < n; ++i)
//{
// if(b[i] != ((a[i] * scalar) + c[i]))
// {
// printf("\n<sgemm_nn> failed at <index: %d>! \n", i);
// return;
// }
//}
printf("\nNOT TESTED.......................... <sgemm_nn> \n");
//---------------------------------------------------------------
#endif
vx_tmc(0);
return 0;
}

View file

@ -34,8 +34,7 @@ HEX: ELF
$(CPY) -O ihex $(VX_MAIN).elf $(VX_MAIN).hex
ELF:
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC1) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
$(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC2) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC3) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC4) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf
# $(COMP) $(CC_FLAGS) $(VX_STR) $(VX_VEC5) $(VX_FIO) $(NEWLIB) $(VX_INT) $(VX_IO) $(VX_API) $(VX_TEST) $(VX_MAIN).c $(LIBS) -Iinclude -o $(VX_MAIN).elf~

View file

@ -6,14 +6,17 @@
int main()
{
vx_tmc(1);
int n = 5;
int scalar = 10;
int *a = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *b = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
int *c = (int*)malloc(sizeof(int) * n); //{1, 1, 1, 1, 1};
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 2; c[i] = 5; }
#if 1
#if 0
//---------------------------------------------------------------
/* vvaddint32
* # vector-vector add routine of 32-bit integers
@ -43,7 +46,6 @@ int main()
/* # vector-scalar add
# for (i=0; i<N; i++) { C[i] = A[i] + B; } // 32-bit ints */
for (int i = 0; i < n; ++i) { a[i] = 1; b[i] = 1;}
int scalar = 10;
printf("vsadd...scalar:%d\na[%d]: ", scalar, n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb: %d", scalar);
@ -78,10 +80,18 @@ int main()
if(a[i] != b[i])
{
printf("\n<memcpy> failed at <index: %d>! \n", i);
<<<<<<< HEAD
return;
}
}
printf("\nPASSED.......................... <memcpy> \n");
=======
return 1;
}
}
printf("\nPASSED.......................... <memcpy> \n");
#endif
#if 1
//---------------------------------------------------------------
/* # void saxpy(size_t n, const float a, const float *x, float *y)
# ==> convert to int!!
@ -99,6 +109,11 @@ int main()
vx_vec_saxpy(n, scalar, a, b);
printf("saxpy\na[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", a[i]);
printf("\nb[%d]: ", n);
for(int i = 0; i < n; ++i) printf("%d \n", b[i]);
for(int i = 0; i < n; ++i)
{
if(b[i] != ((a[i] * scalar) + c[i]))
@ -109,6 +124,12 @@ int main()
}
printf("\nPASSED.......................... <saxpy> \n");
return 1;
}
}
printf("\nPASSED.......................... <saxpy> \n");
#endif
#if 0
//---------------------------------------------------------------
/* # void sgemm_nn(size_t n, size_t m, size_t k, const float*a, // m * k matrix
# size_t lda, const float*b, // k * n matrix

Binary file not shown.

View file

@ -5,10 +5,10 @@
extern "C" {
#endif
void vx_vec_vvaddint32(int n, int* a, int* b, int *c);
//void vx_vec_vvaddint32(int n, int* a, int* b, int *c);
//void vx_vec_vsadd(int n, int* a, int scalar);
//void vx_vec_memcpy(int* a, int* b, int n);
//void vx_vec_saxpy(int n, int scalar, int* a, int* b);
void vx_vec_saxpy(int n, int scalar, int* a, int* b);
//void vx_vec_sgemm_nn(int n, int m, int k, int* a1, int lda, int* b1, int ldb, int* c1, int ldc);
#ifdef __cplusplus

View file

@ -25,4 +25,32 @@ saxpy:
vsw.v v8, (a2)
add a2, a2, a4
bnez a0, saxpy
ret
ret
#vx_vec_saxpy:
# vsetvli a4, a0, e32, m8
#saxpy:
# vlw.v v0, (a1)
# sub a0, a0, a4
# slli a4, a4, 2
# add a1, a1, a4
# vlw.v v8, (a2)
# vfmacc.vf v8, fa0, v0
# vsw.v v8, (a2)
# add a2, a2, a4
# bnez a0, saxpy
# ret
# a0 n, rs1 a, a2 x, a3 y
vx_vec_saxpy:
vsetvli a4, a0, e32, m8
saxpy:
vlw.v v0, (a2)
sub a0, a0, a4
slli a4, a4, 2
add a2, a2, a4
vlw.v v1, (a3)
vmacc.vx v1, rs1, v0
vsw.v v1, (a3)
add a3, a3, a4
bnez a0, saxpy
ret

View file

@ -1,7 +1,7 @@
################################################################################
# HARPtools by Chad D. Kersey, Summer 2011 #
################################################################################
CXXFLAGS ?= -std=c++11 -fPIC -O3 # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS
CXXFLAGS ?= -std=c++11 -fPIC -O3 -g # -g -DUSE_DEBUG=3 -DPRINT_ACTIVE_THREADS
LIB_OBJS=simX.cpp args.cpp mem.cpp core.cpp instruction.cpp enc.cpp util.cpp

View file

@ -46,7 +46,8 @@
trace_inst.vd = -1; \
trace_inst.is_lw = false; \
trace_inst.is_sw = false; \
trace_inst.mem_addresses = new unsigned[a.getNThds()]; \
if (trace_inst.mem_addresses != NULL) free(trace_inst.mem_addresses); \
trace_inst.mem_addresses = (unsigned *) malloc(32 * sizeof(unsigned)); \
for (int tid = 0; tid < a.getNThds(); tid++) trace_inst.mem_addresses[tid] = 0xdeadbeef; \
trace_inst.mem_stall_cycles = 0; \
trace_inst.fetch_stall_cycles = 0; \
@ -79,19 +80,19 @@ using namespace std;
void printTrace(trace_inst_t * trace, const char * stage_name)
{
cout << "********************************** " << stage_name << " *********************************\n";
cout << "valid: " << trace->valid_inst << '\n';
cout << "PC: " << hex << trace->pc << dec << '\n';
cout << "wid: " << trace->wid << '\n';
cout << "rd: " << trace->rd << "\trs1: " << trace->rs1 << "\trs2: " << trace->rs2 << '\n';
cout << "is_lw: " << trace->is_lw << '\n';
cout << "is_sw: " << trace->is_sw << '\n';
cout << "fetch_stall_cycles: " << trace->fetch_stall_cycles << '\n';
cout << "mem_stall_cycles: " << trace->mem_stall_cycles << '\n';
D(3, "********************************** " << stage_name << " *********************************");
D(3, "valid: " << trace->valid_inst);
D(3, "PC: " << hex << trace->pc << dec);
D(3, "wid: " << trace->wid);
D(3, "rd: " << trace->rd << "\trs1: " << trace->rs1 << "\trs2: " << trace->rs2);
D(3, "is_lw: " << trace->is_lw);
D(3, "is_sw: " << trace->is_sw);
D(3, "fetch_stall_cycles: " << trace->fetch_stall_cycles);
D(3, "mem_stall_cycles: " << trace->mem_stall_cycles);
cout << "stall_warp: " << trace->stall_warp << '\n';
cout << "wspawn: " << trace->wspawn << '\n';
cout << "stalled: " << trace->stalled << '\n';
D(3, "stall_warp: " << trace->stall_warp);
D(3, "wspawn: " << trace->wspawn);
D(3, "stalled: " << trace->stalled);
}
#ifdef EMU_INSTRUMENTATION
@ -105,7 +106,7 @@ void Harp::reg_doWrite(Word cpuId, Word regNum) {
#endif
Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id):
a(a), iDec(d), mem(mem), steps(4)
a(a), iDec(d), mem(mem), steps(4), num_cycles(0), num_instructions(0)
{
release_warp = false;
foundSchedule = true;
@ -133,9 +134,9 @@ Core::Core(const ArchDef &a, Decoder &d, MemoryUnit &mem, Word id):
cache_simulator = new Vcache_simX;
m_trace = new VerilatedVcdC;
cache_simulator->trace(m_trace, 99);
m_trace->open("simXtrace.vcd");
// m_trace = new VerilatedVcdC;
// cache_simulator->trace(m_trace, 99);
// m_trace->open("simXtrace.vcd");
cache_simulator->reset = 1;
cache_simulator->clk = 0;
@ -161,38 +162,50 @@ bool Core::interrupt(Word r0) {
void Core::step()
{
cout << "\n\n\n------------------------------------------------------\n";
D(3, "\n\n\n------------------------------------------------------");
D(3, "Started core::step" << flush);
steps++;
cout << "CYCLE: " << steps << '\n';
this->num_cycles++;
D(3, "CYCLE: " << this->num_cycles);
cout << "Stalled Warps:\n";
D(3, "Stalled Warps:");
for (int widd = 0; widd < a.getNWarps(); widd++)
{
cout << stallWarp[widd] << " ";
D(3, stallWarp[widd] << " ");
}
cout << '\n';
// cout << "Rename table\n";
// for (int regii = 0; regii < 32; regii++)
// {
// cout << regii << ": " << renameTable[0][regii] << '\n';
// }
cout << '\n';
// cout << '\n' << flush;
// cout << "About to call writeback" << endl;
this->writeback();
// cout << "About to call load_store" << endl;
this->load_store();
// cout << "About to call execute_unit" << endl;
this->execute_unit();
// cout << "About to call scheduler" << endl;
this->scheduler();
// cout << "About to call decode" << endl;
this->decode();
// D(3, "About to call fetch" << flush);
this->fetch();
// D(3, "Finished fetch" << flush);
if (release_warp)
{
release_warp = false;
stallWarp[release_warp_num] = false;
}
D(3, "released warp" << flush);
D(3, "Finished core::step" << flush);
}
void Core::getCacheDelays(trace_inst_t * trace_inst)
@ -238,7 +251,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
cache_simulator->clk = 1;
cache_simulator->eval();
m_trace->dump(2*curr_cycle);
// m_trace->dump(2*curr_cycle);
cache_simulator->in_icache_pc_addr = trace_inst->pc;
cache_simulator->in_icache_valid_pc_addr = 1;
@ -254,7 +267,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
// DCache end
cache_simulator->clk = 0;
cache_simulator->eval();
m_trace->dump(2*curr_cycle+1);
// m_trace->dump(2*curr_cycle+1);
curr_cycle++;
@ -296,7 +309,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
cache_simulator->clk = 1;
cache_simulator->eval();
m_trace->dump(2*curr_cycle);
// m_trace->dump(2*curr_cycle);
//////// Feed input
if (cache_simulator->out_icache_stall)
@ -331,7 +344,7 @@ void Core::getCacheDelays(trace_inst_t * trace_inst)
cache_simulator->clk = 0;
cache_simulator->eval();
m_trace->dump(2*curr_cycle+1);
// m_trace->dump(2*curr_cycle+1);
curr_cycle++;
@ -378,9 +391,9 @@ void Core::warpScheduler()
void Core::fetch()
{
#ifdef PRINT_ACTIVE_THREADS
cout << endl << "Threads:";
#endif
// #ifdef PRINT_ACTIVE_THREADS
D(3, "Threads:");
// #endif
// D(-1, "Found schedule: " << foundSchedule);
@ -395,16 +408,22 @@ void Core::fetch()
if (foundSchedule)
{
D(3, "Core step stepping warp " << schedule_w << '[' << w[schedule_w].activeThreads << ']');
this->num_instructions = this->num_instructions + w[schedule_w].activeThreads;
// this->num_instructions++;
w[schedule_w].step(&inst_in_fetch);
D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w);
D(3, "Now " << w[schedule_w].activeThreads << " active threads in " << schedule_w << flush);
this->getCacheDelays(&inst_in_fetch);
// this->getCacheDelays(&inst_in_fetch);
D(3, "Got cache delays" << flush);
if (inst_in_fetch.stall_warp)
{
stallWarp[inst_in_fetch.wid] = true;
}
D(3, "staled warps\n" << flush);
}
D(3, "About to schedule warp\n" << flush);
warpScheduler();
D(3, "Scheduled warp" << flush);
}
}
else
@ -413,21 +432,33 @@ void Core::fetch()
if (inst_in_fetch.fetch_stall_cycles > 0) inst_in_fetch.fetch_stall_cycles--;
}
D(3, "Printing trace" << flush);
printTrace(&inst_in_fetch, "Fetch");
D(3, "printed trace" << flush);
// #ifdef PRINT_ACTIVE_THREADS
D(3, "About to print active threads" << flush << "\n");
for (unsigned j = 0; j < w[schedule_w].tmask.size(); ++j) {
if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j]) cout << " 1";
else cout << " 0";
if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1) cout << ',';
if (w[schedule_w].activeThreads > j && w[schedule_w].tmask[j])
{
D(3, " 1");
}
else
{
D(3, " 0");
}
if (j != w[schedule_w].tmask.size()-1 || schedule_w != w.size()-1)
{
D(3, ',');
}
}
D(3, "\nPrinted active threads" << flush);
// #endif
#ifdef PRINT_ACTIVE_THREADS
cout << endl;
#endif
// #ifdef PRINT_ACTIVE_THREADS
// #endif
}
void Core::decode()
@ -522,7 +553,7 @@ void Core::load_store()
void Core::execute_unit()
{
// cout << "$$$$$$$$$$$$$$$$$$$ EXE START\n";
D(3, "$$$$$$$$$$$$$$$$$$$ EXE START\n" << flush);
bool do_nothing = false;
// EXEC is always not busy
if (inst_in_scheduler.is_lw || inst_in_scheduler.is_sw)
@ -546,6 +577,7 @@ void Core::execute_unit()
// cout << "Rename RS2: " << inst_in_scheduler.rs1 << " is " << renameTable[inst_in_scheduler.wid][inst_in_scheduler.rs2] << " wid: " << inst_in_scheduler.wid << '\n';
}
// cout << "About to check vs*\n" << flush;
if(inst_in_scheduler.vs1 > 0)
{
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs1];
@ -554,6 +586,7 @@ void Core::execute_unit()
{
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable[inst_in_scheduler.vs2];
}
// cout << "Finished sources\n" << flush;
if (scheduler_srcs_ready)
{
@ -561,15 +594,19 @@ void Core::execute_unit()
// cout << "rename setting rd: " << inst_in_scheduler.rd << " to not useabel wid: " << inst_in_scheduler.wid << '\n';
renameTable[inst_in_scheduler.wid][inst_in_scheduler.rd] = false;
}
// cout << "About to check vector wb: " << inst_in_scheduler.vd << "\n" << flush;
if(inst_in_scheduler.vd != -1) {
vecRenameTable[inst_in_scheduler.vd] = false;
}
// cout << "Finished wb checking" << "\n" << flush;
CPY_TRACE(inst_in_exe, inst_in_scheduler);
INIT_TRACE(inst_in_scheduler);
// cout << "Finished trace copying and clearning" << "\n" << flush;
}
else
{
cout << "&&&&&&&&&&&&&&&&&&&&&&&& EXECUTE SRCS NOT READY\n";
D(3, "&&&&&&&&&&&&&&&&&&&&&&&& EXECUTE SRCS NOT READY");
inst_in_scheduler.stalled = true;
// INIT_TRACE(inst_in_exe);
do_nothing = true;
@ -583,6 +620,7 @@ void Core::execute_unit()
//printTrace(&inst_in_exe, "execute_unit");
// INIT_TRACE(inst_in_exe);
D(3, "EXECUTE END" << flush);
}
void Core::writeback()
@ -623,7 +661,7 @@ void Core::writeback()
{
if (serviced_exe)
{
cout << "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used\n";
D(3, "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used");
inst_in_lsu.stalled = true;
}
else
@ -652,7 +690,7 @@ bool Core::running() const {
for (unsigned i = 0; i < w.size(); ++i)
if (w[i].running())
{
cout << "Warp ID " << i << " is running\n";
D(3, "Warp ID " << i << " is running");
return true;
}
return false;
@ -665,7 +703,7 @@ void Core::printStats() const {
cerr << "Total steps: " << steps << endl;
for (unsigned i = 0; i < w.size(); ++i) {
cout << "=== Warp " << i << " ===" << endl;
// cout << "=== Warp " << i << " ===" << endl;
w[i].printStats();
}
}
@ -715,7 +753,7 @@ void Warp::step(trace_inst_t * trace_inst) {
// ++steps;
D(3, "in step pc=0x" << hex << pc);
cout << "help: in PC: " << hex << pc << dec << '\n';
D(3, "help: in PC: " << hex << pc << dec);
// std::cout << "pc: " << hex << pc << "\n";
@ -727,8 +765,9 @@ void Warp::step(trace_inst_t * trace_inst) {
bool fetchMore;
fetchMore = false;
unsigned fetchSize(wordSize - (pc+fetchPos)%wordSize);
fetchBuffer.resize(fetchPos + fetchSize);
// unsigned fetchSize(wordSize - (pc+fetchPos)%wordSize);
unsigned fetchSize = 4;
fetchBuffer.resize(fetchSize);
Word fetched = core->mem.fetch(pc + fetchPos, supervisorMode);
writeWord(fetchBuffer, fetchPos, fetchSize, fetched);
decPos = 0;

View file

@ -104,7 +104,8 @@ Instruction *WordDecoder::decode(const std::vector<Byte> &v, Size &idx, trace_in
bool predicated = false;
if (predicated) { inst.setPred((code>>(inst_s-p-1))&pMask); }
printf("CUrrent CODE: %x\n", code);
// printf("CUrrent CODE: %x\n", code);
D(3, "Curr Code: " << hex << code << dec);
Opcode op = (Opcode)((code>>shift_opcode)&opcode_mask);
// std::cout << "opcode: " << op << "\n";
@ -234,17 +235,17 @@ Instruction *WordDecoder::decode(const std::vector<Byte> &v, Size &idx, trace_in
break;
case InstType::V_TYPE:
cout << "Entered here: instr type = vector" << op << endl;
D(3, "Entered here: instr type = vector" << op);
switch(op) {
case Opcode::VSET_ARITH: //TODO: arithmetic ops
inst.setDestReg((code>>shift_rd) & reg_mask);
inst.setSrcReg((code>>shift_rs1) & reg_mask);
func3 = (code>>shift_func3) & func3_mask;
inst.setFunc3 (func3);
cout << "Entered here: instr type = vector" << endl;
D(3, "Entered here: instr type = vector");
if(func3 == 7) {
cout << "Entered here: imm instr";
D(3, "Entered here: imm instr");
inst.setVsetImm(!(code>>shift_vset));
@ -318,9 +319,9 @@ Instruction *WordDecoder::decode(const std::vector<Byte> &v, Size &idx, trace_in
Ref *srcRef = refMap[idx-n/8];
/* Create a new ref tied to this instruction. */
Ref *r = new SimpleRef(srcRef->name, *(Addr*)inst.setSrcImm(),
inst.hasRelImm());
inst.setImmRef(*r);
// Ref *r = new SimpleRef(srcRef->name, *(Addr*)inst.setSrcImm(),
// inst.hasRelImm());
// inst.setImmRef(*r);
}
D(2, "Decoded 0x" << hex << code << " into: " << inst << '\n');

View file

@ -23,8 +23,8 @@ namespace Harp {
encChar = 'w';
nRegs = 32;
nPRegs = 0;
nThds = 8;
nWarps = 8;
nThds = 32;
nWarps = 32;
extent = EXT_WARPS;

View file

@ -144,6 +144,8 @@ namespace Harp {
Word interruptEntry;
unsigned long steps;
unsigned long num_cycles;
unsigned long num_instructions;
std::vector<Warp> w;
std::map<Word, std::set<Warp *> > b; // Barriers
int schedule_w;

View file

@ -5,7 +5,7 @@
#define __DEBUG_H
// #define USE_DEBUG 9
#define USE_DEBUG 3
// #define USE_DEBUG 3
#ifdef USE_DEBUG
#include <iostream>
@ -21,10 +21,11 @@
#define D_RAW(x) do { \
std::cout << x; \
} while (0)
#else
#define D(lvl, x) do {} while(0)
#define D_RAW(x) do {} while(0)
#endif
#endif

File diff suppressed because it is too large Load diff

BIN
simX/obj_dir/Vcache_simX Executable file

Binary file not shown.

View file

@ -0,0 +1,208 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design implementation internals
// See Vcache_simX.h for the primary calling header
#include "Vcache_simX.h" // For This
#include "Vcache_simX__Syms.h"
//--------------------
// STATIC VARIABLES
//--------------------
VL_CTOR_IMP(Vcache_simX) {
Vcache_simX__Syms* __restrict vlSymsp = __VlSymsp = new Vcache_simX__Syms(this, name());
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
VL_CELL (__PVT__v, Vcache_simX_cache_simX);
// Reset internal values
// Reset structure values
clk = VL_RAND_RESET_I(1);
reset = VL_RAND_RESET_I(1);
in_icache_pc_addr = VL_RAND_RESET_I(32);
in_icache_valid_pc_addr = VL_RAND_RESET_I(1);
out_icache_stall = VL_RAND_RESET_I(1);
in_dcache_mem_read = VL_RAND_RESET_I(3);
in_dcache_mem_write = VL_RAND_RESET_I(3);
{ int __Vi0=0; for (; __Vi0<4; ++__Vi0) {
in_dcache_in_valid[__Vi0] = VL_RAND_RESET_I(1);
}}
{ int __Vi0=0; for (; __Vi0<4; ++__Vi0) {
in_dcache_in_address[__Vi0] = VL_RAND_RESET_I(32);
}}
out_dcache_stall = VL_RAND_RESET_I(1);
__Vclklast__TOP__clk = VL_RAND_RESET_I(1);
__Vclklast__TOP__reset = VL_RAND_RESET_I(1);
__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr = VL_RAND_RESET_I(28);
__Vm_traceActivity = VL_RAND_RESET_I(32);
}
void Vcache_simX::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
if (0 && first) {} // Prevent unused
this->__VlSymsp = vlSymsp;
}
Vcache_simX::~Vcache_simX() {
delete __VlSymsp; __VlSymsp=NULL;
}
//--------------------
void Vcache_simX::eval() {
Vcache_simX__Syms* __restrict vlSymsp = this->__VlSymsp; // Setup global symbol table
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
// Initialize
if (VL_UNLIKELY(!vlSymsp->__Vm_didInit)) _eval_initial_loop(vlSymsp);
// Evaluate till stable
VL_DEBUG_IF(VL_PRINTF("\n----TOP Evaluate Vcache_simX::eval\n"); );
int __VclockLoop = 0;
QData __Vchange=1;
while (VL_LIKELY(__Vchange)) {
VL_DEBUG_IF(VL_PRINTF(" Clock loop\n"););
vlSymsp->__Vm_activity = true;
_eval(vlSymsp);
__Vchange = _change_request(vlSymsp);
if (++__VclockLoop > 100) vl_fatal(__FILE__,__LINE__,__FILE__,"Verilated model didn't converge");
}
}
void Vcache_simX::_eval_initial_loop(Vcache_simX__Syms* __restrict vlSymsp) {
vlSymsp->__Vm_didInit = true;
_eval_initial(vlSymsp);
vlSymsp->__Vm_activity = true;
int __VclockLoop = 0;
QData __Vchange=1;
while (VL_LIKELY(__Vchange)) {
_eval_settle(vlSymsp);
_eval(vlSymsp);
__Vchange = _change_request(vlSymsp);
if (++__VclockLoop > 100) vl_fatal(__FILE__,__LINE__,__FILE__,"Verilated model didn't DC converge");
}
}
//--------------------
// Internal Methods
VL_INLINE_OPT void Vcache_simX::_combo__TOP__1(Vcache_simX__Syms* __restrict vlSymsp) {
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_combo__TOP__1\n"); );
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
// Body
vlSymsp->TOP__v.in_dcache_in_valid[3U] = vlTOPp->in_dcache_in_valid
[3U];
vlSymsp->TOP__v.in_dcache_in_valid[2U] = vlTOPp->in_dcache_in_valid
[2U];
vlSymsp->TOP__v.in_dcache_in_valid[1U] = vlTOPp->in_dcache_in_valid
[1U];
vlSymsp->TOP__v.in_dcache_in_valid[0U] = vlTOPp->in_dcache_in_valid
[0U];
vlSymsp->TOP__v.in_dcache_in_address[3U] = vlTOPp->in_dcache_in_address
[3U];
vlSymsp->TOP__v.in_dcache_in_address[2U] = vlTOPp->in_dcache_in_address
[2U];
vlSymsp->TOP__v.in_dcache_in_address[1U] = vlTOPp->in_dcache_in_address
[1U];
vlSymsp->TOP__v.in_dcache_in_address[0U] = vlTOPp->in_dcache_in_address
[0U];
}
VL_INLINE_OPT void Vcache_simX::_combo__TOP__3(Vcache_simX__Syms* __restrict vlSymsp) {
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_combo__TOP__3\n"); );
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
// Body
vlTOPp->out_icache_stall = ((IData)(vlSymsp->TOP__v__dmem_controller.__PVT__icache__DOT__new_stored_valid)
| (0U != (IData)(vlSymsp->TOP__v__dmem_controller.__PVT__icache__DOT__state)));
}
VL_INLINE_OPT void Vcache_simX::_combo__TOP__5(Vcache_simX__Syms* __restrict vlSymsp) {
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_combo__TOP__5\n"); );
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
// Body
vlTOPp->out_dcache_stall = ((0U != (IData)(vlSymsp->TOP__v__dmem_controller.__PVT__shared_memory__DOT__vx_priority_encoder_sm__DOT__more_than_one_valid))
| ((0U != (IData)(vlSymsp->TOP__v__dmem_controller.__PVT__dcache__DOT__new_stored_valid))
| (0U != (IData)(vlSymsp->TOP__v__dmem_controller.__PVT__dcache__DOT__state))));
}
void Vcache_simX::_eval(Vcache_simX__Syms* __restrict vlSymsp) {
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_eval\n"); );
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
// Body
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__1(vlSymsp);
vlTOPp->__Vm_traceActivity = (2U | vlTOPp->__Vm_traceActivity);
vlTOPp->_combo__TOP__1(vlSymsp);
if ((((IData)(vlTOPp->clk) & (~ (IData)(vlTOPp->__Vclklast__TOP__clk)))
| ((IData)(vlTOPp->reset) & (~ (IData)(vlTOPp->__Vclklast__TOP__reset))))) {
vlSymsp->TOP__v__dmem_controller._sequent__TOP__v__dmem_controller__3(vlSymsp);
vlTOPp->__Vm_traceActivity = (4U | vlTOPp->__Vm_traceActivity);
vlSymsp->TOP__v._sequent__TOP__v__2(vlSymsp);
vlSymsp->TOP__v__dmem_controller._sequent__TOP__v__dmem_controller__4(vlSymsp);
}
vlSymsp->TOP__v._combo__TOP__v__3(vlSymsp);
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__5(vlSymsp);
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__7(vlSymsp);
if ((((IData)(vlTOPp->clk) & (~ (IData)(vlTOPp->__Vclklast__TOP__clk)))
| ((IData)(vlTOPp->reset) & (~ (IData)(vlTOPp->__Vclklast__TOP__reset))))) {
vlSymsp->TOP__v__dmem_controller._sequent__TOP__v__dmem_controller__8(vlSymsp);
vlTOPp->__Vm_traceActivity = (8U | vlTOPp->__Vm_traceActivity);
}
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__10(vlSymsp);
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__12(vlSymsp);
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__14(vlSymsp);
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__16(vlSymsp);
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__18(vlSymsp);
vlTOPp->_combo__TOP__3(vlSymsp);
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__20(vlSymsp);
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__22(vlSymsp);
vlTOPp->_combo__TOP__5(vlSymsp);
// Final
vlTOPp->__Vclklast__TOP__clk = vlTOPp->clk;
vlTOPp->__Vclklast__TOP__reset = vlTOPp->reset;
}
void Vcache_simX::_eval_initial(Vcache_simX__Syms* __restrict vlSymsp) {
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_eval_initial\n"); );
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
}
void Vcache_simX::final() {
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::final\n"); );
// Variables
Vcache_simX__Syms* __restrict vlSymsp = this->__VlSymsp;
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
}
void Vcache_simX::_eval_settle(Vcache_simX__Syms* __restrict vlSymsp) {
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_eval_settle\n"); );
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
// Body
vlSymsp->TOP__v__dmem_controller._combo__TOP__v__dmem_controller__1(vlSymsp);
vlTOPp->__Vm_traceActivity = (1U | vlTOPp->__Vm_traceActivity);
vlTOPp->_combo__TOP__1(vlSymsp);
vlSymsp->TOP__v._settle__TOP__v__1(vlSymsp);
vlSymsp->TOP__v._settle__TOP__v__4(vlSymsp);
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__6(vlSymsp);
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__9(vlSymsp);
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__11(vlSymsp);
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__13(vlSymsp);
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__15(vlSymsp);
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__17(vlSymsp);
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__19(vlSymsp);
vlTOPp->_combo__TOP__3(vlSymsp);
vlSymsp->TOP__v__dmem_controller._settle__TOP__v__dmem_controller__21(vlSymsp);
vlTOPp->_combo__TOP__5(vlSymsp);
}
VL_INLINE_OPT QData Vcache_simX::_change_request(Vcache_simX__Syms* __restrict vlSymsp) {
VL_DEBUG_IF(VL_PRINTF(" Vcache_simX::_change_request\n"); );
Vcache_simX* __restrict vlTOPp VL_ATTR_UNUSED = vlSymsp->TOPp;
// Body
// Change detection
QData __req = false; // Logically a bool
__req |= ((vlSymsp->TOP__v__dmem_controller.__PVT__shared_memory__DOT__block_addr ^ vlTOPp->__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr));
VL_DEBUG_IF( if(__req && ((vlSymsp->TOP__v__dmem_controller.__PVT__shared_memory__DOT__block_addr ^ vlTOPp->__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr))) VL_PRINTF(" CHANGE: ../rtl/shared_memory/VX_shared_memory.v:49: shared_memory.block_addr\n"); );
// Final
vlTOPp->__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr
= vlSymsp->TOP__v__dmem_controller.__PVT__shared_memory__DOT__block_addr;
return __req;
}

113
simX/obj_dir/Vcache_simX.h Normal file
View file

@ -0,0 +1,113 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Primary design header
//
// This header should be included by all source files instantiating the design.
// The class here is then constructed to instantiate the design.
// See the Verilator manual for examples.
#ifndef _Vcache_simX_H_
#define _Vcache_simX_H_
#include "verilated.h"
#include "Vcache_simX__Inlines.h"
class Vcache_simX__Syms;
class Vcache_simX_cache_simX;
class VerilatedVcd;
//----------
VL_MODULE(Vcache_simX) {
public:
// CELLS
// Public to allow access to /*verilator_public*/ items;
// otherwise the application code can consider these internals.
Vcache_simX_cache_simX* __PVT__v;
// PORTS
// The application code writes and reads these signals to
// propagate new values into/out from the Verilated model.
VL_IN8(clk,0,0);
VL_IN8(reset,0,0);
VL_IN8(in_icache_valid_pc_addr,0,0);
VL_OUT8(out_icache_stall,0,0);
VL_IN8(in_dcache_mem_read,2,0);
VL_IN8(in_dcache_mem_write,2,0);
VL_OUT8(out_dcache_stall,0,0);
//char __VpadToAlign7[1];
VL_IN(in_icache_pc_addr,31,0);
VL_IN8(in_dcache_in_valid[4],0,0);
VL_IN(in_dcache_in_address[4],31,0);
// LOCAL SIGNALS
// Internals; generally not touched by application code
// LOCAL VARIABLES
// Internals; generally not touched by application code
VL_SIG8(__Vclklast__TOP__clk,0,0);
VL_SIG8(__Vclklast__TOP__reset,0,0);
//char __VpadToAlign42[2];
VL_SIG(__Vchglast__TOP__v__dmem_controller__shared_memory__DOT__block_addr,27,0);
VL_SIG(__Vm_traceActivity,31,0);
// INTERNAL VARIABLES
// Internals; generally not touched by application code
Vcache_simX__Syms* __VlSymsp; // Symbol table
// PARAMETERS
// Parameters marked /*verilator public*/ for use by application code
// CONSTRUCTORS
private:
Vcache_simX& operator= (const Vcache_simX&); ///< Copying not allowed
Vcache_simX(const Vcache_simX&); ///< Copying not allowed
public:
/// Construct the model; called by application code
/// The special name may be used to make a wrapper with a
/// single model invisible WRT DPI scope names.
Vcache_simX(const char* name="TOP");
/// Destroy the model; called (often implicitly) by application code
~Vcache_simX();
/// Trace signals in the model; called by application code
void trace (VerilatedVcdC* tfp, int levels, int options=0);
// USER METHODS
// API METHODS
/// Evaluate the model. Application must call when inputs change.
void eval();
/// Simulation complete, run final blocks. Application must call on completion.
void final();
// INTERNAL METHODS
private:
static void _eval_initial_loop(Vcache_simX__Syms* __restrict vlSymsp);
public:
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
private:
static QData _change_request(Vcache_simX__Syms* __restrict vlSymsp);
public:
static void _combo__TOP__1(Vcache_simX__Syms* __restrict vlSymsp);
static void _combo__TOP__3(Vcache_simX__Syms* __restrict vlSymsp);
static void _combo__TOP__5(Vcache_simX__Syms* __restrict vlSymsp);
static void _eval(Vcache_simX__Syms* __restrict vlSymsp);
static void _eval_initial(Vcache_simX__Syms* __restrict vlSymsp);
static void _eval_settle(Vcache_simX__Syms* __restrict vlSymsp);
static void traceChgThis(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceChgThis__2(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceChgThis__3(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceChgThis__4(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceChgThis__5(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceChgThis__6(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceChgThis__7(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceChgThis__8(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceChgThis__9(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceFullThis(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceFullThis__1(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceInitThis(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceInitThis__1(Vcache_simX__Syms* __restrict vlSymsp, VerilatedVcd* vcdp, uint32_t code);
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
} VL_ATTR_ALIGNED(128);
#endif /*guard*/

View file

@ -0,0 +1,91 @@
# Verilated -*- Makefile -*-
# DESCRIPTION: Verilator output: Makefile for building Verilated archive or executable
#
# Execute this makefile from the object directory:
# make -f Vcache_simX.mk
default: Vcache_simX
### Constants...
# Perl executable (from $PERL)
PERL = perl
# Path to Verilator kit (from $VERILATOR_ROOT)
VERILATOR_ROOT = /usr/share/verilator
# Path to SystemPerl kit top (from $SYSTEMPERL)
SYSTEMPERL =
# Path to SystemPerl kit includes (from $SYSTEMPERL_INCLUDE)
SYSTEMPERL_INCLUDE =
# SystemC include directory with systemc.h (from $SYSTEMC_INCLUDE)
SYSTEMC_INCLUDE ?=
# SystemC library directory with libsystemc.a (from $SYSTEMC_LIBDIR)
SYSTEMC_LIBDIR ?=
### Switches...
# SystemPerl output mode? 0/1 (from --sp)
VM_SP = 0
# SystemC output mode? 0/1 (from --sc)
VM_SC = 0
# SystemPerl or SystemC output mode? 0/1 (from --sp/--sc)
VM_SP_OR_SC = 0
# Deprecated
VM_PCLI = 1
# Deprecated: SystemC architecture to find link library path (from $SYSTEMC_ARCH)
VM_SC_TARGET_ARCH = linux
### Vars...
# Design prefix (from --prefix)
VM_PREFIX = Vcache_simX
# Module prefix (from --prefix)
VM_MODPREFIX = Vcache_simX
# User CFLAGS (from -CFLAGS on Verilator command line)
VM_USER_CFLAGS = \
-std=c++11 -fPIC -O3 \
# User LDLIBS (from -LDFLAGS on Verilator command line)
VM_USER_LDLIBS = \
# User .cpp files (from .cpp's on Verilator command line)
VM_USER_CLASSES = \
args \
core \
enc \
instruction \
mem \
simX \
util \
# User .cpp directories (from .cpp's on Verilator command line)
VM_USER_DIR = \
. \
### Default rules...
# Include list of all generated classes
include Vcache_simX_classes.mk
# Include global rules
include $(VERILATOR_ROOT)/include/verilated.mk
### Executable rules... (from --exe)
VPATH += $(VM_USER_DIR)
args.o: args.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
core.o: core.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
enc.o: enc.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
instruction.o: instruction.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
mem.o: mem.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
simX.o: simX.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
util.o: util.cpp
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
### Link rules... (from --exe)
Vcache_simX: $(VK_USER_OBJS) $(VK_GLOBAL_OBJS) $(VM_PREFIX)__ALL.a
$(LINK) $(LDFLAGS) $^ $(LOADLIBES) $(LDLIBS) -o $@ $(LIBS) $(SC_LIBS) 2>&1 | c++filt
# Verilated -*- Makefile -*-

View file

@ -0,0 +1,30 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design implementation internals
// See Vcache_simX.h for the primary calling header
#include "Vcache_simX_VX_dcache_request_inter.h" // For This
#include "Vcache_simX__Syms.h"
//--------------------
// STATIC VARIABLES
//--------------------
VL_CTOR_IMP(Vcache_simX_VX_dcache_request_inter) {
// Reset internal values
// Reset structure values
VL_RAND_RESET_W(128,__PVT__out_cache_driver_in_address);
__PVT__out_cache_driver_in_valid = VL_RAND_RESET_I(4);
}
void Vcache_simX_VX_dcache_request_inter::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
if (0 && first) {} // Prevent unused
this->__VlSymsp = vlSymsp;
}
Vcache_simX_VX_dcache_request_inter::~Vcache_simX_VX_dcache_request_inter() {
}
//--------------------
// Internal Methods

View file

@ -0,0 +1,55 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design internal header
// See Vcache_simX.h for the primary calling header
#ifndef _Vcache_simX_VX_dcache_request_inter_H_
#define _Vcache_simX_VX_dcache_request_inter_H_
#include "verilated.h"
#include "Vcache_simX__Inlines.h"
class Vcache_simX__Syms;
class VerilatedVcd;
//----------
VL_MODULE(Vcache_simX_VX_dcache_request_inter) {
public:
// CELLS
// PORTS
// LOCAL SIGNALS
VL_SIG8(__PVT__out_cache_driver_in_valid,3,0);
//char __VpadToAlign5[3];
VL_SIGW(__PVT__out_cache_driver_in_address,127,0,4);
// LOCAL VARIABLES
// INTERNAL VARIABLES
private:
Vcache_simX__Syms* __VlSymsp; // Symbol table
public:
// PARAMETERS
// CONSTRUCTORS
private:
Vcache_simX_VX_dcache_request_inter& operator= (const Vcache_simX_VX_dcache_request_inter&); ///< Copying not allowed
Vcache_simX_VX_dcache_request_inter(const Vcache_simX_VX_dcache_request_inter&); ///< Copying not allowed
public:
Vcache_simX_VX_dcache_request_inter(const char* name="TOP");
~Vcache_simX_VX_dcache_request_inter();
void trace (VerilatedVcdC* tfp, int levels, int options=0);
// USER METHODS
// API METHODS
// INTERNAL METHODS
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
} VL_ATTR_ALIGNED(128);
#endif /*guard*/

View file

@ -0,0 +1,28 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design implementation internals
// See Vcache_simX.h for the primary calling header
#include "Vcache_simX_VX_dcache_response_inter.h" // For This
#include "Vcache_simX__Syms.h"
//--------------------
// STATIC VARIABLES
//--------------------
VL_CTOR_IMP(Vcache_simX_VX_dcache_response_inter) {
// Reset internal values
// Reset structure values
}
void Vcache_simX_VX_dcache_response_inter::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
if (0 && first) {} // Prevent unused
this->__VlSymsp = vlSymsp;
}
Vcache_simX_VX_dcache_response_inter::~Vcache_simX_VX_dcache_response_inter() {
}
//--------------------
// Internal Methods

View file

@ -0,0 +1,53 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design internal header
// See Vcache_simX.h for the primary calling header
#ifndef _Vcache_simX_VX_dcache_response_inter_H_
#define _Vcache_simX_VX_dcache_response_inter_H_
#include "verilated.h"
#include "Vcache_simX__Inlines.h"
class Vcache_simX__Syms;
class VerilatedVcd;
//----------
VL_MODULE(Vcache_simX_VX_dcache_response_inter) {
public:
// CELLS
// PORTS
// LOCAL SIGNALS
// LOCAL VARIABLES
// INTERNAL VARIABLES
private:
//char __VpadToAlign12[4];
Vcache_simX__Syms* __VlSymsp; // Symbol table
public:
// PARAMETERS
// CONSTRUCTORS
private:
Vcache_simX_VX_dcache_response_inter& operator= (const Vcache_simX_VX_dcache_response_inter&); ///< Copying not allowed
Vcache_simX_VX_dcache_response_inter(const Vcache_simX_VX_dcache_response_inter&); ///< Copying not allowed
public:
Vcache_simX_VX_dcache_response_inter(const char* name="TOP");
~Vcache_simX_VX_dcache_response_inter();
void trace (VerilatedVcdC* tfp, int levels, int options=0);
// USER METHODS
// API METHODS
// INTERNAL METHODS
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
} VL_ATTR_ALIGNED(128);
#endif /*guard*/

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,29 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design implementation internals
// See Vcache_simX.h for the primary calling header
#include "Vcache_simX_VX_dram_req_rsp_inter__N1_NB4.h" // For This
#include "Vcache_simX__Syms.h"
//--------------------
// STATIC VARIABLES
//--------------------
VL_CTOR_IMP(Vcache_simX_VX_dram_req_rsp_inter__N1_NB4) {
// Reset internal values
// Reset structure values
VL_RAND_RESET_W(128,__PVT__i_m_readdata);
}
void Vcache_simX_VX_dram_req_rsp_inter__N1_NB4::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
if (0 && first) {} // Prevent unused
this->__VlSymsp = vlSymsp;
}
Vcache_simX_VX_dram_req_rsp_inter__N1_NB4::~Vcache_simX_VX_dram_req_rsp_inter__N1_NB4() {
}
//--------------------
// Internal Methods

View file

@ -0,0 +1,54 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design internal header
// See Vcache_simX.h for the primary calling header
#ifndef _Vcache_simX_VX_dram_req_rsp_inter__N1_NB4_H_
#define _Vcache_simX_VX_dram_req_rsp_inter__N1_NB4_H_
#include "verilated.h"
#include "Vcache_simX__Inlines.h"
class Vcache_simX__Syms;
class VerilatedVcd;
//----------
VL_MODULE(Vcache_simX_VX_dram_req_rsp_inter__N1_NB4) {
public:
// CELLS
// PORTS
// LOCAL SIGNALS
//char __VpadToAlign4[4];
VL_SIGW(__PVT__i_m_readdata,127,0,4);
// LOCAL VARIABLES
// INTERNAL VARIABLES
private:
Vcache_simX__Syms* __VlSymsp; // Symbol table
public:
// PARAMETERS
// CONSTRUCTORS
private:
Vcache_simX_VX_dram_req_rsp_inter__N1_NB4& operator= (const Vcache_simX_VX_dram_req_rsp_inter__N1_NB4&); ///< Copying not allowed
Vcache_simX_VX_dram_req_rsp_inter__N1_NB4(const Vcache_simX_VX_dram_req_rsp_inter__N1_NB4&); ///< Copying not allowed
public:
Vcache_simX_VX_dram_req_rsp_inter__N1_NB4(const char* name="TOP");
~Vcache_simX_VX_dram_req_rsp_inter__N1_NB4();
void trace (VerilatedVcdC* tfp, int levels, int options=0);
// USER METHODS
// API METHODS
// INTERNAL METHODS
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
} VL_ATTR_ALIGNED(128);
#endif /*guard*/

View file

@ -0,0 +1,29 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design implementation internals
// See Vcache_simX.h for the primary calling header
#include "Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.h" // For This
#include "Vcache_simX__Syms.h"
//--------------------
// STATIC VARIABLES
//--------------------
VL_CTOR_IMP(Vcache_simX_VX_dram_req_rsp_inter__N4_NB4) {
// Reset internal values
// Reset structure values
VL_RAND_RESET_W(512,__PVT__i_m_readdata);
}
void Vcache_simX_VX_dram_req_rsp_inter__N4_NB4::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
if (0 && first) {} // Prevent unused
this->__VlSymsp = vlSymsp;
}
Vcache_simX_VX_dram_req_rsp_inter__N4_NB4::~Vcache_simX_VX_dram_req_rsp_inter__N4_NB4() {
}
//--------------------
// Internal Methods

View file

@ -0,0 +1,54 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design internal header
// See Vcache_simX.h for the primary calling header
#ifndef _Vcache_simX_VX_dram_req_rsp_inter__N4_NB4_H_
#define _Vcache_simX_VX_dram_req_rsp_inter__N4_NB4_H_
#include "verilated.h"
#include "Vcache_simX__Inlines.h"
class Vcache_simX__Syms;
class VerilatedVcd;
//----------
VL_MODULE(Vcache_simX_VX_dram_req_rsp_inter__N4_NB4) {
public:
// CELLS
// PORTS
// LOCAL SIGNALS
//char __VpadToAlign4[4];
VL_SIGW(__PVT__i_m_readdata,511,0,16);
// LOCAL VARIABLES
// INTERNAL VARIABLES
private:
Vcache_simX__Syms* __VlSymsp; // Symbol table
public:
// PARAMETERS
// CONSTRUCTORS
private:
Vcache_simX_VX_dram_req_rsp_inter__N4_NB4& operator= (const Vcache_simX_VX_dram_req_rsp_inter__N4_NB4&); ///< Copying not allowed
Vcache_simX_VX_dram_req_rsp_inter__N4_NB4(const Vcache_simX_VX_dram_req_rsp_inter__N4_NB4&); ///< Copying not allowed
public:
Vcache_simX_VX_dram_req_rsp_inter__N4_NB4(const char* name="TOP");
~Vcache_simX_VX_dram_req_rsp_inter__N4_NB4();
void trace (VerilatedVcdC* tfp, int levels, int options=0);
// USER METHODS
// API METHODS
// INTERNAL METHODS
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
} VL_ATTR_ALIGNED(128);
#endif /*guard*/

View file

@ -0,0 +1,28 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design implementation internals
// See Vcache_simX.h for the primary calling header
#include "Vcache_simX_VX_icache_request_inter.h" // For This
#include "Vcache_simX__Syms.h"
//--------------------
// STATIC VARIABLES
//--------------------
VL_CTOR_IMP(Vcache_simX_VX_icache_request_inter) {
// Reset internal values
// Reset structure values
}
void Vcache_simX_VX_icache_request_inter::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
if (0 && first) {} // Prevent unused
this->__VlSymsp = vlSymsp;
}
Vcache_simX_VX_icache_request_inter::~Vcache_simX_VX_icache_request_inter() {
}
//--------------------
// Internal Methods

View file

@ -0,0 +1,53 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design internal header
// See Vcache_simX.h for the primary calling header
#ifndef _Vcache_simX_VX_icache_request_inter_H_
#define _Vcache_simX_VX_icache_request_inter_H_
#include "verilated.h"
#include "Vcache_simX__Inlines.h"
class Vcache_simX__Syms;
class VerilatedVcd;
//----------
VL_MODULE(Vcache_simX_VX_icache_request_inter) {
public:
// CELLS
// PORTS
// LOCAL SIGNALS
// LOCAL VARIABLES
// INTERNAL VARIABLES
private:
//char __VpadToAlign12[4];
Vcache_simX__Syms* __VlSymsp; // Symbol table
public:
// PARAMETERS
// CONSTRUCTORS
private:
Vcache_simX_VX_icache_request_inter& operator= (const Vcache_simX_VX_icache_request_inter&); ///< Copying not allowed
Vcache_simX_VX_icache_request_inter(const Vcache_simX_VX_icache_request_inter&); ///< Copying not allowed
public:
Vcache_simX_VX_icache_request_inter(const char* name="TOP");
~Vcache_simX_VX_icache_request_inter();
void trace (VerilatedVcdC* tfp, int levels, int options=0);
// USER METHODS
// API METHODS
// INTERNAL METHODS
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
} VL_ATTR_ALIGNED(128);
#endif /*guard*/

View file

@ -0,0 +1,28 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design implementation internals
// See Vcache_simX.h for the primary calling header
#include "Vcache_simX_VX_icache_response_inter.h" // For This
#include "Vcache_simX__Syms.h"
//--------------------
// STATIC VARIABLES
//--------------------
VL_CTOR_IMP(Vcache_simX_VX_icache_response_inter) {
// Reset internal values
// Reset structure values
}
void Vcache_simX_VX_icache_response_inter::__Vconfigure(Vcache_simX__Syms* vlSymsp, bool first) {
if (0 && first) {} // Prevent unused
this->__VlSymsp = vlSymsp;
}
Vcache_simX_VX_icache_response_inter::~Vcache_simX_VX_icache_response_inter() {
}
//--------------------
// Internal Methods

View file

@ -0,0 +1,53 @@
// Verilated -*- C++ -*-
// DESCRIPTION: Verilator output: Design internal header
// See Vcache_simX.h for the primary calling header
#ifndef _Vcache_simX_VX_icache_response_inter_H_
#define _Vcache_simX_VX_icache_response_inter_H_
#include "verilated.h"
#include "Vcache_simX__Inlines.h"
class Vcache_simX__Syms;
class VerilatedVcd;
//----------
VL_MODULE(Vcache_simX_VX_icache_response_inter) {
public:
// CELLS
// PORTS
// LOCAL SIGNALS
// LOCAL VARIABLES
// INTERNAL VARIABLES
private:
//char __VpadToAlign12[4];
Vcache_simX__Syms* __VlSymsp; // Symbol table
public:
// PARAMETERS
// CONSTRUCTORS
private:
Vcache_simX_VX_icache_response_inter& operator= (const Vcache_simX_VX_icache_response_inter&); ///< Copying not allowed
Vcache_simX_VX_icache_response_inter(const Vcache_simX_VX_icache_response_inter&); ///< Copying not allowed
public:
Vcache_simX_VX_icache_response_inter(const char* name="TOP");
~Vcache_simX_VX_icache_response_inter();
void trace (VerilatedVcdC* tfp, int levels, int options=0);
// USER METHODS
// API METHODS
// INTERNAL METHODS
void __Vconfigure(Vcache_simX__Syms* symsp, bool first);
static void traceInit (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceFull (VerilatedVcd* vcdp, void* userthis, uint32_t code);
static void traceChg (VerilatedVcd* vcdp, void* userthis, uint32_t code);
} VL_ATTR_ALIGNED(128);
#endif /*guard*/

Binary file not shown.

View file

@ -0,0 +1,11 @@
// DESCRIPTION: Generated by verilator_includer via makefile
#define VL_INCLUDE_OPT include
#include "Vcache_simX.cpp"
#include "Vcache_simX_cache_simX.cpp"
#include "Vcache_simX_VX_dmem_controller__V0_VB1000.cpp"
#include "Vcache_simX_VX_icache_request_inter.cpp"
#include "Vcache_simX_VX_icache_response_inter.cpp"
#include "Vcache_simX_VX_dram_req_rsp_inter__N4_NB4.cpp"
#include "Vcache_simX_VX_dram_req_rsp_inter__N1_NB4.cpp"
#include "Vcache_simX_VX_dcache_request_inter.cpp"
#include "Vcache_simX_VX_dcache_response_inter.cpp"

Some files were not shown because too many files have changed in this diff Show more