//--by Jianbin Fang #include #include #include #include #include #include #include #ifdef PROFILING #include "timer.h" #endif #include "CLHelper.h" #include "util.h" #define MAX_THREADS_PER_BLOCK 256 // Structure to hold a node information struct Node { int starting; int no_of_edges; }; //---------------------------------------------------------- //--bfs on cpu //--programmer: jianbin //--date: 26/01/2011 //--note: width is changed to the new_width //---------------------------------------------------------- void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, char *h_graph_visited, int *h_cost_ref) { char stop; int k = 0; do { // if no thread changes this value then the loop stops stop = false; for (int tid = 0; tid < no_of_nodes; tid++) { if (h_graph_mask[tid] == true) { h_graph_mask[tid] = false; for (int i = h_graph_nodes[tid].starting; i < (h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++) { int id = h_graph_edges[i]; //--cambine: node id is connected with node tid if (!h_graph_visited[id]) { //--cambine: if node id has not been //visited, enter the body below h_cost_ref[id] = h_cost_ref[tid] + 1; h_updating_graph_mask[id] = true; } } } } for (int tid = 0; tid < no_of_nodes; tid++) { if (h_updating_graph_mask[tid] == true) { h_graph_mask[tid] = true; h_graph_visited[tid] = true; stop = true; h_updating_graph_mask[tid] = false; } } k++; } while (stop); } //---------------------------------------------------------- //--breadth first search on GPUs //---------------------------------------------------------- void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, char *h_graph_visited, int *h_cost) throw(std::string) { // int number_elements = height*width; char h_over; cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, d_graph_visited, d_cost, d_over; try { //--1 transfer data from host to device _clInit(); d_graph_nodes = _clMalloc(no_of_nodes * sizeof(Node), h_graph_nodes); d_graph_edges = _clMalloc(edge_list_size * sizeof(int), h_graph_edges); d_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_graph_mask); d_updating_graph_mask = _clMallocRW(no_of_nodes * sizeof(char), h_updating_graph_mask); d_graph_visited = _clMallocRW(no_of_nodes * sizeof(char), h_graph_visited); d_cost = _clMallocRW(no_of_nodes * sizeof(int), h_cost); d_over = _clMallocRW(sizeof(char), &h_over); _clMemcpyH2D(d_graph_nodes, no_of_nodes * sizeof(Node), h_graph_nodes); _clMemcpyH2D(d_graph_edges, edge_list_size * sizeof(int), h_graph_edges); _clMemcpyH2D(d_graph_mask, no_of_nodes * sizeof(char), h_graph_mask); _clMemcpyH2D(d_updating_graph_mask, no_of_nodes * sizeof(char), h_updating_graph_mask); _clMemcpyH2D(d_graph_visited, no_of_nodes * sizeof(char), h_graph_visited); _clMemcpyH2D(d_cost, no_of_nodes * sizeof(int), h_cost); //--2 invoke kernel #ifdef PROFILING timer kernel_timer; double kernel_time = 0.0; kernel_timer.reset(); kernel_timer.start(); #endif do { h_over = false; _clMemcpyH2D(d_over, sizeof(char), &h_over); //--kernel 0 int kernel_id = 0; int kernel_idx = 0; _clSetArgs(kernel_id, kernel_idx++, d_graph_nodes); _clSetArgs(kernel_id, kernel_idx++, d_graph_edges); _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_cost); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); // int work_items = no_of_nodes; _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); //--kernel 1 kernel_id = 1; kernel_idx = 0; _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_over); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); // work_items = no_of_nodes; _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); _clMemcpyD2H(d_over, sizeof(char), &h_over); } while (h_over); #ifdef PROFILING kernel_timer.stop(); kernel_time = kernel_timer.getTimeInSeconds(); #endif //--3 transfer data from device to host _clMemcpyD2H(d_cost, no_of_nodes * sizeof(int), h_cost); //--statistics #ifdef PROFILING std::cout << "kernel time(s):" << kernel_time << std::endl; #endif //--4 release cl resources. _clFree(d_graph_nodes); _clFree(d_graph_edges); _clFree(d_graph_mask); _clFree(d_updating_graph_mask); _clFree(d_graph_visited); _clFree(d_cost); _clFree(d_over); _clRelease(); } catch (std::string msg) { _clFree(d_graph_nodes); _clFree(d_graph_edges); _clFree(d_graph_mask); _clFree(d_updating_graph_mask); _clFree(d_graph_visited); _clFree(d_cost); _clFree(d_over); _clRelease(); std::string e_str = "in run_transpose_gpu -> "; e_str += msg; throw(e_str); } return; } //---------------------------------------------------------- //--cambine: main function //--author: created by Jianbin Fang //--date: 25/01/2011 //---------------------------------------------------------- int main(int argc, char *argv[]) { printf("enter demo main\n"); int no_of_nodes; int edge_list_size; FILE *fp; Node *h_graph_nodes; char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited; try { char *input_f = "graph4096.txt"; printf("Reading File\n"); // Read in Graph from a file fp = fopen(input_f, "r"); if (!fp) { printf("Error Reading graph file\n"); return 0; } printf("Reading File completed!\n"); int source = 0; fscanf(fp, "%d", &no_of_nodes); int num_of_blocks = 1; int num_of_threads_per_block = no_of_nodes; // Make execution Parameters according to the number of nodes // Distribute threads across multiple Blocks if necessary if (no_of_nodes > MAX_THREADS_PER_BLOCK) { num_of_blocks = (int)ceil(no_of_nodes / (double)MAX_THREADS_PER_BLOCK); num_of_threads_per_block = MAX_THREADS_PER_BLOCK; } work_group_size = num_of_threads_per_block; // allocate host memory h_graph_nodes = (Node *)malloc(sizeof(Node) * no_of_nodes); h_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes); h_updating_graph_mask = (char *)malloc(sizeof(char) * no_of_nodes); h_graph_visited = (char *)malloc(sizeof(char) * no_of_nodes); int start, edgeno; // initalize the memory for (int i = 0; i < no_of_nodes; i++) { fscanf(fp, "%d %d", &start, &edgeno); h_graph_nodes[i].starting = start; h_graph_nodes[i].no_of_edges = edgeno; h_graph_mask[i] = false; h_updating_graph_mask[i] = false; h_graph_visited[i] = false; } // read the source node from the file fscanf(fp, "%d", &source); source = 0; // set the source node as true in the mask h_graph_mask[source] = true; h_graph_visited[source] = true; fscanf(fp, "%d", &edge_list_size); int id, cost; int *h_graph_edges = (int *)malloc(sizeof(int) * edge_list_size); for (int i = 0; i < edge_list_size; i++) { fscanf(fp, "%d", &id); fscanf(fp, "%d", &cost); h_graph_edges[i] = id; } if (fp) fclose(fp); // allocate mem for the result on host side int *h_cost = (int *)malloc(sizeof(int) * no_of_nodes); int *h_cost_ref = (int *)malloc(sizeof(int) * no_of_nodes); for (int i = 0; i < no_of_nodes; i++) { h_cost[i] = -1; h_cost_ref[i] = -1; } h_cost[source] = 0; h_cost_ref[source] = 0; //--------------------------------------------------------- //--gpu entry run_bfs_gpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost); //--------------------------------------------------------- //--cpu entry // initalize the memory again for (int i = 0; i < no_of_nodes; i++) { h_graph_mask[i] = false; h_updating_graph_mask[i] = false; h_graph_visited[i] = false; } // set the source node as true in the mask source = 0; h_graph_mask[source] = true; h_graph_visited[source] = true; run_bfs_cpu(no_of_nodes, h_graph_nodes, edge_list_size, h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost_ref); //--------------------------------------------------------- //--result varification compare_results(h_cost_ref, h_cost, no_of_nodes); // release host memory free(h_graph_nodes); free(h_graph_mask); free(h_updating_graph_mask); free(h_graph_visited); } catch (std::string msg) { std::cout << "--cambine: exception in main ->" << msg << std::endl; // release host memory free(h_graph_nodes); free(h_graph_mask); free(h_updating_graph_mask); free(h_graph_visited); } printf("Passed!\n"); return 0; }