CUDA+QT+CMake on Ubuntu:
I'm about to implement some a CUDA program by QT creator and CMake on Ubuntu. I haven't played with them before this note will explain how I get through and make they work together.I reviewed about cooperating between them and they should works properly, please see sn0v and Dayal.
Here is a brief explanation of the setting process:
- setting CUDA
- download the CUDA 5 product from here
- follow sn0v's instruction
- install development tools:
- sudo apt-get install freeglut3-dev build-essential libx11-dev libxmu-dev libxi-dev libgl1-mesa-glx libglu1-mesa libglu1-mesa-devcuda
- disable some graphics components by adding the blacklist:
- gedit /etc/modprobe.d/blacklist.conf
- blacklist amd76x_edac
blacklist vga16fb
blacklist nouveau
blacklist rivafb
blacklist nvidiafb
blacklist rivatv - to make sure the previous graphics components is clean:
- sudo apt-get remove --purge nvidia*
- Now we are ready to install CUDA, firstly we need to reboot and press Ctrl+Alt+F1 and login window. It will switch to command-line mode.
- Turn off the GUI service and install CUDA
- sudo service lightdm stop
- sudo ./<CUDAInstallFile>.run
- Follow the CUDA installer instruction. At this point you should get CUDA toolkit and CUDA Samples.
- Installing QT creator, CMake and compliers by following Dayal's tutorial.
- Here is the list of all necessary packages (install by Synaptic Package Manager)
- gcc
- g++
- gdb
- qtcreator
- valgrind
- cmake
- It is almost done, next step is to create some simple project.
- Open QT creator and add new text file (CMakeLists.txt).
- write this into the txt file
- cmake_minimum_required(VERSION 2.6.2)
message(" * CUDA ${CUDA_VERSION} was found")
message(" * CUDA is not found")
message(FATAL_ERROR "Not all CUDA libraries are found")
# add sources files
cuda_add_executable(CMakeCUDA ${SRCS}) - Add main.cpp and write the code
- Add file
- reopen the CMakeLists.txt but this time open by the cmake wizard
- You just finished setting up a simple CUDA project. When you build and run in terminal you will see output numbers, please see click here to see more explanation of the program.
//main.cpp // simple warpper class of CUDA function. // Vector A abd B is inititialized and transfered to the device. // Thedevice performs addition and transfers the data back to host. // by Wasit Limprasert created on 19-07-2011 // updated 23-08-2011: adding comment #include <stdlib.h> #include <stdio.h> extern "C" void CUDA_Constructor(int** g_A,int** g_B,int size); extern "C" void CUDA_SetData(int* g_dist, int* h_src, int size); extern "C" void CUDA_Add(int* g_A,int* g_B,int size); extern "C" void CUDA_GetData(int* h_dist, int* g_src,int size); class VectorOperation{ public: int *h_A,*g_A;//h_A host pointer of vector A and g_A is a ponter to global memory on device. int *h_B,*g_B; int size; VectorOperation(int _size); ~VectorOperation(void){} void init(void); void SetA(int* h_src){CUDA_SetData(g_A,h_src,size);} void SetB(int* h_src){CUDA_SetData(g_B,h_src,size);} void Add(void){CUDA_Add(g_A,g_B,size);} void Result(void); }; //constractor //setting size of vectors and memory allocation on both host and device. VectorOperation::VectorOperation(int _size){ size=_size; CUDA_Constructor(&g_A,&g_B,size); h_A=(int*)malloc(sizeof(int)*size); h_B=(int*)malloc(sizeof(int)*size); } //initialization //generating data for vector A and B then copy the data to device. void VectorOperation::init(void){ printf("A ="); for(int i=0;i<size;i++){ h_A[i]=1; printf("%2d ",h_A[i]); } SetA(h_A); printf("\nB ="); for(int i=0;i<size;i++){ h_B[i]=i; printf("%2d ",h_B[i]); } SetB(h_B); printf("\n"); } //reading result,which is saved in g_A back to h_A and print out. void VectorOperation::Result(void){ CUDA_GetData(h_A,g_A,size); printf("A+B:"); for(int i=0;i<size;i++){ printf("%2d ",h_A[i]); } printf("\n"); } //main int main(){ printf("Simple CUDA vector addition.\n"); VectorOperation P = VectorOperation(16); P.init(); P.Add(); P.Result(); getchar(); }
// //simple CUDA functions //by Wasit 208-2011 #include <stdio.h> #include <cuda_runtime.h> //memory allocation on device side extern "C" void CUDA_Constructor(int** g_A,int** g_B,int size){ cudaMalloc(g_A,sizeof(int)*size); cudaMalloc(g_B,sizeof(int)*size); } //copying data from host to device extern "C" void CUDA_SetData(int* g_dist, int* h_src,int size){ cudaMemcpy(g_dist,h_src,sizeof(int)*size,cudaMemcpyHostToDevice); } //CUDA Kernel block and thread ID are indicated by blockIdx and threadIdx, respectively __global__ void Kernel_Add(int* g_A,int* g_B){ unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; g_A[x]=g_A[x]+g_B[x]; } //Addition function //number of thread and block is set before call Kernel extern "C" void CUDA_Add(int* g_A,int* g_B,int size){ int threadnum=16; int blocknum=size/threadnum; Kernel_Add<<<threadnum,blocknum>>>(g_A,g_B); } //read data back to host extern "C" void CUDA_GetData(int* h_dist, int* g_src,int size){ cudaMemcpy(h_dist,g_src,sizeof(int)*size,cudaMemcpyDeviceToHost); }
No comments:
Post a Comment