CUDA+QT+CMake on Ubuntu:
I'm abo
ut to implement some a CUDA program by QT creator and CMake on Ubuntu. I haven't played with them before this note will explain how I get through and make they work together.
I reviewed about cooperating between them and they should works properly, please see
sn0v and
Dayal.
Here is a brief explanation of the setting process:
- setting CUDA
- download the CUDA 5 product from here
- follow sn0v's instruction
- install development tools:
- sudo apt-get install freeglut3-dev build-essential libx11-dev libxmu-dev libxi-dev libgl1-mesa-glx libglu1-mesa libglu1-mesa-devcuda
- disable some graphics components by adding the blacklist:
- gedit /etc/modprobe.d/blacklist.conf
- blacklist amd76x_edac
blacklist vga16fb
blacklist nouveau
blacklist rivafb
blacklist nvidiafb
blacklist rivatv
- to make sure the previous graphics components is clean:
- sudo apt-get remove --purge nvidia*
- Now we are ready to install CUDA, firstly we need to reboot and press Ctrl+Alt+F1 and login window. It will switch to command-line mode.
- Turn off the GUI service and install CUDA
- sudo service lightdm stop
- sudo ./<CUDAInstallFile>.run
- Follow the CUDA installer instruction. At this point you should get CUDA toolkit and CUDA Samples.
- Installing QT creator, CMake and compliers by following Dayal's tutorial.
- Here is the list of all necessary packages (install by Synaptic Package Manager)
- gcc
- g++
- gdb
- qtcreator
- valgrind
- cmake
- It is almost done, next step is to create some simple project.
- Open QT creator and add new text file (CMakeLists.txt).
- write this into the txt file
- cmake_minimum_required(VERSION 2.6.2)
project(CMakeCUDA)
find_package(CUDA)
if (CUDA_FOUND)
message(" * CUDA ${CUDA_VERSION} was found")
else(CUDA_FOUND)
message(" * CUDA is not found")
message(FATAL_ERROR "Not all CUDA libraries are found")
endif(CUDA_FOUND)
# add sources files
set(SRCS
main.cpp
cuda.cu
)
cuda_add_executable(CMakeCUDA ${SRCS})
- Add main.cpp and write the code
- Add cuda.cu file
- reopen the CMakeLists.txt but this time open by the cmake wizard
- You just finished setting up a simple CUDA project. When you build and run in terminal you will see output numbers, please see click here to see more explanation of the program.
------------------------------------main.cpp-------------------------------------
//main.cpp
// simple warpper class of CUDA function.
// Vector A abd B is inititialized and transfered to the device.
// Thedevice performs addition and transfers the data back to host.
// by Wasit Limprasert created on 19-07-2011
// updated 23-08-2011: adding comment
#include <stdlib.h>
#include <stdio.h>
extern "C" void CUDA_Constructor(int** g_A,int** g_B,int size);
extern "C" void CUDA_SetData(int* g_dist, int* h_src, int size);
extern "C" void CUDA_Add(int* g_A,int* g_B,int size);
extern "C" void CUDA_GetData(int* h_dist, int* g_src,int size);
class VectorOperation{
public:
int *h_A,*g_A;//h_A host pointer of vector A and g_A is a ponter to global memory on device.
int *h_B,*g_B;
int size;
VectorOperation(int _size);
~VectorOperation(void){}
void init(void);
void SetA(int* h_src){CUDA_SetData(g_A,h_src,size);}
void SetB(int* h_src){CUDA_SetData(g_B,h_src,size);}
void Add(void){CUDA_Add(g_A,g_B,size);}
void Result(void);
};
//constractor
//setting size of vectors and memory allocation on both host and device.
VectorOperation::VectorOperation(int _size){
size=_size;
CUDA_Constructor(&g_A,&g_B,size);
h_A=(int*)malloc(sizeof(int)*size);
h_B=(int*)malloc(sizeof(int)*size);
}
//initialization
//generating data for vector A and B then copy the data to device.
void VectorOperation::init(void){
printf("A =");
for(int i=0;i<size;i++){
h_A[i]=1;
printf("%2d ",h_A[i]);
}
SetA(h_A);
printf("\nB =");
for(int i=0;i<size;i++){
h_B[i]=i;
printf("%2d ",h_B[i]);
}
SetB(h_B);
printf("\n");
}
//reading result,which is saved in g_A back to h_A and print out.
void VectorOperation::Result(void){
CUDA_GetData(h_A,g_A,size);
printf("A+B:");
for(int i=0;i<size;i++){
printf("%2d ",h_A[i]);
}
printf("\n");
}
//main
int main(){
printf("Simple CUDA vector addition.\n");
VectorOperation P = VectorOperation(16);
P.init();
P.Add();
P.Result();
getchar();
}
------------------------------------------------cuda.cu-------------------------------
//cuda.cu
//simple CUDA functions
//by Wasit 208-2011
#include <stdio.h>
#include <cuda_runtime.h>
//memory allocation on device side
extern "C" void CUDA_Constructor(int** g_A,int** g_B,int size){
cudaMalloc(g_A,sizeof(int)*size);
cudaMalloc(g_B,sizeof(int)*size);
}
//copying data from host to device
extern "C" void CUDA_SetData(int* g_dist, int* h_src,int size){
cudaMemcpy(g_dist,h_src,sizeof(int)*size,cudaMemcpyHostToDevice);
}
//CUDA Kernel block and thread ID are indicated by blockIdx and threadIdx, respectively
__global__ void Kernel_Add(int* g_A,int* g_B){
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
g_A[x]=g_A[x]+g_B[x];
}
//Addition function
//number of thread and block is set before call Kernel
extern "C" void CUDA_Add(int* g_A,int* g_B,int size){
int threadnum=16;
int blocknum=size/threadnum;
Kernel_Add<<<threadnum,blocknum>>>(g_A,g_B);
}
//read data back to host
extern "C" void CUDA_GetData(int* h_dist, int* g_src,int size){
cudaMemcpy(h_dist,g_src,sizeof(int)*size,cudaMemcpyDeviceToHost);
}