Librairie générant une version de OpenCL purement CPU.
Suivre la procédure décrite dans le readme et respecter la compatibilité avec les versions de clang.
export LLVM_VERSION=<major LLVM version> apt install -y python3-dev libpython3-dev build-essential ocl-icd-libopencl1 \ cmake git pkg-config libclang-${LLVM_VERSION}-dev clang \ llvm-${LLVM_VERSION} make ninja-build ocl-icd-libopencl1 ocl-icd-dev \ ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils \ libxml2-dev libclang-cpp${LLVM_VERSION}-dev libclang-cpp${LLVM_VERSION} \ llvm-${LLVM_VERSION}-dev
Pour forcer la compilation de OpenCL version CPU, ajouter l'option DEFAULT_ENABLE_ICD=0
.
cd <directory-with-pocl-sources> mkdir build cd build cmake .. -DDEFAULT_ENABLE_ICD=0 make # and optionally make install
Pris dans le livre OpenCL Programming by Example
Pour passer de la version OpenCL à la version PoCL, il suffit de remplacer l'include CL/opencl.h
par pocl_opencl.h
et de changer les 2 références à CL_DEVICE_TYPE_GPU
par CL_DEVICE_TYPE_CPU
.
#include <stdio.h> #include <stdlib.h> // A décommenter //#include "pocl_opencl.h" // A commenter #ifdef __APPLE__ #include <OpenCL/cl.h> #else #include <CL/opencl.h> #endif #define VECTOR_SIZE 409600000 //OpenCL kernel which is run for every work item created. const char *saxpy_kernel = "__kernel \n" "void saxpy_kernel(float alpha, \n" " __global float *A, \n" " __global float *B, \n" " __global float *C) \n" "{ \n" " //Get the index of the work-item \n" " int index = get_global_id(0); \n" " C[index] = alpha* A[index] + B[index]; \n" "} \n"; int main(void) { int i; // Allocate space for vectors A, B and C float alpha = 2.0; float *A = (float*)malloc(sizeof(float)*VECTOR_SIZE); float *B = (float*)malloc(sizeof(float)*VECTOR_SIZE); float *C = (float*)malloc(sizeof(float)*VECTOR_SIZE); for(i = 0; i < VECTOR_SIZE; i++) { A[i] = i; B[i] = VECTOR_SIZE - i; C[i] = 0; } // Get platform and device information cl_platform_id * platforms = NULL; cl_uint num_platforms; //Set up the Platform cl_int clStatus = clGetPlatformIDs(0, NULL, &num_platforms); platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id)*num_platforms); clStatus = clGetPlatformIDs(num_platforms, platforms, NULL); //Get the devices list and choose the device you want to run on cl_device_id *device_list = NULL; cl_uint num_devices; // A passer en CL_DEVICE_TYPE_CPU clStatus = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_GPU, 0,NULL, &num_devices); device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*num_devices); // A passer en CL_DEVICE_TYPE_CPU clStatus = clGetDeviceIDs( platforms[0],CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL); // Create one OpenCL context for each device in the platform cl_context context; context = clCreateContext( NULL, num_devices, device_list, NULL, NULL, &clStatus); // Create a command queue cl_command_queue command_queue = clCreateCommandQueue(context, device_list[0], 0, &clStatus); // Create memory buffers on the device for each vector cl_mem A_clmem = clCreateBuffer(context, CL_MEM_READ_ONLY,VECTOR_SIZE * sizeof(float), NULL, &clStatus); cl_mem B_clmem = clCreateBuffer(context, CL_MEM_READ_ONLY,VECTOR_SIZE * sizeof(float), NULL, &clStatus); cl_mem C_clmem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,VECTOR_SIZE * sizeof(float), NULL, &clStatus); // Copy the Buffer A and B to the device clStatus = clEnqueueWriteBuffer(command_queue, A_clmem, CL_TRUE, 0, VECTOR_SIZE * sizeof(float), A, 0, NULL, NULL); clStatus = clEnqueueWriteBuffer(command_queue, B_clmem, CL_TRUE, 0, VECTOR_SIZE * sizeof(float), B, 0, NULL, NULL); // Create a program from the kernel source cl_program program = clCreateProgramWithSource(context, 1,(const char **)&saxpy_kernel, NULL, &clStatus); // Build the program clStatus = clBuildProgram(program, 1, device_list, NULL, NULL, NULL); // Create the OpenCL kernel cl_kernel kernel = clCreateKernel(program, "saxpy_kernel", &clStatus); // Set the arguments of the kernel clStatus = clSetKernelArg(kernel, 0, sizeof(float), (void *)&alpha); clStatus = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&A_clmem); clStatus = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&B_clmem); clStatus = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&C_clmem); // Execute the OpenCL kernel on the list size_t global_size = VECTOR_SIZE; // Process the entire lists size_t local_size = 64; // Process one item at a time clStatus = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL); // Read the cl memory C_clmem on device to the host variable C clStatus = clEnqueueReadBuffer(command_queue, C_clmem, CL_TRUE, 0, VECTOR_SIZE * sizeof(float), C, 0, NULL, NULL); // Clean up and wait for all the comands to complete. clStatus = clFlush(command_queue); clStatus = clFinish(command_queue); // Display the result to the screen //for(i = 0; i < VECTOR_SIZE; i++) // printf("%f * %f + %f = %f\n", alpha, A[i], B[i], C[i]); // Finally release all OpenCL allocated objects and host buffers. clStatus = clReleaseKernel(kernel); clStatus = clReleaseProgram(program); clStatus = clReleaseMemObject(A_clmem); clStatus = clReleaseMemObject(B_clmem); clStatus = clReleaseMemObject(C_clmem); clStatus = clReleaseCommandQueue(command_queue); clStatus = clReleaseContext(context); free(A); free(B); free(C); free(platforms); free(device_list); return 0; }
g++ -o opencc opencc.cpp -lOpenCL time ./opencc real 0m3,532s user 0m2,259s sys 0m1,209s
g++ -g -o opencc opencc.cpp -I ~/prog/pocl-3.1/build -I ~/prog/pocl-3.1/poclu/ -L ~/prog/pocl-3.1/build/lib/CL -lpocl -lOpenCL LD_LIBRARY_PATH="/home/legarrec/prog/pocl-3.1/build/lib/CL" POCL_BUILDING=1 time ./opencc 4.95user 14.23system 0:22.90elapsed 83%CPU (0avgtext+0avgdata 7643848maxresident)k 2624936inputs+2176outputs (47118major+959861minor)pagefaults 0swaps
Il faut mettre POCL_BUILDING=1
si les librairies libOpenCL
et libpocl
sont dans le dossier de compilation et non le dossier d'installation (make install
non exécuté).