Librairie générant une version de OpenCL purement CPU.
[[https://github.com/pocl/pocl/tree/master|Code source]]
[[http://portablecl.org|Site web]]
====Compilation====
Suivre la procédure décrite dans le readme et respecter la [[http://portablecl.org/download.html|compatibilité]] avec les versions de clang.
export LLVM_VERSION=
apt install -y python3-dev libpython3-dev build-essential ocl-icd-libopencl1 \
cmake git pkg-config libclang-${LLVM_VERSION}-dev clang \
llvm-${LLVM_VERSION} make ninja-build ocl-icd-libopencl1 ocl-icd-dev \
ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils \
libxml2-dev libclang-cpp${LLVM_VERSION}-dev libclang-cpp${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev
Pour forcer la compilation de OpenCL version CPU, ajouter l'option ''DEFAULT_ENABLE_ICD=0''.
cd
mkdir build
cd build
cmake .. -DDEFAULT_ENABLE_ICD=0
make
# and optionally
make install
====Exemple====
Pris dans le livre [[https://www.abebooks.fr/9781849692342/OpenCL-Programming-Example-Banger-Ravishekhar-1849692343/plp|OpenCL Programming by Example]]
Pour passer de la version OpenCL à la version PoCL, il suffit de remplacer l'include ''CL/opencl.h'' par ''pocl_opencl.h'' et de changer les 2 références à ''CL_DEVICE_TYPE_GPU'' par ''CL_DEVICE_TYPE_CPU''.
#include
#include
// A décommenter
//#include "pocl_opencl.h"
// A commenter
#ifdef __APPLE__
#include
#else
#include
#endif
#define VECTOR_SIZE 409600000
//OpenCL kernel which is run for every work item created.
const char *saxpy_kernel =
"__kernel \n"
"void saxpy_kernel(float alpha, \n"
" __global float *A, \n"
" __global float *B, \n"
" __global float *C) \n"
"{ \n"
" //Get the index of the work-item \n"
" int index = get_global_id(0); \n"
" C[index] = alpha* A[index] + B[index]; \n"
"} \n";
int main(void) {
int i;
// Allocate space for vectors A, B and C
float alpha = 2.0;
float *A = (float*)malloc(sizeof(float)*VECTOR_SIZE);
float *B = (float*)malloc(sizeof(float)*VECTOR_SIZE);
float *C = (float*)malloc(sizeof(float)*VECTOR_SIZE);
for(i = 0; i < VECTOR_SIZE; i++)
{
A[i] = i;
B[i] = VECTOR_SIZE - i;
C[i] = 0;
}
// Get platform and device information
cl_platform_id * platforms = NULL;
cl_uint num_platforms;
//Set up the Platform
cl_int clStatus = clGetPlatformIDs(0, NULL, &num_platforms);
platforms = (cl_platform_id *)
malloc(sizeof(cl_platform_id)*num_platforms);
clStatus = clGetPlatformIDs(num_platforms, platforms, NULL);
//Get the devices list and choose the device you want to run on
cl_device_id *device_list = NULL;
cl_uint num_devices;
// A passer en CL_DEVICE_TYPE_CPU
clStatus = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_GPU, 0,NULL, &num_devices);
device_list = (cl_device_id *)
malloc(sizeof(cl_device_id)*num_devices);
// A passer en CL_DEVICE_TYPE_CPU
clStatus = clGetDeviceIDs( platforms[0],CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL);
// Create one OpenCL context for each device in the platform
cl_context context;
context = clCreateContext( NULL, num_devices, device_list, NULL, NULL, &clStatus);
// Create a command queue
cl_command_queue command_queue = clCreateCommandQueue(context, device_list[0], 0, &clStatus);
// Create memory buffers on the device for each vector
cl_mem A_clmem = clCreateBuffer(context, CL_MEM_READ_ONLY,VECTOR_SIZE * sizeof(float), NULL, &clStatus);
cl_mem B_clmem = clCreateBuffer(context, CL_MEM_READ_ONLY,VECTOR_SIZE * sizeof(float), NULL, &clStatus);
cl_mem C_clmem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,VECTOR_SIZE * sizeof(float), NULL, &clStatus);
// Copy the Buffer A and B to the device
clStatus = clEnqueueWriteBuffer(command_queue, A_clmem, CL_TRUE, 0, VECTOR_SIZE * sizeof(float), A, 0, NULL, NULL);
clStatus = clEnqueueWriteBuffer(command_queue, B_clmem, CL_TRUE, 0, VECTOR_SIZE * sizeof(float), B, 0, NULL, NULL);
// Create a program from the kernel source
cl_program program = clCreateProgramWithSource(context, 1,(const char **)&saxpy_kernel, NULL, &clStatus);
// Build the program
clStatus = clBuildProgram(program, 1, device_list, NULL, NULL, NULL);
// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "saxpy_kernel", &clStatus);
// Set the arguments of the kernel
clStatus = clSetKernelArg(kernel, 0, sizeof(float), (void *)&alpha);
clStatus = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&A_clmem);
clStatus = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&B_clmem);
clStatus = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&C_clmem);
// Execute the OpenCL kernel on the list
size_t global_size = VECTOR_SIZE; // Process the entire lists
size_t local_size = 64; // Process one item at a time
clStatus = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
// Read the cl memory C_clmem on device to the host variable C
clStatus = clEnqueueReadBuffer(command_queue, C_clmem, CL_TRUE, 0, VECTOR_SIZE * sizeof(float), C, 0, NULL, NULL);
// Clean up and wait for all the comands to complete.
clStatus = clFlush(command_queue);
clStatus = clFinish(command_queue);
// Display the result to the screen
//for(i = 0; i < VECTOR_SIZE; i++)
// printf("%f * %f + %f = %f\n", alpha, A[i], B[i], C[i]);
// Finally release all OpenCL allocated objects and host buffers.
clStatus = clReleaseKernel(kernel);
clStatus = clReleaseProgram(program);
clStatus = clReleaseMemObject(A_clmem);
clStatus = clReleaseMemObject(B_clmem);
clStatus = clReleaseMemObject(C_clmem);
clStatus = clReleaseCommandQueue(command_queue);
clStatus = clReleaseContext(context);
free(A);
free(B);
free(C);
free(platforms);
free(device_list);
return 0;
}
* Compilation / exécution pour OpenCL
g++ -o opencc opencc.cpp -lOpenCL
time ./opencc
real 0m3,532s
user 0m2,259s
sys 0m1,209s
* Compilation / exécution pour PoCL
g++ -g -o opencc opencc.cpp -I ~/prog/pocl-3.1/build -I ~/prog/pocl-3.1/poclu/ -L ~/prog/pocl-3.1/build/lib/CL -lpocl -lOpenCL
LD_LIBRARY_PATH="/home/legarrec/prog/pocl-3.1/build/lib/CL" POCL_BUILDING=1 time ./opencc
4.95user 14.23system 0:22.90elapsed 83%CPU (0avgtext+0avgdata 7643848maxresident)k
2624936inputs+2176outputs (47118major+959861minor)pagefaults 0swaps
Il faut mettre ''POCL_BUILDING=1'' si les librairies ''libOpenCL'' et ''libpocl'' sont dans le dossier de compilation et non le dossier d'installation (''make install'' non exécuté).