Librairie générant une version de OpenCL purement CPU.

Code source

Site web

Compilation

Suivre la procédure décrite dans le readme et respecter la compatibilité avec les versions de clang.

export LLVM_VERSION=<major LLVM version>
apt install -y python3-dev libpython3-dev build-essential ocl-icd-libopencl1 \
    cmake git pkg-config libclang-${LLVM_VERSION}-dev clang \
    llvm-${LLVM_VERSION} make ninja-build ocl-icd-libopencl1 ocl-icd-dev \
    ocl-icd-opencl-dev libhwloc-dev zlib1g zlib1g-dev clinfo dialog apt-utils \
    libxml2-dev libclang-cpp${LLVM_VERSION}-dev libclang-cpp${LLVM_VERSION} \
    llvm-${LLVM_VERSION}-dev

Pour forcer la compilation de OpenCL version CPU, ajouter l'option DEFAULT_ENABLE_ICD=0.

cd <directory-with-pocl-sources>
mkdir build
cd build
cmake .. -DDEFAULT_ENABLE_ICD=0
make
# and optionally
make install

Exemple

Pris dans le livre OpenCL Programming by Example

Pour passer de la version OpenCL à la version PoCL, il suffit de remplacer l'include CL/opencl.h par pocl_opencl.h et de changer les 2 références à CL_DEVICE_TYPE_GPU par CL_DEVICE_TYPE_CPU.

opencc.cpp
#include <stdio.h>
#include <stdlib.h>
 
// A décommenter
//#include "pocl_opencl.h"
 
// A commenter
#ifdef __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/opencl.h>
#endif
 
#define VECTOR_SIZE 409600000
 
//OpenCL kernel which is run for every work item created.
const char *saxpy_kernel =
"__kernel                                   \n"
"void saxpy_kernel(float alpha,             \n"
"                  __global float *A,       \n"
"                  __global float *B,       \n"
"                  __global float *C)       \n"
"{                                          \n"
"    //Get the index of the work-item       \n"
"    int index = get_global_id(0);          \n"
"    C[index] = alpha* A[index] + B[index]; \n"
"}                                          \n";
 
int main(void) {
  int i;
  // Allocate space for vectors A, B and C
  float alpha = 2.0;
  float *A = (float*)malloc(sizeof(float)*VECTOR_SIZE);
  float *B = (float*)malloc(sizeof(float)*VECTOR_SIZE);
  float *C = (float*)malloc(sizeof(float)*VECTOR_SIZE);
  for(i = 0; i < VECTOR_SIZE; i++)
  {
    A[i] = i;
    B[i] = VECTOR_SIZE - i;
    C[i] = 0;
  }
 
  // Get platform and device information
  cl_platform_id * platforms = NULL;
  cl_uint     num_platforms;
  //Set up the Platform
  cl_int clStatus = clGetPlatformIDs(0, NULL, &num_platforms);
  platforms = (cl_platform_id *)
  malloc(sizeof(cl_platform_id)*num_platforms);
  clStatus = clGetPlatformIDs(num_platforms, platforms, NULL);
 
  //Get the devices list and choose the device you want to run on
  cl_device_id     *device_list = NULL;
  cl_uint           num_devices;
 
  // A passer en CL_DEVICE_TYPE_CPU
  clStatus = clGetDeviceIDs( platforms[0], CL_DEVICE_TYPE_GPU, 0,NULL, &num_devices);
  device_list = (cl_device_id *) 
  malloc(sizeof(cl_device_id)*num_devices);
  // A passer en CL_DEVICE_TYPE_CPU
  clStatus = clGetDeviceIDs( platforms[0],CL_DEVICE_TYPE_GPU, num_devices, device_list, NULL);
 
  // Create one OpenCL context for each device in the platform
  cl_context context;
  context = clCreateContext( NULL, num_devices, device_list, NULL, NULL, &clStatus);
 
  // Create a command queue
  cl_command_queue command_queue = clCreateCommandQueue(context, device_list[0], 0, &clStatus);
 
  // Create memory buffers on the device for each vector
  cl_mem A_clmem = clCreateBuffer(context, CL_MEM_READ_ONLY,VECTOR_SIZE * sizeof(float), NULL, &clStatus);
  cl_mem B_clmem = clCreateBuffer(context, CL_MEM_READ_ONLY,VECTOR_SIZE * sizeof(float), NULL, &clStatus);
  cl_mem C_clmem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,VECTOR_SIZE * sizeof(float), NULL, &clStatus);
 
  // Copy the Buffer A and B to the device
  clStatus = clEnqueueWriteBuffer(command_queue, A_clmem, CL_TRUE, 0, VECTOR_SIZE * sizeof(float), A, 0, NULL, NULL);
  clStatus = clEnqueueWriteBuffer(command_queue, B_clmem, CL_TRUE, 0, VECTOR_SIZE * sizeof(float), B, 0, NULL, NULL);
 
  // Create a program from the kernel source
  cl_program program = clCreateProgramWithSource(context, 1,(const char **)&saxpy_kernel, NULL, &clStatus);
 
  // Build the program
  clStatus = clBuildProgram(program, 1, device_list, NULL, NULL, NULL);
 
  // Create the OpenCL kernel
  cl_kernel kernel = clCreateKernel(program, "saxpy_kernel", &clStatus);
 
  // Set the arguments of the kernel
  clStatus = clSetKernelArg(kernel, 0, sizeof(float), (void *)&alpha);
  clStatus = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&A_clmem);
  clStatus = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&B_clmem);
  clStatus = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&C_clmem);
 
  // Execute the OpenCL kernel on the list
  size_t global_size = VECTOR_SIZE; // Process the entire lists
  size_t local_size = 64;           // Process one item at a time
  clStatus = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
 
  // Read the cl memory C_clmem on device to the host variable C
  clStatus = clEnqueueReadBuffer(command_queue, C_clmem, CL_TRUE, 0, VECTOR_SIZE * sizeof(float), C, 0, NULL, NULL);
 
  // Clean up and wait for all the comands to complete.
  clStatus = clFlush(command_queue);
  clStatus = clFinish(command_queue);
 
  // Display the result to the screen
  //for(i = 0; i < VECTOR_SIZE; i++)
  //  printf("%f * %f + %f = %f\n", alpha, A[i], B[i], C[i]);
 
  // Finally release all OpenCL allocated objects and host buffers.
  clStatus = clReleaseKernel(kernel);
  clStatus = clReleaseProgram(program);
  clStatus = clReleaseMemObject(A_clmem);
  clStatus = clReleaseMemObject(B_clmem);
  clStatus = clReleaseMemObject(C_clmem);
  clStatus = clReleaseCommandQueue(command_queue);
  clStatus = clReleaseContext(context);
  free(A);
  free(B);
  free(C);
  free(platforms);
  free(device_list);
  return 0;
}
g++ -o opencc opencc.cpp -lOpenCL
time ./opencc
real    0m3,532s
user    0m2,259s
sys     0m1,209s
g++ -g -o opencc opencc.cpp -I ~/prog/pocl-3.1/build -I ~/prog/pocl-3.1/poclu/ -L ~/prog/pocl-3.1/build/lib/CL -lpocl -lOpenCL
LD_LIBRARY_PATH="/home/legarrec/prog/pocl-3.1/build/lib/CL" POCL_BUILDING=1 time ./opencc
4.95user 14.23system 0:22.90elapsed 83%CPU (0avgtext+0avgdata 7643848maxresident)k
2624936inputs+2176outputs (47118major+959861minor)pagefaults 0swaps

Il faut mettre POCL_BUILDING=1 si les librairies libOpenCL et libpocl sont dans le dossier de compilation et non le dossier d'installation (make install non exécuté).