// // simpleOCL // // This simple code sample demonstrates how to perform a simple linear // algebra operation using OCL, single precision axpy: // y[i] = alpha*x[i] + y[i] for x,y in R^N and a scalar alpha // // This code is a straight-forward OpenCL port of the simpleCUDA // example available from the same site. // // Compilation instructions: // - Install an OpenCL SDK // - Compile with the provided Makefile // - Launch the executable // // (c) 2010-2011 D. Goeddeke, D. Ribbrock ///////////////////////////////////// // standard imports ///////////////////////////////////// #include #include ///////////////////////////////////// // OpenCL imports ///////////////////////////////////// #include ///////////////////////////////////// // global variables and configuration section ///////////////////////////////////// // problem size (vector length) N static const size_t N = 123456; // OpenCL device type to use // possible selections are // CL_DEVICE_TYPE_DEFAULT // CL_DEVICE_TYPE_CPU // CL_DEVICE_TYPE_GPU // CL_DEVICE_TYPE_ACCELERATOR // CL_DEVICE_TYPE_ALL static cl_device_type targetDeviceType = CL_DEVICE_TYPE_DEFAULT; ///////////////////////////////////// // utility function that maps OCL error constants to a string, // auto-generated from the CL headers because I am lazy. ///////////////////////////////////// void checkErrors(cl_int status, char *label, int line) { switch (status) { case CL_SUCCESS: return; case CL_BUILD_PROGRAM_FAILURE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_BUILD_PROGRAM_FAILURE\n", label, line); break; case CL_COMPILER_NOT_AVAILABLE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_COMPILER_NOT_AVAILABLE\n", label, line); break; case CL_DEVICE_NOT_AVAILABLE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_DEVICE_NOT_AVAILABLE\n", label, line); break; case CL_DEVICE_NOT_FOUND: fprintf(stderr, "OpenCL error (at %s, line %d): CL_DEVICE_NOT_FOUND\n", label, line); break; case CL_IMAGE_FORMAT_MISMATCH: fprintf(stderr, "OpenCL error (at %s, line %d): CL_IMAGE_FORMAT_MISMATCH\n", label, line); break; case CL_IMAGE_FORMAT_NOT_SUPPORTED: fprintf(stderr, "OpenCL error (at %s, line %d): CL_IMAGE_FORMAT_NOT_SUPPORTED\n", label, line); break; case CL_INVALID_ARG_INDEX: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_ARG_INDEX\n", label, line); break; case CL_INVALID_ARG_SIZE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_ARG_SIZE\n", label, line); break; case CL_INVALID_ARG_VALUE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_ARG_VALUE\n", label, line); break; case CL_INVALID_BINARY: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_BINARY\n", label, line); break; case CL_INVALID_BUFFER_SIZE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_BUFFER_SIZE\n", label, line); break; case CL_INVALID_BUILD_OPTIONS: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_BUILD_OPTIONS\n", label, line); break; case CL_INVALID_COMMAND_QUEUE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_COMMAND_QUEUE\n", label, line); break; case CL_INVALID_CONTEXT: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_CONTEXT\n", label, line); break; case CL_INVALID_DEVICE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_DEVICE\n", label, line); break; case CL_INVALID_DEVICE_TYPE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_DEVICE_TYPE\n", label, line); break; case CL_INVALID_EVENT: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_EVENT\n", label, line); break; case CL_INVALID_EVENT_WAIT_LIST: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_EVENT_WAIT_LIST\n", label, line); break; case CL_INVALID_GL_OBJECT: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_GL_OBJECT\n", label, line); break; case CL_INVALID_GLOBAL_OFFSET: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_GLOBAL_OFFSET\n", label, line); break; case CL_INVALID_HOST_PTR: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_HOST_PTR\n", label, line); break; case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_IMAGE_FORMAT_DESCRIPTOR\n", label, line); break; case CL_INVALID_IMAGE_SIZE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_IMAGE_SIZE\n", label, line); break; case CL_INVALID_KERNEL_NAME: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_KERNEL_NAME\n", label, line); break; case CL_INVALID_KERNEL: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_KERNEL\n", label, line); break; case CL_INVALID_KERNEL_ARGS: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_KERNEL_ARGS\n", label, line); break; case CL_INVALID_KERNEL_DEFINITION: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_KERNEL_DEFINITION\n", label, line); break; case CL_INVALID_MEM_OBJECT: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_MEM_OBJECT\n", label, line); break; case CL_INVALID_OPERATION: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_OPERATION\n", label, line); break; case CL_INVALID_PLATFORM: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_PLATFORM\n", label, line); break; case CL_INVALID_PROGRAM: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_PROGRAM\n", label, line); break; case CL_INVALID_PROGRAM_EXECUTABLE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_PROGRAM_EXECUTABLE\n", label, line); break; case CL_INVALID_QUEUE_PROPERTIES: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_QUEUE_PROPERTIES\n", label, line); break; case CL_INVALID_SAMPLER: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_SAMPLER\n", label, line); break; case CL_INVALID_VALUE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_VALUE\n", label, line); break; case CL_INVALID_WORK_DIMENSION: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_WORK_DIMENSION\n", label, line); break; case CL_INVALID_WORK_GROUP_SIZE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_WORK_GROUP_SIZE\n", label, line); break; case CL_INVALID_WORK_ITEM_SIZE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_INVALID_WORK_ITEM_SIZE\n", label, line); break; case CL_MAP_FAILURE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_MAP_FAILURE\n", label, line); break; case CL_MEM_OBJECT_ALLOCATION_FAILURE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_MEM_OBJECT_ALLOCATION_FAILURE\n", label, line); break; case CL_MEM_COPY_OVERLAP: fprintf(stderr, "OpenCL error (at %s, line %d): CL_MEM_COPY_OVERLAP\n", label, line); break; case CL_OUT_OF_HOST_MEMORY: fprintf(stderr, "OpenCL error (at %s, line %d): CL_OUT_OF_HOST_MEMORY\n", label, line); break; case CL_OUT_OF_RESOURCES: fprintf(stderr, "OpenCL error (at %s, line %d): CL_OUT_OF_RESOURCES\n", label, line); break; case CL_PROFILING_INFO_NOT_AVAILABLE: fprintf(stderr, "OpenCL error (at %s, line %d): CL_PROFILING_INFO_NOT_AVAILABLE\n", label, line); break; } exit(status); } ///////////////////////////////////// // kernel function (CPU) ///////////////////////////////////// void saxpy_serial(int n, float alpha, float *x, float *y) { int i; for (i=0; i 0) { cl_device_id* allDevices = (cl_device_id*) malloc (numDevices*sizeof(cl_device_id)); status = clGetDeviceIDs (currentPlatform, targetDeviceType, numDevices, allDevices, NULL); checkErrors (status, "clGetDeviceIDs", __LINE__); // greedily pick first device currentDevice = allDevices[0]; // print out some information on the device we'll be using. // Check OpenCL documentation for all supported parameters (there's a lot of them) char name[100]; status = clGetDeviceInfo (currentDevice, CL_DEVICE_NAME, sizeof(name), name, NULL); checkErrors (status, "clGetDeviceInfo", __LINE__); printf ("Matching device found:\n"); printf ("Name: %s\n", name); // clean up and exit loop over platforms free (allDevices); break; } } // clean up preparation stage free (allPlatforms); // if we haven't found a suitable device in any platform, we must stop if (currentDevice == NULL) { fprintf (stderr, "OpenCL Error: No matching device found! \n"); } // otherwise, currentPlatform holds the platform ID, and currentDevice // holds the device we'll be using so that we can start to do something // proper now. // create one OpenCL context on currentDevice cl_context_properties contextProperties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)currentPlatform, 0 }; cl_context context; context = clCreateContext (contextProperties, 1, ¤tDevice, NULL, NULL, &status); checkErrors (status, "clCreateContext", __LINE__); // create command queue within that context cl_command_queue commandQueue; commandQueue = clCreateCommandQueue (context, currentDevice, 0, &status); checkErrors (status, "clCreateCommandQueue", __LINE__); // create program cl_program program = clCreateProgramWithSource (context, 1, &kernelSource, NULL, &status); checkErrors (status, "clCreateProgramWithSource", __LINE__); // compile program into executable for the chosen device // we choose here a blocking compilation, because it is much easier to handle. status = clBuildProgram (program, 1, ¤tDevice, NULL, NULL, NULL); // in case building failed, query why if (status == CL_BUILD_PROGRAM_FAILURE) { char buildLog[1024]; status = clGetProgramBuildInfo (program, currentDevice, CL_PROGRAM_BUILD_LOG, sizeof(buildLog), buildLog, NULL); checkErrors (status, "clGetProgramBuildInfo", __LINE__); fprintf (stderr, "Program Build Log:\n%s\n",buildLog); status = CL_BUILD_PROGRAM_FAILURE; } // check for remaining possible errors checkErrors (status, "clBuildProgram", __LINE__); // create kernel from compiled program cl_kernel kernel = clCreateKernel (program, "saxpy_parallel", &status); checkErrors (status, "clCreateKernel", __LINE__); ///////////////////////////////////// // (2) allocate memory on host (main CPU memory) and device, // h_ denotes data residing on the host, d_ on device ///////////////////////////////////// float *h_x = (float*) malloc (N*sizeof(float)); float *h_y = (float*) malloc (N*sizeof(float)); cl_mem d_x = clCreateBuffer (context, CL_MEM_READ_WRITE, N*sizeof(float), NULL, &status); checkErrors (status, "clCreateBuffer", __LINE__); cl_mem d_y = clCreateBuffer (context, CL_MEM_READ_WRITE, N*sizeof(float), NULL, &status); checkErrors (status, "clCreateBuffer", __LINE__); ///////////////////////////////////// // (3) initialise data on the CPU ///////////////////////////////////// int i; for (i=0; i 1e-6) errorCount = errorCount + 1; } if (errorCount > 0) printf ("Result comparison failed.\n"); else printf ("Result comparison passed.\n"); ///////////////////////////////////// // (9) clean up, free memory and release all CL stuff ///////////////////////////////////// free(h_x); free(h_y); free(h_z); status = clReleaseKernel (kernel); checkErrors (status, "clReleaseKernel", __LINE__); status = clReleaseProgram (program); checkErrors (status, "clReleaseProgram", __LINE__); status = clUnloadCompiler (); checkErrors (status, "clUnloadCompiler", __LINE__); status = clReleaseCommandQueue (commandQueue); checkErrors (status, "clReleaseCommandQueue", __LINE__); status = clReleaseContext (context); checkErrors (status, "clReleaseContext", __LINE__); status = clReleaseMemObject (d_x); checkErrors (status, "clReleaseMemObject", __LINE__); status = clReleaseMemObject (d_y); checkErrors (status, "clReleaseMemObject", __LINE__); return 0; }