/* ocl_vadd_v1.c (OpenCL, version 1) Compile + Link: > cl.exe /O2 ocl_vadd_v1.c OpenCL.lib Usage: > ocl_vadd_v1 */ #include #include #include #include #define MAX_PLATFORMS (10) #define MAX_DEVICES (10) #define MAX_SOURCE_SIZE (100000) int main(int argc, char **argv) { // OpenCL cl_context context = NULL; cl_command_queue command_queue = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_platform_id platform_id[MAX_PLATFORMS]; cl_device_id device_id[MAX_DEVICES]; // memory object cl_mem d_a = NULL; cl_mem d_b = NULL; cl_mem d_c = NULL; FILE *fp; char *source_str; size_t source_size; size_t global_item_size, local_item_size; size_t ret_size; cl_uint num_platforms; cl_uint num_devices; cl_int ret; char str[BUFSIZ]; cl_uint platform = 0; cl_uint device = 0; int nloop = 1000; int n = 1000; // arguments if (argc >= 5) { n = atoi(argv[1]); nloop = atoi(argv[2]); platform = atoi(argv[3]); device = atoi(argv[4]); } // alloc source_str = (char *)malloc(MAX_SOURCE_SIZE * sizeof(char)); // setup host arrays float *a = (float *)malloc(n * sizeof(float)); float *b = (float *)malloc(n * sizeof(float)); float *c = (float *)malloc(n * sizeof(float)); for (int i = 0; i < n; i++) { a[i] = (float)(1 + i); b[i] = (float)(1 + i); } // platform clGetPlatformIDs(MAX_PLATFORMS, platform_id, &num_platforms); if (platform >= num_platforms) { printf("error : platform = %d (limit = %d)\n", platform, num_platforms - 1); exit(1); } // device clGetDeviceIDs(platform_id[platform], CL_DEVICE_TYPE_ALL, MAX_DEVICES, device_id, &num_devices); if (device >= num_devices) { printf("error : device = %d (limit = %d)\n", device, num_devices - 1); exit(1); } // device name (option) clGetDeviceInfo(device_id[device], CL_DEVICE_NAME, sizeof(str), str, &ret_size); printf("%s\n", str); // context context = clCreateContext(NULL, 1, &device_id[device], NULL, NULL, &ret); // command queue command_queue = clCreateCommandQueue(context, device_id[device], 0, &ret); // source if ((fp = fopen("vadd.cl", "r")) == NULL) { fprintf(stderr, "kernel source open error\n"); exit(1); } source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); fclose(fp); // program program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); if (ret != CL_SUCCESS) { fprintf(stderr, "clCreateProgramWithSource() error\n"); exit(1); } // build if (clBuildProgram(program, 1, &device_id[device], NULL, NULL, NULL) != CL_SUCCESS) { fprintf(stderr, "clBuildProgram() error\n"); exit(1); } // kernel kernel = clCreateKernel(program, "vadd", &ret); if (ret != CL_SUCCESS) { fprintf(stderr, "clCreateKernel() error\n"); exit(1); } // memory object d_a = clCreateBuffer(context, CL_MEM_READ_WRITE, n * sizeof(float), NULL, &ret); d_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n * sizeof(float), NULL, &ret); d_c = clCreateBuffer(context, CL_MEM_READ_WRITE, n * sizeof(float), NULL, &ret); // host to device clEnqueueWriteBuffer(command_queue, d_a, CL_TRUE, 0, n * sizeof(float), a, 0, NULL, NULL); clEnqueueWriteBuffer(command_queue, d_b, CL_TRUE, 0, n * sizeof(float), b, 0, NULL, NULL); // args clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&d_a); clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&d_b); clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&d_c); clSetKernelArg(kernel, 3, sizeof(int), (void *)&n); // timer clock_t t0 = clock(); // work item local_item_size = 256; global_item_size = ((n + local_item_size - 1) / local_item_size) * local_item_size; // run for (int loop = 0; loop < nloop; loop++) { clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL); } // device to host clEnqueueReadBuffer(command_queue, d_c, CL_TRUE, 0, n * sizeof(float), c, 0, NULL, NULL); // timer clock_t t1 = clock(); double cpu = (double)(t1 - t0) / CLOCKS_PER_SEC; // output double sum = 0; for (int i = 0; i < n; i++) { sum += c[i]; } double exact = n * (n + 1.0); printf("N=%d L=%d %.6e(%.6e) %.1e %.3f[sec]\n", n, nloop, sum, exact, fabs((sum - exact) / exact), cpu); // release clFlush(command_queue); clFinish(command_queue); clReleaseMemObject(d_a); clReleaseMemObject(d_b); clReleaseMemObject(d_c); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(command_queue); clReleaseContext(context); // free free(source_str); free(a); free(b); free(c); return 0; }