-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclsupport.c
More file actions
244 lines (215 loc) · 9.14 KB
/
clsupport.c
File metadata and controls
244 lines (215 loc) · 9.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
/**
* @file clsupport.c
* @brief OpenCL helper functions for n-puzzle solver program
* @author Mitchell Clay
* @date 4/18/2020
**/
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include "clsupport.h"
#define MAX_SOURCE_SIZE (0x10000)
// Function to print ret status from OpenCL functions
void print_ret_status(int ret) {
if(ret == CL_SUCCESS) {
printf("OK\n");
}
else {
printf("ERROR: %d\n", ret);
}
}
// Print out CL device info
// code is from https://gist.github.com/courtneyfaulkner/7919509
// This will probably be replaced later with more program-specific info
int print_cl_devices() {
int i, j;
char* value;
size_t valueSize;
cl_uint platformCount;
cl_platform_id* platforms;
cl_uint deviceCount;
cl_device_id* devices;
cl_uint maxComputeUnits;
// get all platforms
clGetPlatformIDs(0, NULL, &platformCount);
platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
clGetPlatformIDs(platformCount, platforms, NULL);
for (i = 0; i < platformCount; i++) {
// get all devices
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &deviceCount);
devices = (cl_device_id*) malloc(sizeof(cl_device_id) * deviceCount);
clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, deviceCount, devices, NULL);
// for each device print critical attributes
for (j = 0; j < deviceCount; j++) {
// print device name
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(devices[j], CL_DEVICE_NAME, valueSize, value, NULL);
printf("%d. Device: %s\n", j+1, value);
free(value);
// print hardware device version
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(devices[j], CL_DEVICE_VERSION, valueSize, value, NULL);
printf(" %d.%d Hardware version: %s\n", j+1, 1, value);
free(value);
// print software driver version
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(devices[j], CL_DRIVER_VERSION, valueSize, value, NULL);
printf(" %d.%d Software version: %s\n", j+1, 2, value);
free(value);
// print c version supported by compiler for device
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &valueSize);
value = (char*) malloc(valueSize);
clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, valueSize, value, NULL);
printf(" %d.%d OpenCL C version: %s\n", j+1, 3, value);
free(value);
// print parallel compute units
clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(maxComputeUnits), &maxComputeUnits, NULL);
printf(" %d.%d Parallel compute units: %d\n", j+1, 4, maxComputeUnits);
}
free(devices);
}
free(platforms);
return 0;
}
void CLSolve(unsigned* initial_state,
unsigned* final_state,
unsigned puzzle_size,
unsigned blank_position,
bool verbose,
bool debug) {
cl_uint maxComputeUnits;
size_t maxWorkGroupSize;
cl_platform_id platform_id = NULL;
cl_device_id device_id = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
size_t global_item_size = 1;
size_t local_item_size = 1;
unsigned num_compute_units = 1;
unsigned num_pes_per_unit = 1;
// This needs to allow for at least (2 * dimensions * depth) number of nodes
unsigned node_array_size = 100;
// Get platform and device information
printf("Getting platform and device information: ");
cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1,
&device_id, &ret_num_devices);
print_ret_status(ret);
FILE *fp;
char *source_str;
size_t source_size;
fp = fopen("solve.cl", "r");
if (!fp) {
fprintf(stderr, "Failed to load kernel.\n");
exit(1);
}
source_str = (char*)malloc(MAX_SOURCE_SIZE);
source_size = fread( source_str, 1, MAX_SOURCE_SIZE, fp);
fclose( fp );
// Get Max Compute Units
clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(maxComputeUnits), &maxComputeUnits, NULL);
printf("Parallel compute units: %d\n", maxComputeUnits);
// Get Max Workgroup Size
clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(maxWorkGroupSize), &maxWorkGroupSize, NULL);
printf("Max workgroup size: %lu\n", maxWorkGroupSize);
// Create an OpenCL context
printf("Creating context: ");
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
print_ret_status(ret);
// Create a command queue
printf("Creating queue: ");
cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
print_ret_status(ret);
// Create memory buffers
cl_mem initial_state_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(unsigned) * puzzle_size * puzzle_size , NULL, &ret);
printf("Creating initial state buffer: ");
print_ret_status(ret);
cl_mem final_state_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(unsigned) * puzzle_size * puzzle_size , NULL, &ret);
printf("Creating final state buffer: ");
print_ret_status(ret);
cl_mem puzzle_size_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(unsigned), NULL, &ret);
printf("Creating puzzle size buffer: ");
print_ret_status(ret);
cl_mem blank_position_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(unsigned), NULL, &ret);
printf("Creating blank position buffer: ");
print_ret_status(ret);
cl_mem CLNodes_mem_obj = clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(struct CLNode) * num_compute_units * num_pes_per_unit * node_array_size,
NULL, &ret);
printf("Creating CLNodes buffer: ");
print_ret_status(ret);
// Copy data to buffers
printf("Copying variables to buffers\n");
printf("- Initial State Array: ");
ret = clEnqueueWriteBuffer(command_queue, initial_state_mem_obj, CL_TRUE, 0,
sizeof(unsigned) * puzzle_size * puzzle_size, initial_state, 0, NULL, NULL);
print_ret_status(ret);
printf("- Final State Array: ");
ret = clEnqueueWriteBuffer(command_queue, final_state_mem_obj, CL_TRUE, 0,
sizeof(unsigned) * puzzle_size * puzzle_size, final_state, 0, NULL, NULL);
print_ret_status(ret);
printf("- Puzzle Size: ");
ret = clEnqueueWriteBuffer(command_queue, puzzle_size_mem_obj, CL_TRUE, 0,
sizeof(unsigned), &puzzle_size, 0, NULL, NULL);
print_ret_status(ret);
printf("- Blank Position: ");
ret = clEnqueueWriteBuffer(command_queue, blank_position_mem_obj, CL_TRUE, 0,
sizeof(unsigned), &blank_position, 0, NULL, NULL);
print_ret_status(ret);
// Create a program from the kernel source
printf("Creating program from source: ");
cl_program program = clCreateProgramWithSource(context, 1,
(const char **)&source_str, (const size_t *)&source_size, &ret);
print_ret_status(ret);
// Build the program
printf("Building program: ");
ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
print_ret_status(ret);
if(ret != CL_SUCCESS)
{
size_t log_size;
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
char *buildLog = calloc(log_size, sizeof(char));
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, buildLog, NULL);
buildLog[log_size]='\0';
printf("\nError in kernel BUILD PGM: %s\n ",buildLog);
return;
}
// Create the OpenCL kernel
printf("Creating OpenCL kernel: ");
cl_kernel kernel = clCreateKernel(program, "solve", &ret);
print_ret_status(ret);
// Set the arguments of the kernel
printf("Setting kernel arguments: \n");
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&initial_state_mem_obj);
printf("1 - ");
print_ret_status(ret);
ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&final_state_mem_obj);
printf("2 - ");
print_ret_status(ret);
ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&puzzle_size_mem_obj);
printf("3 - ");
print_ret_status(ret);
ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&blank_position_mem_obj);
printf("4 - ");
print_ret_status(ret);
ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&CLNodes_mem_obj);
printf("5 - ");
print_ret_status(ret);
printf("Executing kernel: \n");
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
&global_item_size, &local_item_size, 0, NULL, NULL);
printf("Execution: ");
print_ret_status(ret);
}