-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcuda.cpp
More file actions
68 lines (60 loc) · 2.12 KB
/
cuda.cpp
File metadata and controls
68 lines (60 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#define R 2
#define LAYERDIM 32
__device__ __forceinline__ double sigmoid (double a)
{
return 1.0 / (1.0 + exp (-a));
}
__global__ void sigmoid_kernel (double weigths[], float bias, double layer[], int lengthLayer, int idLayer, double* nextLayer)
{
int step = R;
int start = blockIdx.x*R;
float value = 0;
int end = (blockIdx.x + 1)*R;
for (int b = start; b<end; b += 1) {
value += weigths[b]*layer[b];
cout << weigths[b] << " WEIGHT " << layer[b+i] << " INPUT " << bias << "\n";
}
value += bias;
cout << value << " VALUE\n";
nextLayer[blockIdx.x] = activationFunction(value);
cout << nextLayer[blockIdx.x] << " NEXT LAYER " << " \n";
}
int main(void){
double weights1[LAYERDIM];
for (int i = 0; i < LAYERDIM; i++){
weights1[i] =static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
cout << weights1[i] << '\n';
}
double layer[LAYERDIM];
for (int i = 0; i < LAYERDIM; i++){
layer[i] =static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
cout << layer[i] << '\n';
}
float bias = 0.1;
int lengthLayer = LAYERDIM;
int idLayer = 0;
double *d_a = 0, *d_b = 0;
int numBlock = lengthLayer/R;
int numThread = R;
double* nextLayer = new double[lengthLayer/R];
cudaMalloc((void**)&d_a, sizeof(d_a[0]) * lengthLayer);
cudaMalloc((void**)&d_b, sizeof(d_b[0]) * lenghtLayer);
/* Push source data to device */
cudaMemcpy (d_a, source, sizeof(d_a[0]) * lenghtLayer, cudaMemcpyHostToDevice);
/* Compute execution configuration */
dim3 dimBlock(256);
int threadBlocks = (lenghtLayer + (dimBlock.x - 1)) / dimBlock.x;
if (threadBlocks > 65520) threadBlocks = 65520;
dim3 dimGrid(threadBlocks);
sigmoid_kernel<<<numBlock,numThread>>>(weights, bias, layer, lengthLayer, idLayer, nextLayer);
cudaMemcpy (result, d_b, sizeof (result[0]) * lenghtLayer,
cudaMemcpyDeviceToHost);
cudaFree(d_a);
cudaFree(d_b);
free (result);
free (source);
return 0;
}