-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrotate.cu
More file actions
100 lines (81 loc) · 3.27 KB
/
rotate.cu
File metadata and controls
100 lines (81 loc) · 3.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#include <cuda_runtime.h>
#include "image_descriptor.hpp"
#include <stdio.h>
// Texture reference for 2D float texture
texture<float, 2, cudaReadModeElementType> tex;
template<typename T>
__global__ void transformKernel(T* outputData,
int width,
int height,
T theta)
{
// calculate normalized texture coordinates
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
T u = (T)x - (T)width/2;
T v = (T)y - (T)height/2;
T tu = u*cosf(theta) - v*sinf(theta);
T tv = v*cosf(theta) + u*sinf(theta);
tu /= (T)width;
tv /= (T)height;
// read from texture and write to global memory
T val = tex2D(tex, tu+0.5f, tv+0.5f);
if(x == 0 && y == 0)
printf("u = %f, v = %f, tu = %f, tv = %f, val = %f\n", u, v, tu, tv, val);
if(x == 1 && y == 0)
printf("u = %f, v = %f, tu = %f, tv = %f, val = %f\n", u, v, tu, tv, val);
if(x == 2 && y == 0)
printf("u = %f, v = %f, tu = %f, tv = %f, val = %f\n", u, v, tu, tv, val);
outputData[y*width + x] = val;
}
template<typename T>
void transform_image(T* data, const image_description<T>& img_desc, T angle)
{
size_t size = img_desc.width * img_desc.height * sizeof(T);
T* d_data = NULL;
cudaMalloc((void **) &d_data, size);
/**
* Example, for float texels we could create a channel with
*
* cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
*
* while for short4 texels this would be
*
* cudaCreateChannelDesc(16, 16, 16, 16, cudaChannelFormatKindSigned);
*/
//define channel descriptor for image 32 means 32 bits per pixel = float;
cudaChannelFormatDesc channelDesc =
cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
//prepare array to fetch data from. It is the copy of input image
cudaArray *cuArray;
cudaMallocArray(&cuArray, &channelDesc, img_desc.width, img_desc.height);
cudaMemcpyToArray(cuArray, 0, 0, data, size, cudaMemcpyHostToDevice);
//set texture parameters
if(img_desc.x_addr_mode == CLAMP)
{
tex.addressMode[0] = cudaAddressModeWrap;
tex.addressMode[1] = cudaAddressModeClamp;
}
else
{
tex.addressMode[0] = cudaAddressModeWrap;
tex.addressMode[1] = cudaAddressModeWrap;
}
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true; // access with normalized texture coordinates
// Bind the array to the texture
cudaBindTextureToArray(tex, cuArray, channelDesc);
dim3 dimBlock(8, 8, 1);
dim3 dimGrid(img_desc.width / dimBlock.x, img_desc.height / dimBlock.y, 1);
transformKernel<<<dimGrid, dimBlock, 0>>>(d_data,
(int)img_desc.width,
(int)img_desc.height,
angle);
cudaDeviceSynchronize();
cudaMemcpy(data, d_data, size, cudaMemcpyDeviceToHost);
cudaFree(d_data);
cudaFreeArray(cuArray);
}
//instantiation of a template definition
// other option is to move the definition to header file
template void transform_image<float>(float* data, const image_description<float>& img_desc, float f);