-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmoving_average_custom.hip.cpp
More file actions
135 lines (113 loc) · 4.41 KB
/
moving_average_custom.hip.cpp
File metadata and controls
135 lines (113 loc) · 4.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include "hip_runtime.h"
#include "memorytraverser.hpp"
#include "hip_errors.hpp"
inline int iDivUp(int a, int b)
{
return ((a % b) != 0) ? (a / b + 1) : (a / b);
}
//Each new kernel have to be template to pass MemoryTraverser
//instead of using tex, we have to pass src as an additional param
template<typename TraverserType>
__global__ void moving_average_tr_kernel(hipLaunchParm lp, float* dst, float*__restrict__ src, const int N, const int R,
TraverserType* mt)
{
const int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
if (tid < N) {
float average = 0.f;
for (int k = -R; k <= R; k++) {
average = average + mt->get1D(src, (float)(tid - k + 0.5f)/(float)N);
}
dst[tid] = average / (2.f * (float)R + 1.f);
}
}
template<typename TraverserType>
void moving_average_tr_impl(float *dst, float *src, const int N, const int R)
{
//prepare data on device
float* d_dst;
float* d_src;
hipSafeCall(hipMalloc((void**)&d_dst, N * sizeof(float)));
hipSafeCall(hipMalloc((void**)&d_src, N * sizeof(float)));
hipSafeCall(hipMemcpy(d_src, src, N * sizeof(float), hipMemcpyHostToDevice));
TraverserType* gmt;
// old plain way is to use host device copy;
{
TraverserType mt;
mt.width = N;
hipSafeCall(hipMalloc((void**)&gmt, sizeof(TraverserType)));
hipSafeCall(hipMemcpy(gmt, &mt, sizeof(TraverserType), hipMemcpyHostToDevice));
hipLaunchKernel(HIP_KERNEL_NAME(moving_average_tr_kernel), dim3(iDivUp(N,256)), dim3(256), 0, 0,
d_dst, d_src, N, R, gmt);
hipCheckError();
hipSafeCall(hipDeviceSynchronize());
hipSafeCall(hipFree(gmt));
}
hipSafeCall(hipMemcpy(dst, d_dst, N * sizeof(float), hipMemcpyDeviceToHost));
hipSafeCall(hipFree(d_dst));
hipSafeCall(hipFree(d_src));
}
void moving_average_tr(float *dst, float *src, const int N, const int R,
cudaTextureFilterMode filterMode,
cudaTextureAddressMode addressMode,
int normalization)
{
//This defines the behaviour
using TraverserClampNormPixel = MemoryTraverser<float, Clamp<NORMALIZED, float>, PixelFilter<NEAREST, float>>;
using TraverserClampUNormPixel = MemoryTraverser<float, Clamp<NON_NORMALIZED, float>, PixelFilter<NEAREST, float>>;
using TraverserClampNormLinear = MemoryTraverser<float, Clamp<NORMALIZED, float>, PixelFilter<LINEAR, float>>;
using TraverserClampUNormLinear = MemoryTraverser<float, Clamp<NON_NORMALIZED, float>, PixelFilter<LINEAR, float>>;
using TraverserWrapNormPixel = MemoryTraverser<float, Wrap<NORMALIZED, float>, PixelFilter<NEAREST, float>>;
using TraverserWrapUNormPixel = MemoryTraverser<float, Wrap<NON_NORMALIZED, float>, PixelFilter<NEAREST, float>>;
using TraverserWrapNormLinear = MemoryTraverser<float, Wrap<NORMALIZED, float>, PixelFilter<LINEAR, float>>;
using TraverserWrapUNormLinear = MemoryTraverser<float, Wrap<NON_NORMALIZED, float>, PixelFilter<LINEAR, float>>;
if(filterMode == hipFilterModePoint)
{
if (addressMode == cudaAddressModeWrap)
{
if(normalization)
{
moving_average_tr_impl<TraverserWrapNormPixel>(dst, src, N, R);
}
else
{
moving_average_tr_impl<TraverserWrapUNormPixel>(dst, src, N, R);
}
}
else //clamp
{
if(normalization)
{
moving_average_tr_impl<TraverserClampNormPixel>(dst, src, N, R);
}
else
{
moving_average_tr_impl<TraverserClampUNormPixel>(dst, src, N, R);
}
}
}
else //Linear interpolation
{
if (addressMode == cudaAddressModeWrap)
{
if(normalization)
{
moving_average_tr_impl<TraverserWrapNormLinear>(dst, src, N, R);
}
else
{
moving_average_tr_impl<TraverserWrapUNormLinear>(dst, src, N, R);
}
}
else //clamp
{
if(normalization)
{
moving_average_tr_impl<TraverserClampNormLinear>(dst, src, N, R);
}
else
{
moving_average_tr_impl<TraverserClampUNormLinear>(dst, src, N, R);
}
}
}
}