-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathrt.h
More file actions
170 lines (144 loc) · 5.89 KB
/
rt.h
File metadata and controls
170 lines (144 loc) · 5.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#ifndef RT_H_
#define RT_H_
#include <assert.h>
#include <immintrin.h>
#include <limits.h>
#include <omp.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include <algorithm>
#include <iostream>
#include <parallel/algorithm>
#include <random>
#include <sstream>
#include <string>
#include <thread>
#include <vector>
#include <regex>
#include <fstream>
#include <malloc.h>
#include <unordered_map>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include "timer.h"
#define THREAD_NUM 20
#define MAX_BINDEX_NUM 256
#ifndef VAREA_N
#define VAREA_N 128 // 128
#endif
#ifndef DATA_N
#define DATA_N 1e8 // 1e9
#endif
#define DEBUG_TIME_COUNT 1
#define DEBUG_INFO 1
// VALUES: 0 (uniform, normal), 1 (zipf)
#ifndef DISTRIBUTION
#define DISTRIBUTION 0
#endif
#ifndef ENCODE
#define ENCODE 1
#endif
#define RANGE_SELECTIVITY_11
#ifndef ONLY_DATA_SIEVING
#define ONLY_DATA_SIEVING 0
#endif
#ifndef ONLY_REFINE
#define ONLY_REFINE 0
#endif
#define PRINT_EXCECUTION_TIME(msg, code) \
do { \
struct timeval t1, t2; \
double elapsed; \
gettimeofday(&t1, NULL); \
do { \
code; \
} while (0); \
gettimeofday(&t2, NULL); \
elapsed = (t2.tv_sec - t1.tv_sec) * 1000.0; \
elapsed += (t2.tv_usec - t1.tv_usec) / 1000.0; \
printf(msg " time: %f ms\n", elapsed); \
} while (0);
#define CODEWIDTH 32
typedef uint32_t CODE;
typedef int POSTYPE; // Data type for positions
typedef unsigned int BITS; // 32 0-1 bit results are stored in a BITS
enum OPERATOR {
EQ = 0, // x == a
LT, // x < a
GT, // x > a
LE, // x <= a
GE, // x >= a
BT, // a < x < b
};
const int RUNS = 1;
const int BITSWIDTH = sizeof(BITS) * 8;
const int BITSSHIFT = 5; // x / BITSWIDTH == x >> BITSSHIFT
const int SIMD_ALIGEN = 32;
const int SIMD_JOB_UNIT = 8; // 8 * BITSWIDTH == __m256i
const int K = VAREA_N; // Number of virtual areas
const int N = (int)DATA_N;
extern Timer timer;
extern map<CODE, CODE> encodeMap[MAX_BINDEX_NUM];
typedef struct {
CODE areaStartValues[K];
BITS *filterVectors[K - 1];
BITS *filterVectorsInGPU[K - 1];
POSTYPE length;
CODE data_min;
CODE data_max;
} BinDex;
template <typename T>
POSTYPE *argsort(const T *v, POSTYPE n) {
POSTYPE *idx = (POSTYPE *)malloc(n * sizeof(POSTYPE));
for (POSTYPE i = 0; i < n; i++) {
idx[i] = i;
}
__gnu_parallel::sort(idx, idx + n, [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
return idx;
}
template <typename T>
POSTYPE *argsort(const T *v, POSTYPE n, POSTYPE *idx) {
for (POSTYPE i = 0; i < n; i++) {
idx[i] = i;
}
__gnu_parallel::sort(idx, idx + n, [&v](size_t i1, size_t i2) { return v[i1] < v[i2]; });
return idx;
}
inline int bits_num_needed(int n) {
// calculate the number of bits for storing n 0-1 bit results
return ((n - 1) / BITSWIDTH) + 1;
}
CODE **normalEncode(CODE **initialDataSet, int column_num, CODE encode_min, CODE encode_max, int data_num, POSTYPE **sorted_pos, CODE **sorted_data);
CODE **normalEncode(CODE **initialDataSet, int column_num, CODE encode_min, CODE encode_max, int data_num);
CODE encodeQuery(int column_id, CODE old_query, string &cmd);
bool ifEncodeEqual(const CODE val1, const CODE val2, int bindex_id);
CODE findKeyByValue(const CODE Val, std::map<CODE, int>& map_);
cudaError_t GPUbitAndWithCuda(BITS* dev_bitmap_a, BITS* dev_bitmap_b, unsigned int n);
cudaError_t GPUbitCopyWithCuda(BITS* dev_bitmap_a, BITS* dev_bitmap_b, unsigned int n);
cudaError_t GPUbitCopyNegationWithCuda(BITS* dev_bitmap_a, BITS* dev_bitmap_b, unsigned int n);
cudaError_t GPUbitCopySIMDWithCuda(BITS* result, BITS* dev_bitmap_a, BITS* dev_bitmap_b, unsigned int bitnum);
void initializeOptix(CODE **raw_data, int length, int density_width, int density_height, int column_num, CODE* range,
int cube_width);
void refineWithOptix(BITS *dev_result_bitmap, double *predicate, int column_num,
int ray_length = -1, int ray_segment_num = 10, bool inverse = false, int direction = 2, int ray_mode = 0);
void initializeOptixRTc3(CODE **raw_data, int length, int density_width, int density_height, int column_num,
CODE *range, uint32_t cube_width, int direction);
void refineWithOptixRTc3(BITS *dev_result_bitmap, double *predicate, int column_num,
int ray_length = -1, int ray_segment_num = 10, bool inverse = false, int direction = 2, int ray_mode = 0);
void initializeOptixRTc1(CODE **raw_data, int length, int density_width, int density_height, int column_num, CODE* range, int cube_width_factor,
int ray_interval, int prim_size);
void refineWithOptixRTc1(BITS *dev_result_bitmap, double *predicate, int column_num,
int ray_length = -1, int ray_segment_num = 10, bool inverse = false, int direction = 2, int ray_mode = 0);
void initializeOptixRTScan_2c(CODE **raw_data, int length, int density_width, int density_height, int column_num = 3);
void refineWithOptixRTScan_2c(BITS *dev_result_bitmap, double *predicate, unsigned *range,
int column_num, int ray_segment_num = 1, bool inverse = false);
void initializeOptixRTScan_interval_spacing(CODE **raw_data, int length, int density_width, int density_height, int column_num, CODE* range, double ray_interval_ratio);
void refineWithOptixRTScan_interval_spacing(BITS *dev_result_bitmap, double *predicate, int column_num,
int ray_length, int ray_segment_num, bool inverse, int direction, int ray_mode,
double ray_distance_ratio);
#endif