-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathMemoryLatency.c
More file actions
233 lines (202 loc) · 7.25 KB
/
MemoryLatency.c
File metadata and controls
233 lines (202 loc) · 7.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#include "pch.h"
#include "BenchmarkDllCommon.h"
// If set, memory latency tests will use this as the test array
// If not set, test runs will use malloc()
void* mem = NULL;
// mem latency functions
__declspec(dllexport) float __stdcall RunAsmLatencyTest(uint32_t size_kb, uint64_t iterations);
__declspec(dllexport) float __stdcall RunLatencyTest(uint32_t size_kb, uint64_t iterations);
__declspec(dllexport) int __stdcall SetLargePages(uint32_t enable);
int GetPrivilege();
/// <summary>
/// Sets large pages state. Will allocate array if large pages are enabled
/// </summary>
/// <param name="enable">If greater than 0, enable large pages, with array set to specified size in bytes. If 0, disable large pages and free any allocated arr</param>
/// <returns>0 on success, something else otherwise</returns>
int SetLargePages(uint32_t enable)
{
if (enable == 0)
{
if (mem != NULL)
{
VirtualFree(mem, 0, MEM_RELEASE);
mem = NULL;
}
return 0;
}
else
{
if (mem != NULL)
{
VirtualFree(mem, 0, MEM_RELEASE);
mem = NULL;
}
if (GetPrivilege() != 0)
{
return -1;
}
mem = VirtualAlloc(NULL, enable, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
if (mem == NULL)
{
return -2;
}
return 0;
}
}
/// <summary>
/// Fills pattern array with 32-bit integers
/// </summary>
/// <param name="pattern_arr">array to fill</param>
/// <param name="list_size">number of 32-bit elements</param>
/// <param name="byte_increment">how far apart elements should be spaced</param>
void FillPatternArr(uint32_t* pattern_arr, uint32_t list_size, uint32_t byte_increment) {
uint32_t increment = byte_increment / sizeof(uint32_t);
uint32_t element_count = list_size / increment;
for (int i = 0; i < element_count; i++) {
pattern_arr[i * increment] = i * increment;
}
int iter = element_count;
while (iter > 1) {
iter -= 1;
int j = iter - 1 == 0 ? 0 : rand() % (iter - 1);
uint32_t tmp = pattern_arr[iter * increment];
pattern_arr[iter * increment] = pattern_arr[j * increment];
pattern_arr[j * increment] = tmp;
}
}
uint32_t GetTlbShiftedOffset(uint32_t index, uint32_t cacheline_size, uint32_t page_size)
{
uint32_t page_increment = page_size / sizeof(uint32_t);
uint32_t cacheline_increment = cacheline_size / sizeof(uint32_t);
uint32_t byte_offset = (index * cacheline_increment) & (page_increment - 1);
return index * page_increment + byte_offset;
}
/// <summary>
/// Fills pattern array with page_size as the pointer chasing stride, but
///
/// </summary>
/// <param name="pattern_arr"></param>
/// <param name="list_size"></param>
/// <param name="cacheline_size"></param>
/// <param name="page_size"></param>
void FillTlbTestPatternArr(uint32_t* pattern_arr, uint32_t list_size, uint32_t cacheline_size, uint32_t page_size) {
// fill a temporary array with the element count
uint32_t element_count = list_size * sizeof(uint32_t) / page_size;
uint32_t* temp_arr = (uint32_t*)malloc(sizeof(uint32_t) * element_count);
uint32_t page_increment = page_size / sizeof(uint32_t);
FillPatternArr(temp_arr, element_count, sizeof(uint32_t));
memset(pattern_arr, INT_MAX, list_size * sizeof(uint32_t));
for (uint32_t i = 0; i < element_count; i++)
{
uint32_t dst_index = GetTlbShiftedOffset(i, cacheline_size, page_size);
uint32_t dst_value = GetTlbShiftedOffset(temp_arr[i], cacheline_size, page_size);
pattern_arr[dst_index] = dst_value;
}
free(temp_arr);
}
/// <summary>
/// Fills pattern array with 64-bit integers
/// </summary>
/// <param name="pattern_arr">array to fill</param>
/// <param name="list_size">number of 64-bit elements in array</param>
/// <param name="byte_increment">how far apart elements should be spaced</param>
void FillPatternArr64(uint64_t* pattern_arr, uint64_t list_size, uint64_t byte_increment) {
uint32_t increment = byte_increment / sizeof(uint64_t);
uint32_t element_count = list_size / increment;
for (int i = 0; i < element_count; i++) {
pattern_arr[i * increment] = i * increment;
}
int iter = element_count;
while (iter > 1) {
iter -= 1;
int j = iter - 1 == 0 ? 0 : rand() % (iter - 1);
uint64_t tmp = pattern_arr[iter * increment];
pattern_arr[iter * increment] = pattern_arr[j * increment];
pattern_arr[j * increment] = tmp;
}
}
float RunAsmLatencyTest(uint32_t size_kb, uint64_t iterations) {
struct timeb start, end;
uint32_t list_size = size_kb * 1024 / sizeof(void*);
uint64_t* A;
if (mem == NULL) {
A = (uint64_t*)malloc(size_kb * 1024);
}
else {
A = (uint64_t*)mem;
}
memset(A, 0, 1024 * size_kb);
FillPatternArr64(A, size_kb * 1024 / sizeof(uint64_t), 64);
preplatencyarr(A, size_kb * 1024 / sizeof(uint64_t));
ftime(&start);
uint64_t sum = latencytest(iterations, A);
ftime(&end);
int64_t time_diff_ms = 1000 * (end.time - start.time) + (end.millitm - start.millitm);
float latency = 1e6 * (float)time_diff_ms / (float)iterations;
if (mem == NULL) free(A);
return latency;
}
float RunLatencyTest(uint32_t size_kb, uint64_t iterations) {
struct timeb start, end;
uint32_t list_size = size_kb * 1024 / 4;
uint32_t current;
// Fill list to create random access pattern
int* A;
if (mem == NULL) {
A = (int*)malloc(sizeof(int) * list_size);
}
else {
A = (int*)mem;
}
for (int i = 0; i < list_size; i++) {
A[i] = i;
}
FillPatternArr(A, list_size, 64);
// Run test
ftime(&start);
current = A[0];
for (int i = 0; i < iterations; i++) {
current = A[current];
}
ftime(&end);
int64_t time_diff_ms = 1000 * (end.time - start.time) + (end.millitm - start.millitm);
float latency = 1e6 * (float)time_diff_ms / (float)iterations;
int tmp = A[current];
if (mem == NULL) free(A);
if (current == tmp) return 0;
return latency;
}
int GetPrivilege()
{
HANDLE hToken;
TOKEN_PRIVILEGES tp;
BOOL status;
DWORD error;
// open process token
if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
{
return -1;
}
// get the luid
if (!LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid))
{
return -1;
}
// enable privilege
tp.PrivilegeCount = 1;
tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
status = AdjustTokenPrivileges(hToken, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0);
// It is possible for AdjustTokenPrivileges to return TRUE and still not succeed.
// So always check for the last error value.
error = GetLastError();
if (!status || (error != ERROR_SUCCESS))
{
return -1;
}
// close the handle
if (!CloseHandle(hToken))
{
return -1;
}
return 0;
}