-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathReciprocating.c
More file actions
162 lines (131 loc) · 4.69 KB
/
Reciprocating.c
File metadata and controls
162 lines (131 loc) · 4.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// Dave Dice and Alex Kogan
// Reciprocating Locks
// PPoPP 2025
// https://doi.org/10.1145/3710848.3710862 (ACM)
// The code below closely follows Listing-1 in the above.
// https://arxiv.org/abs/2501.02380 (Long form)
// The Reciprocating lock gains performance by a controlled amount of unfairness, while ensuring an anti-starvation
// guarantee. If thread T1 is waiting and T2 arrives after T1, then T2 can bypass T1 at most once before T1 gains
// ownership. The maximum possible long-term unfairness induced by the lock's admission schedule is 2x, where the
// fastest thread can only be admitted twice as often as the most unlucky thread over long periods.
#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdatomic.h>
typedef struct _WaitElement {
_Atomic(struct _WaitElement *) Gate CALIGN;
} WaitElement;
static WaitElement * const LOCKEDEMPTY = (WaitElement *)(uintptr_t) 1;
typedef struct {
_Atomic(WaitElement *) Arrivals;
} ReciprocatingLock;
static __thread WaitElement E CALIGN;
static ReciprocatingLock lock CALIGN;
static inline WaitElement * Acquire( ReciprocatingLock * lock, WaitElement ** _EndOfSegment ) {
Str( E.Gate, NULL );
WaitElement * succ = NULL;
WaitElement * EndOfSegment = &E;
WaitElement * const tail = Fas( lock->Arrivals, &E );
assert( tail != &E );
if ( tail != NULL ) {
// coerce LOCKEDEMPTY to null
// succ will be our successor when we subsequently release
succ = (WaitElement *)(((uintptr_t) tail) & ~1);
assert(succ != &E);
// contention : waiting phase
// Consider : could use HemLock CTR optimization here and spin using exchange
// That, in turn, would obviate the need to clear Gate at the top of Acquire
// and would avoid the MESI/MOESI/MESIF S->M coherence upgrade.
for ( ;; ) {
EndOfSegment = Ld( E.Gate );
if ( EndOfSegment != NULL ) break;
Pause();
}
assert( EndOfSegment != &E );
// Detect logical end-of-segment terminus address
if ( succ == EndOfSegment ) {
succ = NULL; // quash
EndOfSegment = LOCKEDEMPTY;
}
}
*_EndOfSegment = EndOfSegment;
return succ;
}
static inline void Release( ReciprocatingLock * lock, WaitElement * EndOfSegment, WaitElement * succ ) {
assert( EndOfSegment != NULL );
assert( Ld( lock->Arrivals ) != NULL );
if ( succ != NULL ) {
assert( Ld( succ->Gate ) == NULL );
Str( succ->Gate, EndOfSegment );
return;
}
assert( EndOfSegment == LOCKEDEMPTY || EndOfSegment == &E );
#if 0
WaitElement * v = EndOfSegment;
if ( Casv( lock->Arrivals, v, NULL ) ) {
// uncontended fast-path return
return;
}
#else
if ( Ld( lock->Arrivals ) == EndOfSegment ) {
WaitElement * v = EndOfSegment;
if ( Casv( lock->Arrivals, v, NULL ) ) {
// uncontended fast-path return
return;
}
}
#endif
WaitElement * w = Fas( lock->Arrivals, LOCKEDEMPTY );
assert( w != NULL );
assert( w != LOCKEDEMPTY );
assert( w != &E );
assert( Ld( w->Gate ) == NULL );
Str( w->Gate, EndOfSegment );
}
static void * Worker( void * arg ) {
TYPE id = (size_t)arg;
uint64_t entry;
#ifdef FAST
unsigned int cnt = 0, oid = id;
#endif // FAST
NCS_DECL;
// Note that E could either reside in thread_lock, or, in this case, on-stack
for ( int r = 0; r < RUNS; r += 1 ) {
RTYPE randomThreadChecksum = 0;
for ( entry = 0; stop == 0; entry += 1 ) {
NCS;
// EndOfSegment and succ reflect context to be passed from Acquire to corresponding Release
// CLH, MCS, and MCSH similarly are _not context-free.
// With a slightly more clever encoding we can readily collapse the 2 fields into just 1.
// But 2 fields is easier for the purpose of explication.
// Could also pass the context via fields in the lock body or into TLS.
// Might also use C++ RAII std::scoped_lock or friends to carry context.
WaitElement * EndOfSegment;
WaitElement * succ = Acquire( &lock, &EndOfSegment );
randomThreadChecksum += CS( id );
Release( &lock, EndOfSegment, succ );
#ifdef FAST
id = startpoint( cnt ); // different starting point each experiment
cnt = cycleUp( cnt, NoStartPoints );
#endif // FAST
} // for
Fai( sumOfThreadChecksums, randomThreadChecksum );
#ifdef FAST
id = oid;
#endif // FAST
entries[r][id] = entry;
Fai( Arrived, 1 );
while ( stop != 0 ) Pause();
Fai( Arrived, -1 );
} // for
return NULL;
} // Worker
void __attribute__((noinline)) ctor() {
Str( lock.Arrivals, NULL );
} // ctor
void __attribute__((noinline)) dtor() {
} // dtor
// Local Variables: //
// tab-width: 4 //
// compile-command: "gcc -Wall -Wextra -std=gnu11 -O3 -DNDEBUG -fno-reorder-functions -DPIN -DAlgorithm=Reciprocating Harness.c -lpthread -lm -D`hostname` -DCFMT -DCNT=0" //
// End: //