Commit | Line | Data |
---|---|---|
544cdc88 | 1 | // SPDX-License-Identifier: LGPL-2.1-only |
b848736e MD |
2 | #ifndef _GNU_SOURCE |
3 | #define _GNU_SOURCE | |
4 | #endif | |
5 | #include <assert.h> | |
6 | #include <pthread.h> | |
7 | #include <sched.h> | |
8 | #include <stdint.h> | |
a91728e0 | 9 | #include <inttypes.h> |
b848736e MD |
10 | #include <stdio.h> |
11 | #include <stdlib.h> | |
12 | #include <string.h> | |
13 | #include <stddef.h> | |
14 | ||
15 | #include <rseq/rseq.h> | |
16 | ||
544cdc88 MJ |
17 | #include "tap.h" |
18 | ||
d1cdec98 | 19 | #define NR_TESTS 4 |
544cdc88 | 20 | |
b848736e MD |
21 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) |
22 | ||
23 | struct percpu_lock_entry { | |
24 | intptr_t v; | |
25 | } __attribute__((aligned(128))); | |
26 | ||
27 | struct percpu_lock { | |
28 | struct percpu_lock_entry c[CPU_SETSIZE]; | |
29 | }; | |
30 | ||
31 | struct test_data_entry { | |
32 | intptr_t count; | |
33 | } __attribute__((aligned(128))); | |
34 | ||
35 | struct spinlock_test_data { | |
36 | struct percpu_lock lock; | |
37 | struct test_data_entry c[CPU_SETSIZE]; | |
38 | int reps; | |
39 | }; | |
40 | ||
41 | struct percpu_list_node { | |
42 | intptr_t data; | |
43 | struct percpu_list_node *next; | |
44 | }; | |
45 | ||
46 | struct percpu_list_entry { | |
47 | struct percpu_list_node *head; | |
48 | } __attribute__((aligned(128))); | |
49 | ||
50 | struct percpu_list { | |
51 | struct percpu_list_entry c[CPU_SETSIZE]; | |
52 | }; | |
53 | ||
54 | /* A simple percpu spinlock. Returns the cpu lock was acquired on. */ | |
6e284b80 | 55 | static int rseq_this_cpu_lock(struct percpu_lock *lock) |
b848736e MD |
56 | { |
57 | int cpu; | |
58 | ||
59 | for (;;) { | |
60 | int ret; | |
61 | ||
62 | cpu = rseq_cpu_start(); | |
63 | ret = rseq_cmpeqv_storev(&lock->c[cpu].v, | |
64 | 0, 1, cpu); | |
65 | if (rseq_likely(!ret)) | |
66 | break; | |
67 | /* Retry if comparison fails or rseq aborts. */ | |
68 | } | |
69 | /* | |
70 | * Acquire semantic when taking lock after control dependency. | |
71 | * Matches rseq_smp_store_release(). | |
72 | */ | |
73 | rseq_smp_acquire__after_ctrl_dep(); | |
74 | return cpu; | |
75 | } | |
76 | ||
6e284b80 | 77 | static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) |
b848736e MD |
78 | { |
79 | assert(lock->c[cpu].v == 1); | |
80 | /* | |
81 | * Release lock, with release semantic. Matches | |
82 | * rseq_smp_acquire__after_ctrl_dep(). | |
83 | */ | |
84 | rseq_smp_store_release(&lock->c[cpu].v, 0); | |
85 | } | |
86 | ||
6e284b80 | 87 | static void *test_percpu_spinlock_thread(void *arg) |
b848736e | 88 | { |
d268885a | 89 | struct spinlock_test_data *data = (struct spinlock_test_data *) arg; |
b848736e MD |
90 | int i, cpu; |
91 | ||
92 | if (rseq_register_current_thread()) { | |
93 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | |
94 | errno, strerror(errno)); | |
95 | abort(); | |
96 | } | |
97 | for (i = 0; i < data->reps; i++) { | |
98 | cpu = rseq_this_cpu_lock(&data->lock); | |
99 | data->c[cpu].count++; | |
100 | rseq_percpu_unlock(&data->lock, cpu); | |
101 | } | |
102 | if (rseq_unregister_current_thread()) { | |
103 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | |
104 | errno, strerror(errno)); | |
105 | abort(); | |
106 | } | |
107 | ||
108 | return NULL; | |
109 | } | |
110 | ||
111 | /* | |
112 | * A simple test which implements a sharded counter using a per-cpu | |
113 | * lock. Obviously real applications might prefer to simply use a | |
114 | * per-cpu increment; however, this is reasonable for a test and the | |
115 | * lock can be extended to synchronize more complicated operations. | |
116 | */ | |
6e284b80 | 117 | static void test_percpu_spinlock(void) |
b848736e MD |
118 | { |
119 | const int num_threads = 200; | |
120 | int i; | |
a91728e0 | 121 | uint64_t sum, expected_sum; |
b848736e MD |
122 | pthread_t test_threads[num_threads]; |
123 | struct spinlock_test_data data; | |
124 | ||
544cdc88 MJ |
125 | diag("spinlock"); |
126 | ||
b848736e MD |
127 | memset(&data, 0, sizeof(data)); |
128 | data.reps = 5000; | |
129 | ||
130 | for (i = 0; i < num_threads; i++) | |
131 | pthread_create(&test_threads[i], NULL, | |
132 | test_percpu_spinlock_thread, &data); | |
133 | ||
134 | for (i = 0; i < num_threads; i++) | |
135 | pthread_join(test_threads[i], NULL); | |
136 | ||
137 | sum = 0; | |
138 | for (i = 0; i < CPU_SETSIZE; i++) | |
139 | sum += data.c[i].count; | |
140 | ||
a91728e0 MJ |
141 | expected_sum = (uint64_t)data.reps * num_threads; |
142 | ||
143 | ok(sum == expected_sum, "spinlock - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum); | |
b848736e MD |
144 | } |
145 | ||
6e284b80 | 146 | static void this_cpu_list_push(struct percpu_list *list, |
b848736e MD |
147 | struct percpu_list_node *node, |
148 | int *_cpu) | |
149 | { | |
150 | int cpu; | |
151 | ||
152 | for (;;) { | |
153 | intptr_t *targetptr, newval, expect; | |
154 | int ret; | |
155 | ||
156 | cpu = rseq_cpu_start(); | |
157 | /* Load list->c[cpu].head with single-copy atomicity. */ | |
158 | expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); | |
159 | newval = (intptr_t)node; | |
160 | targetptr = (intptr_t *)&list->c[cpu].head; | |
161 | node->next = (struct percpu_list_node *)expect; | |
162 | ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); | |
163 | if (rseq_likely(!ret)) | |
164 | break; | |
165 | /* Retry if comparison fails or rseq aborts. */ | |
166 | } | |
167 | if (_cpu) | |
168 | *_cpu = cpu; | |
169 | } | |
170 | ||
171 | /* | |
172 | * Unlike a traditional lock-less linked list; the availability of a | |
173 | * rseq primitive allows us to implement pop without concerns over | |
174 | * ABA-type races. | |
175 | */ | |
6e284b80 | 176 | static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, |
b848736e MD |
177 | int *_cpu) |
178 | { | |
179 | for (;;) { | |
180 | struct percpu_list_node *head; | |
181 | intptr_t *targetptr, expectnot, *load; | |
182 | off_t offset; | |
183 | int ret, cpu; | |
184 | ||
185 | cpu = rseq_cpu_start(); | |
186 | targetptr = (intptr_t *)&list->c[cpu].head; | |
187 | expectnot = (intptr_t)NULL; | |
188 | offset = offsetof(struct percpu_list_node, next); | |
189 | load = (intptr_t *)&head; | |
190 | ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, | |
191 | offset, load, cpu); | |
192 | if (rseq_likely(!ret)) { | |
193 | if (_cpu) | |
194 | *_cpu = cpu; | |
195 | return head; | |
196 | } | |
197 | if (ret > 0) | |
198 | return NULL; | |
199 | /* Retry if rseq aborts. */ | |
200 | } | |
201 | } | |
202 | ||
203 | /* | |
204 | * __percpu_list_pop is not safe against concurrent accesses. Should | |
205 | * only be used on lists that are not concurrently modified. | |
206 | */ | |
6e284b80 | 207 | static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) |
b848736e MD |
208 | { |
209 | struct percpu_list_node *node; | |
210 | ||
211 | node = list->c[cpu].head; | |
212 | if (!node) | |
213 | return NULL; | |
214 | list->c[cpu].head = node->next; | |
215 | return node; | |
216 | } | |
217 | ||
6e284b80 | 218 | static void *test_percpu_list_thread(void *arg) |
b848736e MD |
219 | { |
220 | int i; | |
221 | struct percpu_list *list = (struct percpu_list *)arg; | |
222 | ||
223 | if (rseq_register_current_thread()) { | |
224 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | |
225 | errno, strerror(errno)); | |
226 | abort(); | |
227 | } | |
228 | ||
229 | for (i = 0; i < 100000; i++) { | |
230 | struct percpu_list_node *node; | |
231 | ||
232 | node = this_cpu_list_pop(list, NULL); | |
233 | sched_yield(); /* encourage shuffling */ | |
234 | if (node) | |
235 | this_cpu_list_push(list, node, NULL); | |
236 | } | |
237 | ||
238 | if (rseq_unregister_current_thread()) { | |
239 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | |
240 | errno, strerror(errno)); | |
241 | abort(); | |
242 | } | |
243 | ||
244 | return NULL; | |
245 | } | |
246 | ||
247 | /* Simultaneous modification to a per-cpu linked list from many threads. */ | |
6e284b80 | 248 | static void test_percpu_list(void) |
b848736e MD |
249 | { |
250 | int i, j; | |
251 | uint64_t sum = 0, expected_sum = 0; | |
252 | struct percpu_list list; | |
253 | pthread_t test_threads[200]; | |
254 | cpu_set_t allowed_cpus; | |
255 | ||
544cdc88 MJ |
256 | diag("percpu_list"); |
257 | ||
b848736e MD |
258 | memset(&list, 0, sizeof(list)); |
259 | ||
260 | /* Generate list entries for every usable cpu. */ | |
261 | sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); | |
262 | for (i = 0; i < CPU_SETSIZE; i++) { | |
263 | if (!CPU_ISSET(i, &allowed_cpus)) | |
264 | continue; | |
265 | for (j = 1; j <= 100; j++) { | |
266 | struct percpu_list_node *node; | |
267 | ||
268 | expected_sum += j; | |
269 | ||
d268885a | 270 | node = (struct percpu_list_node *) malloc(sizeof(*node)); |
b848736e MD |
271 | assert(node); |
272 | node->data = j; | |
273 | node->next = list.c[i].head; | |
274 | list.c[i].head = node; | |
275 | } | |
276 | } | |
277 | ||
278 | for (i = 0; i < 200; i++) | |
279 | pthread_create(&test_threads[i], NULL, | |
280 | test_percpu_list_thread, &list); | |
281 | ||
282 | for (i = 0; i < 200; i++) | |
283 | pthread_join(test_threads[i], NULL); | |
284 | ||
285 | for (i = 0; i < CPU_SETSIZE; i++) { | |
286 | struct percpu_list_node *node; | |
287 | ||
288 | if (!CPU_ISSET(i, &allowed_cpus)) | |
289 | continue; | |
290 | ||
291 | while ((node = __percpu_list_pop(&list, i))) { | |
292 | sum += node->data; | |
293 | free(node); | |
294 | } | |
295 | } | |
296 | ||
297 | /* | |
298 | * All entries should now be accounted for (unless some external | |
299 | * actor is interfering with our allowed affinity while this | |
300 | * test is running). | |
301 | */ | |
a91728e0 | 302 | ok(sum == expected_sum, "percpu_list - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum); |
b848736e MD |
303 | } |
304 | ||
305 | int main(void) | |
306 | { | |
544cdc88 MJ |
307 | plan_tests(NR_TESTS); |
308 | ||
d1cdec98 MJ |
309 | if (!rseq_available()) { |
310 | skip(NR_TESTS, "The rseq syscall is unavailable"); | |
311 | goto end; | |
312 | } | |
313 | ||
b848736e | 314 | if (rseq_register_current_thread()) { |
d1cdec98 | 315 | fail("rseq_register_current_thread(...) failed(%d): %s\n", |
b848736e | 316 | errno, strerror(errno)); |
d1cdec98 MJ |
317 | goto end; |
318 | } else { | |
319 | pass("Registered current thread with rseq"); | |
b848736e | 320 | } |
544cdc88 | 321 | |
b848736e | 322 | test_percpu_spinlock(); |
b848736e | 323 | test_percpu_list(); |
544cdc88 | 324 | |
b848736e | 325 | if (rseq_unregister_current_thread()) { |
d1cdec98 | 326 | fail("rseq_unregister_current_thread(...) failed(%d): %s\n", |
b848736e | 327 | errno, strerror(errno)); |
d1cdec98 MJ |
328 | goto end; |
329 | } else { | |
330 | pass("Unregistered current thread with rseq"); | |
b848736e | 331 | } |
544cdc88 | 332 | |
d1cdec98 MJ |
333 | end: |
334 | exit(exit_status()); | |
b848736e | 335 | } |