Commit | Line | Data |
---|---|---|
90702366 | 1 | // SPDX-License-Identifier: MIT |
f2d7b530 | 2 | // SPDX-FileCopyrightText: 2018-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
b848736e MD |
3 | #ifndef _GNU_SOURCE |
4 | #define _GNU_SOURCE | |
5 | #endif | |
6 | #include <assert.h> | |
7 | #include <pthread.h> | |
8 | #include <sched.h> | |
9 | #include <stdint.h> | |
a91728e0 | 10 | #include <inttypes.h> |
b848736e MD |
11 | #include <stdio.h> |
12 | #include <stdlib.h> | |
13 | #include <string.h> | |
14 | #include <stddef.h> | |
15 | ||
16 | #include <rseq/rseq.h> | |
17 | ||
544cdc88 MJ |
18 | #include "tap.h" |
19 | ||
d1cdec98 | 20 | #define NR_TESTS 4 |
544cdc88 | 21 | |
b848736e MD |
22 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) |
23 | ||
40293a78 MD |
24 | #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID |
25 | # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID | |
26 | static | |
27 | int get_current_cpu_id(void) | |
28 | { | |
29 | return rseq_current_mm_cid(); | |
30 | } | |
31 | static | |
32 | bool rseq_validate_cpu_id(void) | |
33 | { | |
34 | return rseq_mm_cid_available(); | |
35 | } | |
36 | #else | |
37 | # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID | |
38 | static | |
39 | int get_current_cpu_id(void) | |
40 | { | |
41 | return rseq_cpu_start(); | |
42 | } | |
43 | static | |
44 | bool rseq_validate_cpu_id(void) | |
45 | { | |
46 | return rseq_current_cpu_raw() >= 0; | |
47 | } | |
48 | #endif | |
49 | ||
b848736e MD |
50 | struct percpu_lock_entry { |
51 | intptr_t v; | |
52 | } __attribute__((aligned(128))); | |
53 | ||
54 | struct percpu_lock { | |
55 | struct percpu_lock_entry c[CPU_SETSIZE]; | |
56 | }; | |
57 | ||
58 | struct test_data_entry { | |
59 | intptr_t count; | |
60 | } __attribute__((aligned(128))); | |
61 | ||
62 | struct spinlock_test_data { | |
63 | struct percpu_lock lock; | |
64 | struct test_data_entry c[CPU_SETSIZE]; | |
65 | int reps; | |
66 | }; | |
67 | ||
68 | struct percpu_list_node { | |
69 | intptr_t data; | |
70 | struct percpu_list_node *next; | |
71 | }; | |
72 | ||
73 | struct percpu_list_entry { | |
74 | struct percpu_list_node *head; | |
75 | } __attribute__((aligned(128))); | |
76 | ||
77 | struct percpu_list { | |
78 | struct percpu_list_entry c[CPU_SETSIZE]; | |
79 | }; | |
80 | ||
81 | /* A simple percpu spinlock. Returns the cpu lock was acquired on. */ | |
6e284b80 | 82 | static int rseq_this_cpu_lock(struct percpu_lock *lock) |
b848736e MD |
83 | { |
84 | int cpu; | |
85 | ||
86 | for (;;) { | |
87 | int ret; | |
88 | ||
40293a78 MD |
89 | cpu = get_current_cpu_id(); |
90 | ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, | |
91 | &lock->c[cpu].v, 0, 1, cpu); | |
b848736e MD |
92 | if (rseq_likely(!ret)) |
93 | break; | |
94 | /* Retry if comparison fails or rseq aborts. */ | |
95 | } | |
96 | /* | |
97 | * Acquire semantic when taking lock after control dependency. | |
98 | * Matches rseq_smp_store_release(). | |
99 | */ | |
100 | rseq_smp_acquire__after_ctrl_dep(); | |
101 | return cpu; | |
102 | } | |
103 | ||
6e284b80 | 104 | static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) |
b848736e MD |
105 | { |
106 | assert(lock->c[cpu].v == 1); | |
107 | /* | |
108 | * Release lock, with release semantic. Matches | |
109 | * rseq_smp_acquire__after_ctrl_dep(). | |
110 | */ | |
111 | rseq_smp_store_release(&lock->c[cpu].v, 0); | |
112 | } | |
113 | ||
6e284b80 | 114 | static void *test_percpu_spinlock_thread(void *arg) |
b848736e | 115 | { |
d268885a | 116 | struct spinlock_test_data *data = (struct spinlock_test_data *) arg; |
b848736e MD |
117 | int i, cpu; |
118 | ||
119 | if (rseq_register_current_thread()) { | |
120 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | |
121 | errno, strerror(errno)); | |
122 | abort(); | |
123 | } | |
124 | for (i = 0; i < data->reps; i++) { | |
125 | cpu = rseq_this_cpu_lock(&data->lock); | |
126 | data->c[cpu].count++; | |
127 | rseq_percpu_unlock(&data->lock, cpu); | |
128 | } | |
129 | if (rseq_unregister_current_thread()) { | |
130 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | |
131 | errno, strerror(errno)); | |
132 | abort(); | |
133 | } | |
134 | ||
135 | return NULL; | |
136 | } | |
137 | ||
138 | /* | |
139 | * A simple test which implements a sharded counter using a per-cpu | |
140 | * lock. Obviously real applications might prefer to simply use a | |
141 | * per-cpu increment; however, this is reasonable for a test and the | |
142 | * lock can be extended to synchronize more complicated operations. | |
143 | */ | |
6e284b80 | 144 | static void test_percpu_spinlock(void) |
b848736e MD |
145 | { |
146 | const int num_threads = 200; | |
147 | int i; | |
a91728e0 | 148 | uint64_t sum, expected_sum; |
b848736e MD |
149 | pthread_t test_threads[num_threads]; |
150 | struct spinlock_test_data data; | |
151 | ||
544cdc88 MJ |
152 | diag("spinlock"); |
153 | ||
b848736e MD |
154 | memset(&data, 0, sizeof(data)); |
155 | data.reps = 5000; | |
156 | ||
157 | for (i = 0; i < num_threads; i++) | |
158 | pthread_create(&test_threads[i], NULL, | |
159 | test_percpu_spinlock_thread, &data); | |
160 | ||
161 | for (i = 0; i < num_threads; i++) | |
162 | pthread_join(test_threads[i], NULL); | |
163 | ||
164 | sum = 0; | |
165 | for (i = 0; i < CPU_SETSIZE; i++) | |
166 | sum += data.c[i].count; | |
167 | ||
a91728e0 MJ |
168 | expected_sum = (uint64_t)data.reps * num_threads; |
169 | ||
170 | ok(sum == expected_sum, "spinlock - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum); | |
b848736e MD |
171 | } |
172 | ||
6e284b80 | 173 | static void this_cpu_list_push(struct percpu_list *list, |
b848736e MD |
174 | struct percpu_list_node *node, |
175 | int *_cpu) | |
176 | { | |
177 | int cpu; | |
178 | ||
179 | for (;;) { | |
180 | intptr_t *targetptr, newval, expect; | |
181 | int ret; | |
182 | ||
40293a78 | 183 | cpu = get_current_cpu_id(); |
b848736e MD |
184 | /* Load list->c[cpu].head with single-copy atomicity. */ |
185 | expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); | |
186 | newval = (intptr_t)node; | |
187 | targetptr = (intptr_t *)&list->c[cpu].head; | |
188 | node->next = (struct percpu_list_node *)expect; | |
40293a78 MD |
189 | ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU, |
190 | targetptr, expect, newval, cpu); | |
b848736e MD |
191 | if (rseq_likely(!ret)) |
192 | break; | |
193 | /* Retry if comparison fails or rseq aborts. */ | |
194 | } | |
195 | if (_cpu) | |
196 | *_cpu = cpu; | |
197 | } | |
198 | ||
199 | /* | |
200 | * Unlike a traditional lock-less linked list; the availability of a | |
201 | * rseq primitive allows us to implement pop without concerns over | |
202 | * ABA-type races. | |
203 | */ | |
6e284b80 | 204 | static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, |
b848736e MD |
205 | int *_cpu) |
206 | { | |
207 | for (;;) { | |
208 | struct percpu_list_node *head; | |
209 | intptr_t *targetptr, expectnot, *load; | |
d35eae6b MD |
210 | long offset; |
211 | int ret, cpu; | |
b848736e | 212 | |
40293a78 | 213 | cpu = get_current_cpu_id(); |
b848736e MD |
214 | targetptr = (intptr_t *)&list->c[cpu].head; |
215 | expectnot = (intptr_t)NULL; | |
216 | offset = offsetof(struct percpu_list_node, next); | |
217 | load = (intptr_t *)&head; | |
40293a78 MD |
218 | ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU, |
219 | targetptr, expectnot, | |
b848736e MD |
220 | offset, load, cpu); |
221 | if (rseq_likely(!ret)) { | |
222 | if (_cpu) | |
223 | *_cpu = cpu; | |
224 | return head; | |
225 | } | |
226 | if (ret > 0) | |
227 | return NULL; | |
228 | /* Retry if rseq aborts. */ | |
229 | } | |
230 | } | |
231 | ||
232 | /* | |
233 | * __percpu_list_pop is not safe against concurrent accesses. Should | |
234 | * only be used on lists that are not concurrently modified. | |
235 | */ | |
6e284b80 | 236 | static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) |
b848736e MD |
237 | { |
238 | struct percpu_list_node *node; | |
239 | ||
240 | node = list->c[cpu].head; | |
241 | if (!node) | |
242 | return NULL; | |
243 | list->c[cpu].head = node->next; | |
244 | return node; | |
245 | } | |
246 | ||
6e284b80 | 247 | static void *test_percpu_list_thread(void *arg) |
b848736e MD |
248 | { |
249 | int i; | |
250 | struct percpu_list *list = (struct percpu_list *)arg; | |
251 | ||
252 | if (rseq_register_current_thread()) { | |
253 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | |
254 | errno, strerror(errno)); | |
255 | abort(); | |
256 | } | |
257 | ||
258 | for (i = 0; i < 100000; i++) { | |
259 | struct percpu_list_node *node; | |
260 | ||
261 | node = this_cpu_list_pop(list, NULL); | |
262 | sched_yield(); /* encourage shuffling */ | |
263 | if (node) | |
264 | this_cpu_list_push(list, node, NULL); | |
265 | } | |
266 | ||
267 | if (rseq_unregister_current_thread()) { | |
268 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | |
269 | errno, strerror(errno)); | |
270 | abort(); | |
271 | } | |
272 | ||
273 | return NULL; | |
274 | } | |
275 | ||
276 | /* Simultaneous modification to a per-cpu linked list from many threads. */ | |
6e284b80 | 277 | static void test_percpu_list(void) |
b848736e MD |
278 | { |
279 | int i, j; | |
280 | uint64_t sum = 0, expected_sum = 0; | |
281 | struct percpu_list list; | |
282 | pthread_t test_threads[200]; | |
283 | cpu_set_t allowed_cpus; | |
284 | ||
544cdc88 MJ |
285 | diag("percpu_list"); |
286 | ||
b848736e MD |
287 | memset(&list, 0, sizeof(list)); |
288 | ||
289 | /* Generate list entries for every usable cpu. */ | |
290 | sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); | |
291 | for (i = 0; i < CPU_SETSIZE; i++) { | |
292 | if (!CPU_ISSET(i, &allowed_cpus)) | |
293 | continue; | |
294 | for (j = 1; j <= 100; j++) { | |
295 | struct percpu_list_node *node; | |
296 | ||
297 | expected_sum += j; | |
298 | ||
d268885a | 299 | node = (struct percpu_list_node *) malloc(sizeof(*node)); |
b848736e MD |
300 | assert(node); |
301 | node->data = j; | |
302 | node->next = list.c[i].head; | |
303 | list.c[i].head = node; | |
304 | } | |
305 | } | |
306 | ||
307 | for (i = 0; i < 200; i++) | |
308 | pthread_create(&test_threads[i], NULL, | |
309 | test_percpu_list_thread, &list); | |
310 | ||
311 | for (i = 0; i < 200; i++) | |
312 | pthread_join(test_threads[i], NULL); | |
313 | ||
314 | for (i = 0; i < CPU_SETSIZE; i++) { | |
315 | struct percpu_list_node *node; | |
316 | ||
317 | if (!CPU_ISSET(i, &allowed_cpus)) | |
318 | continue; | |
319 | ||
320 | while ((node = __percpu_list_pop(&list, i))) { | |
321 | sum += node->data; | |
322 | free(node); | |
323 | } | |
324 | } | |
325 | ||
326 | /* | |
327 | * All entries should now be accounted for (unless some external | |
328 | * actor is interfering with our allowed affinity while this | |
329 | * test is running). | |
330 | */ | |
a91728e0 | 331 | ok(sum == expected_sum, "percpu_list - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum); |
b848736e MD |
332 | } |
333 | ||
334 | int main(void) | |
335 | { | |
544cdc88 MJ |
336 | plan_tests(NR_TESTS); |
337 | ||
8b34114a | 338 | if (!rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) { |
d1cdec98 MJ |
339 | skip(NR_TESTS, "The rseq syscall is unavailable"); |
340 | goto end; | |
341 | } | |
342 | ||
b848736e | 343 | if (rseq_register_current_thread()) { |
d1cdec98 | 344 | fail("rseq_register_current_thread(...) failed(%d): %s\n", |
b848736e | 345 | errno, strerror(errno)); |
d1cdec98 MJ |
346 | goto end; |
347 | } else { | |
348 | pass("Registered current thread with rseq"); | |
b848736e | 349 | } |
40293a78 MD |
350 | if (!rseq_validate_cpu_id()) { |
351 | skip(NR_TESTS, "Error: cpu id getter unavailable"); | |
352 | goto end; | |
353 | } | |
b848736e | 354 | test_percpu_spinlock(); |
b848736e | 355 | test_percpu_list(); |
544cdc88 | 356 | |
b848736e | 357 | if (rseq_unregister_current_thread()) { |
d1cdec98 | 358 | fail("rseq_unregister_current_thread(...) failed(%d): %s\n", |
b848736e | 359 | errno, strerror(errno)); |
d1cdec98 MJ |
360 | goto end; |
361 | } else { | |
362 | pass("Unregistered current thread with rseq"); | |
b848736e | 363 | } |
d1cdec98 MJ |
364 | end: |
365 | exit(exit_status()); | |
b848736e | 366 | } |