252a0a7dec663743681bf750371089803882abc2
[librseq.git] / tests / param_test.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6 #include <assert.h>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
9 #include <pthread.h>
10 #include <sched.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <syscall.h>
16 #include <unistd.h>
17 #include <poll.h>
18 #include <sys/types.h>
19 #include <signal.h>
20 #include <errno.h>
21 #include <stddef.h>
22 #include <stdbool.h>
23 #include <rseq/percpu-alloc.h>
24
25 #define PERCPU_POOL_LEN (1024*1024) /* 1MB */
26
27 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
28 enum {
29 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
30 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
31 };
32
33 enum {
34 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
35 };
36 #endif
37
38 #define NR_INJECT 9
39 static int loop_cnt[NR_INJECT + 1];
40
41 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
42 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
43 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
44 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
45 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
46 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
47
48 static int opt_modulo, verbose;
49
50 static int opt_yield, opt_signal, opt_sleep,
51 opt_disable_rseq, opt_threads = 200,
52 opt_disable_mod = 0, opt_test = 's';
53
54 static long long opt_reps = 5000;
55
56 static __thread __attribute__((tls_model("initial-exec")))
57 unsigned int signals_delivered;
58
59 static inline pid_t rseq_gettid(void)
60 {
61 return syscall(__NR_gettid);
62 }
63
64 #ifndef BENCHMARK
65
66 static __thread __attribute__((tls_model("initial-exec"), unused))
67 int yield_mod_cnt, nr_abort;
68
69 #define printf_verbose(fmt, ...) \
70 do { \
71 if (verbose) \
72 printf(fmt, ## __VA_ARGS__); \
73 } while (0)
74
75 #ifdef __i386__
76
77 #define INJECT_ASM_REG "eax"
78
79 #define RSEQ_INJECT_CLOBBER \
80 , INJECT_ASM_REG
81
82 /*
83 * Use ip-relative addressing to get the loop counter.
84 */
85 #define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
86 "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
87 "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
88 "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "jz 333f\n\t" \
91 "222:\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
93 "jnz 222b\n\t" \
94 "333:\n\t"
95
96 #define RSEQ_INJECT_ASM(n) \
97 __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
98
99 #elif defined(__x86_64__)
100
101 #define INJECT_ASM_REG_P "rax"
102 #define INJECT_ASM_REG "eax"
103
104 #define RSEQ_INJECT_CLOBBER \
105 , INJECT_ASM_REG_P \
106 , INJECT_ASM_REG
107
108 #define RSEQ_INJECT_ASM(n) \
109 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
110 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
111 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
112 "jz 333f\n\t" \
113 "222:\n\t" \
114 "dec %%" INJECT_ASM_REG "\n\t" \
115 "jnz 222b\n\t" \
116 "333:\n\t"
117
118 #elif defined(__s390__)
119
120 #define RSEQ_INJECT_INPUT \
121 , [loop_cnt_1]"m"(loop_cnt[1]) \
122 , [loop_cnt_2]"m"(loop_cnt[2]) \
123 , [loop_cnt_3]"m"(loop_cnt[3]) \
124 , [loop_cnt_4]"m"(loop_cnt[4]) \
125 , [loop_cnt_5]"m"(loop_cnt[5]) \
126 , [loop_cnt_6]"m"(loop_cnt[6])
127
128 #define INJECT_ASM_REG "r12"
129
130 #define RSEQ_INJECT_CLOBBER \
131 , INJECT_ASM_REG
132
133 #define RSEQ_INJECT_ASM(n) \
134 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
135 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
136 "je 333f\n\t" \
137 "222:\n\t" \
138 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
139 "jnz 222b\n\t" \
140 "333:\n\t"
141
142 #elif defined(__ARMEL__)
143
144 #define RSEQ_INJECT_INPUT \
145 , [loop_cnt_1]"m"(loop_cnt[1]) \
146 , [loop_cnt_2]"m"(loop_cnt[2]) \
147 , [loop_cnt_3]"m"(loop_cnt[3]) \
148 , [loop_cnt_4]"m"(loop_cnt[4]) \
149 , [loop_cnt_5]"m"(loop_cnt[5]) \
150 , [loop_cnt_6]"m"(loop_cnt[6])
151
152 #define INJECT_ASM_REG "r4"
153
154 #define RSEQ_INJECT_CLOBBER \
155 , INJECT_ASM_REG
156
157 #define RSEQ_INJECT_ASM(n) \
158 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
159 "cmp " INJECT_ASM_REG ", #0\n\t" \
160 "beq 333f\n\t" \
161 "222:\n\t" \
162 "subs " INJECT_ASM_REG ", #1\n\t" \
163 "bne 222b\n\t" \
164 "333:\n\t"
165
166 #elif defined(__AARCH64EL__)
167
168 #define RSEQ_INJECT_INPUT \
169 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
170 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
171 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
172 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
173 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
174 , [loop_cnt_6] "Qo" (loop_cnt[6])
175
176 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
177
178 #define RSEQ_INJECT_ASM(n) \
179 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
180 " cbz " INJECT_ASM_REG ", 333f\n" \
181 "222:\n" \
182 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
183 " cbnz " INJECT_ASM_REG ", 222b\n" \
184 "333:\n"
185
186 #elif defined(__PPC__)
187
188 #define RSEQ_INJECT_INPUT \
189 , [loop_cnt_1]"m"(loop_cnt[1]) \
190 , [loop_cnt_2]"m"(loop_cnt[2]) \
191 , [loop_cnt_3]"m"(loop_cnt[3]) \
192 , [loop_cnt_4]"m"(loop_cnt[4]) \
193 , [loop_cnt_5]"m"(loop_cnt[5]) \
194 , [loop_cnt_6]"m"(loop_cnt[6])
195
196 #define INJECT_ASM_REG "r18"
197
198 #define RSEQ_INJECT_CLOBBER \
199 , INJECT_ASM_REG
200
201 #define RSEQ_INJECT_ASM(n) \
202 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
203 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
204 "beq 333f\n\t" \
205 "222:\n\t" \
206 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
207 "bne 222b\n\t" \
208 "333:\n\t"
209
210 #elif defined(__mips__)
211
212 #define RSEQ_INJECT_INPUT \
213 , [loop_cnt_1]"m"(loop_cnt[1]) \
214 , [loop_cnt_2]"m"(loop_cnt[2]) \
215 , [loop_cnt_3]"m"(loop_cnt[3]) \
216 , [loop_cnt_4]"m"(loop_cnt[4]) \
217 , [loop_cnt_5]"m"(loop_cnt[5]) \
218 , [loop_cnt_6]"m"(loop_cnt[6])
219
220 #define INJECT_ASM_REG "$5"
221
222 #define RSEQ_INJECT_CLOBBER \
223 , INJECT_ASM_REG
224
225 #define RSEQ_INJECT_ASM(n) \
226 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
227 "beqz " INJECT_ASM_REG ", 333f\n\t" \
228 "222:\n\t" \
229 "addiu " INJECT_ASM_REG ", -1\n\t" \
230 "bnez " INJECT_ASM_REG ", 222b\n\t" \
231 "333:\n\t"
232
233 #elif defined(__riscv)
234
235 #define RSEQ_INJECT_INPUT \
236 , [loop_cnt_1]"m"(loop_cnt[1]) \
237 , [loop_cnt_2]"m"(loop_cnt[2]) \
238 , [loop_cnt_3]"m"(loop_cnt[3]) \
239 , [loop_cnt_4]"m"(loop_cnt[4]) \
240 , [loop_cnt_5]"m"(loop_cnt[5]) \
241 , [loop_cnt_6]"m"(loop_cnt[6])
242
243 #define INJECT_ASM_REG "t1"
244
245 #define RSEQ_INJECT_CLOBBER \
246 , INJECT_ASM_REG
247
248 #define RSEQ_INJECT_ASM(n) \
249 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
250 "beqz " INJECT_ASM_REG ", 333f\n\t" \
251 "222:\n\t" \
252 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
253 "bnez " INJECT_ASM_REG ", 222b\n\t" \
254 "333:\n\t"
255
256 #else
257 #error unsupported target
258 #endif
259
260 #define RSEQ_INJECT_FAILED \
261 nr_abort++;
262
263 #define RSEQ_INJECT_C(n) \
264 { \
265 int loc_i, loc_nr_loops = loop_cnt[n]; \
266 \
267 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
268 rseq_barrier(); \
269 } \
270 if (loc_nr_loops == -1 && opt_modulo) { \
271 if (yield_mod_cnt == opt_modulo - 1) { \
272 if (opt_sleep > 0) \
273 poll(NULL, 0, opt_sleep); \
274 if (opt_yield) \
275 sched_yield(); \
276 if (opt_signal) \
277 raise(SIGUSR1); \
278 yield_mod_cnt = 0; \
279 } else { \
280 yield_mod_cnt++; \
281 } \
282 } \
283 }
284
285 #else
286
287 #define printf_verbose(fmt, ...)
288
289 #endif /* BENCHMARK */
290
291 #include <rseq/rseq.h>
292
293 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
294
295 static int sys_membarrier(int cmd, int flags, int cpu_id)
296 {
297 return syscall(__NR_membarrier, cmd, flags, cpu_id);
298 }
299
300 #ifdef rseq_arch_has_load_cbne_load_add_store
301 #define TEST_MEMBARRIER
302 #endif
303
304 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
305 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
306 static
307 int get_current_cpu_id(void)
308 {
309 return rseq_current_mm_cid();
310 }
311 static
312 bool rseq_validate_cpu_id(void)
313 {
314 return rseq_mm_cid_available();
315 }
316 static
317 bool rseq_use_cpu_index(void)
318 {
319 return false; /* Use mm_cid */
320 }
321 # ifdef TEST_MEMBARRIER
322 /*
323 * Membarrier does not currently support targeting a mm_cid, so
324 * issue the barrier on all cpus.
325 */
326 static
327 int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
328 {
329 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
330 0, 0);
331 }
332 # endif /* TEST_MEMBARRIER */
333 #else
334 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
335 static
336 int get_current_cpu_id(void)
337 {
338 return rseq_cpu_start();
339 }
340 static
341 bool rseq_validate_cpu_id(void)
342 {
343 return rseq_current_cpu_raw() >= 0;
344 }
345 static
346 bool rseq_use_cpu_index(void)
347 {
348 return true; /* Use cpu_id as index. */
349 }
350 # ifdef TEST_MEMBARRIER
351 static
352 int rseq_membarrier_expedited(int cpu)
353 {
354 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
355 MEMBARRIER_CMD_FLAG_CPU, cpu);
356 }
357 # endif /* TEST_MEMBARRIER */
358 #endif
359
360 struct percpu_lock {
361 intptr_t v;
362 };
363
364 struct spinlock_test_data {
365 struct percpu_lock lock;
366 intptr_t count;
367 };
368
369 struct spinlock_thread_test_data {
370 struct spinlock_test_data __rseq_percpu *data;
371 long long reps;
372 int reg;
373 };
374
375 struct inc_test_data {
376 intptr_t count;
377 };
378
379 struct inc_thread_test_data {
380 struct inc_test_data __rseq_percpu *data;
381 long long reps;
382 int reg;
383 };
384
385 struct percpu_list_node {
386 intptr_t data;
387 struct percpu_list_node *next;
388 };
389
390 struct percpu_list {
391 struct percpu_list_node *head;
392 };
393
394 #define BUFFER_ITEM_PER_CPU 100
395
396 struct percpu_buffer_node {
397 intptr_t data;
398 };
399
400 struct percpu_buffer {
401 intptr_t offset;
402 intptr_t buflen;
403 struct percpu_buffer_node **array;
404 };
405
406 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
407
408 struct percpu_memcpy_buffer_node {
409 intptr_t data1;
410 uint64_t data2;
411 };
412
413 struct percpu_memcpy_buffer {
414 intptr_t offset;
415 intptr_t buflen;
416 struct percpu_memcpy_buffer_node *array;
417 };
418
419 /* A simple percpu spinlock. Grabs lock on current cpu. */
420 static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
421 {
422 int cpu;
423
424 for (;;) {
425 int ret;
426
427 cpu = get_current_cpu_id();
428 if (cpu < 0) {
429 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
430 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
431 abort();
432 }
433 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
434 &rseq_percpu_ptr(lock, cpu)->v,
435 0, 1, cpu);
436 if (rseq_likely(!ret))
437 break;
438 /* Retry if comparison fails or rseq aborts. */
439 }
440 /*
441 * Acquire semantic when taking lock after control dependency.
442 * Matches rseq_smp_store_release().
443 */
444 rseq_smp_acquire__after_ctrl_dep();
445 return cpu;
446 }
447
448 static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
449 {
450 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
451 /*
452 * Release lock, with release semantic. Matches
453 * rseq_smp_acquire__after_ctrl_dep().
454 */
455 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
456 }
457
458 static void *test_percpu_spinlock_thread(void *arg)
459 {
460 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
461 struct spinlock_test_data __rseq_percpu *data = thread_data->data;
462 long long i, reps;
463
464 if (!opt_disable_rseq && thread_data->reg &&
465 rseq_register_current_thread())
466 abort();
467 reps = thread_data->reps;
468 for (i = 0; i < reps; i++) {
469 int cpu = rseq_this_cpu_lock(&data->lock);
470 rseq_percpu_ptr(data, cpu)->count++;
471 rseq_percpu_unlock(&data->lock, cpu);
472 #ifndef BENCHMARK
473 if (i != 0 && !(i % (reps / 10)))
474 printf_verbose("tid %d: count %lld\n",
475 (int) rseq_gettid(), i);
476 #endif
477 }
478 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
479 (int) rseq_gettid(), nr_abort, signals_delivered);
480 if (!opt_disable_rseq && thread_data->reg &&
481 rseq_unregister_current_thread())
482 abort();
483 return NULL;
484 }
485
486 /*
487 * A simple test which implements a sharded counter using a per-cpu
488 * lock. Obviously real applications might prefer to simply use a
489 * per-cpu increment; however, this is reasonable for a test and the
490 * lock can be extended to synchronize more complicated operations.
491 */
492 static void test_percpu_spinlock(void)
493 {
494 const int num_threads = opt_threads;
495 int i, ret;
496 uint64_t sum;
497 pthread_t test_threads[num_threads];
498 struct spinlock_test_data __rseq_percpu *data;
499 struct spinlock_thread_test_data thread_data[num_threads];
500 struct rseq_percpu_pool *mempool;
501
502 mempool = rseq_percpu_pool_create(sizeof(struct spinlock_test_data),
503 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
504 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
505 if (!mempool) {
506 perror("rseq_percpu_pool_create");
507 abort();
508 }
509 data = (struct spinlock_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
510 if (!data) {
511 perror("rseq_percpu_zmalloc");
512 abort();
513 }
514
515 for (i = 0; i < num_threads; i++) {
516 thread_data[i].reps = opt_reps;
517 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
518 thread_data[i].reg = 1;
519 else
520 thread_data[i].reg = 0;
521 thread_data[i].data = data;
522 ret = pthread_create(&test_threads[i], NULL,
523 test_percpu_spinlock_thread,
524 &thread_data[i]);
525 if (ret) {
526 errno = ret;
527 perror("pthread_create");
528 abort();
529 }
530 }
531
532 for (i = 0; i < num_threads; i++) {
533 ret = pthread_join(test_threads[i], NULL);
534 if (ret) {
535 errno = ret;
536 perror("pthread_join");
537 abort();
538 }
539 }
540
541 sum = 0;
542 for (i = 0; i < CPU_SETSIZE; i++)
543 sum += rseq_percpu_ptr(data, i)->count;
544
545 assert(sum == (uint64_t)opt_reps * num_threads);
546 rseq_percpu_free(data);
547 ret = rseq_percpu_pool_destroy(mempool);
548 if (ret) {
549 perror("rseq_percpu_pool_destroy");
550 abort();
551 }
552 }
553
554 static void *test_percpu_inc_thread(void *arg)
555 {
556 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
557 struct inc_test_data __rseq_percpu *data = thread_data->data;
558 long long i, reps;
559
560 if (!opt_disable_rseq && thread_data->reg &&
561 rseq_register_current_thread())
562 abort();
563 reps = thread_data->reps;
564 for (i = 0; i < reps; i++) {
565 int ret;
566
567 do {
568 int cpu;
569
570 cpu = get_current_cpu_id();
571 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
572 &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
573 } while (rseq_unlikely(ret));
574 #ifndef BENCHMARK
575 if (i != 0 && !(i % (reps / 10)))
576 printf_verbose("tid %d: count %lld\n",
577 (int) rseq_gettid(), i);
578 #endif
579 }
580 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
581 (int) rseq_gettid(), nr_abort, signals_delivered);
582 if (!opt_disable_rseq && thread_data->reg &&
583 rseq_unregister_current_thread())
584 abort();
585 return NULL;
586 }
587
588 static void test_percpu_inc(void)
589 {
590 const int num_threads = opt_threads;
591 int i, ret;
592 uint64_t sum;
593 pthread_t test_threads[num_threads];
594 struct inc_test_data __rseq_percpu *data;
595 struct inc_thread_test_data thread_data[num_threads];
596 struct rseq_percpu_pool *mempool;
597
598 mempool = rseq_percpu_pool_create(sizeof(struct inc_test_data),
599 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
600 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
601 if (!mempool) {
602 perror("rseq_percpu_pool_create");
603 abort();
604 }
605 data = (struct inc_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
606 if (!data) {
607 perror("rseq_percpu_zmalloc");
608 abort();
609 }
610
611 for (i = 0; i < num_threads; i++) {
612 thread_data[i].reps = opt_reps;
613 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
614 thread_data[i].reg = 1;
615 else
616 thread_data[i].reg = 0;
617 thread_data[i].data = data;
618 ret = pthread_create(&test_threads[i], NULL,
619 test_percpu_inc_thread,
620 &thread_data[i]);
621 if (ret) {
622 errno = ret;
623 perror("pthread_create");
624 abort();
625 }
626 }
627
628 for (i = 0; i < num_threads; i++) {
629 ret = pthread_join(test_threads[i], NULL);
630 if (ret) {
631 errno = ret;
632 perror("pthread_join");
633 abort();
634 }
635 }
636
637 sum = 0;
638 for (i = 0; i < CPU_SETSIZE; i++)
639 sum += rseq_percpu_ptr(data, i)->count;
640
641 assert(sum == (uint64_t)opt_reps * num_threads);
642 rseq_percpu_free(data);
643 ret = rseq_percpu_pool_destroy(mempool);
644 if (ret) {
645 perror("rseq_percpu_pool_destroy");
646 abort();
647 }
648 }
649
650 static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
651 struct percpu_list_node *node,
652 int *_cpu)
653 {
654 int cpu;
655
656 for (;;) {
657 intptr_t *targetptr, newval, expect;
658 struct percpu_list *cpulist;
659 int ret;
660
661 cpu = get_current_cpu_id();
662 cpulist = rseq_percpu_ptr(list, cpu);
663 /* Load list->c[cpu].head with single-copy atomicity. */
664 expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
665 newval = (intptr_t)node;
666 targetptr = (intptr_t *)&cpulist->head;
667 node->next = (struct percpu_list_node *)expect;
668 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
669 targetptr, expect, newval, cpu);
670 if (rseq_likely(!ret))
671 break;
672 /* Retry if comparison fails or rseq aborts. */
673 }
674 if (_cpu)
675 *_cpu = cpu;
676 }
677
678 /*
679 * Unlike a traditional lock-less linked list; the availability of a
680 * rseq primitive allows us to implement pop without concerns over
681 * ABA-type races.
682 */
683 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
684 int *_cpu)
685 {
686 struct percpu_list_node *node = NULL;
687 int cpu;
688
689 for (;;) {
690 struct percpu_list_node *head;
691 intptr_t *targetptr, expectnot, *load;
692 struct percpu_list *cpulist;
693 long offset;
694 int ret;
695
696 cpu = get_current_cpu_id();
697 cpulist = rseq_percpu_ptr(list, cpu);
698 targetptr = (intptr_t *)&cpulist->head;
699 expectnot = (intptr_t)NULL;
700 offset = offsetof(struct percpu_list_node, next);
701 load = (intptr_t *)&head;
702 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
703 targetptr, expectnot,
704 offset, load, cpu);
705 if (rseq_likely(!ret)) {
706 node = head;
707 break;
708 }
709 if (ret > 0)
710 break;
711 /* Retry if rseq aborts. */
712 }
713 if (_cpu)
714 *_cpu = cpu;
715 return node;
716 }
717
718 /*
719 * __percpu_list_pop is not safe against concurrent accesses. Should
720 * only be used on lists that are not concurrently modified.
721 */
722 static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
723 {
724 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
725 struct percpu_list_node *node;
726
727 node = cpulist->head;
728 if (!node)
729 return NULL;
730 cpulist->head = node->next;
731 return node;
732 }
733
734 static void *test_percpu_list_thread(void *arg)
735 {
736 long long i, reps;
737 struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
738
739 if (!opt_disable_rseq && rseq_register_current_thread())
740 abort();
741
742 reps = opt_reps;
743 for (i = 0; i < reps; i++) {
744 struct percpu_list_node *node;
745
746 node = this_cpu_list_pop(list, NULL);
747 if (opt_yield)
748 sched_yield(); /* encourage shuffling */
749 if (node)
750 this_cpu_list_push(list, node, NULL);
751 }
752
753 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
754 (int) rseq_gettid(), nr_abort, signals_delivered);
755 if (!opt_disable_rseq && rseq_unregister_current_thread())
756 abort();
757
758 return NULL;
759 }
760
761 /* Simultaneous modification to a per-cpu linked list from many threads. */
762 static void test_percpu_list(void)
763 {
764 const int num_threads = opt_threads;
765 int i, j, ret;
766 uint64_t sum = 0, expected_sum = 0;
767 struct percpu_list __rseq_percpu *list;
768 pthread_t test_threads[num_threads];
769 cpu_set_t allowed_cpus;
770 struct rseq_percpu_pool *mempool;
771
772 mempool = rseq_percpu_pool_create(sizeof(struct percpu_list),
773 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
774 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
775 if (!mempool) {
776 perror("rseq_percpu_pool_create");
777 abort();
778 }
779 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
780 if (!list) {
781 perror("rseq_percpu_zmalloc");
782 abort();
783 }
784
785 /* Generate list entries for every usable cpu. */
786 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
787 for (i = 0; i < CPU_SETSIZE; i++) {
788 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
789 continue;
790 for (j = 1; j <= 100; j++) {
791 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
792 struct percpu_list_node *node;
793
794 expected_sum += j;
795
796 node = (struct percpu_list_node *) malloc(sizeof(*node));
797 assert(node);
798 node->data = j;
799 node->next = cpulist->head;
800 cpulist->head = node;
801 }
802 }
803
804 for (i = 0; i < num_threads; i++) {
805 ret = pthread_create(&test_threads[i], NULL,
806 test_percpu_list_thread, list);
807 if (ret) {
808 errno = ret;
809 perror("pthread_create");
810 abort();
811 }
812 }
813
814 for (i = 0; i < num_threads; i++) {
815 ret = pthread_join(test_threads[i], NULL);
816 if (ret) {
817 errno = ret;
818 perror("pthread_join");
819 abort();
820 }
821 }
822
823 for (i = 0; i < CPU_SETSIZE; i++) {
824 struct percpu_list_node *node;
825
826 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
827 continue;
828
829 while ((node = __percpu_list_pop(list, i))) {
830 sum += node->data;
831 free(node);
832 }
833 }
834
835 /*
836 * All entries should now be accounted for (unless some external
837 * actor is interfering with our allowed affinity while this
838 * test is running).
839 */
840 assert(sum == expected_sum);
841 rseq_percpu_free(list);
842 ret = rseq_percpu_pool_destroy(mempool);
843 if (ret) {
844 perror("rseq_percpu_pool_destroy");
845 abort();
846 }
847 }
848
849 static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer,
850 struct percpu_buffer_node *node,
851 int *_cpu)
852 {
853 bool result = false;
854 int cpu;
855
856 for (;;) {
857 struct percpu_buffer *cpubuffer;
858 intptr_t *targetptr_spec, newval_spec;
859 intptr_t *targetptr_final, newval_final;
860 intptr_t offset;
861 int ret;
862
863 cpu = get_current_cpu_id();
864 cpubuffer = rseq_percpu_ptr(buffer, cpu);
865 offset = RSEQ_READ_ONCE(cpubuffer->offset);
866 if (offset == cpubuffer->buflen)
867 break;
868 newval_spec = (intptr_t)node;
869 targetptr_spec = (intptr_t *)&cpubuffer->array[offset];
870 newval_final = offset + 1;
871 targetptr_final = &cpubuffer->offset;
872 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
873 targetptr_final, offset, targetptr_spec,
874 newval_spec, newval_final, cpu);
875 if (rseq_likely(!ret)) {
876 result = true;
877 break;
878 }
879 /* Retry if comparison fails or rseq aborts. */
880 }
881 if (_cpu)
882 *_cpu = cpu;
883 return result;
884 }
885
886 static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
887 int *_cpu)
888 {
889 struct percpu_buffer_node *head;
890 int cpu;
891
892 for (;;) {
893 struct percpu_buffer *cpubuffer;
894 intptr_t *targetptr, newval;
895 intptr_t offset;
896 int ret;
897
898 cpu = get_current_cpu_id();
899 cpubuffer = rseq_percpu_ptr(buffer, cpu);
900 /* Load offset with single-copy atomicity. */
901 offset = RSEQ_READ_ONCE(cpubuffer->offset);
902 if (offset == 0) {
903 head = NULL;
904 break;
905 }
906 head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]);
907 newval = offset - 1;
908 targetptr = (intptr_t *)&cpubuffer->offset;
909 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
910 targetptr, offset,
911 (intptr_t *)&cpubuffer->array[offset - 1],
912 (intptr_t)head, newval, cpu);
913 if (rseq_likely(!ret))
914 break;
915 /* Retry if comparison fails or rseq aborts. */
916 }
917 if (_cpu)
918 *_cpu = cpu;
919 return head;
920 }
921
922 /*
923 * __percpu_buffer_pop is not safe against concurrent accesses. Should
924 * only be used on buffers that are not concurrently modified.
925 */
926 static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
927 int cpu)
928 {
929 struct percpu_buffer *cpubuffer;
930 struct percpu_buffer_node *head;
931 intptr_t offset;
932
933 cpubuffer = rseq_percpu_ptr(buffer, cpu);
934 offset = cpubuffer->offset;
935 if (offset == 0)
936 return NULL;
937 head = cpubuffer->array[offset - 1];
938 cpubuffer->offset = offset - 1;
939 return head;
940 }
941
942 static void *test_percpu_buffer_thread(void *arg)
943 {
944 long long i, reps;
945 struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg;
946
947 if (!opt_disable_rseq && rseq_register_current_thread())
948 abort();
949
950 reps = opt_reps;
951 for (i = 0; i < reps; i++) {
952 struct percpu_buffer_node *node;
953
954 node = this_cpu_buffer_pop(buffer, NULL);
955 if (opt_yield)
956 sched_yield(); /* encourage shuffling */
957 if (node) {
958 if (!this_cpu_buffer_push(buffer, node, NULL)) {
959 /* Should increase buffer size. */
960 abort();
961 }
962 }
963 }
964
965 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
966 (int) rseq_gettid(), nr_abort, signals_delivered);
967 if (!opt_disable_rseq && rseq_unregister_current_thread())
968 abort();
969
970 return NULL;
971 }
972
973 /* Simultaneous modification to a per-cpu buffer from many threads. */
974 static void test_percpu_buffer(void)
975 {
976 const int num_threads = opt_threads;
977 int i, j, ret;
978 uint64_t sum = 0, expected_sum = 0;
979 struct percpu_buffer __rseq_percpu *buffer;
980 pthread_t test_threads[num_threads];
981 cpu_set_t allowed_cpus;
982 struct rseq_percpu_pool *mempool;
983
984 mempool = rseq_percpu_pool_create(sizeof(struct percpu_buffer),
985 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
986 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
987 if (!mempool) {
988 perror("rseq_percpu_pool_create");
989 abort();
990 }
991 buffer = (struct percpu_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
992 if (!buffer) {
993 perror("rseq_percpu_zmalloc");
994 abort();
995 }
996
997 /* Generate list entries for every usable cpu. */
998 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
999 for (i = 0; i < CPU_SETSIZE; i++) {
1000 struct percpu_buffer *cpubuffer;
1001
1002 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1003 continue;
1004 cpubuffer = rseq_percpu_ptr(buffer, i);
1005 /* Worse-case is every item in same CPU. */
1006 cpubuffer->array =
1007 (struct percpu_buffer_node **)
1008 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
1009 BUFFER_ITEM_PER_CPU);
1010 assert(cpubuffer->array);
1011 cpubuffer->buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
1012 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
1013 struct percpu_buffer_node *node;
1014
1015 expected_sum += j;
1016
1017 /*
1018 * We could theoretically put the word-sized
1019 * "data" directly in the buffer. However, we
1020 * want to model objects that would not fit
1021 * within a single word, so allocate an object
1022 * for each node.
1023 */
1024 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
1025 assert(node);
1026 node->data = j;
1027 cpubuffer->array[j - 1] = node;
1028 cpubuffer->offset++;
1029 }
1030 }
1031
1032 for (i = 0; i < num_threads; i++) {
1033 ret = pthread_create(&test_threads[i], NULL,
1034 test_percpu_buffer_thread, buffer);
1035 if (ret) {
1036 errno = ret;
1037 perror("pthread_create");
1038 abort();
1039 }
1040 }
1041
1042 for (i = 0; i < num_threads; i++) {
1043 ret = pthread_join(test_threads[i], NULL);
1044 if (ret) {
1045 errno = ret;
1046 perror("pthread_join");
1047 abort();
1048 }
1049 }
1050
1051 for (i = 0; i < CPU_SETSIZE; i++) {
1052 struct percpu_buffer *cpubuffer;
1053 struct percpu_buffer_node *node;
1054
1055 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1056 continue;
1057
1058 cpubuffer = rseq_percpu_ptr(buffer, i);
1059 while ((node = __percpu_buffer_pop(buffer, i))) {
1060 sum += node->data;
1061 free(node);
1062 }
1063 free(cpubuffer->array);
1064 }
1065
1066 /*
1067 * All entries should now be accounted for (unless some external
1068 * actor is interfering with our allowed affinity while this
1069 * test is running).
1070 */
1071 assert(sum == expected_sum);
1072 rseq_percpu_free(buffer);
1073 ret = rseq_percpu_pool_destroy(mempool);
1074 if (ret) {
1075 perror("rseq_percpu_pool_destroy");
1076 abort();
1077 }
1078 }
1079
1080 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1081 struct percpu_memcpy_buffer_node item,
1082 int *_cpu)
1083 {
1084 bool result = false;
1085 int cpu;
1086
1087 for (;;) {
1088 struct percpu_memcpy_buffer *cpubuffer;
1089 intptr_t *targetptr_final, newval_final, offset;
1090 char *destptr, *srcptr;
1091 size_t copylen;
1092 int ret;
1093
1094 cpu = get_current_cpu_id();
1095 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1096 /* Load offset with single-copy atomicity. */
1097 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1098 if (offset == cpubuffer->buflen)
1099 break;
1100 destptr = (char *)&cpubuffer->array[offset];
1101 srcptr = (char *)&item;
1102 /* copylen must be <= 4kB. */
1103 copylen = sizeof(item);
1104 newval_final = offset + 1;
1105 targetptr_final = &cpubuffer->offset;
1106 ret = rseq_load_cbne_memcpy_store__ptr(
1107 opt_mo, RSEQ_PERCPU,
1108 targetptr_final, offset,
1109 destptr, srcptr, copylen,
1110 newval_final, cpu);
1111 if (rseq_likely(!ret)) {
1112 result = true;
1113 break;
1114 }
1115 /* Retry if comparison fails or rseq aborts. */
1116 }
1117 if (_cpu)
1118 *_cpu = cpu;
1119 return result;
1120 }
1121
1122 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1123 struct percpu_memcpy_buffer_node *item,
1124 int *_cpu)
1125 {
1126 bool result = false;
1127 int cpu;
1128
1129 for (;;) {
1130 struct percpu_memcpy_buffer *cpubuffer;
1131 intptr_t *targetptr_final, newval_final, offset;
1132 char *destptr, *srcptr;
1133 size_t copylen;
1134 int ret;
1135
1136 cpu = get_current_cpu_id();
1137 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1138 /* Load offset with single-copy atomicity. */
1139 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1140 if (offset == 0)
1141 break;
1142 destptr = (char *)item;
1143 srcptr = (char *)&cpubuffer->array[offset - 1];
1144 /* copylen must be <= 4kB. */
1145 copylen = sizeof(*item);
1146 newval_final = offset - 1;
1147 targetptr_final = &cpubuffer->offset;
1148 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1149 targetptr_final, offset, destptr, srcptr, copylen,
1150 newval_final, cpu);
1151 if (rseq_likely(!ret)) {
1152 result = true;
1153 break;
1154 }
1155 /* Retry if comparison fails or rseq aborts. */
1156 }
1157 if (_cpu)
1158 *_cpu = cpu;
1159 return result;
1160 }
1161
1162 /*
1163 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1164 * only be used on buffers that are not concurrently modified.
1165 */
1166 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
1167 struct percpu_memcpy_buffer_node *item,
1168 int cpu)
1169 {
1170 struct percpu_memcpy_buffer *cpubuffer;
1171 intptr_t offset;
1172
1173 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1174 offset = cpubuffer->offset;
1175 if (offset == 0)
1176 return false;
1177 memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item));
1178 cpubuffer->offset = offset - 1;
1179 return true;
1180 }
1181
1182 static void *test_percpu_memcpy_buffer_thread(void *arg)
1183 {
1184 long long i, reps;
1185 struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg;
1186
1187 if (!opt_disable_rseq && rseq_register_current_thread())
1188 abort();
1189
1190 reps = opt_reps;
1191 for (i = 0; i < reps; i++) {
1192 struct percpu_memcpy_buffer_node item;
1193 bool result;
1194
1195 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1196 if (opt_yield)
1197 sched_yield(); /* encourage shuffling */
1198 if (result) {
1199 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1200 /* Should increase buffer size. */
1201 abort();
1202 }
1203 }
1204 }
1205
1206 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1207 (int) rseq_gettid(), nr_abort, signals_delivered);
1208 if (!opt_disable_rseq && rseq_unregister_current_thread())
1209 abort();
1210
1211 return NULL;
1212 }
1213
1214 /* Simultaneous modification to a per-cpu buffer from many threads. */
1215 static void test_percpu_memcpy_buffer(void)
1216 {
1217 const int num_threads = opt_threads;
1218 int i, j, ret;
1219 uint64_t sum = 0, expected_sum = 0;
1220 struct percpu_memcpy_buffer *buffer;
1221 pthread_t test_threads[num_threads];
1222 cpu_set_t allowed_cpus;
1223 struct rseq_percpu_pool *mempool;
1224
1225 mempool = rseq_percpu_pool_create(sizeof(struct percpu_memcpy_buffer),
1226 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
1227 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
1228 if (!mempool) {
1229 perror("rseq_percpu_pool_create");
1230 abort();
1231 }
1232 buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
1233 if (!buffer) {
1234 perror("rseq_percpu_zmalloc");
1235 abort();
1236 }
1237
1238 /* Generate list entries for every usable cpu. */
1239 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1240 for (i = 0; i < CPU_SETSIZE; i++) {
1241 struct percpu_memcpy_buffer *cpubuffer;
1242
1243 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1244 continue;
1245 cpubuffer = rseq_percpu_ptr(buffer, i);
1246 /* Worse-case is every item in same CPU. */
1247 cpubuffer->array =
1248 (struct percpu_memcpy_buffer_node *)
1249 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
1250 MEMCPY_BUFFER_ITEM_PER_CPU);
1251 assert(cpubuffer->array);
1252 cpubuffer->buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1253 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1254 expected_sum += 2 * j + 1;
1255
1256 /*
1257 * We could theoretically put the word-sized
1258 * "data" directly in the buffer. However, we
1259 * want to model objects that would not fit
1260 * within a single word, so allocate an object
1261 * for each node.
1262 */
1263 cpubuffer->array[j - 1].data1 = j;
1264 cpubuffer->array[j - 1].data2 = j + 1;
1265 cpubuffer->offset++;
1266 }
1267 }
1268
1269 for (i = 0; i < num_threads; i++) {
1270 ret = pthread_create(&test_threads[i], NULL,
1271 test_percpu_memcpy_buffer_thread,
1272 buffer);
1273 if (ret) {
1274 errno = ret;
1275 perror("pthread_create");
1276 abort();
1277 }
1278 }
1279
1280 for (i = 0; i < num_threads; i++) {
1281 ret = pthread_join(test_threads[i], NULL);
1282 if (ret) {
1283 errno = ret;
1284 perror("pthread_join");
1285 abort();
1286 }
1287 }
1288
1289 for (i = 0; i < CPU_SETSIZE; i++) {
1290 struct percpu_memcpy_buffer_node item;
1291 struct percpu_memcpy_buffer *cpubuffer;
1292
1293 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1294 continue;
1295
1296 cpubuffer = rseq_percpu_ptr(buffer, i);
1297 while (__percpu_memcpy_buffer_pop(buffer, &item, i)) {
1298 sum += item.data1;
1299 sum += item.data2;
1300 }
1301 free(cpubuffer->array);
1302 }
1303
1304 /*
1305 * All entries should now be accounted for (unless some external
1306 * actor is interfering with our allowed affinity while this
1307 * test is running).
1308 */
1309 assert(sum == expected_sum);
1310 rseq_percpu_free(buffer);
1311 ret = rseq_percpu_pool_destroy(mempool);
1312 if (ret) {
1313 perror("rseq_percpu_pool_destroy");
1314 abort();
1315 }
1316 }
1317
1318 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
1319 {
1320 signals_delivered++;
1321 }
1322
1323 static int set_signal_handler(void)
1324 {
1325 int ret = 0;
1326 struct sigaction sa;
1327 sigset_t sigset;
1328
1329 ret = sigemptyset(&sigset);
1330 if (ret < 0) {
1331 perror("sigemptyset");
1332 return ret;
1333 }
1334
1335 sa.sa_handler = test_signal_interrupt_handler;
1336 sa.sa_mask = sigset;
1337 sa.sa_flags = 0;
1338 ret = sigaction(SIGUSR1, &sa, NULL);
1339 if (ret < 0) {
1340 perror("sigaction");
1341 return ret;
1342 }
1343
1344 printf_verbose("Signal handler set for SIGUSR1\n");
1345
1346 return ret;
1347 }
1348
1349 static
1350 bool membarrier_private_expedited_rseq_available(void)
1351 {
1352 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1353
1354 if (status < 0) {
1355 perror("membarrier");
1356 return false;
1357 }
1358 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1359 return false;
1360 return true;
1361 }
1362
1363 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1364 #ifdef TEST_MEMBARRIER
1365 struct test_membarrier_thread_args {
1366 struct rseq_percpu_pool *mempool;
1367 struct percpu_list __rseq_percpu *percpu_list_ptr;
1368 int stop;
1369 };
1370
1371 /* Worker threads modify data in their "active" percpu lists. */
1372 static
1373 void *test_membarrier_worker_thread(void *arg)
1374 {
1375 struct test_membarrier_thread_args *args =
1376 (struct test_membarrier_thread_args *)arg;
1377 const int iters = opt_reps;
1378 int i;
1379
1380 if (rseq_register_current_thread()) {
1381 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1382 errno, strerror(errno));
1383 abort();
1384 }
1385
1386 /* Wait for initialization. */
1387 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1388
1389 for (i = 0; i < iters; ++i) {
1390 int ret;
1391
1392 do {
1393 int cpu = get_current_cpu_id();
1394 ptrdiff_t mempool_offset = rseq_percpu_pool_ptr_offset(args->mempool, cpu);
1395
1396 ret = rseq_load_add_load_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1397 (intptr_t *) &args->percpu_list_ptr,
1398 mempool_offset + offsetof(struct percpu_list, head),
1399 1, cpu);
1400 } while (rseq_unlikely(ret));
1401 }
1402
1403 if (rseq_unregister_current_thread()) {
1404 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1405 errno, strerror(errno));
1406 abort();
1407 }
1408 return NULL;
1409 }
1410
1411 static
1412 struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_percpu_pool *mempool)
1413 {
1414 struct percpu_list __rseq_percpu *list;
1415 int i;
1416
1417 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
1418 if (!list) {
1419 perror("rseq_percpu_zmalloc");
1420 return NULL;
1421 }
1422 for (i = 0; i < CPU_SETSIZE; i++) {
1423 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
1424 struct percpu_list_node *node;
1425
1426 node = (struct percpu_list_node *) malloc(sizeof(*node));
1427 assert(node);
1428 node->data = 0;
1429 node->next = NULL;
1430 cpulist->head = node;
1431 }
1432 return list;
1433 }
1434
1435 static
1436 void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list)
1437 {
1438 int i;
1439
1440 for (i = 0; i < CPU_SETSIZE; i++)
1441 free(rseq_percpu_ptr(list, i)->head);
1442 rseq_percpu_free(list);
1443 }
1444
1445 /*
1446 * The manager thread swaps per-cpu lists that worker threads see,
1447 * and validates that there are no unexpected modifications.
1448 */
1449 static
1450 void *test_membarrier_manager_thread(void *arg)
1451 {
1452 struct test_membarrier_thread_args *args =
1453 (struct test_membarrier_thread_args *)arg;
1454 struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
1455 intptr_t expect_a = 0, expect_b = 0;
1456 int cpu_a = 0, cpu_b = 0;
1457 struct rseq_percpu_pool *mempool;
1458 int ret;
1459
1460 mempool = rseq_percpu_pool_create(sizeof(struct percpu_list),
1461 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
1462 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
1463 if (!mempool) {
1464 perror("rseq_percpu_pool_create");
1465 abort();
1466 }
1467 args->mempool = mempool;
1468
1469 if (rseq_register_current_thread()) {
1470 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1471 errno, strerror(errno));
1472 abort();
1473 }
1474
1475 /* Init lists. */
1476 list_a = test_membarrier_alloc_percpu_list(mempool);
1477 assert(list_a);
1478 list_b = test_membarrier_alloc_percpu_list(mempool);
1479 assert(list_b);
1480
1481 /* Initialize lists before publishing them. */
1482 rseq_smp_wmb();
1483
1484 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1485
1486 while (!RSEQ_READ_ONCE(args->stop)) {
1487 /* list_a is "active". */
1488 cpu_a = rand() % CPU_SETSIZE;
1489 /*
1490 * As list_b is "inactive", we should never see changes
1491 * to list_b.
1492 */
1493 if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
1494 fprintf(stderr, "Membarrier test failed\n");
1495 abort();
1496 }
1497
1498 /* Make list_b "active". */
1499 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
1500 if (rseq_membarrier_expedited(cpu_a) &&
1501 errno != ENXIO /* missing CPU */) {
1502 perror("sys_membarrier");
1503 abort();
1504 }
1505 /*
1506 * Cpu A should now only modify list_b, so the values
1507 * in list_a should be stable.
1508 */
1509 expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
1510
1511 cpu_b = rand() % CPU_SETSIZE;
1512 /*
1513 * As list_a is "inactive", we should never see changes
1514 * to list_a.
1515 */
1516 if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
1517 fprintf(stderr, "Membarrier test failed\n");
1518 abort();
1519 }
1520
1521 /* Make list_a "active". */
1522 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
1523 if (rseq_membarrier_expedited(cpu_b) &&
1524 errno != ENXIO /* missing CPU */) {
1525 perror("sys_membarrier");
1526 abort();
1527 }
1528 /* Remember a value from list_b. */
1529 expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
1530 }
1531
1532 test_membarrier_free_percpu_list(list_a);
1533 test_membarrier_free_percpu_list(list_b);
1534
1535 if (rseq_unregister_current_thread()) {
1536 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1537 errno, strerror(errno));
1538 abort();
1539 }
1540 ret = rseq_percpu_pool_destroy(mempool);
1541 if (ret) {
1542 perror("rseq_percpu_pool_destroy");
1543 abort();
1544 }
1545
1546 return NULL;
1547 }
1548
1549 static
1550 void test_membarrier(void)
1551 {
1552 const int num_threads = opt_threads;
1553 struct test_membarrier_thread_args thread_args;
1554 pthread_t worker_threads[num_threads];
1555 pthread_t manager_thread;
1556 int i, ret;
1557
1558 if (!membarrier_private_expedited_rseq_available()) {
1559 fprintf(stderr, "Membarrier private expedited rseq not available. "
1560 "Skipping membarrier test.\n");
1561 return;
1562 }
1563 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1564 perror("sys_membarrier");
1565 abort();
1566 }
1567
1568 thread_args.percpu_list_ptr = NULL;
1569 thread_args.stop = 0;
1570 ret = pthread_create(&manager_thread, NULL,
1571 test_membarrier_manager_thread, &thread_args);
1572 if (ret) {
1573 errno = ret;
1574 perror("pthread_create");
1575 abort();
1576 }
1577
1578 for (i = 0; i < num_threads; i++) {
1579 ret = pthread_create(&worker_threads[i], NULL,
1580 test_membarrier_worker_thread, &thread_args);
1581 if (ret) {
1582 errno = ret;
1583 perror("pthread_create");
1584 abort();
1585 }
1586 }
1587
1588
1589 for (i = 0; i < num_threads; i++) {
1590 ret = pthread_join(worker_threads[i], NULL);
1591 if (ret) {
1592 errno = ret;
1593 perror("pthread_join");
1594 abort();
1595 }
1596 }
1597
1598 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1599 ret = pthread_join(manager_thread, NULL);
1600 if (ret) {
1601 errno = ret;
1602 perror("pthread_join");
1603 abort();
1604 }
1605 }
1606 #else /* TEST_MEMBARRIER */
1607 static
1608 void test_membarrier(void)
1609 {
1610 if (!membarrier_private_expedited_rseq_available()) {
1611 fprintf(stderr, "Membarrier private expedited rseq not available. "
1612 "Skipping membarrier test.\n");
1613 return;
1614 }
1615 fprintf(stderr, "rseq_load_add_load_load_add_store__ptr is not implemented on this architecture. "
1616 "Skipping membarrier test.\n");
1617 }
1618 #endif
1619
1620 static void show_usage(char **argv)
1621 {
1622 printf("Usage : %s <OPTIONS>\n",
1623 argv[0]);
1624 printf("OPTIONS:\n");
1625 printf(" [-1 loops] Number of loops for delay injection 1\n");
1626 printf(" [-2 loops] Number of loops for delay injection 2\n");
1627 printf(" [-3 loops] Number of loops for delay injection 3\n");
1628 printf(" [-4 loops] Number of loops for delay injection 4\n");
1629 printf(" [-5 loops] Number of loops for delay injection 5\n");
1630 printf(" [-6 loops] Number of loops for delay injection 6\n");
1631 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1632 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1633 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1634 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1635 printf(" [-y] Yield\n");
1636 printf(" [-k] Kill thread with signal\n");
1637 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1638 printf(" [-t N] Number of threads (default 200)\n");
1639 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1640 printf(" [-d] Disable rseq system call (no initialization)\n");
1641 printf(" [-D M] Disable rseq for each M threads\n");
1642 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1643 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1644 printf(" [-c] Check if the rseq syscall is available.\n");
1645 printf(" [-v] Verbose output.\n");
1646 printf(" [-h] Show this help.\n");
1647 printf("\n");
1648 }
1649
1650 int main(int argc, char **argv)
1651 {
1652 int i;
1653
1654 for (i = 1; i < argc; i++) {
1655 if (argv[i][0] != '-')
1656 continue;
1657 switch (argv[i][1]) {
1658 case '1':
1659 case '2':
1660 case '3':
1661 case '4':
1662 case '5':
1663 case '6':
1664 case '7':
1665 case '8':
1666 case '9':
1667 if (argc < i + 2) {
1668 show_usage(argv);
1669 goto error;
1670 }
1671 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1672 i++;
1673 break;
1674 case 'm':
1675 if (argc < i + 2) {
1676 show_usage(argv);
1677 goto error;
1678 }
1679 opt_modulo = atol(argv[i + 1]);
1680 if (opt_modulo < 0) {
1681 show_usage(argv);
1682 goto error;
1683 }
1684 i++;
1685 break;
1686 case 's':
1687 if (argc < i + 2) {
1688 show_usage(argv);
1689 goto error;
1690 }
1691 opt_sleep = atol(argv[i + 1]);
1692 if (opt_sleep < 0) {
1693 show_usage(argv);
1694 goto error;
1695 }
1696 i++;
1697 break;
1698 case 'y':
1699 opt_yield = 1;
1700 break;
1701 case 'k':
1702 opt_signal = 1;
1703 break;
1704 case 'd':
1705 opt_disable_rseq = 1;
1706 break;
1707 case 'D':
1708 if (argc < i + 2) {
1709 show_usage(argv);
1710 goto error;
1711 }
1712 opt_disable_mod = atol(argv[i + 1]);
1713 if (opt_disable_mod < 0) {
1714 show_usage(argv);
1715 goto error;
1716 }
1717 i++;
1718 break;
1719 case 't':
1720 if (argc < i + 2) {
1721 show_usage(argv);
1722 goto error;
1723 }
1724 opt_threads = atol(argv[i + 1]);
1725 if (opt_threads < 0) {
1726 show_usage(argv);
1727 goto error;
1728 }
1729 i++;
1730 break;
1731 case 'r':
1732 if (argc < i + 2) {
1733 show_usage(argv);
1734 goto error;
1735 }
1736 opt_reps = atoll(argv[i + 1]);
1737 if (opt_reps < 0) {
1738 show_usage(argv);
1739 goto error;
1740 }
1741 i++;
1742 break;
1743 case 'h':
1744 show_usage(argv);
1745 goto end;
1746 case 'T':
1747 if (argc < i + 2) {
1748 show_usage(argv);
1749 goto error;
1750 }
1751 opt_test = *argv[i + 1];
1752 switch (opt_test) {
1753 case 's':
1754 case 'l':
1755 case 'i':
1756 case 'b':
1757 case 'm':
1758 case 'r':
1759 break;
1760 default:
1761 show_usage(argv);
1762 goto error;
1763 }
1764 i++;
1765 break;
1766 case 'v':
1767 verbose = 1;
1768 break;
1769 case 'M':
1770 opt_mo = RSEQ_MO_RELEASE;
1771 break;
1772 case 'c':
1773 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
1774 printf_verbose("The rseq syscall is available.\n");
1775 goto end;
1776 } else {
1777 printf_verbose("The rseq syscall is unavailable.\n");
1778 goto no_rseq;
1779 }
1780 default:
1781 show_usage(argv);
1782 goto error;
1783 }
1784 }
1785
1786 loop_cnt_1 = loop_cnt[1];
1787 loop_cnt_2 = loop_cnt[2];
1788 loop_cnt_3 = loop_cnt[3];
1789 loop_cnt_4 = loop_cnt[4];
1790 loop_cnt_5 = loop_cnt[5];
1791 loop_cnt_6 = loop_cnt[6];
1792
1793 if (set_signal_handler())
1794 goto error;
1795
1796 if (!opt_disable_rseq && rseq_register_current_thread())
1797 goto error;
1798 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1799 printf_verbose("The rseq cpu id getter is unavailable\n");
1800 goto no_rseq;
1801 }
1802 switch (opt_test) {
1803 case 's':
1804 printf_verbose("spinlock\n");
1805 test_percpu_spinlock();
1806 break;
1807 case 'l':
1808 printf_verbose("linked list\n");
1809 test_percpu_list();
1810 break;
1811 case 'b':
1812 printf_verbose("buffer\n");
1813 test_percpu_buffer();
1814 break;
1815 case 'm':
1816 printf_verbose("memcpy buffer\n");
1817 test_percpu_memcpy_buffer();
1818 break;
1819 case 'i':
1820 printf_verbose("counter increment\n");
1821 test_percpu_inc();
1822 break;
1823 case 'r':
1824 printf_verbose("membarrier\n");
1825 test_membarrier();
1826 break;
1827 }
1828 if (!opt_disable_rseq && rseq_unregister_current_thread())
1829 abort();
1830 end:
1831 return 0;
1832
1833 error:
1834 return -1;
1835
1836 no_rseq:
1837 return 2;
1838 }
This page took 0.131214 seconds and 4 git commands to generate.