1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
18 #include <sys/types.h>
23 #include <rseq/mempool.h>
25 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
27 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
= (1 << 7),
28 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
= (1 << 8),
32 MEMBARRIER_CMD_FLAG_CPU
= (1 << 0),
37 static int loop_cnt
[NR_INJECT
+ 1];
39 static int loop_cnt_1
asm("asm_loop_cnt_1") __attribute__((used
));
40 static int loop_cnt_2
asm("asm_loop_cnt_2") __attribute__((used
));
41 static int loop_cnt_3
asm("asm_loop_cnt_3") __attribute__((used
));
42 static int loop_cnt_4
asm("asm_loop_cnt_4") __attribute__((used
));
43 static int loop_cnt_5
asm("asm_loop_cnt_5") __attribute__((used
));
44 static int loop_cnt_6
asm("asm_loop_cnt_6") __attribute__((used
));
46 static int opt_modulo
, verbose
;
48 static int opt_yield
, opt_signal
, opt_sleep
,
49 opt_disable_rseq
, opt_threads
= 200,
50 opt_disable_mod
= 0, opt_test
= 's';
52 static long long opt_reps
= 5000;
54 static __thread
__attribute__((tls_model("initial-exec")))
55 unsigned int signals_delivered
;
57 static inline pid_t
rseq_gettid(void)
59 return syscall(__NR_gettid
);
64 static __thread
__attribute__((tls_model("initial-exec"), unused
))
65 int yield_mod_cnt
, nr_abort
;
67 #define printf_verbose(fmt, ...) \
70 printf(fmt, ## __VA_ARGS__); \
75 #define INJECT_ASM_REG "eax"
77 #define RSEQ_INJECT_CLOBBER \
81 * Use ip-relative addressing to get the loop counter.
83 #define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
84 "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
85 "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
86 "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
87 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "dec %%" INJECT_ASM_REG "\n\t" \
94 #define RSEQ_INJECT_ASM(n) \
95 __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
97 #elif defined(__x86_64__)
99 #define INJECT_ASM_REG_P "rax"
100 #define INJECT_ASM_REG "eax"
102 #define RSEQ_INJECT_CLOBBER \
106 #define RSEQ_INJECT_ASM(n) \
107 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
108 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
109 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
112 "dec %%" INJECT_ASM_REG "\n\t" \
116 #elif defined(__s390__)
118 #define RSEQ_INJECT_INPUT \
119 , [loop_cnt_1]"m"(loop_cnt[1]) \
120 , [loop_cnt_2]"m"(loop_cnt[2]) \
121 , [loop_cnt_3]"m"(loop_cnt[3]) \
122 , [loop_cnt_4]"m"(loop_cnt[4]) \
123 , [loop_cnt_5]"m"(loop_cnt[5]) \
124 , [loop_cnt_6]"m"(loop_cnt[6])
126 #define INJECT_ASM_REG "r12"
128 #define RSEQ_INJECT_CLOBBER \
131 #define RSEQ_INJECT_ASM(n) \
132 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
133 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
136 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
140 #elif defined(__ARMEL__)
142 #define RSEQ_INJECT_INPUT \
143 , [loop_cnt_1]"m"(loop_cnt[1]) \
144 , [loop_cnt_2]"m"(loop_cnt[2]) \
145 , [loop_cnt_3]"m"(loop_cnt[3]) \
146 , [loop_cnt_4]"m"(loop_cnt[4]) \
147 , [loop_cnt_5]"m"(loop_cnt[5]) \
148 , [loop_cnt_6]"m"(loop_cnt[6])
150 #define INJECT_ASM_REG "r4"
152 #define RSEQ_INJECT_CLOBBER \
155 #define RSEQ_INJECT_ASM(n) \
156 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
157 "cmp " INJECT_ASM_REG ", #0\n\t" \
160 "subs " INJECT_ASM_REG ", #1\n\t" \
164 #elif defined(__AARCH64EL__)
166 #define RSEQ_INJECT_INPUT \
167 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
168 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
169 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
170 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
171 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
172 , [loop_cnt_6] "Qo" (loop_cnt[6])
174 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
176 #define RSEQ_INJECT_ASM(n) \
177 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
178 " cbz " INJECT_ASM_REG ", 333f\n" \
180 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
181 " cbnz " INJECT_ASM_REG ", 222b\n" \
184 #elif defined(__PPC__)
186 #define RSEQ_INJECT_INPUT \
187 , [loop_cnt_1]"m"(loop_cnt[1]) \
188 , [loop_cnt_2]"m"(loop_cnt[2]) \
189 , [loop_cnt_3]"m"(loop_cnt[3]) \
190 , [loop_cnt_4]"m"(loop_cnt[4]) \
191 , [loop_cnt_5]"m"(loop_cnt[5]) \
192 , [loop_cnt_6]"m"(loop_cnt[6])
194 #define INJECT_ASM_REG "r18"
196 #define RSEQ_INJECT_CLOBBER \
199 #define RSEQ_INJECT_ASM(n) \
200 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
201 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
204 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
208 #elif defined(__mips__)
210 #define RSEQ_INJECT_INPUT \
211 , [loop_cnt_1]"m"(loop_cnt[1]) \
212 , [loop_cnt_2]"m"(loop_cnt[2]) \
213 , [loop_cnt_3]"m"(loop_cnt[3]) \
214 , [loop_cnt_4]"m"(loop_cnt[4]) \
215 , [loop_cnt_5]"m"(loop_cnt[5]) \
216 , [loop_cnt_6]"m"(loop_cnt[6])
218 #define INJECT_ASM_REG "$5"
220 #define RSEQ_INJECT_CLOBBER \
223 #define RSEQ_INJECT_ASM(n) \
224 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
225 "beqz " INJECT_ASM_REG ", 333f\n\t" \
227 "addiu " INJECT_ASM_REG ", -1\n\t" \
228 "bnez " INJECT_ASM_REG ", 222b\n\t" \
231 #elif defined(__riscv)
233 #define RSEQ_INJECT_INPUT \
234 , [loop_cnt_1]"m"(loop_cnt[1]) \
235 , [loop_cnt_2]"m"(loop_cnt[2]) \
236 , [loop_cnt_3]"m"(loop_cnt[3]) \
237 , [loop_cnt_4]"m"(loop_cnt[4]) \
238 , [loop_cnt_5]"m"(loop_cnt[5]) \
239 , [loop_cnt_6]"m"(loop_cnt[6])
241 #define INJECT_ASM_REG "t1"
243 #define RSEQ_INJECT_CLOBBER \
246 #define RSEQ_INJECT_ASM(n) \
247 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
248 "beqz " INJECT_ASM_REG ", 333f\n\t" \
250 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
251 "bnez " INJECT_ASM_REG ", 222b\n\t" \
255 #error unsupported target
258 #define RSEQ_INJECT_FAILED \
261 #define RSEQ_INJECT_C(n) \
263 int loc_i, loc_nr_loops = loop_cnt[n]; \
265 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
268 if (loc_nr_loops == -1 && opt_modulo) { \
269 if (yield_mod_cnt == opt_modulo - 1) { \
271 poll(NULL, 0, opt_sleep); \
285 #define printf_verbose(fmt, ...)
287 #endif /* BENCHMARK */
289 #include <rseq/rseq.h>
291 static enum rseq_mo opt_mo
= RSEQ_MO_RELAXED
;
293 static int sys_membarrier(int cmd
, int flags
, int cpu_id
)
295 return syscall(__NR_membarrier
, cmd
, flags
, cpu_id
);
298 #ifdef rseq_arch_has_load_add_load_load_add_store
299 #define TEST_MEMBARRIER
302 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
303 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
305 int get_current_cpu_id(void)
307 return rseq_current_mm_cid();
310 bool rseq_validate_cpu_id(void)
312 return rseq_mm_cid_available();
315 bool rseq_use_cpu_index(void)
317 return false; /* Use mm_cid */
319 # ifdef TEST_MEMBARRIER
321 * Membarrier does not currently support targeting a mm_cid, so
322 * issue the barrier on all cpus.
325 int rseq_membarrier_expedited(__attribute__ ((unused
)) int cpu
)
327 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
,
330 # endif /* TEST_MEMBARRIER */
332 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
334 int get_current_cpu_id(void)
336 return rseq_cpu_start();
339 bool rseq_validate_cpu_id(void)
341 return rseq_current_cpu_raw() >= 0;
344 bool rseq_use_cpu_index(void)
346 return true; /* Use cpu_id as index. */
348 # ifdef TEST_MEMBARRIER
350 int rseq_membarrier_expedited(int cpu
)
352 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
,
353 MEMBARRIER_CMD_FLAG_CPU
, cpu
);
355 # endif /* TEST_MEMBARRIER */
362 struct spinlock_test_data
{
363 struct percpu_lock lock
;
367 struct spinlock_thread_test_data
{
368 struct spinlock_test_data __rseq_percpu
*data
;
373 struct inc_test_data
{
377 struct inc_thread_test_data
{
378 struct inc_test_data __rseq_percpu
*data
;
383 struct percpu_list_node
{
385 struct percpu_list_node
*next
;
389 struct percpu_list_node
*head
;
392 #define BUFFER_ITEM_PER_CPU 100
394 struct percpu_buffer_node
{
398 struct percpu_buffer
{
401 struct percpu_buffer_node
**array
;
404 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
406 struct percpu_memcpy_buffer_node
{
411 struct percpu_memcpy_buffer
{
414 struct percpu_memcpy_buffer_node
*array
;
417 /* A simple percpu spinlock. Grabs lock on current cpu. */
418 static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu
*lock
)
425 cpu
= get_current_cpu_id();
427 fprintf(stderr
, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
428 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu
);
431 ret
= rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
432 &rseq_percpu_ptr(lock
, cpu
)->v
,
434 if (rseq_likely(!ret
))
436 /* Retry if comparison fails or rseq aborts. */
439 * Acquire semantic when taking lock after control dependency.
440 * Matches rseq_smp_store_release().
442 rseq_smp_acquire__after_ctrl_dep();
446 static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu
*lock
, int cpu
)
448 assert(rseq_percpu_ptr(lock
, cpu
)->v
== 1);
450 * Release lock, with release semantic. Matches
451 * rseq_smp_acquire__after_ctrl_dep().
453 rseq_smp_store_release(&rseq_percpu_ptr(lock
, cpu
)->v
, 0);
456 static void *test_percpu_spinlock_thread(void *arg
)
458 struct spinlock_thread_test_data
*thread_data
= (struct spinlock_thread_test_data
*) arg
;
459 struct spinlock_test_data __rseq_percpu
*data
= thread_data
->data
;
462 if (!opt_disable_rseq
&& thread_data
->reg
&&
463 rseq_register_current_thread())
465 reps
= thread_data
->reps
;
466 for (i
= 0; i
< reps
; i
++) {
467 int cpu
= rseq_this_cpu_lock(&data
->lock
);
468 rseq_percpu_ptr(data
, cpu
)->count
++;
469 rseq_percpu_unlock(&data
->lock
, cpu
);
471 if (i
!= 0 && !(i
% (reps
/ 10)))
472 printf_verbose("tid %d: count %lld\n",
473 (int) rseq_gettid(), i
);
476 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
477 (int) rseq_gettid(), nr_abort
, signals_delivered
);
478 if (!opt_disable_rseq
&& thread_data
->reg
&&
479 rseq_unregister_current_thread())
485 * A simple test which implements a sharded counter using a per-cpu
486 * lock. Obviously real applications might prefer to simply use a
487 * per-cpu increment; however, this is reasonable for a test and the
488 * lock can be extended to synchronize more complicated operations.
490 static void test_percpu_spinlock(void)
492 const int num_threads
= opt_threads
;
493 int i
, ret
, max_nr_cpus
;
495 pthread_t test_threads
[num_threads
];
496 struct spinlock_test_data __rseq_percpu
*data
;
497 struct spinlock_thread_test_data thread_data
[num_threads
];
498 struct rseq_mempool
*mempool
;
499 struct rseq_mempool_attr
*attr
;
501 attr
= rseq_mempool_attr_create();
503 perror("rseq_mempool_attr_create");
506 ret
= rseq_mempool_attr_set_percpu(attr
, RSEQ_MEMPOOL_STRIDE
, 0);
508 perror("rseq_mempool_attr_set_percpu");
511 mempool
= rseq_mempool_create("spinlock_test_data",
512 sizeof(struct spinlock_test_data
), attr
);
514 perror("rseq_mempool_create");
517 rseq_mempool_attr_destroy(attr
);
518 max_nr_cpus
= rseq_mempool_get_max_nr_cpus(mempool
);
519 data
= (struct spinlock_test_data __rseq_percpu
*)rseq_mempool_percpu_zmalloc(mempool
);
521 perror("rseq_mempool_percpu_zmalloc");
525 for (i
= 0; i
< num_threads
; i
++) {
526 thread_data
[i
].reps
= opt_reps
;
527 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
528 thread_data
[i
].reg
= 1;
530 thread_data
[i
].reg
= 0;
531 thread_data
[i
].data
= data
;
532 ret
= pthread_create(&test_threads
[i
], NULL
,
533 test_percpu_spinlock_thread
,
537 perror("pthread_create");
542 for (i
= 0; i
< num_threads
; i
++) {
543 ret
= pthread_join(test_threads
[i
], NULL
);
546 perror("pthread_join");
552 for (i
= 0; i
< max_nr_cpus
; i
++)
553 sum
+= rseq_percpu_ptr(data
, i
)->count
;
555 assert(sum
== (uint64_t)opt_reps
* num_threads
);
556 rseq_mempool_percpu_free(data
);
557 ret
= rseq_mempool_destroy(mempool
);
559 perror("rseq_mempool_destroy");
564 static void *test_percpu_inc_thread(void *arg
)
566 struct inc_thread_test_data
*thread_data
= (struct inc_thread_test_data
*) arg
;
567 struct inc_test_data __rseq_percpu
*data
= thread_data
->data
;
570 if (!opt_disable_rseq
&& thread_data
->reg
&&
571 rseq_register_current_thread())
573 reps
= thread_data
->reps
;
574 for (i
= 0; i
< reps
; i
++) {
580 cpu
= get_current_cpu_id();
581 ret
= rseq_load_add_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
582 &rseq_percpu_ptr(data
, cpu
)->count
, 1, cpu
);
583 } while (rseq_unlikely(ret
));
585 if (i
!= 0 && !(i
% (reps
/ 10)))
586 printf_verbose("tid %d: count %lld\n",
587 (int) rseq_gettid(), i
);
590 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
591 (int) rseq_gettid(), nr_abort
, signals_delivered
);
592 if (!opt_disable_rseq
&& thread_data
->reg
&&
593 rseq_unregister_current_thread())
598 static void test_percpu_inc(void)
600 const int num_threads
= opt_threads
;
601 int i
, ret
, max_nr_cpus
;
603 pthread_t test_threads
[num_threads
];
604 struct inc_test_data __rseq_percpu
*data
;
605 struct inc_thread_test_data thread_data
[num_threads
];
606 struct rseq_mempool
*mempool
;
607 struct rseq_mempool_attr
*attr
;
609 attr
= rseq_mempool_attr_create();
611 perror("rseq_mempool_attr_create");
614 ret
= rseq_mempool_attr_set_percpu(attr
, RSEQ_MEMPOOL_STRIDE
, 0);
616 perror("rseq_mempool_attr_set_percpu");
619 mempool
= rseq_mempool_create("inc_test_data",
620 sizeof(struct inc_test_data
), attr
);
622 perror("rseq_mempool_create");
625 rseq_mempool_attr_destroy(attr
);
626 max_nr_cpus
= rseq_mempool_get_max_nr_cpus(mempool
);
627 data
= (struct inc_test_data __rseq_percpu
*)rseq_mempool_percpu_zmalloc(mempool
);
629 perror("rseq_mempool_percpu_zmalloc");
633 for (i
= 0; i
< num_threads
; i
++) {
634 thread_data
[i
].reps
= opt_reps
;
635 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
636 thread_data
[i
].reg
= 1;
638 thread_data
[i
].reg
= 0;
639 thread_data
[i
].data
= data
;
640 ret
= pthread_create(&test_threads
[i
], NULL
,
641 test_percpu_inc_thread
,
645 perror("pthread_create");
650 for (i
= 0; i
< num_threads
; i
++) {
651 ret
= pthread_join(test_threads
[i
], NULL
);
654 perror("pthread_join");
660 for (i
= 0; i
< max_nr_cpus
; i
++)
661 sum
+= rseq_percpu_ptr(data
, i
)->count
;
663 assert(sum
== (uint64_t)opt_reps
* num_threads
);
664 rseq_mempool_percpu_free(data
);
665 ret
= rseq_mempool_destroy(mempool
);
667 perror("rseq_mempool_destroy");
672 static void this_cpu_list_push(struct percpu_list __rseq_percpu
*list
,
673 struct percpu_list_node
*node
,
679 intptr_t *targetptr
, newval
, expect
;
680 struct percpu_list
*cpulist
;
683 cpu
= get_current_cpu_id();
684 cpulist
= rseq_percpu_ptr(list
, cpu
);
685 /* Load list->c[cpu].head with single-copy atomicity. */
686 expect
= (intptr_t)RSEQ_READ_ONCE(cpulist
->head
);
687 newval
= (intptr_t)node
;
688 targetptr
= (intptr_t *)&cpulist
->head
;
689 node
->next
= (struct percpu_list_node
*)expect
;
690 ret
= rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
691 targetptr
, expect
, newval
, cpu
);
692 if (rseq_likely(!ret
))
694 /* Retry if comparison fails or rseq aborts. */
701 * Unlike a traditional lock-less linked list; the availability of a
702 * rseq primitive allows us to implement pop without concerns over
705 static struct percpu_list_node
*this_cpu_list_pop(struct percpu_list __rseq_percpu
*list
,
708 struct percpu_list_node
*node
= NULL
;
712 struct percpu_list_node
*head
;
713 intptr_t *targetptr
, expectnot
, *load
;
714 struct percpu_list
*cpulist
;
718 cpu
= get_current_cpu_id();
719 cpulist
= rseq_percpu_ptr(list
, cpu
);
720 targetptr
= (intptr_t *)&cpulist
->head
;
721 expectnot
= (intptr_t)NULL
;
722 offset
= offsetof(struct percpu_list_node
, next
);
723 load
= (intptr_t *)&head
;
724 ret
= rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
725 targetptr
, expectnot
,
727 if (rseq_likely(!ret
)) {
733 /* Retry if rseq aborts. */
741 * __percpu_list_pop is not safe against concurrent accesses. Should
742 * only be used on lists that are not concurrently modified.
744 static struct percpu_list_node
*__percpu_list_pop(struct percpu_list __rseq_percpu
*list
, int cpu
)
746 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, cpu
);
747 struct percpu_list_node
*node
;
749 node
= cpulist
->head
;
752 cpulist
->head
= node
->next
;
756 static void *test_percpu_list_thread(void *arg
)
759 struct percpu_list __rseq_percpu
*list
= (struct percpu_list __rseq_percpu
*)arg
;
761 if (!opt_disable_rseq
&& rseq_register_current_thread())
765 for (i
= 0; i
< reps
; i
++) {
766 struct percpu_list_node
*node
;
768 node
= this_cpu_list_pop(list
, NULL
);
770 sched_yield(); /* encourage shuffling */
772 this_cpu_list_push(list
, node
, NULL
);
775 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
776 (int) rseq_gettid(), nr_abort
, signals_delivered
);
777 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
783 /* Simultaneous modification to a per-cpu linked list from many threads. */
784 static void test_percpu_list(void)
786 const int num_threads
= opt_threads
;
787 int i
, j
, ret
, max_nr_cpus
;
788 uint64_t sum
= 0, expected_sum
= 0;
789 struct percpu_list __rseq_percpu
*list
;
790 pthread_t test_threads
[num_threads
];
791 cpu_set_t allowed_cpus
;
792 struct rseq_mempool
*mempool
;
793 struct rseq_mempool_attr
*attr
;
795 attr
= rseq_mempool_attr_create();
797 perror("rseq_mempool_attr_create");
800 ret
= rseq_mempool_attr_set_percpu(attr
, RSEQ_MEMPOOL_STRIDE
, 0);
802 perror("rseq_mempool_attr_set_percpu");
805 mempool
= rseq_mempool_create("percpu_list",
806 sizeof(struct percpu_list
), attr
);
808 perror("rseq_mempool_create");
811 rseq_mempool_attr_destroy(attr
);
812 max_nr_cpus
= rseq_mempool_get_max_nr_cpus(mempool
);
813 list
= (struct percpu_list __rseq_percpu
*)rseq_mempool_percpu_zmalloc(mempool
);
815 perror("rseq_mempool_percpu_zmalloc");
819 /* Generate list entries for every usable cpu. */
820 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
821 for (i
= 0; i
< max_nr_cpus
; i
++) {
822 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
824 for (j
= 1; j
<= 100; j
++) {
825 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, i
);
826 struct percpu_list_node
*node
;
830 node
= (struct percpu_list_node
*) malloc(sizeof(*node
));
833 node
->next
= cpulist
->head
;
834 cpulist
->head
= node
;
838 for (i
= 0; i
< num_threads
; i
++) {
839 ret
= pthread_create(&test_threads
[i
], NULL
,
840 test_percpu_list_thread
, list
);
843 perror("pthread_create");
848 for (i
= 0; i
< num_threads
; i
++) {
849 ret
= pthread_join(test_threads
[i
], NULL
);
852 perror("pthread_join");
857 for (i
= 0; i
< max_nr_cpus
; i
++) {
858 struct percpu_list_node
*node
;
860 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
863 while ((node
= __percpu_list_pop(list
, i
))) {
870 * All entries should now be accounted for (unless some external
871 * actor is interfering with our allowed affinity while this
874 assert(sum
== expected_sum
);
875 rseq_mempool_percpu_free(list
);
876 ret
= rseq_mempool_destroy(mempool
);
878 perror("rseq_mempool_destroy");
883 static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu
*buffer
,
884 struct percpu_buffer_node
*node
,
891 struct percpu_buffer
*cpubuffer
;
892 intptr_t *targetptr_spec
, newval_spec
;
893 intptr_t *targetptr_final
, newval_final
;
897 cpu
= get_current_cpu_id();
898 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
899 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
900 if (offset
== cpubuffer
->buflen
)
902 newval_spec
= (intptr_t)node
;
903 targetptr_spec
= (intptr_t *)&cpubuffer
->array
[offset
];
904 newval_final
= offset
+ 1;
905 targetptr_final
= &cpubuffer
->offset
;
906 ret
= rseq_load_cbne_store_store__ptr(opt_mo
, RSEQ_PERCPU
,
907 targetptr_final
, offset
, targetptr_spec
,
908 newval_spec
, newval_final
, cpu
);
909 if (rseq_likely(!ret
)) {
913 /* Retry if comparison fails or rseq aborts. */
920 static struct percpu_buffer_node
*this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu
*buffer
,
923 struct percpu_buffer_node
*head
;
927 struct percpu_buffer
*cpubuffer
;
928 intptr_t *targetptr
, newval
;
932 cpu
= get_current_cpu_id();
933 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
934 /* Load offset with single-copy atomicity. */
935 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
940 head
= RSEQ_READ_ONCE(cpubuffer
->array
[offset
- 1]);
942 targetptr
= (intptr_t *)&cpubuffer
->offset
;
943 ret
= rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
945 (intptr_t *)&cpubuffer
->array
[offset
- 1],
946 (intptr_t)head
, newval
, cpu
);
947 if (rseq_likely(!ret
))
949 /* Retry if comparison fails or rseq aborts. */
957 * __percpu_buffer_pop is not safe against concurrent accesses. Should
958 * only be used on buffers that are not concurrently modified.
960 static struct percpu_buffer_node
*__percpu_buffer_pop(struct percpu_buffer __rseq_percpu
*buffer
,
963 struct percpu_buffer
*cpubuffer
;
964 struct percpu_buffer_node
*head
;
967 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
968 offset
= cpubuffer
->offset
;
971 head
= cpubuffer
->array
[offset
- 1];
972 cpubuffer
->offset
= offset
- 1;
976 static void *test_percpu_buffer_thread(void *arg
)
979 struct percpu_buffer __rseq_percpu
*buffer
= (struct percpu_buffer __rseq_percpu
*)arg
;
981 if (!opt_disable_rseq
&& rseq_register_current_thread())
985 for (i
= 0; i
< reps
; i
++) {
986 struct percpu_buffer_node
*node
;
988 node
= this_cpu_buffer_pop(buffer
, NULL
);
990 sched_yield(); /* encourage shuffling */
992 if (!this_cpu_buffer_push(buffer
, node
, NULL
)) {
993 /* Should increase buffer size. */
999 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1000 (int) rseq_gettid(), nr_abort
, signals_delivered
);
1001 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
1007 /* Simultaneous modification to a per-cpu buffer from many threads. */
1008 static void test_percpu_buffer(void)
1010 const int num_threads
= opt_threads
;
1011 int i
, j
, ret
, max_nr_cpus
;
1012 uint64_t sum
= 0, expected_sum
= 0;
1013 struct percpu_buffer __rseq_percpu
*buffer
;
1014 pthread_t test_threads
[num_threads
];
1015 cpu_set_t allowed_cpus
;
1016 struct rseq_mempool
*mempool
;
1017 struct rseq_mempool_attr
*attr
;
1019 attr
= rseq_mempool_attr_create();
1021 perror("rseq_mempool_attr_create");
1024 ret
= rseq_mempool_attr_set_percpu(attr
, RSEQ_MEMPOOL_STRIDE
, 0);
1026 perror("rseq_mempool_attr_set_percpu");
1029 mempool
= rseq_mempool_create("percpu_buffer",
1030 sizeof(struct percpu_buffer
), attr
);
1032 perror("rseq_mempool_create");
1035 rseq_mempool_attr_destroy(attr
);
1036 max_nr_cpus
= rseq_mempool_get_max_nr_cpus(mempool
);
1037 buffer
= (struct percpu_buffer __rseq_percpu
*)rseq_mempool_percpu_zmalloc(mempool
);
1039 perror("rseq_mempool_percpu_zmalloc");
1043 /* Generate list entries for every usable cpu. */
1044 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
1045 for (i
= 0; i
< max_nr_cpus
; i
++) {
1046 struct percpu_buffer
*cpubuffer
;
1048 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1050 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1051 /* Worse-case is every item in same CPU. */
1053 (struct percpu_buffer_node
**)
1054 malloc(sizeof(*cpubuffer
->array
) * max_nr_cpus
*
1055 BUFFER_ITEM_PER_CPU
);
1056 assert(cpubuffer
->array
);
1057 cpubuffer
->buflen
= max_nr_cpus
* BUFFER_ITEM_PER_CPU
;
1058 for (j
= 1; j
<= BUFFER_ITEM_PER_CPU
; j
++) {
1059 struct percpu_buffer_node
*node
;
1064 * We could theoretically put the word-sized
1065 * "data" directly in the buffer. However, we
1066 * want to model objects that would not fit
1067 * within a single word, so allocate an object
1070 node
= (struct percpu_buffer_node
*) malloc(sizeof(*node
));
1073 cpubuffer
->array
[j
- 1] = node
;
1074 cpubuffer
->offset
++;
1078 for (i
= 0; i
< num_threads
; i
++) {
1079 ret
= pthread_create(&test_threads
[i
], NULL
,
1080 test_percpu_buffer_thread
, buffer
);
1083 perror("pthread_create");
1088 for (i
= 0; i
< num_threads
; i
++) {
1089 ret
= pthread_join(test_threads
[i
], NULL
);
1092 perror("pthread_join");
1097 for (i
= 0; i
< max_nr_cpus
; i
++) {
1098 struct percpu_buffer
*cpubuffer
;
1099 struct percpu_buffer_node
*node
;
1101 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1104 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1105 while ((node
= __percpu_buffer_pop(buffer
, i
))) {
1109 free(cpubuffer
->array
);
1113 * All entries should now be accounted for (unless some external
1114 * actor is interfering with our allowed affinity while this
1117 assert(sum
== expected_sum
);
1118 rseq_mempool_percpu_free(buffer
);
1119 ret
= rseq_mempool_destroy(mempool
);
1121 perror("rseq_mempool_destroy");
1126 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu
*buffer
,
1127 struct percpu_memcpy_buffer_node item
,
1130 bool result
= false;
1134 struct percpu_memcpy_buffer
*cpubuffer
;
1135 intptr_t *targetptr_final
, newval_final
, offset
;
1136 char *destptr
, *srcptr
;
1140 cpu
= get_current_cpu_id();
1141 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
1142 /* Load offset with single-copy atomicity. */
1143 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
1144 if (offset
== cpubuffer
->buflen
)
1146 destptr
= (char *)&cpubuffer
->array
[offset
];
1147 srcptr
= (char *)&item
;
1148 /* copylen must be <= 4kB. */
1149 copylen
= sizeof(item
);
1150 newval_final
= offset
+ 1;
1151 targetptr_final
= &cpubuffer
->offset
;
1152 ret
= rseq_load_cbne_memcpy_store__ptr(
1153 opt_mo
, RSEQ_PERCPU
,
1154 targetptr_final
, offset
,
1155 destptr
, srcptr
, copylen
,
1157 if (rseq_likely(!ret
)) {
1161 /* Retry if comparison fails or rseq aborts. */
1168 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu
*buffer
,
1169 struct percpu_memcpy_buffer_node
*item
,
1172 bool result
= false;
1176 struct percpu_memcpy_buffer
*cpubuffer
;
1177 intptr_t *targetptr_final
, newval_final
, offset
;
1178 char *destptr
, *srcptr
;
1182 cpu
= get_current_cpu_id();
1183 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
1184 /* Load offset with single-copy atomicity. */
1185 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
1188 destptr
= (char *)item
;
1189 srcptr
= (char *)&cpubuffer
->array
[offset
- 1];
1190 /* copylen must be <= 4kB. */
1191 copylen
= sizeof(*item
);
1192 newval_final
= offset
- 1;
1193 targetptr_final
= &cpubuffer
->offset
;
1194 ret
= rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
1195 targetptr_final
, offset
, destptr
, srcptr
, copylen
,
1197 if (rseq_likely(!ret
)) {
1201 /* Retry if comparison fails or rseq aborts. */
1209 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1210 * only be used on buffers that are not concurrently modified.
1212 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu
*buffer
,
1213 struct percpu_memcpy_buffer_node
*item
,
1216 struct percpu_memcpy_buffer
*cpubuffer
;
1219 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
1220 offset
= cpubuffer
->offset
;
1223 memcpy(item
, &cpubuffer
->array
[offset
- 1], sizeof(*item
));
1224 cpubuffer
->offset
= offset
- 1;
1228 static void *test_percpu_memcpy_buffer_thread(void *arg
)
1231 struct percpu_memcpy_buffer __rseq_percpu
*buffer
= (struct percpu_memcpy_buffer __rseq_percpu
*)arg
;
1233 if (!opt_disable_rseq
&& rseq_register_current_thread())
1237 for (i
= 0; i
< reps
; i
++) {
1238 struct percpu_memcpy_buffer_node item
;
1241 result
= this_cpu_memcpy_buffer_pop(buffer
, &item
, NULL
);
1243 sched_yield(); /* encourage shuffling */
1245 if (!this_cpu_memcpy_buffer_push(buffer
, item
, NULL
)) {
1246 /* Should increase buffer size. */
1252 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1253 (int) rseq_gettid(), nr_abort
, signals_delivered
);
1254 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
1260 /* Simultaneous modification to a per-cpu buffer from many threads. */
1261 static void test_percpu_memcpy_buffer(void)
1263 const int num_threads
= opt_threads
;
1264 int i
, j
, ret
, max_nr_cpus
;
1265 uint64_t sum
= 0, expected_sum
= 0;
1266 struct percpu_memcpy_buffer
*buffer
;
1267 pthread_t test_threads
[num_threads
];
1268 cpu_set_t allowed_cpus
;
1269 struct rseq_mempool
*mempool
;
1270 struct rseq_mempool_attr
*attr
;
1272 attr
= rseq_mempool_attr_create();
1274 perror("rseq_mempool_attr_create");
1277 ret
= rseq_mempool_attr_set_percpu(attr
, RSEQ_MEMPOOL_STRIDE
, 0);
1279 perror("rseq_mempool_attr_set_percpu");
1282 mempool
= rseq_mempool_create("percpu_memcpy_buffer",
1283 sizeof(struct percpu_memcpy_buffer
), attr
);
1285 perror("rseq_mempool_create");
1288 rseq_mempool_attr_destroy(attr
);
1289 max_nr_cpus
= rseq_mempool_get_max_nr_cpus(mempool
);
1290 buffer
= (struct percpu_memcpy_buffer __rseq_percpu
*)rseq_mempool_percpu_zmalloc(mempool
);
1292 perror("rseq_mempool_percpu_zmalloc");
1296 /* Generate list entries for every usable cpu. */
1297 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
1298 for (i
= 0; i
< max_nr_cpus
; i
++) {
1299 struct percpu_memcpy_buffer
*cpubuffer
;
1301 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1303 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1304 /* Worse-case is every item in same CPU. */
1306 (struct percpu_memcpy_buffer_node
*)
1307 malloc(sizeof(*cpubuffer
->array
) * max_nr_cpus
*
1308 MEMCPY_BUFFER_ITEM_PER_CPU
);
1309 assert(cpubuffer
->array
);
1310 cpubuffer
->buflen
= max_nr_cpus
* MEMCPY_BUFFER_ITEM_PER_CPU
;
1311 for (j
= 1; j
<= MEMCPY_BUFFER_ITEM_PER_CPU
; j
++) {
1312 expected_sum
+= 2 * j
+ 1;
1315 * We could theoretically put the word-sized
1316 * "data" directly in the buffer. However, we
1317 * want to model objects that would not fit
1318 * within a single word, so allocate an object
1321 cpubuffer
->array
[j
- 1].data1
= j
;
1322 cpubuffer
->array
[j
- 1].data2
= j
+ 1;
1323 cpubuffer
->offset
++;
1327 for (i
= 0; i
< num_threads
; i
++) {
1328 ret
= pthread_create(&test_threads
[i
], NULL
,
1329 test_percpu_memcpy_buffer_thread
,
1333 perror("pthread_create");
1338 for (i
= 0; i
< num_threads
; i
++) {
1339 ret
= pthread_join(test_threads
[i
], NULL
);
1342 perror("pthread_join");
1347 for (i
= 0; i
< max_nr_cpus
; i
++) {
1348 struct percpu_memcpy_buffer_node item
;
1349 struct percpu_memcpy_buffer
*cpubuffer
;
1351 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1354 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1355 while (__percpu_memcpy_buffer_pop(buffer
, &item
, i
)) {
1359 free(cpubuffer
->array
);
1363 * All entries should now be accounted for (unless some external
1364 * actor is interfering with our allowed affinity while this
1367 assert(sum
== expected_sum
);
1368 rseq_mempool_percpu_free(buffer
);
1369 ret
= rseq_mempool_destroy(mempool
);
1371 perror("rseq_mempool_destroy");
1376 static void test_signal_interrupt_handler(__attribute__ ((unused
)) int signo
)
1378 signals_delivered
++;
1381 static int set_signal_handler(void)
1384 struct sigaction sa
;
1387 ret
= sigemptyset(&sigset
);
1389 perror("sigemptyset");
1393 sa
.sa_handler
= test_signal_interrupt_handler
;
1394 sa
.sa_mask
= sigset
;
1396 ret
= sigaction(SIGUSR1
, &sa
, NULL
);
1398 perror("sigaction");
1402 printf_verbose("Signal handler set for SIGUSR1\n");
1408 bool membarrier_private_expedited_rseq_available(void)
1410 int status
= sys_membarrier(MEMBARRIER_CMD_QUERY
, 0, 0);
1413 perror("membarrier");
1416 if (!(status
& MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
))
1421 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1422 #ifdef TEST_MEMBARRIER
1423 struct test_membarrier_thread_args
{
1424 struct rseq_mempool
*mempool
;
1425 struct percpu_list __rseq_percpu
*percpu_list_ptr
;
1430 /* Worker threads modify data in their "active" percpu lists. */
1432 void *test_membarrier_worker_thread(void *arg
)
1434 struct test_membarrier_thread_args
*args
=
1435 (struct test_membarrier_thread_args
*)arg
;
1436 const long long iters
= opt_reps
;
1439 if (rseq_register_current_thread()) {
1440 fprintf(stderr
, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1441 errno
, strerror(errno
));
1445 /* Wait for initialization. */
1446 while (!rseq_smp_load_acquire(&args
->percpu_list_ptr
)) { }
1448 for (i
= 0; i
< iters
; ++i
) {
1452 int cpu
= get_current_cpu_id();
1454 ret
= rseq_load_add_load_load_add_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
1455 (intptr_t *) &args
->percpu_list_ptr
,
1456 (RSEQ_MEMPOOL_STRIDE
* cpu
) + offsetof(struct percpu_list
, head
),
1458 } while (rseq_unlikely(ret
));
1461 if (rseq_unregister_current_thread()) {
1462 fprintf(stderr
, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1463 errno
, strerror(errno
));
1470 struct percpu_list __rseq_percpu
*test_membarrier_alloc_percpu_list(struct rseq_mempool
*mempool
)
1472 struct percpu_list __rseq_percpu
*list
;
1475 max_nr_cpus
= rseq_mempool_get_max_nr_cpus(mempool
);
1476 list
= (struct percpu_list __rseq_percpu
*)rseq_mempool_percpu_zmalloc(mempool
);
1478 perror("rseq_mempool_percpu_zmalloc");
1481 for (i
= 0; i
< max_nr_cpus
; i
++) {
1482 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, i
);
1483 struct percpu_list_node
*node
;
1485 node
= (struct percpu_list_node
*) malloc(sizeof(*node
));
1489 cpulist
->head
= node
;
1495 void test_membarrier_free_percpu_list(struct test_membarrier_thread_args
*args
,
1496 struct percpu_list __rseq_percpu
*list
)
1500 for (i
= 0; i
< args
->max_nr_cpus
; i
++)
1501 free(rseq_percpu_ptr(list
, i
)->head
);
1502 rseq_mempool_percpu_free(list
);
1506 long long test_membarrier_count_percpu_list(struct test_membarrier_thread_args
*args
,
1507 struct percpu_list __rseq_percpu
*list
)
1509 long long total_count
= 0;
1512 for (i
= 0; i
< args
->max_nr_cpus
; i
++)
1513 total_count
+= rseq_percpu_ptr(list
, i
)->head
->data
;
1518 * The manager thread swaps per-cpu lists that worker threads see,
1519 * and validates that there are no unexpected modifications.
1522 void *test_membarrier_manager_thread(void *arg
)
1524 struct test_membarrier_thread_args
*args
=
1525 (struct test_membarrier_thread_args
*)arg
;
1526 struct percpu_list __rseq_percpu
*list_a
, __rseq_percpu
*list_b
;
1527 intptr_t expect_a
= 0, expect_b
= 0;
1528 int cpu_a
= 0, cpu_b
= 0;
1529 struct rseq_mempool
*mempool
;
1531 long long total_count
= 0;
1532 struct rseq_mempool_attr
*attr
;
1534 attr
= rseq_mempool_attr_create();
1536 perror("rseq_mempool_attr_create");
1539 ret
= rseq_mempool_attr_set_percpu(attr
, RSEQ_MEMPOOL_STRIDE
, 0);
1541 perror("rseq_mempool_attr_set_percpu");
1544 mempool
= rseq_mempool_create("percpu_list",
1545 sizeof(struct percpu_list
), attr
);
1547 perror("rseq_mempool_create");
1550 rseq_mempool_attr_destroy(attr
);
1551 args
->max_nr_cpus
= rseq_mempool_get_max_nr_cpus(mempool
);
1552 args
->mempool
= mempool
;
1554 if (rseq_register_current_thread()) {
1555 fprintf(stderr
, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1556 errno
, strerror(errno
));
1561 list_a
= test_membarrier_alloc_percpu_list(mempool
);
1563 list_b
= test_membarrier_alloc_percpu_list(mempool
);
1566 /* Initialize lists before publishing them. */
1569 RSEQ_WRITE_ONCE(args
->percpu_list_ptr
, list_a
);
1571 while (!RSEQ_READ_ONCE(args
->stop
)) {
1572 /* list_a is "active". */
1573 cpu_a
= rand() % args
->max_nr_cpus
;
1575 * As list_b is "inactive", we should never see changes
1578 if (expect_b
!= RSEQ_READ_ONCE(rseq_percpu_ptr(list_b
, cpu_b
)->head
->data
)) {
1579 fprintf(stderr
, "Membarrier test failed\n");
1583 /* Make list_b "active". */
1584 RSEQ_WRITE_ONCE(args
->percpu_list_ptr
, list_b
);
1585 if (rseq_membarrier_expedited(cpu_a
) &&
1586 errno
!= ENXIO
/* missing CPU */) {
1587 perror("sys_membarrier");
1591 * Cpu A should now only modify list_b, so the values
1592 * in list_a should be stable.
1594 expect_a
= RSEQ_READ_ONCE(rseq_percpu_ptr(list_a
, cpu_a
)->head
->data
);
1596 cpu_b
= rand() % args
->max_nr_cpus
;
1598 * As list_a is "inactive", we should never see changes
1601 if (expect_a
!= RSEQ_READ_ONCE(rseq_percpu_ptr(list_a
, cpu_a
)->head
->data
)) {
1602 fprintf(stderr
, "Membarrier test failed\n");
1606 /* Make list_a "active". */
1607 RSEQ_WRITE_ONCE(args
->percpu_list_ptr
, list_a
);
1608 if (rseq_membarrier_expedited(cpu_b
) &&
1609 errno
!= ENXIO
/* missing CPU */) {
1610 perror("sys_membarrier");
1613 /* Remember a value from list_b. */
1614 expect_b
= RSEQ_READ_ONCE(rseq_percpu_ptr(list_b
, cpu_b
)->head
->data
);
1617 total_count
+= test_membarrier_count_percpu_list(args
, list_a
);
1618 total_count
+= test_membarrier_count_percpu_list(args
, list_b
);
1620 /* Validate that we observe the right number of increments. */
1621 if (total_count
!= opt_threads
* opt_reps
) {
1622 fprintf(stderr
, "Error: Observed %lld increments, expected %lld\n",
1623 total_count
, opt_threads
* opt_reps
);
1626 test_membarrier_free_percpu_list(args
, list_a
);
1627 test_membarrier_free_percpu_list(args
, list_b
);
1629 if (rseq_unregister_current_thread()) {
1630 fprintf(stderr
, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1631 errno
, strerror(errno
));
1634 ret
= rseq_mempool_destroy(mempool
);
1636 perror("rseq_mempool_destroy");
1644 void test_membarrier(void)
1646 const int num_threads
= opt_threads
;
1647 struct test_membarrier_thread_args thread_args
;
1648 pthread_t worker_threads
[num_threads
];
1649 pthread_t manager_thread
;
1652 if (!membarrier_private_expedited_rseq_available()) {
1653 fprintf(stderr
, "Membarrier private expedited rseq not available. "
1654 "Skipping membarrier test.\n");
1657 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
, 0, 0)) {
1658 perror("sys_membarrier");
1662 thread_args
.percpu_list_ptr
= NULL
;
1663 thread_args
.stop
= 0;
1664 ret
= pthread_create(&manager_thread
, NULL
,
1665 test_membarrier_manager_thread
, &thread_args
);
1668 perror("pthread_create");
1672 for (i
= 0; i
< num_threads
; i
++) {
1673 ret
= pthread_create(&worker_threads
[i
], NULL
,
1674 test_membarrier_worker_thread
, &thread_args
);
1677 perror("pthread_create");
1683 for (i
= 0; i
< num_threads
; i
++) {
1684 ret
= pthread_join(worker_threads
[i
], NULL
);
1687 perror("pthread_join");
1692 RSEQ_WRITE_ONCE(thread_args
.stop
, 1);
1693 ret
= pthread_join(manager_thread
, NULL
);
1696 perror("pthread_join");
1700 #else /* TEST_MEMBARRIER */
1702 void test_membarrier(void)
1704 if (!membarrier_private_expedited_rseq_available()) {
1705 fprintf(stderr
, "Membarrier private expedited rseq not available. "
1706 "Skipping membarrier test.\n");
1709 fprintf(stderr
, "rseq_load_add_load_load_add_store__ptr is not implemented on this architecture. "
1710 "Skipping membarrier test.\n");
1714 static void show_usage(char **argv
)
1716 printf("Usage : %s <OPTIONS>\n",
1718 printf("OPTIONS:\n");
1719 printf(" [-1 loops] Number of loops for delay injection 1\n");
1720 printf(" [-2 loops] Number of loops for delay injection 2\n");
1721 printf(" [-3 loops] Number of loops for delay injection 3\n");
1722 printf(" [-4 loops] Number of loops for delay injection 4\n");
1723 printf(" [-5 loops] Number of loops for delay injection 5\n");
1724 printf(" [-6 loops] Number of loops for delay injection 6\n");
1725 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1726 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1727 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1728 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1729 printf(" [-y] Yield\n");
1730 printf(" [-k] Kill thread with signal\n");
1731 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1732 printf(" [-t N] Number of threads (default 200)\n");
1733 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1734 printf(" [-d] Disable rseq system call (no initialization)\n");
1735 printf(" [-D M] Disable rseq for each M threads\n");
1736 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1737 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1738 printf(" [-c] Check if the rseq syscall is available.\n");
1739 printf(" [-v] Verbose output.\n");
1740 printf(" [-h] Show this help.\n");
1744 int main(int argc
, char **argv
)
1748 for (i
= 1; i
< argc
; i
++) {
1749 if (argv
[i
][0] != '-')
1751 switch (argv
[i
][1]) {
1765 loop_cnt
[argv
[i
][1] - '0'] = atol(argv
[i
+ 1]);
1773 opt_modulo
= atol(argv
[i
+ 1]);
1774 if (opt_modulo
< 0) {
1785 opt_sleep
= atol(argv
[i
+ 1]);
1786 if (opt_sleep
< 0) {
1799 opt_disable_rseq
= 1;
1806 opt_disable_mod
= atol(argv
[i
+ 1]);
1807 if (opt_disable_mod
< 0) {
1818 opt_threads
= atol(argv
[i
+ 1]);
1819 if (opt_threads
< 0) {
1830 opt_reps
= atoll(argv
[i
+ 1]);
1845 opt_test
= *argv
[i
+ 1];
1864 opt_mo
= RSEQ_MO_RELEASE
;
1867 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL
)) {
1868 printf_verbose("The rseq syscall is available.\n");
1871 printf_verbose("The rseq syscall is unavailable.\n");
1880 loop_cnt_1
= loop_cnt
[1];
1881 loop_cnt_2
= loop_cnt
[2];
1882 loop_cnt_3
= loop_cnt
[3];
1883 loop_cnt_4
= loop_cnt
[4];
1884 loop_cnt_5
= loop_cnt
[5];
1885 loop_cnt_6
= loop_cnt
[6];
1887 if (set_signal_handler())
1890 if (!opt_disable_rseq
&& rseq_register_current_thread())
1892 if (!opt_disable_rseq
&& !rseq_validate_cpu_id()) {
1893 printf_verbose("The rseq cpu id getter is unavailable\n");
1898 printf_verbose("spinlock\n");
1899 test_percpu_spinlock();
1902 printf_verbose("linked list\n");
1906 printf_verbose("buffer\n");
1907 test_percpu_buffer();
1910 printf_verbose("memcpy buffer\n");
1911 test_percpu_memcpy_buffer();
1914 printf_verbose("counter increment\n");
1918 printf_verbose("membarrier\n");
1922 if (!opt_disable_rseq
&& rseq_unregister_current_thread())