1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
18 #include <sys/types.h>
23 #include <rseq/percpu-alloc.h>
25 #define PERCPU_POOL_LEN (1024*1024) /* 1MB */
27 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
29 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
= (1 << 7),
30 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
= (1 << 8),
34 MEMBARRIER_CMD_FLAG_CPU
= (1 << 0),
39 static int loop_cnt
[NR_INJECT
+ 1];
41 static int loop_cnt_1
asm("asm_loop_cnt_1") __attribute__((used
));
42 static int loop_cnt_2
asm("asm_loop_cnt_2") __attribute__((used
));
43 static int loop_cnt_3
asm("asm_loop_cnt_3") __attribute__((used
));
44 static int loop_cnt_4
asm("asm_loop_cnt_4") __attribute__((used
));
45 static int loop_cnt_5
asm("asm_loop_cnt_5") __attribute__((used
));
46 static int loop_cnt_6
asm("asm_loop_cnt_6") __attribute__((used
));
48 static int opt_modulo
, verbose
;
50 static int opt_yield
, opt_signal
, opt_sleep
,
51 opt_disable_rseq
, opt_threads
= 200,
52 opt_disable_mod
= 0, opt_test
= 's';
54 static long long opt_reps
= 5000;
56 static __thread
__attribute__((tls_model("initial-exec")))
57 unsigned int signals_delivered
;
59 static inline pid_t
rseq_gettid(void)
61 return syscall(__NR_gettid
);
66 static __thread
__attribute__((tls_model("initial-exec"), unused
))
67 int yield_mod_cnt
, nr_abort
;
69 #define printf_verbose(fmt, ...) \
72 printf(fmt, ## __VA_ARGS__); \
77 #define INJECT_ASM_REG "eax"
79 #define RSEQ_INJECT_CLOBBER \
83 * Use ip-relative addressing to get the loop counter.
85 #define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
86 "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
87 "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
88 "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
96 #define RSEQ_INJECT_ASM(n) \
97 __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
99 #elif defined(__x86_64__)
101 #define INJECT_ASM_REG_P "rax"
102 #define INJECT_ASM_REG "eax"
104 #define RSEQ_INJECT_CLOBBER \
108 #define RSEQ_INJECT_ASM(n) \
109 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
110 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
111 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
114 "dec %%" INJECT_ASM_REG "\n\t" \
118 #elif defined(__s390__)
120 #define RSEQ_INJECT_INPUT \
121 , [loop_cnt_1]"m"(loop_cnt[1]) \
122 , [loop_cnt_2]"m"(loop_cnt[2]) \
123 , [loop_cnt_3]"m"(loop_cnt[3]) \
124 , [loop_cnt_4]"m"(loop_cnt[4]) \
125 , [loop_cnt_5]"m"(loop_cnt[5]) \
126 , [loop_cnt_6]"m"(loop_cnt[6])
128 #define INJECT_ASM_REG "r12"
130 #define RSEQ_INJECT_CLOBBER \
133 #define RSEQ_INJECT_ASM(n) \
134 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
135 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
138 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
142 #elif defined(__ARMEL__)
144 #define RSEQ_INJECT_INPUT \
145 , [loop_cnt_1]"m"(loop_cnt[1]) \
146 , [loop_cnt_2]"m"(loop_cnt[2]) \
147 , [loop_cnt_3]"m"(loop_cnt[3]) \
148 , [loop_cnt_4]"m"(loop_cnt[4]) \
149 , [loop_cnt_5]"m"(loop_cnt[5]) \
150 , [loop_cnt_6]"m"(loop_cnt[6])
152 #define INJECT_ASM_REG "r4"
154 #define RSEQ_INJECT_CLOBBER \
157 #define RSEQ_INJECT_ASM(n) \
158 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
159 "cmp " INJECT_ASM_REG ", #0\n\t" \
162 "subs " INJECT_ASM_REG ", #1\n\t" \
166 #elif defined(__AARCH64EL__)
168 #define RSEQ_INJECT_INPUT \
169 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
170 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
171 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
172 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
173 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
174 , [loop_cnt_6] "Qo" (loop_cnt[6])
176 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
178 #define RSEQ_INJECT_ASM(n) \
179 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
180 " cbz " INJECT_ASM_REG ", 333f\n" \
182 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
183 " cbnz " INJECT_ASM_REG ", 222b\n" \
186 #elif defined(__PPC__)
188 #define RSEQ_INJECT_INPUT \
189 , [loop_cnt_1]"m"(loop_cnt[1]) \
190 , [loop_cnt_2]"m"(loop_cnt[2]) \
191 , [loop_cnt_3]"m"(loop_cnt[3]) \
192 , [loop_cnt_4]"m"(loop_cnt[4]) \
193 , [loop_cnt_5]"m"(loop_cnt[5]) \
194 , [loop_cnt_6]"m"(loop_cnt[6])
196 #define INJECT_ASM_REG "r18"
198 #define RSEQ_INJECT_CLOBBER \
201 #define RSEQ_INJECT_ASM(n) \
202 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
203 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
206 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
210 #elif defined(__mips__)
212 #define RSEQ_INJECT_INPUT \
213 , [loop_cnt_1]"m"(loop_cnt[1]) \
214 , [loop_cnt_2]"m"(loop_cnt[2]) \
215 , [loop_cnt_3]"m"(loop_cnt[3]) \
216 , [loop_cnt_4]"m"(loop_cnt[4]) \
217 , [loop_cnt_5]"m"(loop_cnt[5]) \
218 , [loop_cnt_6]"m"(loop_cnt[6])
220 #define INJECT_ASM_REG "$5"
222 #define RSEQ_INJECT_CLOBBER \
225 #define RSEQ_INJECT_ASM(n) \
226 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
227 "beqz " INJECT_ASM_REG ", 333f\n\t" \
229 "addiu " INJECT_ASM_REG ", -1\n\t" \
230 "bnez " INJECT_ASM_REG ", 222b\n\t" \
233 #elif defined(__riscv)
235 #define RSEQ_INJECT_INPUT \
236 , [loop_cnt_1]"m"(loop_cnt[1]) \
237 , [loop_cnt_2]"m"(loop_cnt[2]) \
238 , [loop_cnt_3]"m"(loop_cnt[3]) \
239 , [loop_cnt_4]"m"(loop_cnt[4]) \
240 , [loop_cnt_5]"m"(loop_cnt[5]) \
241 , [loop_cnt_6]"m"(loop_cnt[6])
243 #define INJECT_ASM_REG "t1"
245 #define RSEQ_INJECT_CLOBBER \
248 #define RSEQ_INJECT_ASM(n) \
249 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
250 "beqz " INJECT_ASM_REG ", 333f\n\t" \
252 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
253 "bnez " INJECT_ASM_REG ", 222b\n\t" \
257 #error unsupported target
260 #define RSEQ_INJECT_FAILED \
263 #define RSEQ_INJECT_C(n) \
265 int loc_i, loc_nr_loops = loop_cnt[n]; \
267 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
270 if (loc_nr_loops == -1 && opt_modulo) { \
271 if (yield_mod_cnt == opt_modulo - 1) { \
273 poll(NULL, 0, opt_sleep); \
287 #define printf_verbose(fmt, ...)
289 #endif /* BENCHMARK */
291 #include <rseq/rseq.h>
293 static enum rseq_mo opt_mo
= RSEQ_MO_RELAXED
;
295 static int sys_membarrier(int cmd
, int flags
, int cpu_id
)
297 return syscall(__NR_membarrier
, cmd
, flags
, cpu_id
);
300 #ifdef rseq_arch_has_load_cbne_load_add_store
301 #define TEST_MEMBARRIER
304 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
305 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
307 int get_current_cpu_id(void)
309 return rseq_current_mm_cid();
312 bool rseq_validate_cpu_id(void)
314 return rseq_mm_cid_available();
317 bool rseq_use_cpu_index(void)
319 return false; /* Use mm_cid */
321 # ifdef TEST_MEMBARRIER
323 * Membarrier does not currently support targeting a mm_cid, so
324 * issue the barrier on all cpus.
327 int rseq_membarrier_expedited(__attribute__ ((unused
)) int cpu
)
329 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
,
332 # endif /* TEST_MEMBARRIER */
334 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
336 int get_current_cpu_id(void)
338 return rseq_cpu_start();
341 bool rseq_validate_cpu_id(void)
343 return rseq_current_cpu_raw() >= 0;
346 bool rseq_use_cpu_index(void)
348 return true; /* Use cpu_id as index. */
350 # ifdef TEST_MEMBARRIER
352 int rseq_membarrier_expedited(int cpu
)
354 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
,
355 MEMBARRIER_CMD_FLAG_CPU
, cpu
);
357 # endif /* TEST_MEMBARRIER */
364 struct spinlock_test_data
{
365 struct percpu_lock lock
;
369 struct spinlock_thread_test_data
{
370 struct spinlock_test_data __rseq_percpu
*data
;
375 struct inc_test_data
{
379 struct inc_thread_test_data
{
380 struct inc_test_data __rseq_percpu
*data
;
385 struct percpu_list_node
{
387 struct percpu_list_node
*next
;
391 struct percpu_list_node
*head
;
394 #define BUFFER_ITEM_PER_CPU 100
396 struct percpu_buffer_node
{
400 struct percpu_buffer
{
403 struct percpu_buffer_node
**array
;
406 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
408 struct percpu_memcpy_buffer_node
{
413 struct percpu_memcpy_buffer
{
416 struct percpu_memcpy_buffer_node
*array
;
419 /* A simple percpu spinlock. Grabs lock on current cpu. */
420 static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu
*lock
)
427 cpu
= get_current_cpu_id();
429 fprintf(stderr
, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
430 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu
);
433 ret
= rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
434 &rseq_percpu_ptr(lock
, cpu
)->v
,
436 if (rseq_likely(!ret
))
438 /* Retry if comparison fails or rseq aborts. */
441 * Acquire semantic when taking lock after control dependency.
442 * Matches rseq_smp_store_release().
444 rseq_smp_acquire__after_ctrl_dep();
448 static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu
*lock
, int cpu
)
450 assert(rseq_percpu_ptr(lock
, cpu
)->v
== 1);
452 * Release lock, with release semantic. Matches
453 * rseq_smp_acquire__after_ctrl_dep().
455 rseq_smp_store_release(&rseq_percpu_ptr(lock
, cpu
)->v
, 0);
458 static void *test_percpu_spinlock_thread(void *arg
)
460 struct spinlock_thread_test_data
*thread_data
= (struct spinlock_thread_test_data
*) arg
;
461 struct spinlock_test_data __rseq_percpu
*data
= thread_data
->data
;
464 if (!opt_disable_rseq
&& thread_data
->reg
&&
465 rseq_register_current_thread())
467 reps
= thread_data
->reps
;
468 for (i
= 0; i
< reps
; i
++) {
469 int cpu
= rseq_this_cpu_lock(&data
->lock
);
470 rseq_percpu_ptr(data
, cpu
)->count
++;
471 rseq_percpu_unlock(&data
->lock
, cpu
);
473 if (i
!= 0 && !(i
% (reps
/ 10)))
474 printf_verbose("tid %d: count %lld\n",
475 (int) rseq_gettid(), i
);
478 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
479 (int) rseq_gettid(), nr_abort
, signals_delivered
);
480 if (!opt_disable_rseq
&& thread_data
->reg
&&
481 rseq_unregister_current_thread())
487 * A simple test which implements a sharded counter using a per-cpu
488 * lock. Obviously real applications might prefer to simply use a
489 * per-cpu increment; however, this is reasonable for a test and the
490 * lock can be extended to synchronize more complicated operations.
492 static void test_percpu_spinlock(void)
494 const int num_threads
= opt_threads
;
497 pthread_t test_threads
[num_threads
];
498 struct spinlock_test_data __rseq_percpu
*data
;
499 struct spinlock_thread_test_data thread_data
[num_threads
];
500 struct rseq_percpu_pool
*mempool
;
502 mempool
= rseq_percpu_pool_create(sizeof(struct spinlock_test_data
),
503 PERCPU_POOL_LEN
, CPU_SETSIZE
, PROT_READ
| PROT_WRITE
,
504 MAP_ANONYMOUS
| MAP_PRIVATE
, -1, 0, 0);
506 perror("rseq_percpu_pool_create");
509 data
= (struct spinlock_test_data __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
511 perror("rseq_percpu_zmalloc");
515 for (i
= 0; i
< num_threads
; i
++) {
516 thread_data
[i
].reps
= opt_reps
;
517 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
518 thread_data
[i
].reg
= 1;
520 thread_data
[i
].reg
= 0;
521 thread_data
[i
].data
= data
;
522 ret
= pthread_create(&test_threads
[i
], NULL
,
523 test_percpu_spinlock_thread
,
527 perror("pthread_create");
532 for (i
= 0; i
< num_threads
; i
++) {
533 ret
= pthread_join(test_threads
[i
], NULL
);
536 perror("pthread_join");
542 for (i
= 0; i
< CPU_SETSIZE
; i
++)
543 sum
+= rseq_percpu_ptr(data
, i
)->count
;
545 assert(sum
== (uint64_t)opt_reps
* num_threads
);
546 rseq_percpu_free(data
);
547 ret
= rseq_percpu_pool_destroy(mempool
);
549 perror("rseq_percpu_pool_destroy");
554 static void *test_percpu_inc_thread(void *arg
)
556 struct inc_thread_test_data
*thread_data
= (struct inc_thread_test_data
*) arg
;
557 struct inc_test_data __rseq_percpu
*data
= thread_data
->data
;
560 if (!opt_disable_rseq
&& thread_data
->reg
&&
561 rseq_register_current_thread())
563 reps
= thread_data
->reps
;
564 for (i
= 0; i
< reps
; i
++) {
570 cpu
= get_current_cpu_id();
571 ret
= rseq_load_add_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
572 &rseq_percpu_ptr(data
, cpu
)->count
, 1, cpu
);
573 } while (rseq_unlikely(ret
));
575 if (i
!= 0 && !(i
% (reps
/ 10)))
576 printf_verbose("tid %d: count %lld\n",
577 (int) rseq_gettid(), i
);
580 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
581 (int) rseq_gettid(), nr_abort
, signals_delivered
);
582 if (!opt_disable_rseq
&& thread_data
->reg
&&
583 rseq_unregister_current_thread())
588 static void test_percpu_inc(void)
590 const int num_threads
= opt_threads
;
593 pthread_t test_threads
[num_threads
];
594 struct inc_test_data __rseq_percpu
*data
;
595 struct inc_thread_test_data thread_data
[num_threads
];
596 struct rseq_percpu_pool
*mempool
;
598 mempool
= rseq_percpu_pool_create(sizeof(struct inc_test_data
),
599 PERCPU_POOL_LEN
, CPU_SETSIZE
, PROT_READ
| PROT_WRITE
,
600 MAP_ANONYMOUS
| MAP_PRIVATE
, -1, 0, 0);
602 perror("rseq_percpu_pool_create");
605 data
= (struct inc_test_data __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
607 perror("rseq_percpu_zmalloc");
611 for (i
= 0; i
< num_threads
; i
++) {
612 thread_data
[i
].reps
= opt_reps
;
613 if (opt_disable_mod
<= 0 || (i
% opt_disable_mod
))
614 thread_data
[i
].reg
= 1;
616 thread_data
[i
].reg
= 0;
617 thread_data
[i
].data
= data
;
618 ret
= pthread_create(&test_threads
[i
], NULL
,
619 test_percpu_inc_thread
,
623 perror("pthread_create");
628 for (i
= 0; i
< num_threads
; i
++) {
629 ret
= pthread_join(test_threads
[i
], NULL
);
632 perror("pthread_join");
638 for (i
= 0; i
< CPU_SETSIZE
; i
++)
639 sum
+= rseq_percpu_ptr(data
, i
)->count
;
641 assert(sum
== (uint64_t)opt_reps
* num_threads
);
642 rseq_percpu_free(data
);
643 ret
= rseq_percpu_pool_destroy(mempool
);
645 perror("rseq_percpu_pool_destroy");
650 static void this_cpu_list_push(struct percpu_list __rseq_percpu
*list
,
651 struct percpu_list_node
*node
,
657 intptr_t *targetptr
, newval
, expect
;
658 struct percpu_list
*cpulist
;
661 cpu
= get_current_cpu_id();
662 cpulist
= rseq_percpu_ptr(list
, cpu
);
663 /* Load list->c[cpu].head with single-copy atomicity. */
664 expect
= (intptr_t)RSEQ_READ_ONCE(cpulist
->head
);
665 newval
= (intptr_t)node
;
666 targetptr
= (intptr_t *)&cpulist
->head
;
667 node
->next
= (struct percpu_list_node
*)expect
;
668 ret
= rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
669 targetptr
, expect
, newval
, cpu
);
670 if (rseq_likely(!ret
))
672 /* Retry if comparison fails or rseq aborts. */
679 * Unlike a traditional lock-less linked list; the availability of a
680 * rseq primitive allows us to implement pop without concerns over
683 static struct percpu_list_node
*this_cpu_list_pop(struct percpu_list __rseq_percpu
*list
,
686 struct percpu_list_node
*node
= NULL
;
690 struct percpu_list_node
*head
;
691 intptr_t *targetptr
, expectnot
, *load
;
692 struct percpu_list
*cpulist
;
696 cpu
= get_current_cpu_id();
697 cpulist
= rseq_percpu_ptr(list
, cpu
);
698 targetptr
= (intptr_t *)&cpulist
->head
;
699 expectnot
= (intptr_t)NULL
;
700 offset
= offsetof(struct percpu_list_node
, next
);
701 load
= (intptr_t *)&head
;
702 ret
= rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
703 targetptr
, expectnot
,
705 if (rseq_likely(!ret
)) {
711 /* Retry if rseq aborts. */
719 * __percpu_list_pop is not safe against concurrent accesses. Should
720 * only be used on lists that are not concurrently modified.
722 static struct percpu_list_node
*__percpu_list_pop(struct percpu_list __rseq_percpu
*list
, int cpu
)
724 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, cpu
);
725 struct percpu_list_node
*node
;
727 node
= cpulist
->head
;
730 cpulist
->head
= node
->next
;
734 static void *test_percpu_list_thread(void *arg
)
737 struct percpu_list __rseq_percpu
*list
= (struct percpu_list __rseq_percpu
*)arg
;
739 if (!opt_disable_rseq
&& rseq_register_current_thread())
743 for (i
= 0; i
< reps
; i
++) {
744 struct percpu_list_node
*node
;
746 node
= this_cpu_list_pop(list
, NULL
);
748 sched_yield(); /* encourage shuffling */
750 this_cpu_list_push(list
, node
, NULL
);
753 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
754 (int) rseq_gettid(), nr_abort
, signals_delivered
);
755 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
761 /* Simultaneous modification to a per-cpu linked list from many threads. */
762 static void test_percpu_list(void)
764 const int num_threads
= opt_threads
;
766 uint64_t sum
= 0, expected_sum
= 0;
767 struct percpu_list __rseq_percpu
*list
;
768 pthread_t test_threads
[num_threads
];
769 cpu_set_t allowed_cpus
;
770 struct rseq_percpu_pool
*mempool
;
772 mempool
= rseq_percpu_pool_create(sizeof(struct percpu_list
),
773 PERCPU_POOL_LEN
, CPU_SETSIZE
, PROT_READ
| PROT_WRITE
,
774 MAP_ANONYMOUS
| MAP_PRIVATE
, -1, 0, 0);
776 perror("rseq_percpu_pool_create");
779 list
= (struct percpu_list __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
781 perror("rseq_percpu_zmalloc");
785 /* Generate list entries for every usable cpu. */
786 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
787 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
788 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
790 for (j
= 1; j
<= 100; j
++) {
791 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, i
);
792 struct percpu_list_node
*node
;
796 node
= (struct percpu_list_node
*) malloc(sizeof(*node
));
799 node
->next
= cpulist
->head
;
800 cpulist
->head
= node
;
804 for (i
= 0; i
< num_threads
; i
++) {
805 ret
= pthread_create(&test_threads
[i
], NULL
,
806 test_percpu_list_thread
, list
);
809 perror("pthread_create");
814 for (i
= 0; i
< num_threads
; i
++) {
815 ret
= pthread_join(test_threads
[i
], NULL
);
818 perror("pthread_join");
823 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
824 struct percpu_list_node
*node
;
826 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
829 while ((node
= __percpu_list_pop(list
, i
))) {
836 * All entries should now be accounted for (unless some external
837 * actor is interfering with our allowed affinity while this
840 assert(sum
== expected_sum
);
841 rseq_percpu_free(list
);
842 ret
= rseq_percpu_pool_destroy(mempool
);
844 perror("rseq_percpu_pool_destroy");
849 static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu
*buffer
,
850 struct percpu_buffer_node
*node
,
857 struct percpu_buffer
*cpubuffer
;
858 intptr_t *targetptr_spec
, newval_spec
;
859 intptr_t *targetptr_final
, newval_final
;
863 cpu
= get_current_cpu_id();
864 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
865 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
866 if (offset
== cpubuffer
->buflen
)
868 newval_spec
= (intptr_t)node
;
869 targetptr_spec
= (intptr_t *)&cpubuffer
->array
[offset
];
870 newval_final
= offset
+ 1;
871 targetptr_final
= &cpubuffer
->offset
;
872 ret
= rseq_load_cbne_store_store__ptr(opt_mo
, RSEQ_PERCPU
,
873 targetptr_final
, offset
, targetptr_spec
,
874 newval_spec
, newval_final
, cpu
);
875 if (rseq_likely(!ret
)) {
879 /* Retry if comparison fails or rseq aborts. */
886 static struct percpu_buffer_node
*this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu
*buffer
,
889 struct percpu_buffer_node
*head
;
893 struct percpu_buffer
*cpubuffer
;
894 intptr_t *targetptr
, newval
;
898 cpu
= get_current_cpu_id();
899 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
900 /* Load offset with single-copy atomicity. */
901 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
906 head
= RSEQ_READ_ONCE(cpubuffer
->array
[offset
- 1]);
908 targetptr
= (intptr_t *)&cpubuffer
->offset
;
909 ret
= rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
911 (intptr_t *)&cpubuffer
->array
[offset
- 1],
912 (intptr_t)head
, newval
, cpu
);
913 if (rseq_likely(!ret
))
915 /* Retry if comparison fails or rseq aborts. */
923 * __percpu_buffer_pop is not safe against concurrent accesses. Should
924 * only be used on buffers that are not concurrently modified.
926 static struct percpu_buffer_node
*__percpu_buffer_pop(struct percpu_buffer __rseq_percpu
*buffer
,
929 struct percpu_buffer
*cpubuffer
;
930 struct percpu_buffer_node
*head
;
933 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
934 offset
= cpubuffer
->offset
;
937 head
= cpubuffer
->array
[offset
- 1];
938 cpubuffer
->offset
= offset
- 1;
942 static void *test_percpu_buffer_thread(void *arg
)
945 struct percpu_buffer __rseq_percpu
*buffer
= (struct percpu_buffer __rseq_percpu
*)arg
;
947 if (!opt_disable_rseq
&& rseq_register_current_thread())
951 for (i
= 0; i
< reps
; i
++) {
952 struct percpu_buffer_node
*node
;
954 node
= this_cpu_buffer_pop(buffer
, NULL
);
956 sched_yield(); /* encourage shuffling */
958 if (!this_cpu_buffer_push(buffer
, node
, NULL
)) {
959 /* Should increase buffer size. */
965 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
966 (int) rseq_gettid(), nr_abort
, signals_delivered
);
967 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
973 /* Simultaneous modification to a per-cpu buffer from many threads. */
974 static void test_percpu_buffer(void)
976 const int num_threads
= opt_threads
;
978 uint64_t sum
= 0, expected_sum
= 0;
979 struct percpu_buffer __rseq_percpu
*buffer
;
980 pthread_t test_threads
[num_threads
];
981 cpu_set_t allowed_cpus
;
982 struct rseq_percpu_pool
*mempool
;
984 mempool
= rseq_percpu_pool_create(sizeof(struct percpu_buffer
),
985 PERCPU_POOL_LEN
, CPU_SETSIZE
, PROT_READ
| PROT_WRITE
,
986 MAP_ANONYMOUS
| MAP_PRIVATE
, -1, 0, 0);
988 perror("rseq_percpu_pool_create");
991 buffer
= (struct percpu_buffer __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
993 perror("rseq_percpu_zmalloc");
997 /* Generate list entries for every usable cpu. */
998 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
999 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1000 struct percpu_buffer
*cpubuffer
;
1002 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1004 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1005 /* Worse-case is every item in same CPU. */
1007 (struct percpu_buffer_node
**)
1008 malloc(sizeof(*cpubuffer
->array
) * CPU_SETSIZE
*
1009 BUFFER_ITEM_PER_CPU
);
1010 assert(cpubuffer
->array
);
1011 cpubuffer
->buflen
= CPU_SETSIZE
* BUFFER_ITEM_PER_CPU
;
1012 for (j
= 1; j
<= BUFFER_ITEM_PER_CPU
; j
++) {
1013 struct percpu_buffer_node
*node
;
1018 * We could theoretically put the word-sized
1019 * "data" directly in the buffer. However, we
1020 * want to model objects that would not fit
1021 * within a single word, so allocate an object
1024 node
= (struct percpu_buffer_node
*) malloc(sizeof(*node
));
1027 cpubuffer
->array
[j
- 1] = node
;
1028 cpubuffer
->offset
++;
1032 for (i
= 0; i
< num_threads
; i
++) {
1033 ret
= pthread_create(&test_threads
[i
], NULL
,
1034 test_percpu_buffer_thread
, buffer
);
1037 perror("pthread_create");
1042 for (i
= 0; i
< num_threads
; i
++) {
1043 ret
= pthread_join(test_threads
[i
], NULL
);
1046 perror("pthread_join");
1051 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1052 struct percpu_buffer
*cpubuffer
;
1053 struct percpu_buffer_node
*node
;
1055 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1058 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1059 while ((node
= __percpu_buffer_pop(buffer
, i
))) {
1063 free(cpubuffer
->array
);
1067 * All entries should now be accounted for (unless some external
1068 * actor is interfering with our allowed affinity while this
1071 assert(sum
== expected_sum
);
1072 rseq_percpu_free(buffer
);
1073 ret
= rseq_percpu_pool_destroy(mempool
);
1075 perror("rseq_percpu_pool_destroy");
1080 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu
*buffer
,
1081 struct percpu_memcpy_buffer_node item
,
1084 bool result
= false;
1088 struct percpu_memcpy_buffer
*cpubuffer
;
1089 intptr_t *targetptr_final
, newval_final
, offset
;
1090 char *destptr
, *srcptr
;
1094 cpu
= get_current_cpu_id();
1095 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
1096 /* Load offset with single-copy atomicity. */
1097 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
1098 if (offset
== cpubuffer
->buflen
)
1100 destptr
= (char *)&cpubuffer
->array
[offset
];
1101 srcptr
= (char *)&item
;
1102 /* copylen must be <= 4kB. */
1103 copylen
= sizeof(item
);
1104 newval_final
= offset
+ 1;
1105 targetptr_final
= &cpubuffer
->offset
;
1106 ret
= rseq_load_cbne_memcpy_store__ptr(
1107 opt_mo
, RSEQ_PERCPU
,
1108 targetptr_final
, offset
,
1109 destptr
, srcptr
, copylen
,
1111 if (rseq_likely(!ret
)) {
1115 /* Retry if comparison fails or rseq aborts. */
1122 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu
*buffer
,
1123 struct percpu_memcpy_buffer_node
*item
,
1126 bool result
= false;
1130 struct percpu_memcpy_buffer
*cpubuffer
;
1131 intptr_t *targetptr_final
, newval_final
, offset
;
1132 char *destptr
, *srcptr
;
1136 cpu
= get_current_cpu_id();
1137 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
1138 /* Load offset with single-copy atomicity. */
1139 offset
= RSEQ_READ_ONCE(cpubuffer
->offset
);
1142 destptr
= (char *)item
;
1143 srcptr
= (char *)&cpubuffer
->array
[offset
- 1];
1144 /* copylen must be <= 4kB. */
1145 copylen
= sizeof(*item
);
1146 newval_final
= offset
- 1;
1147 targetptr_final
= &cpubuffer
->offset
;
1148 ret
= rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
1149 targetptr_final
, offset
, destptr
, srcptr
, copylen
,
1151 if (rseq_likely(!ret
)) {
1155 /* Retry if comparison fails or rseq aborts. */
1163 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1164 * only be used on buffers that are not concurrently modified.
1166 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu
*buffer
,
1167 struct percpu_memcpy_buffer_node
*item
,
1170 struct percpu_memcpy_buffer
*cpubuffer
;
1173 cpubuffer
= rseq_percpu_ptr(buffer
, cpu
);
1174 offset
= cpubuffer
->offset
;
1177 memcpy(item
, &cpubuffer
->array
[offset
- 1], sizeof(*item
));
1178 cpubuffer
->offset
= offset
- 1;
1182 static void *test_percpu_memcpy_buffer_thread(void *arg
)
1185 struct percpu_memcpy_buffer __rseq_percpu
*buffer
= (struct percpu_memcpy_buffer __rseq_percpu
*)arg
;
1187 if (!opt_disable_rseq
&& rseq_register_current_thread())
1191 for (i
= 0; i
< reps
; i
++) {
1192 struct percpu_memcpy_buffer_node item
;
1195 result
= this_cpu_memcpy_buffer_pop(buffer
, &item
, NULL
);
1197 sched_yield(); /* encourage shuffling */
1199 if (!this_cpu_memcpy_buffer_push(buffer
, item
, NULL
)) {
1200 /* Should increase buffer size. */
1206 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1207 (int) rseq_gettid(), nr_abort
, signals_delivered
);
1208 if (!opt_disable_rseq
&& rseq_unregister_current_thread())
1214 /* Simultaneous modification to a per-cpu buffer from many threads. */
1215 static void test_percpu_memcpy_buffer(void)
1217 const int num_threads
= opt_threads
;
1219 uint64_t sum
= 0, expected_sum
= 0;
1220 struct percpu_memcpy_buffer
*buffer
;
1221 pthread_t test_threads
[num_threads
];
1222 cpu_set_t allowed_cpus
;
1223 struct rseq_percpu_pool
*mempool
;
1225 mempool
= rseq_percpu_pool_create(sizeof(struct percpu_memcpy_buffer
),
1226 PERCPU_POOL_LEN
, CPU_SETSIZE
, PROT_READ
| PROT_WRITE
,
1227 MAP_ANONYMOUS
| MAP_PRIVATE
, -1, 0, 0);
1229 perror("rseq_percpu_pool_create");
1232 buffer
= (struct percpu_memcpy_buffer __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
1234 perror("rseq_percpu_zmalloc");
1238 /* Generate list entries for every usable cpu. */
1239 sched_getaffinity(0, sizeof(allowed_cpus
), &allowed_cpus
);
1240 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1241 struct percpu_memcpy_buffer
*cpubuffer
;
1243 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1245 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1246 /* Worse-case is every item in same CPU. */
1248 (struct percpu_memcpy_buffer_node
*)
1249 malloc(sizeof(*cpubuffer
->array
) * CPU_SETSIZE
*
1250 MEMCPY_BUFFER_ITEM_PER_CPU
);
1251 assert(cpubuffer
->array
);
1252 cpubuffer
->buflen
= CPU_SETSIZE
* MEMCPY_BUFFER_ITEM_PER_CPU
;
1253 for (j
= 1; j
<= MEMCPY_BUFFER_ITEM_PER_CPU
; j
++) {
1254 expected_sum
+= 2 * j
+ 1;
1257 * We could theoretically put the word-sized
1258 * "data" directly in the buffer. However, we
1259 * want to model objects that would not fit
1260 * within a single word, so allocate an object
1263 cpubuffer
->array
[j
- 1].data1
= j
;
1264 cpubuffer
->array
[j
- 1].data2
= j
+ 1;
1265 cpubuffer
->offset
++;
1269 for (i
= 0; i
< num_threads
; i
++) {
1270 ret
= pthread_create(&test_threads
[i
], NULL
,
1271 test_percpu_memcpy_buffer_thread
,
1275 perror("pthread_create");
1280 for (i
= 0; i
< num_threads
; i
++) {
1281 ret
= pthread_join(test_threads
[i
], NULL
);
1284 perror("pthread_join");
1289 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1290 struct percpu_memcpy_buffer_node item
;
1291 struct percpu_memcpy_buffer
*cpubuffer
;
1293 if (rseq_use_cpu_index() && !CPU_ISSET(i
, &allowed_cpus
))
1296 cpubuffer
= rseq_percpu_ptr(buffer
, i
);
1297 while (__percpu_memcpy_buffer_pop(buffer
, &item
, i
)) {
1301 free(cpubuffer
->array
);
1305 * All entries should now be accounted for (unless some external
1306 * actor is interfering with our allowed affinity while this
1309 assert(sum
== expected_sum
);
1310 rseq_percpu_free(buffer
);
1311 ret
= rseq_percpu_pool_destroy(mempool
);
1313 perror("rseq_percpu_pool_destroy");
1318 static void test_signal_interrupt_handler(__attribute__ ((unused
)) int signo
)
1320 signals_delivered
++;
1323 static int set_signal_handler(void)
1326 struct sigaction sa
;
1329 ret
= sigemptyset(&sigset
);
1331 perror("sigemptyset");
1335 sa
.sa_handler
= test_signal_interrupt_handler
;
1336 sa
.sa_mask
= sigset
;
1338 ret
= sigaction(SIGUSR1
, &sa
, NULL
);
1340 perror("sigaction");
1344 printf_verbose("Signal handler set for SIGUSR1\n");
1350 bool membarrier_private_expedited_rseq_available(void)
1352 int status
= sys_membarrier(MEMBARRIER_CMD_QUERY
, 0, 0);
1355 perror("membarrier");
1358 if (!(status
& MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ
))
1363 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1364 #ifdef TEST_MEMBARRIER
1365 struct test_membarrier_thread_args
{
1366 struct rseq_percpu_pool
*mempool
;
1367 struct percpu_list __rseq_percpu
*percpu_list_ptr
;
1371 /* Worker threads modify data in their "active" percpu lists. */
1373 void *test_membarrier_worker_thread(void *arg
)
1375 struct test_membarrier_thread_args
*args
=
1376 (struct test_membarrier_thread_args
*)arg
;
1377 const int iters
= opt_reps
;
1380 if (rseq_register_current_thread()) {
1381 fprintf(stderr
, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1382 errno
, strerror(errno
));
1386 /* Wait for initialization. */
1387 while (!rseq_smp_load_acquire(&args
->percpu_list_ptr
)) { }
1389 for (i
= 0; i
< iters
; ++i
) {
1393 int cpu
= get_current_cpu_id();
1394 ptrdiff_t mempool_offset
= rseq_percpu_pool_ptr_offset(args
->mempool
, cpu
);
1396 ret
= rseq_load_add_load_load_add_store__ptr(RSEQ_MO_RELAXED
, RSEQ_PERCPU
,
1397 (intptr_t *) &args
->percpu_list_ptr
,
1398 mempool_offset
+ offsetof(struct percpu_list
, head
),
1400 } while (rseq_unlikely(ret
));
1403 if (rseq_unregister_current_thread()) {
1404 fprintf(stderr
, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1405 errno
, strerror(errno
));
1412 struct percpu_list __rseq_percpu
*test_membarrier_alloc_percpu_list(struct rseq_percpu_pool
*mempool
)
1414 struct percpu_list __rseq_percpu
*list
;
1417 list
= (struct percpu_list __rseq_percpu
*)rseq_percpu_zmalloc(mempool
);
1419 perror("rseq_percpu_zmalloc");
1422 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
1423 struct percpu_list
*cpulist
= rseq_percpu_ptr(list
, i
);
1424 struct percpu_list_node
*node
;
1426 node
= (struct percpu_list_node
*) malloc(sizeof(*node
));
1430 cpulist
->head
= node
;
1436 void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu
*list
)
1440 for (i
= 0; i
< CPU_SETSIZE
; i
++)
1441 free(rseq_percpu_ptr(list
, i
)->head
);
1442 rseq_percpu_free(list
);
1446 * The manager thread swaps per-cpu lists that worker threads see,
1447 * and validates that there are no unexpected modifications.
1450 void *test_membarrier_manager_thread(void *arg
)
1452 struct test_membarrier_thread_args
*args
=
1453 (struct test_membarrier_thread_args
*)arg
;
1454 struct percpu_list __rseq_percpu
*list_a
, __rseq_percpu
*list_b
;
1455 intptr_t expect_a
= 0, expect_b
= 0;
1456 int cpu_a
= 0, cpu_b
= 0;
1457 struct rseq_percpu_pool
*mempool
;
1460 mempool
= rseq_percpu_pool_create(sizeof(struct percpu_list
),
1461 PERCPU_POOL_LEN
, CPU_SETSIZE
, PROT_READ
| PROT_WRITE
,
1462 MAP_ANONYMOUS
| MAP_PRIVATE
, -1, 0, 0);
1464 perror("rseq_percpu_pool_create");
1467 args
->mempool
= mempool
;
1469 if (rseq_register_current_thread()) {
1470 fprintf(stderr
, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1471 errno
, strerror(errno
));
1476 list_a
= test_membarrier_alloc_percpu_list(mempool
);
1478 list_b
= test_membarrier_alloc_percpu_list(mempool
);
1481 /* Initialize lists before publishing them. */
1484 RSEQ_WRITE_ONCE(args
->percpu_list_ptr
, list_a
);
1486 while (!RSEQ_READ_ONCE(args
->stop
)) {
1487 /* list_a is "active". */
1488 cpu_a
= rand() % CPU_SETSIZE
;
1490 * As list_b is "inactive", we should never see changes
1493 if (expect_b
!= RSEQ_READ_ONCE(rseq_percpu_ptr(list_b
, cpu_b
)->head
->data
)) {
1494 fprintf(stderr
, "Membarrier test failed\n");
1498 /* Make list_b "active". */
1499 RSEQ_WRITE_ONCE(args
->percpu_list_ptr
, list_b
);
1500 if (rseq_membarrier_expedited(cpu_a
) &&
1501 errno
!= ENXIO
/* missing CPU */) {
1502 perror("sys_membarrier");
1506 * Cpu A should now only modify list_b, so the values
1507 * in list_a should be stable.
1509 expect_a
= RSEQ_READ_ONCE(rseq_percpu_ptr(list_a
, cpu_a
)->head
->data
);
1511 cpu_b
= rand() % CPU_SETSIZE
;
1513 * As list_a is "inactive", we should never see changes
1516 if (expect_a
!= RSEQ_READ_ONCE(rseq_percpu_ptr(list_a
, cpu_a
)->head
->data
)) {
1517 fprintf(stderr
, "Membarrier test failed\n");
1521 /* Make list_a "active". */
1522 RSEQ_WRITE_ONCE(args
->percpu_list_ptr
, list_a
);
1523 if (rseq_membarrier_expedited(cpu_b
) &&
1524 errno
!= ENXIO
/* missing CPU */) {
1525 perror("sys_membarrier");
1528 /* Remember a value from list_b. */
1529 expect_b
= RSEQ_READ_ONCE(rseq_percpu_ptr(list_b
, cpu_b
)->head
->data
);
1532 test_membarrier_free_percpu_list(list_a
);
1533 test_membarrier_free_percpu_list(list_b
);
1535 if (rseq_unregister_current_thread()) {
1536 fprintf(stderr
, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1537 errno
, strerror(errno
));
1540 ret
= rseq_percpu_pool_destroy(mempool
);
1542 perror("rseq_percpu_pool_destroy");
1550 void test_membarrier(void)
1552 const int num_threads
= opt_threads
;
1553 struct test_membarrier_thread_args thread_args
;
1554 pthread_t worker_threads
[num_threads
];
1555 pthread_t manager_thread
;
1558 if (!membarrier_private_expedited_rseq_available()) {
1559 fprintf(stderr
, "Membarrier private expedited rseq not available. "
1560 "Skipping membarrier test.\n");
1563 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
, 0, 0)) {
1564 perror("sys_membarrier");
1568 thread_args
.percpu_list_ptr
= NULL
;
1569 thread_args
.stop
= 0;
1570 ret
= pthread_create(&manager_thread
, NULL
,
1571 test_membarrier_manager_thread
, &thread_args
);
1574 perror("pthread_create");
1578 for (i
= 0; i
< num_threads
; i
++) {
1579 ret
= pthread_create(&worker_threads
[i
], NULL
,
1580 test_membarrier_worker_thread
, &thread_args
);
1583 perror("pthread_create");
1589 for (i
= 0; i
< num_threads
; i
++) {
1590 ret
= pthread_join(worker_threads
[i
], NULL
);
1593 perror("pthread_join");
1598 RSEQ_WRITE_ONCE(thread_args
.stop
, 1);
1599 ret
= pthread_join(manager_thread
, NULL
);
1602 perror("pthread_join");
1606 #else /* TEST_MEMBARRIER */
1608 void test_membarrier(void)
1610 if (!membarrier_private_expedited_rseq_available()) {
1611 fprintf(stderr
, "Membarrier private expedited rseq not available. "
1612 "Skipping membarrier test.\n");
1615 fprintf(stderr
, "rseq_load_add_load_load_add_store__ptr is not implemented on this architecture. "
1616 "Skipping membarrier test.\n");
1620 static void show_usage(char **argv
)
1622 printf("Usage : %s <OPTIONS>\n",
1624 printf("OPTIONS:\n");
1625 printf(" [-1 loops] Number of loops for delay injection 1\n");
1626 printf(" [-2 loops] Number of loops for delay injection 2\n");
1627 printf(" [-3 loops] Number of loops for delay injection 3\n");
1628 printf(" [-4 loops] Number of loops for delay injection 4\n");
1629 printf(" [-5 loops] Number of loops for delay injection 5\n");
1630 printf(" [-6 loops] Number of loops for delay injection 6\n");
1631 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1632 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1633 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1634 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1635 printf(" [-y] Yield\n");
1636 printf(" [-k] Kill thread with signal\n");
1637 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1638 printf(" [-t N] Number of threads (default 200)\n");
1639 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1640 printf(" [-d] Disable rseq system call (no initialization)\n");
1641 printf(" [-D M] Disable rseq for each M threads\n");
1642 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1643 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1644 printf(" [-c] Check if the rseq syscall is available.\n");
1645 printf(" [-v] Verbose output.\n");
1646 printf(" [-h] Show this help.\n");
1650 int main(int argc
, char **argv
)
1654 for (i
= 1; i
< argc
; i
++) {
1655 if (argv
[i
][0] != '-')
1657 switch (argv
[i
][1]) {
1671 loop_cnt
[argv
[i
][1] - '0'] = atol(argv
[i
+ 1]);
1679 opt_modulo
= atol(argv
[i
+ 1]);
1680 if (opt_modulo
< 0) {
1691 opt_sleep
= atol(argv
[i
+ 1]);
1692 if (opt_sleep
< 0) {
1705 opt_disable_rseq
= 1;
1712 opt_disable_mod
= atol(argv
[i
+ 1]);
1713 if (opt_disable_mod
< 0) {
1724 opt_threads
= atol(argv
[i
+ 1]);
1725 if (opt_threads
< 0) {
1736 opt_reps
= atoll(argv
[i
+ 1]);
1751 opt_test
= *argv
[i
+ 1];
1770 opt_mo
= RSEQ_MO_RELEASE
;
1773 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL
)) {
1774 printf_verbose("The rseq syscall is available.\n");
1777 printf_verbose("The rseq syscall is unavailable.\n");
1786 loop_cnt_1
= loop_cnt
[1];
1787 loop_cnt_2
= loop_cnt
[2];
1788 loop_cnt_3
= loop_cnt
[3];
1789 loop_cnt_4
= loop_cnt
[4];
1790 loop_cnt_5
= loop_cnt
[5];
1791 loop_cnt_6
= loop_cnt
[6];
1793 if (set_signal_handler())
1796 if (!opt_disable_rseq
&& rseq_register_current_thread())
1798 if (!opt_disable_rseq
&& !rseq_validate_cpu_id()) {
1799 printf_verbose("The rseq cpu id getter is unavailable\n");
1804 printf_verbose("spinlock\n");
1805 test_percpu_spinlock();
1808 printf_verbose("linked list\n");
1812 printf_verbose("buffer\n");
1813 test_percpu_buffer();
1816 printf_verbose("memcpy buffer\n");
1817 test_percpu_memcpy_buffer();
1820 printf_verbose("counter increment\n");
1824 printf_verbose("membarrier\n");
1828 if (!opt_disable_rseq
&& rseq_unregister_current_thread())