Implement parametrized mm_cid test
[librseq.git] / tests / param_test.c
CommitLineData
90702366 1// SPDX-License-Identifier: MIT
f2d7b530 2// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
31b44ba2
MD
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6#include <assert.h>
cb900b45 7#include <linux/version.h>
5368dcb4 8#include <linux/membarrier.h>
31b44ba2
MD
9#include <pthread.h>
10#include <sched.h>
11#include <stdint.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <syscall.h>
16#include <unistd.h>
17#include <poll.h>
18#include <sys/types.h>
19#include <signal.h>
20#include <errno.h>
21#include <stddef.h>
369688a5 22#include <stdbool.h>
31b44ba2 23
cb900b45
MD
24#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
25enum {
26 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
27 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
28};
29
30enum {
31 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
32};
33#endif
34
31b44ba2
MD
35#define NR_INJECT 9
36static int loop_cnt[NR_INJECT + 1];
37
38static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
39static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
40static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
41static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
42static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
43static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
44
45static int opt_modulo, verbose;
46
47static int opt_yield, opt_signal, opt_sleep,
48 opt_disable_rseq, opt_threads = 200,
369688a5 49 opt_disable_mod = 0, opt_test = 's';
31b44ba2 50
31b44ba2 51static long long opt_reps = 5000;
31b44ba2
MD
52
53static __thread __attribute__((tls_model("initial-exec")))
54unsigned int signals_delivered;
55
56#ifndef BENCHMARK
57
c6e1dc81
MD
58static inline pid_t rseq_gettid(void)
59{
60 return syscall(__NR_gettid);
61}
62
31b44ba2
MD
63static __thread __attribute__((tls_model("initial-exec"), unused))
64int yield_mod_cnt, nr_abort;
65
66#define printf_verbose(fmt, ...) \
67 do { \
68 if (verbose) \
69 printf(fmt, ## __VA_ARGS__); \
70 } while (0)
71
72#ifdef __i386__
73
74#define INJECT_ASM_REG "eax"
75
76#define RSEQ_INJECT_CLOBBER \
77 , INJECT_ASM_REG
78
79#define RSEQ_INJECT_ASM(n) \
80 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
81 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
82 "jz 333f\n\t" \
83 "222:\n\t" \
84 "dec %%" INJECT_ASM_REG "\n\t" \
85 "jnz 222b\n\t" \
86 "333:\n\t"
87
88#elif defined(__x86_64__)
89
90#define INJECT_ASM_REG_P "rax"
91#define INJECT_ASM_REG "eax"
92
93#define RSEQ_INJECT_CLOBBER \
94 , INJECT_ASM_REG_P \
95 , INJECT_ASM_REG
96
97#define RSEQ_INJECT_ASM(n) \
98 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
99 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
100 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
101 "jz 333f\n\t" \
102 "222:\n\t" \
103 "dec %%" INJECT_ASM_REG "\n\t" \
104 "jnz 222b\n\t" \
105 "333:\n\t"
106
107#elif defined(__s390__)
108
109#define RSEQ_INJECT_INPUT \
110 , [loop_cnt_1]"m"(loop_cnt[1]) \
111 , [loop_cnt_2]"m"(loop_cnt[2]) \
112 , [loop_cnt_3]"m"(loop_cnt[3]) \
113 , [loop_cnt_4]"m"(loop_cnt[4]) \
114 , [loop_cnt_5]"m"(loop_cnt[5]) \
115 , [loop_cnt_6]"m"(loop_cnt[6])
116
117#define INJECT_ASM_REG "r12"
118
119#define RSEQ_INJECT_CLOBBER \
120 , INJECT_ASM_REG
121
122#define RSEQ_INJECT_ASM(n) \
123 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
124 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
125 "je 333f\n\t" \
126 "222:\n\t" \
127 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
128 "jnz 222b\n\t" \
129 "333:\n\t"
130
131#elif defined(__ARMEL__)
132
133#define RSEQ_INJECT_INPUT \
134 , [loop_cnt_1]"m"(loop_cnt[1]) \
135 , [loop_cnt_2]"m"(loop_cnt[2]) \
136 , [loop_cnt_3]"m"(loop_cnt[3]) \
137 , [loop_cnt_4]"m"(loop_cnt[4]) \
138 , [loop_cnt_5]"m"(loop_cnt[5]) \
139 , [loop_cnt_6]"m"(loop_cnt[6])
140
141#define INJECT_ASM_REG "r4"
142
143#define RSEQ_INJECT_CLOBBER \
144 , INJECT_ASM_REG
145
146#define RSEQ_INJECT_ASM(n) \
147 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
148 "cmp " INJECT_ASM_REG ", #0\n\t" \
149 "beq 333f\n\t" \
150 "222:\n\t" \
151 "subs " INJECT_ASM_REG ", #1\n\t" \
152 "bne 222b\n\t" \
153 "333:\n\t"
154
155#elif defined(__AARCH64EL__)
156
157#define RSEQ_INJECT_INPUT \
158 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
159 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
160 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
161 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
162 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
163 , [loop_cnt_6] "Qo" (loop_cnt[6])
164
165#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
166
167#define RSEQ_INJECT_ASM(n) \
168 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
169 " cbz " INJECT_ASM_REG ", 333f\n" \
170 "222:\n" \
171 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
172 " cbnz " INJECT_ASM_REG ", 222b\n" \
173 "333:\n"
174
f1c6b55b 175#elif defined(__PPC__)
31b44ba2
MD
176
177#define RSEQ_INJECT_INPUT \
178 , [loop_cnt_1]"m"(loop_cnt[1]) \
179 , [loop_cnt_2]"m"(loop_cnt[2]) \
180 , [loop_cnt_3]"m"(loop_cnt[3]) \
181 , [loop_cnt_4]"m"(loop_cnt[4]) \
182 , [loop_cnt_5]"m"(loop_cnt[5]) \
183 , [loop_cnt_6]"m"(loop_cnt[6])
184
185#define INJECT_ASM_REG "r18"
186
187#define RSEQ_INJECT_CLOBBER \
188 , INJECT_ASM_REG
189
190#define RSEQ_INJECT_ASM(n) \
191 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
192 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
193 "beq 333f\n\t" \
194 "222:\n\t" \
195 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
196 "bne 222b\n\t" \
197 "333:\n\t"
198
199#elif defined(__mips__)
200
201#define RSEQ_INJECT_INPUT \
202 , [loop_cnt_1]"m"(loop_cnt[1]) \
203 , [loop_cnt_2]"m"(loop_cnt[2]) \
204 , [loop_cnt_3]"m"(loop_cnt[3]) \
205 , [loop_cnt_4]"m"(loop_cnt[4]) \
206 , [loop_cnt_5]"m"(loop_cnt[5]) \
207 , [loop_cnt_6]"m"(loop_cnt[6])
208
209#define INJECT_ASM_REG "$5"
210
211#define RSEQ_INJECT_CLOBBER \
212 , INJECT_ASM_REG
213
214#define RSEQ_INJECT_ASM(n) \
215 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
216 "beqz " INJECT_ASM_REG ", 333f\n\t" \
217 "222:\n\t" \
218 "addiu " INJECT_ASM_REG ", -1\n\t" \
219 "bnez " INJECT_ASM_REG ", 222b\n\t" \
220 "333:\n\t"
221
074b1077
MJ
222#elif defined(__riscv)
223
224#define RSEQ_INJECT_INPUT \
225 , [loop_cnt_1]"m"(loop_cnt[1]) \
226 , [loop_cnt_2]"m"(loop_cnt[2]) \
227 , [loop_cnt_3]"m"(loop_cnt[3]) \
228 , [loop_cnt_4]"m"(loop_cnt[4]) \
229 , [loop_cnt_5]"m"(loop_cnt[5]) \
230 , [loop_cnt_6]"m"(loop_cnt[6])
231
232#define INJECT_ASM_REG "t1"
233
234#define RSEQ_INJECT_CLOBBER \
235 , INJECT_ASM_REG
236
237#define RSEQ_INJECT_ASM(n) \
238 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
239 "beqz " INJECT_ASM_REG ", 333f\n\t" \
240 "222:\n\t" \
241 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
242 "bnez " INJECT_ASM_REG ", 222b\n\t" \
243 "333:\n\t"
244
31b44ba2
MD
245#else
246#error unsupported target
247#endif
248
249#define RSEQ_INJECT_FAILED \
250 nr_abort++;
251
252#define RSEQ_INJECT_C(n) \
253{ \
254 int loc_i, loc_nr_loops = loop_cnt[n]; \
255 \
256 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
257 rseq_barrier(); \
258 } \
259 if (loc_nr_loops == -1 && opt_modulo) { \
260 if (yield_mod_cnt == opt_modulo - 1) { \
261 if (opt_sleep > 0) \
262 poll(NULL, 0, opt_sleep); \
263 if (opt_yield) \
264 sched_yield(); \
265 if (opt_signal) \
266 raise(SIGUSR1); \
267 yield_mod_cnt = 0; \
268 } else { \
269 yield_mod_cnt++; \
270 } \
271 } \
272}
273
274#else
275
276#define printf_verbose(fmt, ...)
277
278#endif /* BENCHMARK */
279
280#include <rseq/rseq.h>
281
369688a5
MD
282static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
283
284static int sys_membarrier(int cmd, int flags, int cpu_id)
285{
286 return syscall(__NR_membarrier, cmd, flags, cpu_id);
287}
288
289#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
290#define TEST_MEMBARRIER
291#endif
292
293#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
294# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
295static
296int get_current_cpu_id(void)
297{
298 return rseq_current_mm_cid();
299}
300static
301bool rseq_validate_cpu_id(void)
302{
303 return rseq_mm_cid_available();
304}
305# ifdef TEST_MEMBARRIER
306/*
307 * Membarrier does not currently support targeting a mm_cid, so
308 * issue the barrier on all cpus.
309 */
310static
311int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
312{
313 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
314 0, 0);
315}
316# endif /* TEST_MEMBARRIER */
317#else
318# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
319static
320int get_current_cpu_id(void)
321{
322 return rseq_cpu_start();
323}
324static
325bool rseq_validate_cpu_id(void)
326{
327 return rseq_current_cpu_raw() >= 0;
328}
329# ifdef TEST_MEMBARRIER
330static
331int rseq_membarrier_expedited(int cpu)
332{
333 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
334 MEMBARRIER_CMD_FLAG_CPU, cpu);
335}
336# endif /* TEST_MEMBARRIER */
337#endif
338
31b44ba2
MD
339struct percpu_lock_entry {
340 intptr_t v;
341} __attribute__((aligned(128)));
342
343struct percpu_lock {
344 struct percpu_lock_entry c[CPU_SETSIZE];
345};
346
347struct test_data_entry {
348 intptr_t count;
349} __attribute__((aligned(128)));
350
351struct spinlock_test_data {
352 struct percpu_lock lock;
353 struct test_data_entry c[CPU_SETSIZE];
354};
355
356struct spinlock_thread_test_data {
357 struct spinlock_test_data *data;
358 long long reps;
359 int reg;
360};
361
362struct inc_test_data {
363 struct test_data_entry c[CPU_SETSIZE];
364};
365
366struct inc_thread_test_data {
367 struct inc_test_data *data;
368 long long reps;
369 int reg;
370};
371
372struct percpu_list_node {
373 intptr_t data;
374 struct percpu_list_node *next;
375};
376
377struct percpu_list_entry {
378 struct percpu_list_node *head;
379} __attribute__((aligned(128)));
380
381struct percpu_list {
382 struct percpu_list_entry c[CPU_SETSIZE];
383};
384
385#define BUFFER_ITEM_PER_CPU 100
386
387struct percpu_buffer_node {
388 intptr_t data;
389};
390
391struct percpu_buffer_entry {
392 intptr_t offset;
393 intptr_t buflen;
394 struct percpu_buffer_node **array;
395} __attribute__((aligned(128)));
396
397struct percpu_buffer {
398 struct percpu_buffer_entry c[CPU_SETSIZE];
399};
400
401#define MEMCPY_BUFFER_ITEM_PER_CPU 100
402
403struct percpu_memcpy_buffer_node {
404 intptr_t data1;
405 uint64_t data2;
406};
407
408struct percpu_memcpy_buffer_entry {
409 intptr_t offset;
410 intptr_t buflen;
411 struct percpu_memcpy_buffer_node *array;
412} __attribute__((aligned(128)));
413
414struct percpu_memcpy_buffer {
415 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
416};
417
418/* A simple percpu spinlock. Grabs lock on current cpu. */
419static int rseq_this_cpu_lock(struct percpu_lock *lock)
420{
421 int cpu;
422
423 for (;;) {
424 int ret;
425
369688a5
MD
426 cpu = get_current_cpu_id();
427 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
428 &lock->c[cpu].v,
31b44ba2
MD
429 0, 1, cpu);
430 if (rseq_likely(!ret))
431 break;
432 /* Retry if comparison fails or rseq aborts. */
433 }
434 /*
435 * Acquire semantic when taking lock after control dependency.
436 * Matches rseq_smp_store_release().
437 */
438 rseq_smp_acquire__after_ctrl_dep();
439 return cpu;
440}
441
442static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
443{
444 assert(lock->c[cpu].v == 1);
445 /*
446 * Release lock, with release semantic. Matches
447 * rseq_smp_acquire__after_ctrl_dep().
448 */
449 rseq_smp_store_release(&lock->c[cpu].v, 0);
450}
451
6e284b80 452static void *test_percpu_spinlock_thread(void *arg)
31b44ba2 453{
d268885a 454 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
31b44ba2
MD
455 struct spinlock_test_data *data = thread_data->data;
456 long long i, reps;
457
458 if (!opt_disable_rseq && thread_data->reg &&
459 rseq_register_current_thread())
460 abort();
461 reps = thread_data->reps;
462 for (i = 0; i < reps; i++) {
af895f04 463 int cpu = rseq_this_cpu_lock(&data->lock);
31b44ba2
MD
464 data->c[cpu].count++;
465 rseq_percpu_unlock(&data->lock, cpu);
466#ifndef BENCHMARK
467 if (i != 0 && !(i % (reps / 10)))
468 printf_verbose("tid %d: count %lld\n",
469 (int) rseq_gettid(), i);
470#endif
471 }
472 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
473 (int) rseq_gettid(), nr_abort, signals_delivered);
474 if (!opt_disable_rseq && thread_data->reg &&
475 rseq_unregister_current_thread())
476 abort();
477 return NULL;
478}
479
480/*
481 * A simple test which implements a sharded counter using a per-cpu
482 * lock. Obviously real applications might prefer to simply use a
483 * per-cpu increment; however, this is reasonable for a test and the
484 * lock can be extended to synchronize more complicated operations.
485 */
6e284b80 486static void test_percpu_spinlock(void)
31b44ba2
MD
487{
488 const int num_threads = opt_threads;
489 int i, ret;
490 uint64_t sum;
491 pthread_t test_threads[num_threads];
492 struct spinlock_test_data data;
493 struct spinlock_thread_test_data thread_data[num_threads];
494
495 memset(&data, 0, sizeof(data));
496 for (i = 0; i < num_threads; i++) {
497 thread_data[i].reps = opt_reps;
498 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
499 thread_data[i].reg = 1;
500 else
501 thread_data[i].reg = 0;
502 thread_data[i].data = &data;
503 ret = pthread_create(&test_threads[i], NULL,
504 test_percpu_spinlock_thread,
505 &thread_data[i]);
506 if (ret) {
507 errno = ret;
508 perror("pthread_create");
509 abort();
510 }
511 }
512
513 for (i = 0; i < num_threads; i++) {
514 ret = pthread_join(test_threads[i], NULL);
515 if (ret) {
516 errno = ret;
517 perror("pthread_join");
518 abort();
519 }
520 }
521
522 sum = 0;
523 for (i = 0; i < CPU_SETSIZE; i++)
524 sum += data.c[i].count;
525
526 assert(sum == (uint64_t)opt_reps * num_threads);
527}
528
6e284b80 529static void *test_percpu_inc_thread(void *arg)
31b44ba2 530{
d268885a 531 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
31b44ba2
MD
532 struct inc_test_data *data = thread_data->data;
533 long long i, reps;
534
535 if (!opt_disable_rseq && thread_data->reg &&
536 rseq_register_current_thread())
537 abort();
538 reps = thread_data->reps;
539 for (i = 0; i < reps; i++) {
540 int ret;
541
542 do {
543 int cpu;
544
369688a5
MD
545 cpu = get_current_cpu_id();
546 ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
547 &data->c[cpu].count, 1, cpu);
31b44ba2
MD
548 } while (rseq_unlikely(ret));
549#ifndef BENCHMARK
550 if (i != 0 && !(i % (reps / 10)))
551 printf_verbose("tid %d: count %lld\n",
552 (int) rseq_gettid(), i);
553#endif
554 }
555 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
556 (int) rseq_gettid(), nr_abort, signals_delivered);
557 if (!opt_disable_rseq && thread_data->reg &&
558 rseq_unregister_current_thread())
559 abort();
560 return NULL;
561}
562
6e284b80 563static void test_percpu_inc(void)
31b44ba2
MD
564{
565 const int num_threads = opt_threads;
566 int i, ret;
567 uint64_t sum;
568 pthread_t test_threads[num_threads];
569 struct inc_test_data data;
570 struct inc_thread_test_data thread_data[num_threads];
571
572 memset(&data, 0, sizeof(data));
573 for (i = 0; i < num_threads; i++) {
574 thread_data[i].reps = opt_reps;
575 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
576 thread_data[i].reg = 1;
577 else
578 thread_data[i].reg = 0;
579 thread_data[i].data = &data;
580 ret = pthread_create(&test_threads[i], NULL,
581 test_percpu_inc_thread,
582 &thread_data[i]);
583 if (ret) {
584 errno = ret;
585 perror("pthread_create");
586 abort();
587 }
588 }
589
590 for (i = 0; i < num_threads; i++) {
591 ret = pthread_join(test_threads[i], NULL);
592 if (ret) {
593 errno = ret;
594 perror("pthread_join");
595 abort();
596 }
597 }
598
599 sum = 0;
600 for (i = 0; i < CPU_SETSIZE; i++)
601 sum += data.c[i].count;
602
603 assert(sum == (uint64_t)opt_reps * num_threads);
604}
605
6e284b80 606static void this_cpu_list_push(struct percpu_list *list,
31b44ba2
MD
607 struct percpu_list_node *node,
608 int *_cpu)
609{
610 int cpu;
611
612 for (;;) {
613 intptr_t *targetptr, newval, expect;
614 int ret;
615
369688a5 616 cpu = get_current_cpu_id();
31b44ba2
MD
617 /* Load list->c[cpu].head with single-copy atomicity. */
618 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
619 newval = (intptr_t)node;
620 targetptr = (intptr_t *)&list->c[cpu].head;
621 node->next = (struct percpu_list_node *)expect;
369688a5
MD
622 ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
623 targetptr, expect, newval, cpu);
31b44ba2
MD
624 if (rseq_likely(!ret))
625 break;
626 /* Retry if comparison fails or rseq aborts. */
627 }
628 if (_cpu)
629 *_cpu = cpu;
630}
631
632/*
633 * Unlike a traditional lock-less linked list; the availability of a
634 * rseq primitive allows us to implement pop without concerns over
635 * ABA-type races.
636 */
6e284b80 637static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
31b44ba2
MD
638 int *_cpu)
639{
640 struct percpu_list_node *node = NULL;
641 int cpu;
642
643 for (;;) {
644 struct percpu_list_node *head;
645 intptr_t *targetptr, expectnot, *load;
d35eae6b
MD
646 long offset;
647 int ret;
31b44ba2 648
369688a5 649 cpu = get_current_cpu_id();
31b44ba2
MD
650 targetptr = (intptr_t *)&list->c[cpu].head;
651 expectnot = (intptr_t)NULL;
652 offset = offsetof(struct percpu_list_node, next);
653 load = (intptr_t *)&head;
369688a5
MD
654 ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
655 targetptr, expectnot,
656 offset, load, cpu);
31b44ba2
MD
657 if (rseq_likely(!ret)) {
658 node = head;
659 break;
660 }
661 if (ret > 0)
662 break;
663 /* Retry if rseq aborts. */
664 }
665 if (_cpu)
666 *_cpu = cpu;
667 return node;
668}
669
670/*
671 * __percpu_list_pop is not safe against concurrent accesses. Should
672 * only be used on lists that are not concurrently modified.
673 */
6e284b80 674static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
31b44ba2
MD
675{
676 struct percpu_list_node *node;
677
678 node = list->c[cpu].head;
679 if (!node)
680 return NULL;
681 list->c[cpu].head = node->next;
682 return node;
683}
684
6e284b80 685static void *test_percpu_list_thread(void *arg)
31b44ba2
MD
686{
687 long long i, reps;
688 struct percpu_list *list = (struct percpu_list *)arg;
689
690 if (!opt_disable_rseq && rseq_register_current_thread())
691 abort();
692
693 reps = opt_reps;
694 for (i = 0; i < reps; i++) {
695 struct percpu_list_node *node;
696
697 node = this_cpu_list_pop(list, NULL);
698 if (opt_yield)
699 sched_yield(); /* encourage shuffling */
700 if (node)
701 this_cpu_list_push(list, node, NULL);
702 }
703
704 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
705 (int) rseq_gettid(), nr_abort, signals_delivered);
706 if (!opt_disable_rseq && rseq_unregister_current_thread())
707 abort();
708
709 return NULL;
710}
711
712/* Simultaneous modification to a per-cpu linked list from many threads. */
6e284b80 713static void test_percpu_list(void)
31b44ba2
MD
714{
715 const int num_threads = opt_threads;
716 int i, j, ret;
717 uint64_t sum = 0, expected_sum = 0;
718 struct percpu_list list;
719 pthread_t test_threads[num_threads];
720 cpu_set_t allowed_cpus;
721
722 memset(&list, 0, sizeof(list));
723
724 /* Generate list entries for every usable cpu. */
725 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
726 for (i = 0; i < CPU_SETSIZE; i++) {
727 if (!CPU_ISSET(i, &allowed_cpus))
728 continue;
729 for (j = 1; j <= 100; j++) {
730 struct percpu_list_node *node;
731
732 expected_sum += j;
733
d268885a 734 node = (struct percpu_list_node *) malloc(sizeof(*node));
31b44ba2
MD
735 assert(node);
736 node->data = j;
737 node->next = list.c[i].head;
738 list.c[i].head = node;
739 }
740 }
741
742 for (i = 0; i < num_threads; i++) {
743 ret = pthread_create(&test_threads[i], NULL,
744 test_percpu_list_thread, &list);
745 if (ret) {
746 errno = ret;
747 perror("pthread_create");
748 abort();
749 }
750 }
751
752 for (i = 0; i < num_threads; i++) {
753 ret = pthread_join(test_threads[i], NULL);
754 if (ret) {
755 errno = ret;
756 perror("pthread_join");
757 abort();
758 }
759 }
760
761 for (i = 0; i < CPU_SETSIZE; i++) {
762 struct percpu_list_node *node;
763
764 if (!CPU_ISSET(i, &allowed_cpus))
765 continue;
766
767 while ((node = __percpu_list_pop(&list, i))) {
768 sum += node->data;
769 free(node);
770 }
771 }
772
773 /*
774 * All entries should now be accounted for (unless some external
775 * actor is interfering with our allowed affinity while this
776 * test is running).
777 */
778 assert(sum == expected_sum);
779}
780
6e284b80 781static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
31b44ba2
MD
782 struct percpu_buffer_node *node,
783 int *_cpu)
784{
785 bool result = false;
786 int cpu;
787
788 for (;;) {
789 intptr_t *targetptr_spec, newval_spec;
790 intptr_t *targetptr_final, newval_final;
791 intptr_t offset;
792 int ret;
793
369688a5 794 cpu = get_current_cpu_id();
31b44ba2
MD
795 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
796 if (offset == buffer->c[cpu].buflen)
797 break;
798 newval_spec = (intptr_t)node;
799 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
800 newval_final = offset + 1;
801 targetptr_final = &buffer->c[cpu].offset;
369688a5
MD
802 ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
803 targetptr_final, offset, targetptr_spec,
804 newval_spec, newval_final, cpu);
31b44ba2
MD
805 if (rseq_likely(!ret)) {
806 result = true;
807 break;
808 }
809 /* Retry if comparison fails or rseq aborts. */
810 }
811 if (_cpu)
812 *_cpu = cpu;
813 return result;
814}
815
6e284b80 816static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
817 int *_cpu)
818{
819 struct percpu_buffer_node *head;
820 int cpu;
821
822 for (;;) {
823 intptr_t *targetptr, newval;
824 intptr_t offset;
825 int ret;
826
369688a5 827 cpu = get_current_cpu_id();
31b44ba2
MD
828 /* Load offset with single-copy atomicity. */
829 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
830 if (offset == 0) {
831 head = NULL;
832 break;
833 }
834 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
835 newval = offset - 1;
836 targetptr = (intptr_t *)&buffer->c[cpu].offset;
369688a5
MD
837 ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
838 targetptr, offset,
31b44ba2
MD
839 (intptr_t *)&buffer->c[cpu].array[offset - 1],
840 (intptr_t)head, newval, cpu);
841 if (rseq_likely(!ret))
842 break;
843 /* Retry if comparison fails or rseq aborts. */
844 }
845 if (_cpu)
846 *_cpu = cpu;
847 return head;
848}
849
850/*
851 * __percpu_buffer_pop is not safe against concurrent accesses. Should
852 * only be used on buffers that are not concurrently modified.
853 */
6e284b80 854static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
855 int cpu)
856{
857 struct percpu_buffer_node *head;
858 intptr_t offset;
859
860 offset = buffer->c[cpu].offset;
861 if (offset == 0)
862 return NULL;
863 head = buffer->c[cpu].array[offset - 1];
864 buffer->c[cpu].offset = offset - 1;
865 return head;
866}
867
6e284b80 868static void *test_percpu_buffer_thread(void *arg)
31b44ba2
MD
869{
870 long long i, reps;
871 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
872
873 if (!opt_disable_rseq && rseq_register_current_thread())
874 abort();
875
876 reps = opt_reps;
877 for (i = 0; i < reps; i++) {
878 struct percpu_buffer_node *node;
879
880 node = this_cpu_buffer_pop(buffer, NULL);
881 if (opt_yield)
882 sched_yield(); /* encourage shuffling */
883 if (node) {
884 if (!this_cpu_buffer_push(buffer, node, NULL)) {
885 /* Should increase buffer size. */
886 abort();
887 }
888 }
889 }
890
891 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
892 (int) rseq_gettid(), nr_abort, signals_delivered);
893 if (!opt_disable_rseq && rseq_unregister_current_thread())
894 abort();
895
896 return NULL;
897}
898
899/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 900static void test_percpu_buffer(void)
31b44ba2
MD
901{
902 const int num_threads = opt_threads;
903 int i, j, ret;
904 uint64_t sum = 0, expected_sum = 0;
905 struct percpu_buffer buffer;
906 pthread_t test_threads[num_threads];
907 cpu_set_t allowed_cpus;
908
909 memset(&buffer, 0, sizeof(buffer));
910
911 /* Generate list entries for every usable cpu. */
912 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
913 for (i = 0; i < CPU_SETSIZE; i++) {
914 if (!CPU_ISSET(i, &allowed_cpus))
915 continue;
916 /* Worse-case is every item in same CPU. */
917 buffer.c[i].array =
d268885a 918 (struct percpu_buffer_node **)
31b44ba2
MD
919 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
920 BUFFER_ITEM_PER_CPU);
921 assert(buffer.c[i].array);
922 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
923 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
924 struct percpu_buffer_node *node;
925
926 expected_sum += j;
927
928 /*
929 * We could theoretically put the word-sized
930 * "data" directly in the buffer. However, we
931 * want to model objects that would not fit
932 * within a single word, so allocate an object
933 * for each node.
934 */
d268885a 935 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
31b44ba2
MD
936 assert(node);
937 node->data = j;
938 buffer.c[i].array[j - 1] = node;
939 buffer.c[i].offset++;
940 }
941 }
942
943 for (i = 0; i < num_threads; i++) {
944 ret = pthread_create(&test_threads[i], NULL,
945 test_percpu_buffer_thread, &buffer);
946 if (ret) {
947 errno = ret;
948 perror("pthread_create");
949 abort();
950 }
951 }
952
953 for (i = 0; i < num_threads; i++) {
954 ret = pthread_join(test_threads[i], NULL);
955 if (ret) {
956 errno = ret;
957 perror("pthread_join");
958 abort();
959 }
960 }
961
962 for (i = 0; i < CPU_SETSIZE; i++) {
963 struct percpu_buffer_node *node;
964
965 if (!CPU_ISSET(i, &allowed_cpus))
966 continue;
967
968 while ((node = __percpu_buffer_pop(&buffer, i))) {
969 sum += node->data;
970 free(node);
971 }
972 free(buffer.c[i].array);
973 }
974
975 /*
976 * All entries should now be accounted for (unless some external
977 * actor is interfering with our allowed affinity while this
978 * test is running).
979 */
980 assert(sum == expected_sum);
981}
982
6e284b80 983static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
984 struct percpu_memcpy_buffer_node item,
985 int *_cpu)
986{
987 bool result = false;
988 int cpu;
989
990 for (;;) {
991 intptr_t *targetptr_final, newval_final, offset;
992 char *destptr, *srcptr;
993 size_t copylen;
994 int ret;
995
369688a5 996 cpu = get_current_cpu_id();
31b44ba2
MD
997 /* Load offset with single-copy atomicity. */
998 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
999 if (offset == buffer->c[cpu].buflen)
1000 break;
1001 destptr = (char *)&buffer->c[cpu].array[offset];
1002 srcptr = (char *)&item;
1003 /* copylen must be <= 4kB. */
1004 copylen = sizeof(item);
1005 newval_final = offset + 1;
1006 targetptr_final = &buffer->c[cpu].offset;
369688a5
MD
1007 ret = rseq_cmpeqv_trymemcpy_storev(
1008 opt_mo, RSEQ_PERCPU,
1009 targetptr_final, offset,
1010 destptr, srcptr, copylen,
1011 newval_final, cpu);
31b44ba2
MD
1012 if (rseq_likely(!ret)) {
1013 result = true;
1014 break;
1015 }
1016 /* Retry if comparison fails or rseq aborts. */
1017 }
1018 if (_cpu)
1019 *_cpu = cpu;
1020 return result;
1021}
1022
6e284b80 1023static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1024 struct percpu_memcpy_buffer_node *item,
1025 int *_cpu)
1026{
1027 bool result = false;
1028 int cpu;
1029
1030 for (;;) {
1031 intptr_t *targetptr_final, newval_final, offset;
1032 char *destptr, *srcptr;
1033 size_t copylen;
1034 int ret;
1035
369688a5 1036 cpu = get_current_cpu_id();
31b44ba2
MD
1037 /* Load offset with single-copy atomicity. */
1038 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1039 if (offset == 0)
1040 break;
1041 destptr = (char *)item;
1042 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1043 /* copylen must be <= 4kB. */
1044 copylen = sizeof(*item);
1045 newval_final = offset - 1;
1046 targetptr_final = &buffer->c[cpu].offset;
369688a5
MD
1047 ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1048 targetptr_final, offset, destptr, srcptr, copylen,
31b44ba2
MD
1049 newval_final, cpu);
1050 if (rseq_likely(!ret)) {
1051 result = true;
1052 break;
1053 }
1054 /* Retry if comparison fails or rseq aborts. */
1055 }
1056 if (_cpu)
1057 *_cpu = cpu;
1058 return result;
1059}
1060
1061/*
1062 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1063 * only be used on buffers that are not concurrently modified.
1064 */
6e284b80 1065static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1066 struct percpu_memcpy_buffer_node *item,
1067 int cpu)
1068{
1069 intptr_t offset;
1070
1071 offset = buffer->c[cpu].offset;
1072 if (offset == 0)
1073 return false;
1074 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1075 buffer->c[cpu].offset = offset - 1;
1076 return true;
1077}
1078
6e284b80 1079static void *test_percpu_memcpy_buffer_thread(void *arg)
31b44ba2
MD
1080{
1081 long long i, reps;
1082 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1083
1084 if (!opt_disable_rseq && rseq_register_current_thread())
1085 abort();
1086
1087 reps = opt_reps;
1088 for (i = 0; i < reps; i++) {
1089 struct percpu_memcpy_buffer_node item;
1090 bool result;
1091
1092 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1093 if (opt_yield)
1094 sched_yield(); /* encourage shuffling */
1095 if (result) {
1096 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1097 /* Should increase buffer size. */
1098 abort();
1099 }
1100 }
1101 }
1102
1103 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1104 (int) rseq_gettid(), nr_abort, signals_delivered);
1105 if (!opt_disable_rseq && rseq_unregister_current_thread())
1106 abort();
1107
1108 return NULL;
1109}
1110
1111/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 1112static void test_percpu_memcpy_buffer(void)
31b44ba2
MD
1113{
1114 const int num_threads = opt_threads;
1115 int i, j, ret;
1116 uint64_t sum = 0, expected_sum = 0;
1117 struct percpu_memcpy_buffer buffer;
1118 pthread_t test_threads[num_threads];
1119 cpu_set_t allowed_cpus;
1120
1121 memset(&buffer, 0, sizeof(buffer));
1122
1123 /* Generate list entries for every usable cpu. */
1124 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1125 for (i = 0; i < CPU_SETSIZE; i++) {
1126 if (!CPU_ISSET(i, &allowed_cpus))
1127 continue;
1128 /* Worse-case is every item in same CPU. */
1129 buffer.c[i].array =
d268885a 1130 (struct percpu_memcpy_buffer_node *)
31b44ba2
MD
1131 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1132 MEMCPY_BUFFER_ITEM_PER_CPU);
1133 assert(buffer.c[i].array);
1134 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1135 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1136 expected_sum += 2 * j + 1;
1137
1138 /*
1139 * We could theoretically put the word-sized
1140 * "data" directly in the buffer. However, we
1141 * want to model objects that would not fit
1142 * within a single word, so allocate an object
1143 * for each node.
1144 */
1145 buffer.c[i].array[j - 1].data1 = j;
1146 buffer.c[i].array[j - 1].data2 = j + 1;
1147 buffer.c[i].offset++;
1148 }
1149 }
1150
1151 for (i = 0; i < num_threads; i++) {
1152 ret = pthread_create(&test_threads[i], NULL,
1153 test_percpu_memcpy_buffer_thread,
1154 &buffer);
1155 if (ret) {
1156 errno = ret;
1157 perror("pthread_create");
1158 abort();
1159 }
1160 }
1161
1162 for (i = 0; i < num_threads; i++) {
1163 ret = pthread_join(test_threads[i], NULL);
1164 if (ret) {
1165 errno = ret;
1166 perror("pthread_join");
1167 abort();
1168 }
1169 }
1170
1171 for (i = 0; i < CPU_SETSIZE; i++) {
1172 struct percpu_memcpy_buffer_node item;
1173
1174 if (!CPU_ISSET(i, &allowed_cpus))
1175 continue;
1176
1177 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1178 sum += item.data1;
1179 sum += item.data2;
1180 }
1181 free(buffer.c[i].array);
1182 }
1183
1184 /*
1185 * All entries should now be accounted for (unless some external
1186 * actor is interfering with our allowed affinity while this
1187 * test is running).
1188 */
1189 assert(sum == expected_sum);
1190}
1191
544cdc88
MJ
1192
1193static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
31b44ba2
MD
1194{
1195 signals_delivered++;
1196}
1197
1198static int set_signal_handler(void)
1199{
1200 int ret = 0;
1201 struct sigaction sa;
1202 sigset_t sigset;
1203
1204 ret = sigemptyset(&sigset);
1205 if (ret < 0) {
1206 perror("sigemptyset");
1207 return ret;
1208 }
1209
1210 sa.sa_handler = test_signal_interrupt_handler;
1211 sa.sa_mask = sigset;
1212 sa.sa_flags = 0;
1213 ret = sigaction(SIGUSR1, &sa, NULL);
1214 if (ret < 0) {
1215 perror("sigaction");
1216 return ret;
1217 }
1218
1219 printf_verbose("Signal handler set for SIGUSR1\n");
1220
1221 return ret;
1222}
1223
3664098e
MD
1224static
1225bool membarrier_private_expedited_rseq_available(void)
1226{
1227 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1228
1229 if (status < 0) {
1230 perror("membarrier");
1231 return false;
1232 }
1233 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1234 return false;
1235 return true;
1236}
1237
5368dcb4 1238/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
369688a5 1239#ifdef TEST_MEMBARRIER
5368dcb4
MD
1240struct test_membarrier_thread_args {
1241 int stop;
1242 intptr_t percpu_list_ptr;
1243};
1244
1245/* Worker threads modify data in their "active" percpu lists. */
1246static
1247void *test_membarrier_worker_thread(void *arg)
1248{
1249 struct test_membarrier_thread_args *args =
1250 (struct test_membarrier_thread_args *)arg;
1251 const int iters = opt_reps;
1252 int i;
1253
1254 if (rseq_register_current_thread()) {
1255 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1256 errno, strerror(errno));
1257 abort();
1258 }
1259
1260 /* Wait for initialization. */
1261 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1262
1263 for (i = 0; i < iters; ++i) {
1264 int ret;
1265
1266 do {
369688a5 1267 int cpu = get_current_cpu_id();
5368dcb4 1268
369688a5
MD
1269 ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1270 &args->percpu_list_ptr,
5368dcb4
MD
1271 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1272 } while (rseq_unlikely(ret));
1273 }
1274
1275 if (rseq_unregister_current_thread()) {
1276 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1277 errno, strerror(errno));
1278 abort();
1279 }
1280 return NULL;
1281}
1282
1283static
1284void test_membarrier_init_percpu_list(struct percpu_list *list)
1285{
1286 int i;
1287
1288 memset(list, 0, sizeof(*list));
1289 for (i = 0; i < CPU_SETSIZE; i++) {
1290 struct percpu_list_node *node;
1291
1292 node = (struct percpu_list_node *) malloc(sizeof(*node));
1293 assert(node);
1294 node->data = 0;
1295 node->next = NULL;
1296 list->c[i].head = node;
1297 }
1298}
1299
1300static
1301void test_membarrier_free_percpu_list(struct percpu_list *list)
1302{
1303 int i;
1304
1305 for (i = 0; i < CPU_SETSIZE; i++)
1306 free(list->c[i].head);
1307}
1308
5368dcb4
MD
1309/*
1310 * The manager thread swaps per-cpu lists that worker threads see,
1311 * and validates that there are no unexpected modifications.
1312 */
1313static
1314void *test_membarrier_manager_thread(void *arg)
1315{
1316 struct test_membarrier_thread_args *args =
1317 (struct test_membarrier_thread_args *)arg;
1318 struct percpu_list list_a, list_b;
1319 intptr_t expect_a = 0, expect_b = 0;
1320 int cpu_a = 0, cpu_b = 0;
1321
1322 if (rseq_register_current_thread()) {
1323 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1324 errno, strerror(errno));
1325 abort();
1326 }
1327
1328 /* Init lists. */
1329 test_membarrier_init_percpu_list(&list_a);
1330 test_membarrier_init_percpu_list(&list_b);
1331
1332 /* Initialize lists before publishing them. */
1333 rseq_smp_wmb();
1334
1335 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1336
1337 while (!RSEQ_READ_ONCE(args->stop)) {
1338 /* list_a is "active". */
1339 cpu_a = rand() % CPU_SETSIZE;
1340 /*
1341 * As list_b is "inactive", we should never see changes
1342 * to list_b.
1343 */
1344 if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) {
1345 fprintf(stderr, "Membarrier test failed\n");
1346 abort();
1347 }
1348
1349 /* Make list_b "active". */
1350 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b);
369688a5 1351 if (rseq_membarrier_expedited(cpu_a) &&
5368dcb4
MD
1352 errno != ENXIO /* missing CPU */) {
1353 perror("sys_membarrier");
1354 abort();
1355 }
1356 /*
1357 * Cpu A should now only modify list_b, so the values
1358 * in list_a should be stable.
1359 */
1360 expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data);
1361
1362 cpu_b = rand() % CPU_SETSIZE;
1363 /*
1364 * As list_a is "inactive", we should never see changes
1365 * to list_a.
1366 */
1367 if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) {
1368 fprintf(stderr, "Membarrier test failed\n");
1369 abort();
1370 }
1371
1372 /* Make list_a "active". */
1373 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
369688a5 1374 if (rseq_membarrier_expedited(cpu_b) &&
5368dcb4
MD
1375 errno != ENXIO /* missing CPU */) {
1376 perror("sys_membarrier");
1377 abort();
1378 }
1379 /* Remember a value from list_b. */
1380 expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data);
1381 }
1382
1383 test_membarrier_free_percpu_list(&list_a);
1384 test_membarrier_free_percpu_list(&list_b);
1385
1386 if (rseq_unregister_current_thread()) {
1387 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1388 errno, strerror(errno));
1389 abort();
1390 }
1391 return NULL;
1392}
1393
1394static
1395void test_membarrier(void)
1396{
1397 const int num_threads = opt_threads;
1398 struct test_membarrier_thread_args thread_args;
1399 pthread_t worker_threads[num_threads];
1400 pthread_t manager_thread;
1401 int i, ret;
1402
d4bff8ed
MD
1403 if (!membarrier_private_expedited_rseq_available()) {
1404 fprintf(stderr, "Membarrier private expedited rseq not available. "
1405 "Skipping membarrier test.\n");
1406 return;
1407 }
5368dcb4
MD
1408 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1409 perror("sys_membarrier");
1410 abort();
1411 }
1412
1413 thread_args.stop = 0;
1414 thread_args.percpu_list_ptr = 0;
1415 ret = pthread_create(&manager_thread, NULL,
1416 test_membarrier_manager_thread, &thread_args);
1417 if (ret) {
1418 errno = ret;
1419 perror("pthread_create");
1420 abort();
1421 }
1422
1423 for (i = 0; i < num_threads; i++) {
1424 ret = pthread_create(&worker_threads[i], NULL,
1425 test_membarrier_worker_thread, &thread_args);
1426 if (ret) {
1427 errno = ret;
1428 perror("pthread_create");
1429 abort();
1430 }
1431 }
1432
1433
1434 for (i = 0; i < num_threads; i++) {
1435 ret = pthread_join(worker_threads[i], NULL);
1436 if (ret) {
1437 errno = ret;
1438 perror("pthread_join");
1439 abort();
1440 }
1441 }
1442
1443 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1444 ret = pthread_join(manager_thread, NULL);
1445 if (ret) {
1446 errno = ret;
1447 perror("pthread_join");
1448 abort();
1449 }
1450}
369688a5 1451#else /* TEST_MEMBARRIER */
5368dcb4
MD
1452static
1453void test_membarrier(void)
1454{
d4bff8ed
MD
1455 if (!membarrier_private_expedited_rseq_available()) {
1456 fprintf(stderr, "Membarrier private expedited rseq not available. "
1457 "Skipping membarrier test.\n");
1458 return;
1459 }
5368dcb4
MD
1460 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1461 "Skipping membarrier test.\n");
1462}
1463#endif
1464
544cdc88 1465static void show_usage(char **argv)
31b44ba2
MD
1466{
1467 printf("Usage : %s <OPTIONS>\n",
1468 argv[0]);
1469 printf("OPTIONS:\n");
1470 printf(" [-1 loops] Number of loops for delay injection 1\n");
1471 printf(" [-2 loops] Number of loops for delay injection 2\n");
1472 printf(" [-3 loops] Number of loops for delay injection 3\n");
1473 printf(" [-4 loops] Number of loops for delay injection 4\n");
1474 printf(" [-5 loops] Number of loops for delay injection 5\n");
1475 printf(" [-6 loops] Number of loops for delay injection 6\n");
1476 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1477 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1478 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1479 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1480 printf(" [-y] Yield\n");
1481 printf(" [-k] Kill thread with signal\n");
1482 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1483 printf(" [-t N] Number of threads (default 200)\n");
1484 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1485 printf(" [-d] Disable rseq system call (no initialization)\n");
1486 printf(" [-D M] Disable rseq for each M threads\n");
5368dcb4 1487 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
31b44ba2 1488 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
d1cdec98 1489 printf(" [-c] Check if the rseq syscall is available.\n");
31b44ba2
MD
1490 printf(" [-v] Verbose output.\n");
1491 printf(" [-h] Show this help.\n");
1492 printf("\n");
1493}
1494
1495int main(int argc, char **argv)
1496{
1497 int i;
1498
1499 for (i = 1; i < argc; i++) {
1500 if (argv[i][0] != '-')
1501 continue;
1502 switch (argv[i][1]) {
1503 case '1':
1504 case '2':
1505 case '3':
1506 case '4':
1507 case '5':
1508 case '6':
1509 case '7':
1510 case '8':
1511 case '9':
1512 if (argc < i + 2) {
544cdc88 1513 show_usage(argv);
31b44ba2
MD
1514 goto error;
1515 }
1516 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1517 i++;
1518 break;
1519 case 'm':
1520 if (argc < i + 2) {
544cdc88 1521 show_usage(argv);
31b44ba2
MD
1522 goto error;
1523 }
1524 opt_modulo = atol(argv[i + 1]);
1525 if (opt_modulo < 0) {
544cdc88 1526 show_usage(argv);
31b44ba2
MD
1527 goto error;
1528 }
1529 i++;
1530 break;
1531 case 's':
1532 if (argc < i + 2) {
544cdc88 1533 show_usage(argv);
31b44ba2
MD
1534 goto error;
1535 }
1536 opt_sleep = atol(argv[i + 1]);
1537 if (opt_sleep < 0) {
544cdc88 1538 show_usage(argv);
31b44ba2
MD
1539 goto error;
1540 }
1541 i++;
1542 break;
1543 case 'y':
1544 opt_yield = 1;
1545 break;
1546 case 'k':
1547 opt_signal = 1;
1548 break;
1549 case 'd':
1550 opt_disable_rseq = 1;
1551 break;
1552 case 'D':
1553 if (argc < i + 2) {
544cdc88 1554 show_usage(argv);
31b44ba2
MD
1555 goto error;
1556 }
1557 opt_disable_mod = atol(argv[i + 1]);
1558 if (opt_disable_mod < 0) {
544cdc88 1559 show_usage(argv);
31b44ba2
MD
1560 goto error;
1561 }
1562 i++;
1563 break;
1564 case 't':
1565 if (argc < i + 2) {
544cdc88 1566 show_usage(argv);
31b44ba2
MD
1567 goto error;
1568 }
1569 opt_threads = atol(argv[i + 1]);
1570 if (opt_threads < 0) {
544cdc88 1571 show_usage(argv);
31b44ba2
MD
1572 goto error;
1573 }
1574 i++;
1575 break;
1576 case 'r':
1577 if (argc < i + 2) {
544cdc88 1578 show_usage(argv);
31b44ba2
MD
1579 goto error;
1580 }
1581 opt_reps = atoll(argv[i + 1]);
1582 if (opt_reps < 0) {
544cdc88 1583 show_usage(argv);
31b44ba2
MD
1584 goto error;
1585 }
1586 i++;
1587 break;
1588 case 'h':
544cdc88 1589 show_usage(argv);
31b44ba2
MD
1590 goto end;
1591 case 'T':
1592 if (argc < i + 2) {
544cdc88 1593 show_usage(argv);
31b44ba2
MD
1594 goto error;
1595 }
1596 opt_test = *argv[i + 1];
1597 switch (opt_test) {
1598 case 's':
1599 case 'l':
1600 case 'i':
1601 case 'b':
1602 case 'm':
5368dcb4 1603 case 'r':
31b44ba2
MD
1604 break;
1605 default:
544cdc88 1606 show_usage(argv);
31b44ba2
MD
1607 goto error;
1608 }
1609 i++;
1610 break;
1611 case 'v':
1612 verbose = 1;
1613 break;
1614 case 'M':
369688a5 1615 opt_mo = RSEQ_MO_RELEASE;
31b44ba2 1616 break;
d1cdec98 1617 case 'c':
8b34114a 1618 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
d1cdec98
MJ
1619 printf_verbose("The rseq syscall is available.\n");
1620 goto end;
1621 } else {
1622 printf_verbose("The rseq syscall is unavailable.\n");
1623 goto no_rseq;
1624 }
31b44ba2 1625 default:
544cdc88 1626 show_usage(argv);
31b44ba2
MD
1627 goto error;
1628 }
1629 }
1630
1631 loop_cnt_1 = loop_cnt[1];
1632 loop_cnt_2 = loop_cnt[2];
1633 loop_cnt_3 = loop_cnt[3];
1634 loop_cnt_4 = loop_cnt[4];
1635 loop_cnt_5 = loop_cnt[5];
1636 loop_cnt_6 = loop_cnt[6];
1637
1638 if (set_signal_handler())
1639 goto error;
1640
1641 if (!opt_disable_rseq && rseq_register_current_thread())
1642 goto error;
369688a5
MD
1643 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1644 fprintf(stderr, "Error: cpu id getter unavailable\n");
1645 goto error;
1646 }
31b44ba2
MD
1647 switch (opt_test) {
1648 case 's':
1649 printf_verbose("spinlock\n");
1650 test_percpu_spinlock();
1651 break;
1652 case 'l':
1653 printf_verbose("linked list\n");
1654 test_percpu_list();
1655 break;
1656 case 'b':
1657 printf_verbose("buffer\n");
1658 test_percpu_buffer();
1659 break;
1660 case 'm':
1661 printf_verbose("memcpy buffer\n");
1662 test_percpu_memcpy_buffer();
1663 break;
1664 case 'i':
1665 printf_verbose("counter increment\n");
1666 test_percpu_inc();
1667 break;
5368dcb4
MD
1668 case 'r':
1669 printf_verbose("membarrier\n");
1670 test_membarrier();
1671 break;
31b44ba2
MD
1672 }
1673 if (!opt_disable_rseq && rseq_unregister_current_thread())
1674 abort();
1675end:
1676 return 0;
1677
1678error:
1679 return -1;
d1cdec98
MJ
1680
1681no_rseq:
1682 return 2;
31b44ba2 1683}
This page took 0.11114 seconds and 4 git commands to generate.