Implement the REUSE specification for licensing and copyright
[librseq.git] / tests / param_test.c
CommitLineData
90702366 1// SPDX-License-Identifier: MIT
f2d7b530 2// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
31b44ba2
MD
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6#include <assert.h>
cb900b45 7#include <linux/version.h>
5368dcb4 8#include <linux/membarrier.h>
31b44ba2
MD
9#include <pthread.h>
10#include <sched.h>
11#include <stdint.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <syscall.h>
16#include <unistd.h>
17#include <poll.h>
18#include <sys/types.h>
19#include <signal.h>
20#include <errno.h>
21#include <stddef.h>
22
cb900b45
MD
23#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
24enum {
25 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
26 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
27};
28
29enum {
30 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
31};
32#endif
33
31b44ba2
MD
34#define NR_INJECT 9
35static int loop_cnt[NR_INJECT + 1];
36
37static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
38static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
39static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
40static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
41static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
42static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
43
44static int opt_modulo, verbose;
45
46static int opt_yield, opt_signal, opt_sleep,
47 opt_disable_rseq, opt_threads = 200,
48 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
49
50#ifndef RSEQ_SKIP_FASTPATH
51static long long opt_reps = 5000;
52#else
53static long long opt_reps = 100;
54#endif
55
56static __thread __attribute__((tls_model("initial-exec")))
57unsigned int signals_delivered;
58
59#ifndef BENCHMARK
60
c6e1dc81
MD
61static inline pid_t rseq_gettid(void)
62{
63 return syscall(__NR_gettid);
64}
65
31b44ba2
MD
66static __thread __attribute__((tls_model("initial-exec"), unused))
67int yield_mod_cnt, nr_abort;
68
69#define printf_verbose(fmt, ...) \
70 do { \
71 if (verbose) \
72 printf(fmt, ## __VA_ARGS__); \
73 } while (0)
74
75#ifdef __i386__
76
77#define INJECT_ASM_REG "eax"
78
79#define RSEQ_INJECT_CLOBBER \
80 , INJECT_ASM_REG
81
82#define RSEQ_INJECT_ASM(n) \
83 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
84 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
85 "jz 333f\n\t" \
86 "222:\n\t" \
87 "dec %%" INJECT_ASM_REG "\n\t" \
88 "jnz 222b\n\t" \
89 "333:\n\t"
90
91#elif defined(__x86_64__)
92
93#define INJECT_ASM_REG_P "rax"
94#define INJECT_ASM_REG "eax"
95
96#define RSEQ_INJECT_CLOBBER \
97 , INJECT_ASM_REG_P \
98 , INJECT_ASM_REG
99
100#define RSEQ_INJECT_ASM(n) \
101 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
102 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
103 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
104 "jz 333f\n\t" \
105 "222:\n\t" \
106 "dec %%" INJECT_ASM_REG "\n\t" \
107 "jnz 222b\n\t" \
108 "333:\n\t"
109
110#elif defined(__s390__)
111
112#define RSEQ_INJECT_INPUT \
113 , [loop_cnt_1]"m"(loop_cnt[1]) \
114 , [loop_cnt_2]"m"(loop_cnt[2]) \
115 , [loop_cnt_3]"m"(loop_cnt[3]) \
116 , [loop_cnt_4]"m"(loop_cnt[4]) \
117 , [loop_cnt_5]"m"(loop_cnt[5]) \
118 , [loop_cnt_6]"m"(loop_cnt[6])
119
120#define INJECT_ASM_REG "r12"
121
122#define RSEQ_INJECT_CLOBBER \
123 , INJECT_ASM_REG
124
125#define RSEQ_INJECT_ASM(n) \
126 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
127 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
128 "je 333f\n\t" \
129 "222:\n\t" \
130 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
131 "jnz 222b\n\t" \
132 "333:\n\t"
133
134#elif defined(__ARMEL__)
135
136#define RSEQ_INJECT_INPUT \
137 , [loop_cnt_1]"m"(loop_cnt[1]) \
138 , [loop_cnt_2]"m"(loop_cnt[2]) \
139 , [loop_cnt_3]"m"(loop_cnt[3]) \
140 , [loop_cnt_4]"m"(loop_cnt[4]) \
141 , [loop_cnt_5]"m"(loop_cnt[5]) \
142 , [loop_cnt_6]"m"(loop_cnt[6])
143
144#define INJECT_ASM_REG "r4"
145
146#define RSEQ_INJECT_CLOBBER \
147 , INJECT_ASM_REG
148
149#define RSEQ_INJECT_ASM(n) \
150 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
151 "cmp " INJECT_ASM_REG ", #0\n\t" \
152 "beq 333f\n\t" \
153 "222:\n\t" \
154 "subs " INJECT_ASM_REG ", #1\n\t" \
155 "bne 222b\n\t" \
156 "333:\n\t"
157
158#elif defined(__AARCH64EL__)
159
160#define RSEQ_INJECT_INPUT \
161 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
162 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
163 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
164 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
165 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
166 , [loop_cnt_6] "Qo" (loop_cnt[6])
167
168#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
169
170#define RSEQ_INJECT_ASM(n) \
171 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
172 " cbz " INJECT_ASM_REG ", 333f\n" \
173 "222:\n" \
174 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
175 " cbnz " INJECT_ASM_REG ", 222b\n" \
176 "333:\n"
177
f1c6b55b 178#elif defined(__PPC__)
31b44ba2
MD
179
180#define RSEQ_INJECT_INPUT \
181 , [loop_cnt_1]"m"(loop_cnt[1]) \
182 , [loop_cnt_2]"m"(loop_cnt[2]) \
183 , [loop_cnt_3]"m"(loop_cnt[3]) \
184 , [loop_cnt_4]"m"(loop_cnt[4]) \
185 , [loop_cnt_5]"m"(loop_cnt[5]) \
186 , [loop_cnt_6]"m"(loop_cnt[6])
187
188#define INJECT_ASM_REG "r18"
189
190#define RSEQ_INJECT_CLOBBER \
191 , INJECT_ASM_REG
192
193#define RSEQ_INJECT_ASM(n) \
194 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
195 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
196 "beq 333f\n\t" \
197 "222:\n\t" \
198 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
199 "bne 222b\n\t" \
200 "333:\n\t"
201
202#elif defined(__mips__)
203
204#define RSEQ_INJECT_INPUT \
205 , [loop_cnt_1]"m"(loop_cnt[1]) \
206 , [loop_cnt_2]"m"(loop_cnt[2]) \
207 , [loop_cnt_3]"m"(loop_cnt[3]) \
208 , [loop_cnt_4]"m"(loop_cnt[4]) \
209 , [loop_cnt_5]"m"(loop_cnt[5]) \
210 , [loop_cnt_6]"m"(loop_cnt[6])
211
212#define INJECT_ASM_REG "$5"
213
214#define RSEQ_INJECT_CLOBBER \
215 , INJECT_ASM_REG
216
217#define RSEQ_INJECT_ASM(n) \
218 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
219 "beqz " INJECT_ASM_REG ", 333f\n\t" \
220 "222:\n\t" \
221 "addiu " INJECT_ASM_REG ", -1\n\t" \
222 "bnez " INJECT_ASM_REG ", 222b\n\t" \
223 "333:\n\t"
224
074b1077
MJ
225#elif defined(__riscv)
226
227#define RSEQ_INJECT_INPUT \
228 , [loop_cnt_1]"m"(loop_cnt[1]) \
229 , [loop_cnt_2]"m"(loop_cnt[2]) \
230 , [loop_cnt_3]"m"(loop_cnt[3]) \
231 , [loop_cnt_4]"m"(loop_cnt[4]) \
232 , [loop_cnt_5]"m"(loop_cnt[5]) \
233 , [loop_cnt_6]"m"(loop_cnt[6])
234
235#define INJECT_ASM_REG "t1"
236
237#define RSEQ_INJECT_CLOBBER \
238 , INJECT_ASM_REG
239
240#define RSEQ_INJECT_ASM(n) \
241 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
242 "beqz " INJECT_ASM_REG ", 333f\n\t" \
243 "222:\n\t" \
244 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
245 "bnez " INJECT_ASM_REG ", 222b\n\t" \
246 "333:\n\t"
247
31b44ba2
MD
248#else
249#error unsupported target
250#endif
251
252#define RSEQ_INJECT_FAILED \
253 nr_abort++;
254
255#define RSEQ_INJECT_C(n) \
256{ \
257 int loc_i, loc_nr_loops = loop_cnt[n]; \
258 \
259 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
260 rseq_barrier(); \
261 } \
262 if (loc_nr_loops == -1 && opt_modulo) { \
263 if (yield_mod_cnt == opt_modulo - 1) { \
264 if (opt_sleep > 0) \
265 poll(NULL, 0, opt_sleep); \
266 if (opt_yield) \
267 sched_yield(); \
268 if (opt_signal) \
269 raise(SIGUSR1); \
270 yield_mod_cnt = 0; \
271 } else { \
272 yield_mod_cnt++; \
273 } \
274 } \
275}
276
277#else
278
279#define printf_verbose(fmt, ...)
280
281#endif /* BENCHMARK */
282
283#include <rseq/rseq.h>
284
285struct percpu_lock_entry {
286 intptr_t v;
287} __attribute__((aligned(128)));
288
289struct percpu_lock {
290 struct percpu_lock_entry c[CPU_SETSIZE];
291};
292
293struct test_data_entry {
294 intptr_t count;
295} __attribute__((aligned(128)));
296
297struct spinlock_test_data {
298 struct percpu_lock lock;
299 struct test_data_entry c[CPU_SETSIZE];
300};
301
302struct spinlock_thread_test_data {
303 struct spinlock_test_data *data;
304 long long reps;
305 int reg;
306};
307
308struct inc_test_data {
309 struct test_data_entry c[CPU_SETSIZE];
310};
311
312struct inc_thread_test_data {
313 struct inc_test_data *data;
314 long long reps;
315 int reg;
316};
317
318struct percpu_list_node {
319 intptr_t data;
320 struct percpu_list_node *next;
321};
322
323struct percpu_list_entry {
324 struct percpu_list_node *head;
325} __attribute__((aligned(128)));
326
327struct percpu_list {
328 struct percpu_list_entry c[CPU_SETSIZE];
329};
330
331#define BUFFER_ITEM_PER_CPU 100
332
333struct percpu_buffer_node {
334 intptr_t data;
335};
336
337struct percpu_buffer_entry {
338 intptr_t offset;
339 intptr_t buflen;
340 struct percpu_buffer_node **array;
341} __attribute__((aligned(128)));
342
343struct percpu_buffer {
344 struct percpu_buffer_entry c[CPU_SETSIZE];
345};
346
347#define MEMCPY_BUFFER_ITEM_PER_CPU 100
348
349struct percpu_memcpy_buffer_node {
350 intptr_t data1;
351 uint64_t data2;
352};
353
354struct percpu_memcpy_buffer_entry {
355 intptr_t offset;
356 intptr_t buflen;
357 struct percpu_memcpy_buffer_node *array;
358} __attribute__((aligned(128)));
359
360struct percpu_memcpy_buffer {
361 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
362};
363
364/* A simple percpu spinlock. Grabs lock on current cpu. */
365static int rseq_this_cpu_lock(struct percpu_lock *lock)
366{
367 int cpu;
368
369 for (;;) {
370 int ret;
371
372 cpu = rseq_cpu_start();
373 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
374 0, 1, cpu);
375 if (rseq_likely(!ret))
376 break;
377 /* Retry if comparison fails or rseq aborts. */
378 }
379 /*
380 * Acquire semantic when taking lock after control dependency.
381 * Matches rseq_smp_store_release().
382 */
383 rseq_smp_acquire__after_ctrl_dep();
384 return cpu;
385}
386
387static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
388{
389 assert(lock->c[cpu].v == 1);
390 /*
391 * Release lock, with release semantic. Matches
392 * rseq_smp_acquire__after_ctrl_dep().
393 */
394 rseq_smp_store_release(&lock->c[cpu].v, 0);
395}
396
6e284b80 397static void *test_percpu_spinlock_thread(void *arg)
31b44ba2 398{
d268885a 399 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
31b44ba2
MD
400 struct spinlock_test_data *data = thread_data->data;
401 long long i, reps;
402
403 if (!opt_disable_rseq && thread_data->reg &&
404 rseq_register_current_thread())
405 abort();
406 reps = thread_data->reps;
407 for (i = 0; i < reps; i++) {
af895f04 408 int cpu = rseq_this_cpu_lock(&data->lock);
31b44ba2
MD
409 data->c[cpu].count++;
410 rseq_percpu_unlock(&data->lock, cpu);
411#ifndef BENCHMARK
412 if (i != 0 && !(i % (reps / 10)))
413 printf_verbose("tid %d: count %lld\n",
414 (int) rseq_gettid(), i);
415#endif
416 }
417 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
418 (int) rseq_gettid(), nr_abort, signals_delivered);
419 if (!opt_disable_rseq && thread_data->reg &&
420 rseq_unregister_current_thread())
421 abort();
422 return NULL;
423}
424
425/*
426 * A simple test which implements a sharded counter using a per-cpu
427 * lock. Obviously real applications might prefer to simply use a
428 * per-cpu increment; however, this is reasonable for a test and the
429 * lock can be extended to synchronize more complicated operations.
430 */
6e284b80 431static void test_percpu_spinlock(void)
31b44ba2
MD
432{
433 const int num_threads = opt_threads;
434 int i, ret;
435 uint64_t sum;
436 pthread_t test_threads[num_threads];
437 struct spinlock_test_data data;
438 struct spinlock_thread_test_data thread_data[num_threads];
439
440 memset(&data, 0, sizeof(data));
441 for (i = 0; i < num_threads; i++) {
442 thread_data[i].reps = opt_reps;
443 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
444 thread_data[i].reg = 1;
445 else
446 thread_data[i].reg = 0;
447 thread_data[i].data = &data;
448 ret = pthread_create(&test_threads[i], NULL,
449 test_percpu_spinlock_thread,
450 &thread_data[i]);
451 if (ret) {
452 errno = ret;
453 perror("pthread_create");
454 abort();
455 }
456 }
457
458 for (i = 0; i < num_threads; i++) {
459 ret = pthread_join(test_threads[i], NULL);
460 if (ret) {
461 errno = ret;
462 perror("pthread_join");
463 abort();
464 }
465 }
466
467 sum = 0;
468 for (i = 0; i < CPU_SETSIZE; i++)
469 sum += data.c[i].count;
470
471 assert(sum == (uint64_t)opt_reps * num_threads);
472}
473
6e284b80 474static void *test_percpu_inc_thread(void *arg)
31b44ba2 475{
d268885a 476 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
31b44ba2
MD
477 struct inc_test_data *data = thread_data->data;
478 long long i, reps;
479
480 if (!opt_disable_rseq && thread_data->reg &&
481 rseq_register_current_thread())
482 abort();
483 reps = thread_data->reps;
484 for (i = 0; i < reps; i++) {
485 int ret;
486
487 do {
488 int cpu;
489
490 cpu = rseq_cpu_start();
491 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
492 } while (rseq_unlikely(ret));
493#ifndef BENCHMARK
494 if (i != 0 && !(i % (reps / 10)))
495 printf_verbose("tid %d: count %lld\n",
496 (int) rseq_gettid(), i);
497#endif
498 }
499 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
500 (int) rseq_gettid(), nr_abort, signals_delivered);
501 if (!opt_disable_rseq && thread_data->reg &&
502 rseq_unregister_current_thread())
503 abort();
504 return NULL;
505}
506
6e284b80 507static void test_percpu_inc(void)
31b44ba2
MD
508{
509 const int num_threads = opt_threads;
510 int i, ret;
511 uint64_t sum;
512 pthread_t test_threads[num_threads];
513 struct inc_test_data data;
514 struct inc_thread_test_data thread_data[num_threads];
515
516 memset(&data, 0, sizeof(data));
517 for (i = 0; i < num_threads; i++) {
518 thread_data[i].reps = opt_reps;
519 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
520 thread_data[i].reg = 1;
521 else
522 thread_data[i].reg = 0;
523 thread_data[i].data = &data;
524 ret = pthread_create(&test_threads[i], NULL,
525 test_percpu_inc_thread,
526 &thread_data[i]);
527 if (ret) {
528 errno = ret;
529 perror("pthread_create");
530 abort();
531 }
532 }
533
534 for (i = 0; i < num_threads; i++) {
535 ret = pthread_join(test_threads[i], NULL);
536 if (ret) {
537 errno = ret;
538 perror("pthread_join");
539 abort();
540 }
541 }
542
543 sum = 0;
544 for (i = 0; i < CPU_SETSIZE; i++)
545 sum += data.c[i].count;
546
547 assert(sum == (uint64_t)opt_reps * num_threads);
548}
549
6e284b80 550static void this_cpu_list_push(struct percpu_list *list,
31b44ba2
MD
551 struct percpu_list_node *node,
552 int *_cpu)
553{
554 int cpu;
555
556 for (;;) {
557 intptr_t *targetptr, newval, expect;
558 int ret;
559
560 cpu = rseq_cpu_start();
561 /* Load list->c[cpu].head with single-copy atomicity. */
562 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
563 newval = (intptr_t)node;
564 targetptr = (intptr_t *)&list->c[cpu].head;
565 node->next = (struct percpu_list_node *)expect;
566 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
567 if (rseq_likely(!ret))
568 break;
569 /* Retry if comparison fails or rseq aborts. */
570 }
571 if (_cpu)
572 *_cpu = cpu;
573}
574
575/*
576 * Unlike a traditional lock-less linked list; the availability of a
577 * rseq primitive allows us to implement pop without concerns over
578 * ABA-type races.
579 */
6e284b80 580static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
31b44ba2
MD
581 int *_cpu)
582{
583 struct percpu_list_node *node = NULL;
584 int cpu;
585
586 for (;;) {
587 struct percpu_list_node *head;
588 intptr_t *targetptr, expectnot, *load;
d35eae6b
MD
589 long offset;
590 int ret;
31b44ba2
MD
591
592 cpu = rseq_cpu_start();
593 targetptr = (intptr_t *)&list->c[cpu].head;
594 expectnot = (intptr_t)NULL;
595 offset = offsetof(struct percpu_list_node, next);
596 load = (intptr_t *)&head;
597 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
598 offset, load, cpu);
599 if (rseq_likely(!ret)) {
600 node = head;
601 break;
602 }
603 if (ret > 0)
604 break;
605 /* Retry if rseq aborts. */
606 }
607 if (_cpu)
608 *_cpu = cpu;
609 return node;
610}
611
612/*
613 * __percpu_list_pop is not safe against concurrent accesses. Should
614 * only be used on lists that are not concurrently modified.
615 */
6e284b80 616static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
31b44ba2
MD
617{
618 struct percpu_list_node *node;
619
620 node = list->c[cpu].head;
621 if (!node)
622 return NULL;
623 list->c[cpu].head = node->next;
624 return node;
625}
626
6e284b80 627static void *test_percpu_list_thread(void *arg)
31b44ba2
MD
628{
629 long long i, reps;
630 struct percpu_list *list = (struct percpu_list *)arg;
631
632 if (!opt_disable_rseq && rseq_register_current_thread())
633 abort();
634
635 reps = opt_reps;
636 for (i = 0; i < reps; i++) {
637 struct percpu_list_node *node;
638
639 node = this_cpu_list_pop(list, NULL);
640 if (opt_yield)
641 sched_yield(); /* encourage shuffling */
642 if (node)
643 this_cpu_list_push(list, node, NULL);
644 }
645
646 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
647 (int) rseq_gettid(), nr_abort, signals_delivered);
648 if (!opt_disable_rseq && rseq_unregister_current_thread())
649 abort();
650
651 return NULL;
652}
653
654/* Simultaneous modification to a per-cpu linked list from many threads. */
6e284b80 655static void test_percpu_list(void)
31b44ba2
MD
656{
657 const int num_threads = opt_threads;
658 int i, j, ret;
659 uint64_t sum = 0, expected_sum = 0;
660 struct percpu_list list;
661 pthread_t test_threads[num_threads];
662 cpu_set_t allowed_cpus;
663
664 memset(&list, 0, sizeof(list));
665
666 /* Generate list entries for every usable cpu. */
667 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
668 for (i = 0; i < CPU_SETSIZE; i++) {
669 if (!CPU_ISSET(i, &allowed_cpus))
670 continue;
671 for (j = 1; j <= 100; j++) {
672 struct percpu_list_node *node;
673
674 expected_sum += j;
675
d268885a 676 node = (struct percpu_list_node *) malloc(sizeof(*node));
31b44ba2
MD
677 assert(node);
678 node->data = j;
679 node->next = list.c[i].head;
680 list.c[i].head = node;
681 }
682 }
683
684 for (i = 0; i < num_threads; i++) {
685 ret = pthread_create(&test_threads[i], NULL,
686 test_percpu_list_thread, &list);
687 if (ret) {
688 errno = ret;
689 perror("pthread_create");
690 abort();
691 }
692 }
693
694 for (i = 0; i < num_threads; i++) {
695 ret = pthread_join(test_threads[i], NULL);
696 if (ret) {
697 errno = ret;
698 perror("pthread_join");
699 abort();
700 }
701 }
702
703 for (i = 0; i < CPU_SETSIZE; i++) {
704 struct percpu_list_node *node;
705
706 if (!CPU_ISSET(i, &allowed_cpus))
707 continue;
708
709 while ((node = __percpu_list_pop(&list, i))) {
710 sum += node->data;
711 free(node);
712 }
713 }
714
715 /*
716 * All entries should now be accounted for (unless some external
717 * actor is interfering with our allowed affinity while this
718 * test is running).
719 */
720 assert(sum == expected_sum);
721}
722
6e284b80 723static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
31b44ba2
MD
724 struct percpu_buffer_node *node,
725 int *_cpu)
726{
727 bool result = false;
728 int cpu;
729
730 for (;;) {
731 intptr_t *targetptr_spec, newval_spec;
732 intptr_t *targetptr_final, newval_final;
733 intptr_t offset;
734 int ret;
735
736 cpu = rseq_cpu_start();
737 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
738 if (offset == buffer->c[cpu].buflen)
739 break;
740 newval_spec = (intptr_t)node;
741 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
742 newval_final = offset + 1;
743 targetptr_final = &buffer->c[cpu].offset;
744 if (opt_mb)
745 ret = rseq_cmpeqv_trystorev_storev_release(
746 targetptr_final, offset, targetptr_spec,
747 newval_spec, newval_final, cpu);
748 else
749 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
750 offset, targetptr_spec, newval_spec,
751 newval_final, cpu);
752 if (rseq_likely(!ret)) {
753 result = true;
754 break;
755 }
756 /* Retry if comparison fails or rseq aborts. */
757 }
758 if (_cpu)
759 *_cpu = cpu;
760 return result;
761}
762
6e284b80 763static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
764 int *_cpu)
765{
766 struct percpu_buffer_node *head;
767 int cpu;
768
769 for (;;) {
770 intptr_t *targetptr, newval;
771 intptr_t offset;
772 int ret;
773
774 cpu = rseq_cpu_start();
775 /* Load offset with single-copy atomicity. */
776 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
777 if (offset == 0) {
778 head = NULL;
779 break;
780 }
781 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
782 newval = offset - 1;
783 targetptr = (intptr_t *)&buffer->c[cpu].offset;
784 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
785 (intptr_t *)&buffer->c[cpu].array[offset - 1],
786 (intptr_t)head, newval, cpu);
787 if (rseq_likely(!ret))
788 break;
789 /* Retry if comparison fails or rseq aborts. */
790 }
791 if (_cpu)
792 *_cpu = cpu;
793 return head;
794}
795
796/*
797 * __percpu_buffer_pop is not safe against concurrent accesses. Should
798 * only be used on buffers that are not concurrently modified.
799 */
6e284b80 800static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
801 int cpu)
802{
803 struct percpu_buffer_node *head;
804 intptr_t offset;
805
806 offset = buffer->c[cpu].offset;
807 if (offset == 0)
808 return NULL;
809 head = buffer->c[cpu].array[offset - 1];
810 buffer->c[cpu].offset = offset - 1;
811 return head;
812}
813
6e284b80 814static void *test_percpu_buffer_thread(void *arg)
31b44ba2
MD
815{
816 long long i, reps;
817 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
818
819 if (!opt_disable_rseq && rseq_register_current_thread())
820 abort();
821
822 reps = opt_reps;
823 for (i = 0; i < reps; i++) {
824 struct percpu_buffer_node *node;
825
826 node = this_cpu_buffer_pop(buffer, NULL);
827 if (opt_yield)
828 sched_yield(); /* encourage shuffling */
829 if (node) {
830 if (!this_cpu_buffer_push(buffer, node, NULL)) {
831 /* Should increase buffer size. */
832 abort();
833 }
834 }
835 }
836
837 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
838 (int) rseq_gettid(), nr_abort, signals_delivered);
839 if (!opt_disable_rseq && rseq_unregister_current_thread())
840 abort();
841
842 return NULL;
843}
844
845/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 846static void test_percpu_buffer(void)
31b44ba2
MD
847{
848 const int num_threads = opt_threads;
849 int i, j, ret;
850 uint64_t sum = 0, expected_sum = 0;
851 struct percpu_buffer buffer;
852 pthread_t test_threads[num_threads];
853 cpu_set_t allowed_cpus;
854
855 memset(&buffer, 0, sizeof(buffer));
856
857 /* Generate list entries for every usable cpu. */
858 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
859 for (i = 0; i < CPU_SETSIZE; i++) {
860 if (!CPU_ISSET(i, &allowed_cpus))
861 continue;
862 /* Worse-case is every item in same CPU. */
863 buffer.c[i].array =
d268885a 864 (struct percpu_buffer_node **)
31b44ba2
MD
865 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
866 BUFFER_ITEM_PER_CPU);
867 assert(buffer.c[i].array);
868 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
869 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
870 struct percpu_buffer_node *node;
871
872 expected_sum += j;
873
874 /*
875 * We could theoretically put the word-sized
876 * "data" directly in the buffer. However, we
877 * want to model objects that would not fit
878 * within a single word, so allocate an object
879 * for each node.
880 */
d268885a 881 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
31b44ba2
MD
882 assert(node);
883 node->data = j;
884 buffer.c[i].array[j - 1] = node;
885 buffer.c[i].offset++;
886 }
887 }
888
889 for (i = 0; i < num_threads; i++) {
890 ret = pthread_create(&test_threads[i], NULL,
891 test_percpu_buffer_thread, &buffer);
892 if (ret) {
893 errno = ret;
894 perror("pthread_create");
895 abort();
896 }
897 }
898
899 for (i = 0; i < num_threads; i++) {
900 ret = pthread_join(test_threads[i], NULL);
901 if (ret) {
902 errno = ret;
903 perror("pthread_join");
904 abort();
905 }
906 }
907
908 for (i = 0; i < CPU_SETSIZE; i++) {
909 struct percpu_buffer_node *node;
910
911 if (!CPU_ISSET(i, &allowed_cpus))
912 continue;
913
914 while ((node = __percpu_buffer_pop(&buffer, i))) {
915 sum += node->data;
916 free(node);
917 }
918 free(buffer.c[i].array);
919 }
920
921 /*
922 * All entries should now be accounted for (unless some external
923 * actor is interfering with our allowed affinity while this
924 * test is running).
925 */
926 assert(sum == expected_sum);
927}
928
6e284b80 929static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
930 struct percpu_memcpy_buffer_node item,
931 int *_cpu)
932{
933 bool result = false;
934 int cpu;
935
936 for (;;) {
937 intptr_t *targetptr_final, newval_final, offset;
938 char *destptr, *srcptr;
939 size_t copylen;
940 int ret;
941
942 cpu = rseq_cpu_start();
943 /* Load offset with single-copy atomicity. */
944 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
945 if (offset == buffer->c[cpu].buflen)
946 break;
947 destptr = (char *)&buffer->c[cpu].array[offset];
948 srcptr = (char *)&item;
949 /* copylen must be <= 4kB. */
950 copylen = sizeof(item);
951 newval_final = offset + 1;
952 targetptr_final = &buffer->c[cpu].offset;
953 if (opt_mb)
954 ret = rseq_cmpeqv_trymemcpy_storev_release(
955 targetptr_final, offset,
956 destptr, srcptr, copylen,
957 newval_final, cpu);
958 else
959 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
960 offset, destptr, srcptr, copylen,
961 newval_final, cpu);
962 if (rseq_likely(!ret)) {
963 result = true;
964 break;
965 }
966 /* Retry if comparison fails or rseq aborts. */
967 }
968 if (_cpu)
969 *_cpu = cpu;
970 return result;
971}
972
6e284b80 973static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
974 struct percpu_memcpy_buffer_node *item,
975 int *_cpu)
976{
977 bool result = false;
978 int cpu;
979
980 for (;;) {
981 intptr_t *targetptr_final, newval_final, offset;
982 char *destptr, *srcptr;
983 size_t copylen;
984 int ret;
985
986 cpu = rseq_cpu_start();
987 /* Load offset with single-copy atomicity. */
988 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
989 if (offset == 0)
990 break;
991 destptr = (char *)item;
992 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
993 /* copylen must be <= 4kB. */
994 copylen = sizeof(*item);
995 newval_final = offset - 1;
996 targetptr_final = &buffer->c[cpu].offset;
997 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
998 offset, destptr, srcptr, copylen,
999 newval_final, cpu);
1000 if (rseq_likely(!ret)) {
1001 result = true;
1002 break;
1003 }
1004 /* Retry if comparison fails or rseq aborts. */
1005 }
1006 if (_cpu)
1007 *_cpu = cpu;
1008 return result;
1009}
1010
1011/*
1012 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1013 * only be used on buffers that are not concurrently modified.
1014 */
6e284b80 1015static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1016 struct percpu_memcpy_buffer_node *item,
1017 int cpu)
1018{
1019 intptr_t offset;
1020
1021 offset = buffer->c[cpu].offset;
1022 if (offset == 0)
1023 return false;
1024 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1025 buffer->c[cpu].offset = offset - 1;
1026 return true;
1027}
1028
6e284b80 1029static void *test_percpu_memcpy_buffer_thread(void *arg)
31b44ba2
MD
1030{
1031 long long i, reps;
1032 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1033
1034 if (!opt_disable_rseq && rseq_register_current_thread())
1035 abort();
1036
1037 reps = opt_reps;
1038 for (i = 0; i < reps; i++) {
1039 struct percpu_memcpy_buffer_node item;
1040 bool result;
1041
1042 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1043 if (opt_yield)
1044 sched_yield(); /* encourage shuffling */
1045 if (result) {
1046 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1047 /* Should increase buffer size. */
1048 abort();
1049 }
1050 }
1051 }
1052
1053 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1054 (int) rseq_gettid(), nr_abort, signals_delivered);
1055 if (!opt_disable_rseq && rseq_unregister_current_thread())
1056 abort();
1057
1058 return NULL;
1059}
1060
1061/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 1062static void test_percpu_memcpy_buffer(void)
31b44ba2
MD
1063{
1064 const int num_threads = opt_threads;
1065 int i, j, ret;
1066 uint64_t sum = 0, expected_sum = 0;
1067 struct percpu_memcpy_buffer buffer;
1068 pthread_t test_threads[num_threads];
1069 cpu_set_t allowed_cpus;
1070
1071 memset(&buffer, 0, sizeof(buffer));
1072
1073 /* Generate list entries for every usable cpu. */
1074 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1075 for (i = 0; i < CPU_SETSIZE; i++) {
1076 if (!CPU_ISSET(i, &allowed_cpus))
1077 continue;
1078 /* Worse-case is every item in same CPU. */
1079 buffer.c[i].array =
d268885a 1080 (struct percpu_memcpy_buffer_node *)
31b44ba2
MD
1081 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1082 MEMCPY_BUFFER_ITEM_PER_CPU);
1083 assert(buffer.c[i].array);
1084 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1085 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1086 expected_sum += 2 * j + 1;
1087
1088 /*
1089 * We could theoretically put the word-sized
1090 * "data" directly in the buffer. However, we
1091 * want to model objects that would not fit
1092 * within a single word, so allocate an object
1093 * for each node.
1094 */
1095 buffer.c[i].array[j - 1].data1 = j;
1096 buffer.c[i].array[j - 1].data2 = j + 1;
1097 buffer.c[i].offset++;
1098 }
1099 }
1100
1101 for (i = 0; i < num_threads; i++) {
1102 ret = pthread_create(&test_threads[i], NULL,
1103 test_percpu_memcpy_buffer_thread,
1104 &buffer);
1105 if (ret) {
1106 errno = ret;
1107 perror("pthread_create");
1108 abort();
1109 }
1110 }
1111
1112 for (i = 0; i < num_threads; i++) {
1113 ret = pthread_join(test_threads[i], NULL);
1114 if (ret) {
1115 errno = ret;
1116 perror("pthread_join");
1117 abort();
1118 }
1119 }
1120
1121 for (i = 0; i < CPU_SETSIZE; i++) {
1122 struct percpu_memcpy_buffer_node item;
1123
1124 if (!CPU_ISSET(i, &allowed_cpus))
1125 continue;
1126
1127 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1128 sum += item.data1;
1129 sum += item.data2;
1130 }
1131 free(buffer.c[i].array);
1132 }
1133
1134 /*
1135 * All entries should now be accounted for (unless some external
1136 * actor is interfering with our allowed affinity while this
1137 * test is running).
1138 */
1139 assert(sum == expected_sum);
1140}
1141
544cdc88
MJ
1142
1143static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
31b44ba2
MD
1144{
1145 signals_delivered++;
1146}
1147
1148static int set_signal_handler(void)
1149{
1150 int ret = 0;
1151 struct sigaction sa;
1152 sigset_t sigset;
1153
1154 ret = sigemptyset(&sigset);
1155 if (ret < 0) {
1156 perror("sigemptyset");
1157 return ret;
1158 }
1159
1160 sa.sa_handler = test_signal_interrupt_handler;
1161 sa.sa_mask = sigset;
1162 sa.sa_flags = 0;
1163 ret = sigaction(SIGUSR1, &sa, NULL);
1164 if (ret < 0) {
1165 perror("sigaction");
1166 return ret;
1167 }
1168
1169 printf_verbose("Signal handler set for SIGUSR1\n");
1170
1171 return ret;
1172}
1173
3664098e
MD
1174static
1175int sys_membarrier(int cmd, int flags, int cpu_id)
1176{
1177 return syscall(__NR_membarrier, cmd, flags, cpu_id);
1178}
1179
1180static
1181bool membarrier_private_expedited_rseq_available(void)
1182{
1183 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1184
1185 if (status < 0) {
1186 perror("membarrier");
1187 return false;
1188 }
1189 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1190 return false;
1191 return true;
1192}
1193
5368dcb4
MD
1194/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1195#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
1196struct test_membarrier_thread_args {
1197 int stop;
1198 intptr_t percpu_list_ptr;
1199};
1200
1201/* Worker threads modify data in their "active" percpu lists. */
1202static
1203void *test_membarrier_worker_thread(void *arg)
1204{
1205 struct test_membarrier_thread_args *args =
1206 (struct test_membarrier_thread_args *)arg;
1207 const int iters = opt_reps;
1208 int i;
1209
1210 if (rseq_register_current_thread()) {
1211 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1212 errno, strerror(errno));
1213 abort();
1214 }
1215
1216 /* Wait for initialization. */
1217 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1218
1219 for (i = 0; i < iters; ++i) {
1220 int ret;
1221
1222 do {
1223 int cpu = rseq_cpu_start();
1224
1225 ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1226 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1227 } while (rseq_unlikely(ret));
1228 }
1229
1230 if (rseq_unregister_current_thread()) {
1231 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1232 errno, strerror(errno));
1233 abort();
1234 }
1235 return NULL;
1236}
1237
1238static
1239void test_membarrier_init_percpu_list(struct percpu_list *list)
1240{
1241 int i;
1242
1243 memset(list, 0, sizeof(*list));
1244 for (i = 0; i < CPU_SETSIZE; i++) {
1245 struct percpu_list_node *node;
1246
1247 node = (struct percpu_list_node *) malloc(sizeof(*node));
1248 assert(node);
1249 node->data = 0;
1250 node->next = NULL;
1251 list->c[i].head = node;
1252 }
1253}
1254
1255static
1256void test_membarrier_free_percpu_list(struct percpu_list *list)
1257{
1258 int i;
1259
1260 for (i = 0; i < CPU_SETSIZE; i++)
1261 free(list->c[i].head);
1262}
1263
5368dcb4
MD
1264/*
1265 * The manager thread swaps per-cpu lists that worker threads see,
1266 * and validates that there are no unexpected modifications.
1267 */
1268static
1269void *test_membarrier_manager_thread(void *arg)
1270{
1271 struct test_membarrier_thread_args *args =
1272 (struct test_membarrier_thread_args *)arg;
1273 struct percpu_list list_a, list_b;
1274 intptr_t expect_a = 0, expect_b = 0;
1275 int cpu_a = 0, cpu_b = 0;
1276
1277 if (rseq_register_current_thread()) {
1278 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1279 errno, strerror(errno));
1280 abort();
1281 }
1282
1283 /* Init lists. */
1284 test_membarrier_init_percpu_list(&list_a);
1285 test_membarrier_init_percpu_list(&list_b);
1286
1287 /* Initialize lists before publishing them. */
1288 rseq_smp_wmb();
1289
1290 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1291
1292 while (!RSEQ_READ_ONCE(args->stop)) {
1293 /* list_a is "active". */
1294 cpu_a = rand() % CPU_SETSIZE;
1295 /*
1296 * As list_b is "inactive", we should never see changes
1297 * to list_b.
1298 */
1299 if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) {
1300 fprintf(stderr, "Membarrier test failed\n");
1301 abort();
1302 }
1303
1304 /* Make list_b "active". */
1305 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b);
1306 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1307 MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1308 errno != ENXIO /* missing CPU */) {
1309 perror("sys_membarrier");
1310 abort();
1311 }
1312 /*
1313 * Cpu A should now only modify list_b, so the values
1314 * in list_a should be stable.
1315 */
1316 expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data);
1317
1318 cpu_b = rand() % CPU_SETSIZE;
1319 /*
1320 * As list_a is "inactive", we should never see changes
1321 * to list_a.
1322 */
1323 if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) {
1324 fprintf(stderr, "Membarrier test failed\n");
1325 abort();
1326 }
1327
1328 /* Make list_a "active". */
1329 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1330 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1331 MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1332 errno != ENXIO /* missing CPU */) {
1333 perror("sys_membarrier");
1334 abort();
1335 }
1336 /* Remember a value from list_b. */
1337 expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data);
1338 }
1339
1340 test_membarrier_free_percpu_list(&list_a);
1341 test_membarrier_free_percpu_list(&list_b);
1342
1343 if (rseq_unregister_current_thread()) {
1344 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1345 errno, strerror(errno));
1346 abort();
1347 }
1348 return NULL;
1349}
1350
1351static
1352void test_membarrier(void)
1353{
1354 const int num_threads = opt_threads;
1355 struct test_membarrier_thread_args thread_args;
1356 pthread_t worker_threads[num_threads];
1357 pthread_t manager_thread;
1358 int i, ret;
1359
d4bff8ed
MD
1360 if (!membarrier_private_expedited_rseq_available()) {
1361 fprintf(stderr, "Membarrier private expedited rseq not available. "
1362 "Skipping membarrier test.\n");
1363 return;
1364 }
5368dcb4
MD
1365 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1366 perror("sys_membarrier");
1367 abort();
1368 }
1369
1370 thread_args.stop = 0;
1371 thread_args.percpu_list_ptr = 0;
1372 ret = pthread_create(&manager_thread, NULL,
1373 test_membarrier_manager_thread, &thread_args);
1374 if (ret) {
1375 errno = ret;
1376 perror("pthread_create");
1377 abort();
1378 }
1379
1380 for (i = 0; i < num_threads; i++) {
1381 ret = pthread_create(&worker_threads[i], NULL,
1382 test_membarrier_worker_thread, &thread_args);
1383 if (ret) {
1384 errno = ret;
1385 perror("pthread_create");
1386 abort();
1387 }
1388 }
1389
1390
1391 for (i = 0; i < num_threads; i++) {
1392 ret = pthread_join(worker_threads[i], NULL);
1393 if (ret) {
1394 errno = ret;
1395 perror("pthread_join");
1396 abort();
1397 }
1398 }
1399
1400 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1401 ret = pthread_join(manager_thread, NULL);
1402 if (ret) {
1403 errno = ret;
1404 perror("pthread_join");
1405 abort();
1406 }
1407}
1408#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
1409static
1410void test_membarrier(void)
1411{
d4bff8ed
MD
1412 if (!membarrier_private_expedited_rseq_available()) {
1413 fprintf(stderr, "Membarrier private expedited rseq not available. "
1414 "Skipping membarrier test.\n");
1415 return;
1416 }
5368dcb4
MD
1417 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1418 "Skipping membarrier test.\n");
1419}
1420#endif
1421
544cdc88 1422static void show_usage(char **argv)
31b44ba2
MD
1423{
1424 printf("Usage : %s <OPTIONS>\n",
1425 argv[0]);
1426 printf("OPTIONS:\n");
1427 printf(" [-1 loops] Number of loops for delay injection 1\n");
1428 printf(" [-2 loops] Number of loops for delay injection 2\n");
1429 printf(" [-3 loops] Number of loops for delay injection 3\n");
1430 printf(" [-4 loops] Number of loops for delay injection 4\n");
1431 printf(" [-5 loops] Number of loops for delay injection 5\n");
1432 printf(" [-6 loops] Number of loops for delay injection 6\n");
1433 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1434 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1435 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1436 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1437 printf(" [-y] Yield\n");
1438 printf(" [-k] Kill thread with signal\n");
1439 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1440 printf(" [-t N] Number of threads (default 200)\n");
1441 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1442 printf(" [-d] Disable rseq system call (no initialization)\n");
1443 printf(" [-D M] Disable rseq for each M threads\n");
5368dcb4 1444 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
31b44ba2 1445 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
d1cdec98 1446 printf(" [-c] Check if the rseq syscall is available.\n");
31b44ba2
MD
1447 printf(" [-v] Verbose output.\n");
1448 printf(" [-h] Show this help.\n");
1449 printf("\n");
1450}
1451
1452int main(int argc, char **argv)
1453{
1454 int i;
1455
1456 for (i = 1; i < argc; i++) {
1457 if (argv[i][0] != '-')
1458 continue;
1459 switch (argv[i][1]) {
1460 case '1':
1461 case '2':
1462 case '3':
1463 case '4':
1464 case '5':
1465 case '6':
1466 case '7':
1467 case '8':
1468 case '9':
1469 if (argc < i + 2) {
544cdc88 1470 show_usage(argv);
31b44ba2
MD
1471 goto error;
1472 }
1473 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1474 i++;
1475 break;
1476 case 'm':
1477 if (argc < i + 2) {
544cdc88 1478 show_usage(argv);
31b44ba2
MD
1479 goto error;
1480 }
1481 opt_modulo = atol(argv[i + 1]);
1482 if (opt_modulo < 0) {
544cdc88 1483 show_usage(argv);
31b44ba2
MD
1484 goto error;
1485 }
1486 i++;
1487 break;
1488 case 's':
1489 if (argc < i + 2) {
544cdc88 1490 show_usage(argv);
31b44ba2
MD
1491 goto error;
1492 }
1493 opt_sleep = atol(argv[i + 1]);
1494 if (opt_sleep < 0) {
544cdc88 1495 show_usage(argv);
31b44ba2
MD
1496 goto error;
1497 }
1498 i++;
1499 break;
1500 case 'y':
1501 opt_yield = 1;
1502 break;
1503 case 'k':
1504 opt_signal = 1;
1505 break;
1506 case 'd':
1507 opt_disable_rseq = 1;
1508 break;
1509 case 'D':
1510 if (argc < i + 2) {
544cdc88 1511 show_usage(argv);
31b44ba2
MD
1512 goto error;
1513 }
1514 opt_disable_mod = atol(argv[i + 1]);
1515 if (opt_disable_mod < 0) {
544cdc88 1516 show_usage(argv);
31b44ba2
MD
1517 goto error;
1518 }
1519 i++;
1520 break;
1521 case 't':
1522 if (argc < i + 2) {
544cdc88 1523 show_usage(argv);
31b44ba2
MD
1524 goto error;
1525 }
1526 opt_threads = atol(argv[i + 1]);
1527 if (opt_threads < 0) {
544cdc88 1528 show_usage(argv);
31b44ba2
MD
1529 goto error;
1530 }
1531 i++;
1532 break;
1533 case 'r':
1534 if (argc < i + 2) {
544cdc88 1535 show_usage(argv);
31b44ba2
MD
1536 goto error;
1537 }
1538 opt_reps = atoll(argv[i + 1]);
1539 if (opt_reps < 0) {
544cdc88 1540 show_usage(argv);
31b44ba2
MD
1541 goto error;
1542 }
1543 i++;
1544 break;
1545 case 'h':
544cdc88 1546 show_usage(argv);
31b44ba2
MD
1547 goto end;
1548 case 'T':
1549 if (argc < i + 2) {
544cdc88 1550 show_usage(argv);
31b44ba2
MD
1551 goto error;
1552 }
1553 opt_test = *argv[i + 1];
1554 switch (opt_test) {
1555 case 's':
1556 case 'l':
1557 case 'i':
1558 case 'b':
1559 case 'm':
5368dcb4 1560 case 'r':
31b44ba2
MD
1561 break;
1562 default:
544cdc88 1563 show_usage(argv);
31b44ba2
MD
1564 goto error;
1565 }
1566 i++;
1567 break;
1568 case 'v':
1569 verbose = 1;
1570 break;
1571 case 'M':
1572 opt_mb = 1;
1573 break;
d1cdec98 1574 case 'c':
8b34114a 1575 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
d1cdec98
MJ
1576 printf_verbose("The rseq syscall is available.\n");
1577 goto end;
1578 } else {
1579 printf_verbose("The rseq syscall is unavailable.\n");
1580 goto no_rseq;
1581 }
31b44ba2 1582 default:
544cdc88 1583 show_usage(argv);
31b44ba2
MD
1584 goto error;
1585 }
1586 }
1587
1588 loop_cnt_1 = loop_cnt[1];
1589 loop_cnt_2 = loop_cnt[2];
1590 loop_cnt_3 = loop_cnt[3];
1591 loop_cnt_4 = loop_cnt[4];
1592 loop_cnt_5 = loop_cnt[5];
1593 loop_cnt_6 = loop_cnt[6];
1594
1595 if (set_signal_handler())
1596 goto error;
1597
1598 if (!opt_disable_rseq && rseq_register_current_thread())
1599 goto error;
1600 switch (opt_test) {
1601 case 's':
1602 printf_verbose("spinlock\n");
1603 test_percpu_spinlock();
1604 break;
1605 case 'l':
1606 printf_verbose("linked list\n");
1607 test_percpu_list();
1608 break;
1609 case 'b':
1610 printf_verbose("buffer\n");
1611 test_percpu_buffer();
1612 break;
1613 case 'm':
1614 printf_verbose("memcpy buffer\n");
1615 test_percpu_memcpy_buffer();
1616 break;
1617 case 'i':
1618 printf_verbose("counter increment\n");
1619 test_percpu_inc();
1620 break;
5368dcb4
MD
1621 case 'r':
1622 printf_verbose("membarrier\n");
1623 test_membarrier();
1624 break;
31b44ba2
MD
1625 }
1626 if (!opt_disable_rseq && rseq_unregister_current_thread())
1627 abort();
1628end:
1629 return 0;
1630
1631error:
1632 return -1;
d1cdec98
MJ
1633
1634no_rseq:
1635 return 2;
31b44ba2 1636}
This page took 0.084839 seconds and 4 git commands to generate.