Introduce common generic header file
[librseq.git] / tests / param_test.c
CommitLineData
90702366 1// SPDX-License-Identifier: MIT
f2d7b530 2// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
31b44ba2
MD
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6#include <assert.h>
cb900b45 7#include <linux/version.h>
5368dcb4 8#include <linux/membarrier.h>
31b44ba2
MD
9#include <pthread.h>
10#include <sched.h>
11#include <stdint.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <syscall.h>
16#include <unistd.h>
17#include <poll.h>
18#include <sys/types.h>
19#include <signal.h>
20#include <errno.h>
21#include <stddef.h>
369688a5 22#include <stdbool.h>
31b44ba2 23
cb900b45
MD
24#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
25enum {
26 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
27 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
28};
29
30enum {
31 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
32};
33#endif
34
31b44ba2
MD
35#define NR_INJECT 9
36static int loop_cnt[NR_INJECT + 1];
37
38static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
39static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
40static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
41static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
42static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
43static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
44
45static int opt_modulo, verbose;
46
47static int opt_yield, opt_signal, opt_sleep,
48 opt_disable_rseq, opt_threads = 200,
369688a5 49 opt_disable_mod = 0, opt_test = 's';
31b44ba2 50
31b44ba2 51static long long opt_reps = 5000;
31b44ba2
MD
52
53static __thread __attribute__((tls_model("initial-exec")))
54unsigned int signals_delivered;
55
c6e1dc81
MD
56static inline pid_t rseq_gettid(void)
57{
58 return syscall(__NR_gettid);
59}
60
3726b9f1
MD
61#ifndef BENCHMARK
62
31b44ba2
MD
63static __thread __attribute__((tls_model("initial-exec"), unused))
64int yield_mod_cnt, nr_abort;
65
66#define printf_verbose(fmt, ...) \
67 do { \
68 if (verbose) \
69 printf(fmt, ## __VA_ARGS__); \
70 } while (0)
71
72#ifdef __i386__
73
74#define INJECT_ASM_REG "eax"
75
76#define RSEQ_INJECT_CLOBBER \
77 , INJECT_ASM_REG
78
79#define RSEQ_INJECT_ASM(n) \
80 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
81 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
82 "jz 333f\n\t" \
83 "222:\n\t" \
84 "dec %%" INJECT_ASM_REG "\n\t" \
85 "jnz 222b\n\t" \
86 "333:\n\t"
87
88#elif defined(__x86_64__)
89
90#define INJECT_ASM_REG_P "rax"
91#define INJECT_ASM_REG "eax"
92
93#define RSEQ_INJECT_CLOBBER \
94 , INJECT_ASM_REG_P \
95 , INJECT_ASM_REG
96
97#define RSEQ_INJECT_ASM(n) \
98 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
99 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
100 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
101 "jz 333f\n\t" \
102 "222:\n\t" \
103 "dec %%" INJECT_ASM_REG "\n\t" \
104 "jnz 222b\n\t" \
105 "333:\n\t"
106
107#elif defined(__s390__)
108
109#define RSEQ_INJECT_INPUT \
110 , [loop_cnt_1]"m"(loop_cnt[1]) \
111 , [loop_cnt_2]"m"(loop_cnt[2]) \
112 , [loop_cnt_3]"m"(loop_cnt[3]) \
113 , [loop_cnt_4]"m"(loop_cnt[4]) \
114 , [loop_cnt_5]"m"(loop_cnt[5]) \
115 , [loop_cnt_6]"m"(loop_cnt[6])
116
117#define INJECT_ASM_REG "r12"
118
119#define RSEQ_INJECT_CLOBBER \
120 , INJECT_ASM_REG
121
122#define RSEQ_INJECT_ASM(n) \
123 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
124 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
125 "je 333f\n\t" \
126 "222:\n\t" \
127 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
128 "jnz 222b\n\t" \
129 "333:\n\t"
130
131#elif defined(__ARMEL__)
132
133#define RSEQ_INJECT_INPUT \
134 , [loop_cnt_1]"m"(loop_cnt[1]) \
135 , [loop_cnt_2]"m"(loop_cnt[2]) \
136 , [loop_cnt_3]"m"(loop_cnt[3]) \
137 , [loop_cnt_4]"m"(loop_cnt[4]) \
138 , [loop_cnt_5]"m"(loop_cnt[5]) \
139 , [loop_cnt_6]"m"(loop_cnt[6])
140
141#define INJECT_ASM_REG "r4"
142
143#define RSEQ_INJECT_CLOBBER \
144 , INJECT_ASM_REG
145
146#define RSEQ_INJECT_ASM(n) \
147 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
148 "cmp " INJECT_ASM_REG ", #0\n\t" \
149 "beq 333f\n\t" \
150 "222:\n\t" \
151 "subs " INJECT_ASM_REG ", #1\n\t" \
152 "bne 222b\n\t" \
153 "333:\n\t"
154
155#elif defined(__AARCH64EL__)
156
157#define RSEQ_INJECT_INPUT \
158 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
159 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
160 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
161 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
162 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
163 , [loop_cnt_6] "Qo" (loop_cnt[6])
164
165#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
166
167#define RSEQ_INJECT_ASM(n) \
168 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
169 " cbz " INJECT_ASM_REG ", 333f\n" \
170 "222:\n" \
171 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
172 " cbnz " INJECT_ASM_REG ", 222b\n" \
173 "333:\n"
174
f1c6b55b 175#elif defined(__PPC__)
31b44ba2
MD
176
177#define RSEQ_INJECT_INPUT \
178 , [loop_cnt_1]"m"(loop_cnt[1]) \
179 , [loop_cnt_2]"m"(loop_cnt[2]) \
180 , [loop_cnt_3]"m"(loop_cnt[3]) \
181 , [loop_cnt_4]"m"(loop_cnt[4]) \
182 , [loop_cnt_5]"m"(loop_cnt[5]) \
183 , [loop_cnt_6]"m"(loop_cnt[6])
184
185#define INJECT_ASM_REG "r18"
186
187#define RSEQ_INJECT_CLOBBER \
188 , INJECT_ASM_REG
189
190#define RSEQ_INJECT_ASM(n) \
191 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
192 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
193 "beq 333f\n\t" \
194 "222:\n\t" \
195 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
196 "bne 222b\n\t" \
197 "333:\n\t"
198
199#elif defined(__mips__)
200
201#define RSEQ_INJECT_INPUT \
202 , [loop_cnt_1]"m"(loop_cnt[1]) \
203 , [loop_cnt_2]"m"(loop_cnt[2]) \
204 , [loop_cnt_3]"m"(loop_cnt[3]) \
205 , [loop_cnt_4]"m"(loop_cnt[4]) \
206 , [loop_cnt_5]"m"(loop_cnt[5]) \
207 , [loop_cnt_6]"m"(loop_cnt[6])
208
209#define INJECT_ASM_REG "$5"
210
211#define RSEQ_INJECT_CLOBBER \
212 , INJECT_ASM_REG
213
214#define RSEQ_INJECT_ASM(n) \
215 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
216 "beqz " INJECT_ASM_REG ", 333f\n\t" \
217 "222:\n\t" \
218 "addiu " INJECT_ASM_REG ", -1\n\t" \
219 "bnez " INJECT_ASM_REG ", 222b\n\t" \
220 "333:\n\t"
221
074b1077
MJ
222#elif defined(__riscv)
223
224#define RSEQ_INJECT_INPUT \
225 , [loop_cnt_1]"m"(loop_cnt[1]) \
226 , [loop_cnt_2]"m"(loop_cnt[2]) \
227 , [loop_cnt_3]"m"(loop_cnt[3]) \
228 , [loop_cnt_4]"m"(loop_cnt[4]) \
229 , [loop_cnt_5]"m"(loop_cnt[5]) \
230 , [loop_cnt_6]"m"(loop_cnt[6])
231
232#define INJECT_ASM_REG "t1"
233
234#define RSEQ_INJECT_CLOBBER \
235 , INJECT_ASM_REG
236
237#define RSEQ_INJECT_ASM(n) \
238 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
239 "beqz " INJECT_ASM_REG ", 333f\n\t" \
240 "222:\n\t" \
241 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
242 "bnez " INJECT_ASM_REG ", 222b\n\t" \
243 "333:\n\t"
244
31b44ba2
MD
245#else
246#error unsupported target
247#endif
248
249#define RSEQ_INJECT_FAILED \
250 nr_abort++;
251
252#define RSEQ_INJECT_C(n) \
253{ \
254 int loc_i, loc_nr_loops = loop_cnt[n]; \
255 \
256 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
257 rseq_barrier(); \
258 } \
259 if (loc_nr_loops == -1 && opt_modulo) { \
260 if (yield_mod_cnt == opt_modulo - 1) { \
261 if (opt_sleep > 0) \
262 poll(NULL, 0, opt_sleep); \
263 if (opt_yield) \
264 sched_yield(); \
265 if (opt_signal) \
266 raise(SIGUSR1); \
267 yield_mod_cnt = 0; \
268 } else { \
269 yield_mod_cnt++; \
270 } \
271 } \
272}
273
274#else
275
276#define printf_verbose(fmt, ...)
277
278#endif /* BENCHMARK */
279
280#include <rseq/rseq.h>
281
369688a5
MD
282static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
283
284static int sys_membarrier(int cmd, int flags, int cpu_id)
285{
286 return syscall(__NR_membarrier, cmd, flags, cpu_id);
287}
288
289#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
290#define TEST_MEMBARRIER
291#endif
292
293#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
294# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
295static
296int get_current_cpu_id(void)
297{
298 return rseq_current_mm_cid();
299}
300static
301bool rseq_validate_cpu_id(void)
302{
303 return rseq_mm_cid_available();
304}
40797ae3
MD
305static
306bool rseq_use_cpu_index(void)
307{
308 return false; /* Use mm_cid */
309}
369688a5
MD
310# ifdef TEST_MEMBARRIER
311/*
312 * Membarrier does not currently support targeting a mm_cid, so
313 * issue the barrier on all cpus.
314 */
315static
316int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
317{
318 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
319 0, 0);
320}
321# endif /* TEST_MEMBARRIER */
322#else
323# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
324static
325int get_current_cpu_id(void)
326{
327 return rseq_cpu_start();
328}
329static
330bool rseq_validate_cpu_id(void)
331{
332 return rseq_current_cpu_raw() >= 0;
333}
40797ae3
MD
334static
335bool rseq_use_cpu_index(void)
336{
337 return true; /* Use cpu_id as index. */
338}
369688a5
MD
339# ifdef TEST_MEMBARRIER
340static
341int rseq_membarrier_expedited(int cpu)
342{
343 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
344 MEMBARRIER_CMD_FLAG_CPU, cpu);
345}
346# endif /* TEST_MEMBARRIER */
347#endif
348
31b44ba2
MD
349struct percpu_lock_entry {
350 intptr_t v;
351} __attribute__((aligned(128)));
352
353struct percpu_lock {
354 struct percpu_lock_entry c[CPU_SETSIZE];
355};
356
357struct test_data_entry {
358 intptr_t count;
359} __attribute__((aligned(128)));
360
361struct spinlock_test_data {
362 struct percpu_lock lock;
363 struct test_data_entry c[CPU_SETSIZE];
364};
365
366struct spinlock_thread_test_data {
367 struct spinlock_test_data *data;
368 long long reps;
369 int reg;
370};
371
372struct inc_test_data {
373 struct test_data_entry c[CPU_SETSIZE];
374};
375
376struct inc_thread_test_data {
377 struct inc_test_data *data;
378 long long reps;
379 int reg;
380};
381
382struct percpu_list_node {
383 intptr_t data;
384 struct percpu_list_node *next;
385};
386
387struct percpu_list_entry {
388 struct percpu_list_node *head;
389} __attribute__((aligned(128)));
390
391struct percpu_list {
392 struct percpu_list_entry c[CPU_SETSIZE];
393};
394
395#define BUFFER_ITEM_PER_CPU 100
396
397struct percpu_buffer_node {
398 intptr_t data;
399};
400
401struct percpu_buffer_entry {
402 intptr_t offset;
403 intptr_t buflen;
404 struct percpu_buffer_node **array;
405} __attribute__((aligned(128)));
406
407struct percpu_buffer {
408 struct percpu_buffer_entry c[CPU_SETSIZE];
409};
410
411#define MEMCPY_BUFFER_ITEM_PER_CPU 100
412
413struct percpu_memcpy_buffer_node {
414 intptr_t data1;
415 uint64_t data2;
416};
417
418struct percpu_memcpy_buffer_entry {
419 intptr_t offset;
420 intptr_t buflen;
421 struct percpu_memcpy_buffer_node *array;
422} __attribute__((aligned(128)));
423
424struct percpu_memcpy_buffer {
425 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
426};
427
428/* A simple percpu spinlock. Grabs lock on current cpu. */
429static int rseq_this_cpu_lock(struct percpu_lock *lock)
430{
431 int cpu;
432
433 for (;;) {
434 int ret;
435
369688a5 436 cpu = get_current_cpu_id();
3726b9f1
MD
437 if (cpu < 0) {
438 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
439 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
440 abort();
441 }
41149e28 442 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 443 &lock->c[cpu].v,
31b44ba2
MD
444 0, 1, cpu);
445 if (rseq_likely(!ret))
446 break;
447 /* Retry if comparison fails or rseq aborts. */
448 }
449 /*
450 * Acquire semantic when taking lock after control dependency.
451 * Matches rseq_smp_store_release().
452 */
453 rseq_smp_acquire__after_ctrl_dep();
454 return cpu;
455}
456
457static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
458{
459 assert(lock->c[cpu].v == 1);
460 /*
461 * Release lock, with release semantic. Matches
462 * rseq_smp_acquire__after_ctrl_dep().
463 */
464 rseq_smp_store_release(&lock->c[cpu].v, 0);
465}
466
6e284b80 467static void *test_percpu_spinlock_thread(void *arg)
31b44ba2 468{
d268885a 469 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
31b44ba2
MD
470 struct spinlock_test_data *data = thread_data->data;
471 long long i, reps;
472
473 if (!opt_disable_rseq && thread_data->reg &&
474 rseq_register_current_thread())
475 abort();
476 reps = thread_data->reps;
477 for (i = 0; i < reps; i++) {
af895f04 478 int cpu = rseq_this_cpu_lock(&data->lock);
31b44ba2
MD
479 data->c[cpu].count++;
480 rseq_percpu_unlock(&data->lock, cpu);
481#ifndef BENCHMARK
482 if (i != 0 && !(i % (reps / 10)))
483 printf_verbose("tid %d: count %lld\n",
484 (int) rseq_gettid(), i);
485#endif
486 }
487 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
488 (int) rseq_gettid(), nr_abort, signals_delivered);
489 if (!opt_disable_rseq && thread_data->reg &&
490 rseq_unregister_current_thread())
491 abort();
492 return NULL;
493}
494
495/*
496 * A simple test which implements a sharded counter using a per-cpu
497 * lock. Obviously real applications might prefer to simply use a
498 * per-cpu increment; however, this is reasonable for a test and the
499 * lock can be extended to synchronize more complicated operations.
500 */
6e284b80 501static void test_percpu_spinlock(void)
31b44ba2
MD
502{
503 const int num_threads = opt_threads;
504 int i, ret;
505 uint64_t sum;
506 pthread_t test_threads[num_threads];
507 struct spinlock_test_data data;
508 struct spinlock_thread_test_data thread_data[num_threads];
509
510 memset(&data, 0, sizeof(data));
511 for (i = 0; i < num_threads; i++) {
512 thread_data[i].reps = opt_reps;
513 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
514 thread_data[i].reg = 1;
515 else
516 thread_data[i].reg = 0;
517 thread_data[i].data = &data;
518 ret = pthread_create(&test_threads[i], NULL,
519 test_percpu_spinlock_thread,
520 &thread_data[i]);
521 if (ret) {
522 errno = ret;
523 perror("pthread_create");
524 abort();
525 }
526 }
527
528 for (i = 0; i < num_threads; i++) {
529 ret = pthread_join(test_threads[i], NULL);
530 if (ret) {
531 errno = ret;
532 perror("pthread_join");
533 abort();
534 }
535 }
536
537 sum = 0;
538 for (i = 0; i < CPU_SETSIZE; i++)
539 sum += data.c[i].count;
540
541 assert(sum == (uint64_t)opt_reps * num_threads);
542}
543
6e284b80 544static void *test_percpu_inc_thread(void *arg)
31b44ba2 545{
d268885a 546 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
31b44ba2
MD
547 struct inc_test_data *data = thread_data->data;
548 long long i, reps;
549
550 if (!opt_disable_rseq && thread_data->reg &&
551 rseq_register_current_thread())
552 abort();
553 reps = thread_data->reps;
554 for (i = 0; i < reps; i++) {
555 int ret;
556
557 do {
558 int cpu;
559
369688a5 560 cpu = get_current_cpu_id();
41149e28 561 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 562 &data->c[cpu].count, 1, cpu);
31b44ba2
MD
563 } while (rseq_unlikely(ret));
564#ifndef BENCHMARK
565 if (i != 0 && !(i % (reps / 10)))
566 printf_verbose("tid %d: count %lld\n",
567 (int) rseq_gettid(), i);
568#endif
569 }
570 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
571 (int) rseq_gettid(), nr_abort, signals_delivered);
572 if (!opt_disable_rseq && thread_data->reg &&
573 rseq_unregister_current_thread())
574 abort();
575 return NULL;
576}
577
6e284b80 578static void test_percpu_inc(void)
31b44ba2
MD
579{
580 const int num_threads = opt_threads;
581 int i, ret;
582 uint64_t sum;
583 pthread_t test_threads[num_threads];
584 struct inc_test_data data;
585 struct inc_thread_test_data thread_data[num_threads];
586
587 memset(&data, 0, sizeof(data));
588 for (i = 0; i < num_threads; i++) {
589 thread_data[i].reps = opt_reps;
590 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
591 thread_data[i].reg = 1;
592 else
593 thread_data[i].reg = 0;
594 thread_data[i].data = &data;
595 ret = pthread_create(&test_threads[i], NULL,
596 test_percpu_inc_thread,
597 &thread_data[i]);
598 if (ret) {
599 errno = ret;
600 perror("pthread_create");
601 abort();
602 }
603 }
604
605 for (i = 0; i < num_threads; i++) {
606 ret = pthread_join(test_threads[i], NULL);
607 if (ret) {
608 errno = ret;
609 perror("pthread_join");
610 abort();
611 }
612 }
613
614 sum = 0;
615 for (i = 0; i < CPU_SETSIZE; i++)
616 sum += data.c[i].count;
617
618 assert(sum == (uint64_t)opt_reps * num_threads);
619}
620
6e284b80 621static void this_cpu_list_push(struct percpu_list *list,
31b44ba2
MD
622 struct percpu_list_node *node,
623 int *_cpu)
624{
625 int cpu;
626
627 for (;;) {
628 intptr_t *targetptr, newval, expect;
629 int ret;
630
369688a5 631 cpu = get_current_cpu_id();
31b44ba2
MD
632 /* Load list->c[cpu].head with single-copy atomicity. */
633 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
634 newval = (intptr_t)node;
635 targetptr = (intptr_t *)&list->c[cpu].head;
636 node->next = (struct percpu_list_node *)expect;
41149e28 637 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 638 targetptr, expect, newval, cpu);
31b44ba2
MD
639 if (rseq_likely(!ret))
640 break;
641 /* Retry if comparison fails or rseq aborts. */
642 }
643 if (_cpu)
644 *_cpu = cpu;
645}
646
647/*
648 * Unlike a traditional lock-less linked list; the availability of a
649 * rseq primitive allows us to implement pop without concerns over
650 * ABA-type races.
651 */
6e284b80 652static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
31b44ba2
MD
653 int *_cpu)
654{
655 struct percpu_list_node *node = NULL;
656 int cpu;
657
658 for (;;) {
659 struct percpu_list_node *head;
660 intptr_t *targetptr, expectnot, *load;
d35eae6b
MD
661 long offset;
662 int ret;
31b44ba2 663
369688a5 664 cpu = get_current_cpu_id();
31b44ba2
MD
665 targetptr = (intptr_t *)&list->c[cpu].head;
666 expectnot = (intptr_t)NULL;
667 offset = offsetof(struct percpu_list_node, next);
668 load = (intptr_t *)&head;
41149e28 669 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5
MD
670 targetptr, expectnot,
671 offset, load, cpu);
31b44ba2
MD
672 if (rseq_likely(!ret)) {
673 node = head;
674 break;
675 }
676 if (ret > 0)
677 break;
678 /* Retry if rseq aborts. */
679 }
680 if (_cpu)
681 *_cpu = cpu;
682 return node;
683}
684
685/*
686 * __percpu_list_pop is not safe against concurrent accesses. Should
687 * only be used on lists that are not concurrently modified.
688 */
6e284b80 689static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
31b44ba2
MD
690{
691 struct percpu_list_node *node;
692
693 node = list->c[cpu].head;
694 if (!node)
695 return NULL;
696 list->c[cpu].head = node->next;
697 return node;
698}
699
6e284b80 700static void *test_percpu_list_thread(void *arg)
31b44ba2
MD
701{
702 long long i, reps;
703 struct percpu_list *list = (struct percpu_list *)arg;
704
705 if (!opt_disable_rseq && rseq_register_current_thread())
706 abort();
707
708 reps = opt_reps;
709 for (i = 0; i < reps; i++) {
710 struct percpu_list_node *node;
711
712 node = this_cpu_list_pop(list, NULL);
713 if (opt_yield)
714 sched_yield(); /* encourage shuffling */
715 if (node)
716 this_cpu_list_push(list, node, NULL);
717 }
718
719 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
720 (int) rseq_gettid(), nr_abort, signals_delivered);
721 if (!opt_disable_rseq && rseq_unregister_current_thread())
722 abort();
723
724 return NULL;
725}
726
727/* Simultaneous modification to a per-cpu linked list from many threads. */
6e284b80 728static void test_percpu_list(void)
31b44ba2
MD
729{
730 const int num_threads = opt_threads;
731 int i, j, ret;
732 uint64_t sum = 0, expected_sum = 0;
733 struct percpu_list list;
734 pthread_t test_threads[num_threads];
735 cpu_set_t allowed_cpus;
736
737 memset(&list, 0, sizeof(list));
738
739 /* Generate list entries for every usable cpu. */
740 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
741 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 742 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
743 continue;
744 for (j = 1; j <= 100; j++) {
745 struct percpu_list_node *node;
746
747 expected_sum += j;
748
d268885a 749 node = (struct percpu_list_node *) malloc(sizeof(*node));
31b44ba2
MD
750 assert(node);
751 node->data = j;
752 node->next = list.c[i].head;
753 list.c[i].head = node;
754 }
755 }
756
757 for (i = 0; i < num_threads; i++) {
758 ret = pthread_create(&test_threads[i], NULL,
759 test_percpu_list_thread, &list);
760 if (ret) {
761 errno = ret;
762 perror("pthread_create");
763 abort();
764 }
765 }
766
767 for (i = 0; i < num_threads; i++) {
768 ret = pthread_join(test_threads[i], NULL);
769 if (ret) {
770 errno = ret;
771 perror("pthread_join");
772 abort();
773 }
774 }
775
776 for (i = 0; i < CPU_SETSIZE; i++) {
777 struct percpu_list_node *node;
778
40797ae3 779 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
780 continue;
781
782 while ((node = __percpu_list_pop(&list, i))) {
783 sum += node->data;
784 free(node);
785 }
786 }
787
788 /*
789 * All entries should now be accounted for (unless some external
790 * actor is interfering with our allowed affinity while this
791 * test is running).
792 */
793 assert(sum == expected_sum);
794}
795
6e284b80 796static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
31b44ba2
MD
797 struct percpu_buffer_node *node,
798 int *_cpu)
799{
800 bool result = false;
801 int cpu;
802
803 for (;;) {
804 intptr_t *targetptr_spec, newval_spec;
805 intptr_t *targetptr_final, newval_final;
806 intptr_t offset;
807 int ret;
808
369688a5 809 cpu = get_current_cpu_id();
31b44ba2
MD
810 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
811 if (offset == buffer->c[cpu].buflen)
812 break;
813 newval_spec = (intptr_t)node;
814 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
815 newval_final = offset + 1;
816 targetptr_final = &buffer->c[cpu].offset;
41149e28 817 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
369688a5
MD
818 targetptr_final, offset, targetptr_spec,
819 newval_spec, newval_final, cpu);
31b44ba2
MD
820 if (rseq_likely(!ret)) {
821 result = true;
822 break;
823 }
824 /* Retry if comparison fails or rseq aborts. */
825 }
826 if (_cpu)
827 *_cpu = cpu;
828 return result;
829}
830
6e284b80 831static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
832 int *_cpu)
833{
834 struct percpu_buffer_node *head;
835 int cpu;
836
837 for (;;) {
838 intptr_t *targetptr, newval;
839 intptr_t offset;
840 int ret;
841
369688a5 842 cpu = get_current_cpu_id();
31b44ba2
MD
843 /* Load offset with single-copy atomicity. */
844 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
845 if (offset == 0) {
846 head = NULL;
847 break;
848 }
849 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
850 newval = offset - 1;
851 targetptr = (intptr_t *)&buffer->c[cpu].offset;
41149e28 852 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 853 targetptr, offset,
31b44ba2
MD
854 (intptr_t *)&buffer->c[cpu].array[offset - 1],
855 (intptr_t)head, newval, cpu);
856 if (rseq_likely(!ret))
857 break;
858 /* Retry if comparison fails or rseq aborts. */
859 }
860 if (_cpu)
861 *_cpu = cpu;
862 return head;
863}
864
865/*
866 * __percpu_buffer_pop is not safe against concurrent accesses. Should
867 * only be used on buffers that are not concurrently modified.
868 */
6e284b80 869static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
870 int cpu)
871{
872 struct percpu_buffer_node *head;
873 intptr_t offset;
874
875 offset = buffer->c[cpu].offset;
876 if (offset == 0)
877 return NULL;
878 head = buffer->c[cpu].array[offset - 1];
879 buffer->c[cpu].offset = offset - 1;
880 return head;
881}
882
6e284b80 883static void *test_percpu_buffer_thread(void *arg)
31b44ba2
MD
884{
885 long long i, reps;
886 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
887
888 if (!opt_disable_rseq && rseq_register_current_thread())
889 abort();
890
891 reps = opt_reps;
892 for (i = 0; i < reps; i++) {
893 struct percpu_buffer_node *node;
894
895 node = this_cpu_buffer_pop(buffer, NULL);
896 if (opt_yield)
897 sched_yield(); /* encourage shuffling */
898 if (node) {
899 if (!this_cpu_buffer_push(buffer, node, NULL)) {
900 /* Should increase buffer size. */
901 abort();
902 }
903 }
904 }
905
906 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
907 (int) rseq_gettid(), nr_abort, signals_delivered);
908 if (!opt_disable_rseq && rseq_unregister_current_thread())
909 abort();
910
911 return NULL;
912}
913
914/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 915static void test_percpu_buffer(void)
31b44ba2
MD
916{
917 const int num_threads = opt_threads;
918 int i, j, ret;
919 uint64_t sum = 0, expected_sum = 0;
920 struct percpu_buffer buffer;
921 pthread_t test_threads[num_threads];
922 cpu_set_t allowed_cpus;
923
924 memset(&buffer, 0, sizeof(buffer));
925
926 /* Generate list entries for every usable cpu. */
927 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
928 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 929 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
930 continue;
931 /* Worse-case is every item in same CPU. */
932 buffer.c[i].array =
d268885a 933 (struct percpu_buffer_node **)
31b44ba2
MD
934 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
935 BUFFER_ITEM_PER_CPU);
936 assert(buffer.c[i].array);
937 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
938 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
939 struct percpu_buffer_node *node;
940
941 expected_sum += j;
942
943 /*
944 * We could theoretically put the word-sized
945 * "data" directly in the buffer. However, we
946 * want to model objects that would not fit
947 * within a single word, so allocate an object
948 * for each node.
949 */
d268885a 950 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
31b44ba2
MD
951 assert(node);
952 node->data = j;
953 buffer.c[i].array[j - 1] = node;
954 buffer.c[i].offset++;
955 }
956 }
957
958 for (i = 0; i < num_threads; i++) {
959 ret = pthread_create(&test_threads[i], NULL,
960 test_percpu_buffer_thread, &buffer);
961 if (ret) {
962 errno = ret;
963 perror("pthread_create");
964 abort();
965 }
966 }
967
968 for (i = 0; i < num_threads; i++) {
969 ret = pthread_join(test_threads[i], NULL);
970 if (ret) {
971 errno = ret;
972 perror("pthread_join");
973 abort();
974 }
975 }
976
977 for (i = 0; i < CPU_SETSIZE; i++) {
978 struct percpu_buffer_node *node;
979
40797ae3 980 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
981 continue;
982
983 while ((node = __percpu_buffer_pop(&buffer, i))) {
984 sum += node->data;
985 free(node);
986 }
987 free(buffer.c[i].array);
988 }
989
990 /*
991 * All entries should now be accounted for (unless some external
992 * actor is interfering with our allowed affinity while this
993 * test is running).
994 */
995 assert(sum == expected_sum);
996}
997
6e284b80 998static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
999 struct percpu_memcpy_buffer_node item,
1000 int *_cpu)
1001{
1002 bool result = false;
1003 int cpu;
1004
1005 for (;;) {
1006 intptr_t *targetptr_final, newval_final, offset;
1007 char *destptr, *srcptr;
1008 size_t copylen;
1009 int ret;
1010
369688a5 1011 cpu = get_current_cpu_id();
31b44ba2
MD
1012 /* Load offset with single-copy atomicity. */
1013 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1014 if (offset == buffer->c[cpu].buflen)
1015 break;
1016 destptr = (char *)&buffer->c[cpu].array[offset];
1017 srcptr = (char *)&item;
1018 /* copylen must be <= 4kB. */
1019 copylen = sizeof(item);
1020 newval_final = offset + 1;
1021 targetptr_final = &buffer->c[cpu].offset;
41149e28 1022 ret = rseq_load_cbne_memcpy_store__ptr(
369688a5
MD
1023 opt_mo, RSEQ_PERCPU,
1024 targetptr_final, offset,
1025 destptr, srcptr, copylen,
1026 newval_final, cpu);
31b44ba2
MD
1027 if (rseq_likely(!ret)) {
1028 result = true;
1029 break;
1030 }
1031 /* Retry if comparison fails or rseq aborts. */
1032 }
1033 if (_cpu)
1034 *_cpu = cpu;
1035 return result;
1036}
1037
6e284b80 1038static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1039 struct percpu_memcpy_buffer_node *item,
1040 int *_cpu)
1041{
1042 bool result = false;
1043 int cpu;
1044
1045 for (;;) {
1046 intptr_t *targetptr_final, newval_final, offset;
1047 char *destptr, *srcptr;
1048 size_t copylen;
1049 int ret;
1050
369688a5 1051 cpu = get_current_cpu_id();
31b44ba2
MD
1052 /* Load offset with single-copy atomicity. */
1053 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1054 if (offset == 0)
1055 break;
1056 destptr = (char *)item;
1057 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1058 /* copylen must be <= 4kB. */
1059 copylen = sizeof(*item);
1060 newval_final = offset - 1;
1061 targetptr_final = &buffer->c[cpu].offset;
41149e28 1062 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 1063 targetptr_final, offset, destptr, srcptr, copylen,
31b44ba2
MD
1064 newval_final, cpu);
1065 if (rseq_likely(!ret)) {
1066 result = true;
1067 break;
1068 }
1069 /* Retry if comparison fails or rseq aborts. */
1070 }
1071 if (_cpu)
1072 *_cpu = cpu;
1073 return result;
1074}
1075
1076/*
1077 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1078 * only be used on buffers that are not concurrently modified.
1079 */
6e284b80 1080static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1081 struct percpu_memcpy_buffer_node *item,
1082 int cpu)
1083{
1084 intptr_t offset;
1085
1086 offset = buffer->c[cpu].offset;
1087 if (offset == 0)
1088 return false;
1089 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1090 buffer->c[cpu].offset = offset - 1;
1091 return true;
1092}
1093
6e284b80 1094static void *test_percpu_memcpy_buffer_thread(void *arg)
31b44ba2
MD
1095{
1096 long long i, reps;
1097 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1098
1099 if (!opt_disable_rseq && rseq_register_current_thread())
1100 abort();
1101
1102 reps = opt_reps;
1103 for (i = 0; i < reps; i++) {
1104 struct percpu_memcpy_buffer_node item;
1105 bool result;
1106
1107 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1108 if (opt_yield)
1109 sched_yield(); /* encourage shuffling */
1110 if (result) {
1111 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1112 /* Should increase buffer size. */
1113 abort();
1114 }
1115 }
1116 }
1117
1118 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1119 (int) rseq_gettid(), nr_abort, signals_delivered);
1120 if (!opt_disable_rseq && rseq_unregister_current_thread())
1121 abort();
1122
1123 return NULL;
1124}
1125
1126/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 1127static void test_percpu_memcpy_buffer(void)
31b44ba2
MD
1128{
1129 const int num_threads = opt_threads;
1130 int i, j, ret;
1131 uint64_t sum = 0, expected_sum = 0;
1132 struct percpu_memcpy_buffer buffer;
1133 pthread_t test_threads[num_threads];
1134 cpu_set_t allowed_cpus;
1135
1136 memset(&buffer, 0, sizeof(buffer));
1137
1138 /* Generate list entries for every usable cpu. */
1139 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1140 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 1141 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1142 continue;
1143 /* Worse-case is every item in same CPU. */
1144 buffer.c[i].array =
d268885a 1145 (struct percpu_memcpy_buffer_node *)
31b44ba2
MD
1146 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1147 MEMCPY_BUFFER_ITEM_PER_CPU);
1148 assert(buffer.c[i].array);
1149 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1150 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1151 expected_sum += 2 * j + 1;
1152
1153 /*
1154 * We could theoretically put the word-sized
1155 * "data" directly in the buffer. However, we
1156 * want to model objects that would not fit
1157 * within a single word, so allocate an object
1158 * for each node.
1159 */
1160 buffer.c[i].array[j - 1].data1 = j;
1161 buffer.c[i].array[j - 1].data2 = j + 1;
1162 buffer.c[i].offset++;
1163 }
1164 }
1165
1166 for (i = 0; i < num_threads; i++) {
1167 ret = pthread_create(&test_threads[i], NULL,
1168 test_percpu_memcpy_buffer_thread,
1169 &buffer);
1170 if (ret) {
1171 errno = ret;
1172 perror("pthread_create");
1173 abort();
1174 }
1175 }
1176
1177 for (i = 0; i < num_threads; i++) {
1178 ret = pthread_join(test_threads[i], NULL);
1179 if (ret) {
1180 errno = ret;
1181 perror("pthread_join");
1182 abort();
1183 }
1184 }
1185
1186 for (i = 0; i < CPU_SETSIZE; i++) {
1187 struct percpu_memcpy_buffer_node item;
1188
40797ae3 1189 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1190 continue;
1191
1192 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1193 sum += item.data1;
1194 sum += item.data2;
1195 }
1196 free(buffer.c[i].array);
1197 }
1198
1199 /*
1200 * All entries should now be accounted for (unless some external
1201 * actor is interfering with our allowed affinity while this
1202 * test is running).
1203 */
1204 assert(sum == expected_sum);
1205}
1206
544cdc88
MJ
1207
1208static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
31b44ba2
MD
1209{
1210 signals_delivered++;
1211}
1212
1213static int set_signal_handler(void)
1214{
1215 int ret = 0;
1216 struct sigaction sa;
1217 sigset_t sigset;
1218
1219 ret = sigemptyset(&sigset);
1220 if (ret < 0) {
1221 perror("sigemptyset");
1222 return ret;
1223 }
1224
1225 sa.sa_handler = test_signal_interrupt_handler;
1226 sa.sa_mask = sigset;
1227 sa.sa_flags = 0;
1228 ret = sigaction(SIGUSR1, &sa, NULL);
1229 if (ret < 0) {
1230 perror("sigaction");
1231 return ret;
1232 }
1233
1234 printf_verbose("Signal handler set for SIGUSR1\n");
1235
1236 return ret;
1237}
1238
3664098e
MD
1239static
1240bool membarrier_private_expedited_rseq_available(void)
1241{
1242 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1243
1244 if (status < 0) {
1245 perror("membarrier");
1246 return false;
1247 }
1248 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1249 return false;
1250 return true;
1251}
1252
5368dcb4 1253/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
369688a5 1254#ifdef TEST_MEMBARRIER
5368dcb4
MD
1255struct test_membarrier_thread_args {
1256 int stop;
1257 intptr_t percpu_list_ptr;
1258};
1259
1260/* Worker threads modify data in their "active" percpu lists. */
1261static
1262void *test_membarrier_worker_thread(void *arg)
1263{
1264 struct test_membarrier_thread_args *args =
1265 (struct test_membarrier_thread_args *)arg;
1266 const int iters = opt_reps;
1267 int i;
1268
1269 if (rseq_register_current_thread()) {
1270 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1271 errno, strerror(errno));
1272 abort();
1273 }
1274
1275 /* Wait for initialization. */
1276 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1277
1278 for (i = 0; i < iters; ++i) {
1279 int ret;
1280
1281 do {
369688a5 1282 int cpu = get_current_cpu_id();
5368dcb4 1283
41149e28 1284 ret = rseq_load_add_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 1285 &args->percpu_list_ptr,
5368dcb4
MD
1286 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1287 } while (rseq_unlikely(ret));
1288 }
1289
1290 if (rseq_unregister_current_thread()) {
1291 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1292 errno, strerror(errno));
1293 abort();
1294 }
1295 return NULL;
1296}
1297
1298static
1299void test_membarrier_init_percpu_list(struct percpu_list *list)
1300{
1301 int i;
1302
1303 memset(list, 0, sizeof(*list));
1304 for (i = 0; i < CPU_SETSIZE; i++) {
1305 struct percpu_list_node *node;
1306
1307 node = (struct percpu_list_node *) malloc(sizeof(*node));
1308 assert(node);
1309 node->data = 0;
1310 node->next = NULL;
1311 list->c[i].head = node;
1312 }
1313}
1314
1315static
1316void test_membarrier_free_percpu_list(struct percpu_list *list)
1317{
1318 int i;
1319
1320 for (i = 0; i < CPU_SETSIZE; i++)
1321 free(list->c[i].head);
1322}
1323
5368dcb4
MD
1324/*
1325 * The manager thread swaps per-cpu lists that worker threads see,
1326 * and validates that there are no unexpected modifications.
1327 */
1328static
1329void *test_membarrier_manager_thread(void *arg)
1330{
1331 struct test_membarrier_thread_args *args =
1332 (struct test_membarrier_thread_args *)arg;
1333 struct percpu_list list_a, list_b;
1334 intptr_t expect_a = 0, expect_b = 0;
1335 int cpu_a = 0, cpu_b = 0;
1336
1337 if (rseq_register_current_thread()) {
1338 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1339 errno, strerror(errno));
1340 abort();
1341 }
1342
1343 /* Init lists. */
1344 test_membarrier_init_percpu_list(&list_a);
1345 test_membarrier_init_percpu_list(&list_b);
1346
1347 /* Initialize lists before publishing them. */
1348 rseq_smp_wmb();
1349
1350 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1351
1352 while (!RSEQ_READ_ONCE(args->stop)) {
1353 /* list_a is "active". */
1354 cpu_a = rand() % CPU_SETSIZE;
1355 /*
1356 * As list_b is "inactive", we should never see changes
1357 * to list_b.
1358 */
1359 if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) {
1360 fprintf(stderr, "Membarrier test failed\n");
1361 abort();
1362 }
1363
1364 /* Make list_b "active". */
1365 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b);
369688a5 1366 if (rseq_membarrier_expedited(cpu_a) &&
5368dcb4
MD
1367 errno != ENXIO /* missing CPU */) {
1368 perror("sys_membarrier");
1369 abort();
1370 }
1371 /*
1372 * Cpu A should now only modify list_b, so the values
1373 * in list_a should be stable.
1374 */
1375 expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data);
1376
1377 cpu_b = rand() % CPU_SETSIZE;
1378 /*
1379 * As list_a is "inactive", we should never see changes
1380 * to list_a.
1381 */
1382 if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) {
1383 fprintf(stderr, "Membarrier test failed\n");
1384 abort();
1385 }
1386
1387 /* Make list_a "active". */
1388 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
369688a5 1389 if (rseq_membarrier_expedited(cpu_b) &&
5368dcb4
MD
1390 errno != ENXIO /* missing CPU */) {
1391 perror("sys_membarrier");
1392 abort();
1393 }
1394 /* Remember a value from list_b. */
1395 expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data);
1396 }
1397
1398 test_membarrier_free_percpu_list(&list_a);
1399 test_membarrier_free_percpu_list(&list_b);
1400
1401 if (rseq_unregister_current_thread()) {
1402 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1403 errno, strerror(errno));
1404 abort();
1405 }
1406 return NULL;
1407}
1408
1409static
1410void test_membarrier(void)
1411{
1412 const int num_threads = opt_threads;
1413 struct test_membarrier_thread_args thread_args;
1414 pthread_t worker_threads[num_threads];
1415 pthread_t manager_thread;
1416 int i, ret;
1417
d4bff8ed
MD
1418 if (!membarrier_private_expedited_rseq_available()) {
1419 fprintf(stderr, "Membarrier private expedited rseq not available. "
1420 "Skipping membarrier test.\n");
1421 return;
1422 }
5368dcb4
MD
1423 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1424 perror("sys_membarrier");
1425 abort();
1426 }
1427
1428 thread_args.stop = 0;
1429 thread_args.percpu_list_ptr = 0;
1430 ret = pthread_create(&manager_thread, NULL,
1431 test_membarrier_manager_thread, &thread_args);
1432 if (ret) {
1433 errno = ret;
1434 perror("pthread_create");
1435 abort();
1436 }
1437
1438 for (i = 0; i < num_threads; i++) {
1439 ret = pthread_create(&worker_threads[i], NULL,
1440 test_membarrier_worker_thread, &thread_args);
1441 if (ret) {
1442 errno = ret;
1443 perror("pthread_create");
1444 abort();
1445 }
1446 }
1447
1448
1449 for (i = 0; i < num_threads; i++) {
1450 ret = pthread_join(worker_threads[i], NULL);
1451 if (ret) {
1452 errno = ret;
1453 perror("pthread_join");
1454 abort();
1455 }
1456 }
1457
1458 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1459 ret = pthread_join(manager_thread, NULL);
1460 if (ret) {
1461 errno = ret;
1462 perror("pthread_join");
1463 abort();
1464 }
1465}
369688a5 1466#else /* TEST_MEMBARRIER */
5368dcb4
MD
1467static
1468void test_membarrier(void)
1469{
d4bff8ed
MD
1470 if (!membarrier_private_expedited_rseq_available()) {
1471 fprintf(stderr, "Membarrier private expedited rseq not available. "
1472 "Skipping membarrier test.\n");
1473 return;
1474 }
41149e28 1475 fprintf(stderr, "rseq_load_add_load_add_store__ptr is not implemented on this architecture. "
5368dcb4
MD
1476 "Skipping membarrier test.\n");
1477}
1478#endif
1479
544cdc88 1480static void show_usage(char **argv)
31b44ba2
MD
1481{
1482 printf("Usage : %s <OPTIONS>\n",
1483 argv[0]);
1484 printf("OPTIONS:\n");
1485 printf(" [-1 loops] Number of loops for delay injection 1\n");
1486 printf(" [-2 loops] Number of loops for delay injection 2\n");
1487 printf(" [-3 loops] Number of loops for delay injection 3\n");
1488 printf(" [-4 loops] Number of loops for delay injection 4\n");
1489 printf(" [-5 loops] Number of loops for delay injection 5\n");
1490 printf(" [-6 loops] Number of loops for delay injection 6\n");
1491 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1492 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1493 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1494 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1495 printf(" [-y] Yield\n");
1496 printf(" [-k] Kill thread with signal\n");
1497 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1498 printf(" [-t N] Number of threads (default 200)\n");
1499 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1500 printf(" [-d] Disable rseq system call (no initialization)\n");
1501 printf(" [-D M] Disable rseq for each M threads\n");
5368dcb4 1502 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
31b44ba2 1503 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
d1cdec98 1504 printf(" [-c] Check if the rseq syscall is available.\n");
31b44ba2
MD
1505 printf(" [-v] Verbose output.\n");
1506 printf(" [-h] Show this help.\n");
1507 printf("\n");
1508}
1509
1510int main(int argc, char **argv)
1511{
1512 int i;
1513
1514 for (i = 1; i < argc; i++) {
1515 if (argv[i][0] != '-')
1516 continue;
1517 switch (argv[i][1]) {
1518 case '1':
1519 case '2':
1520 case '3':
1521 case '4':
1522 case '5':
1523 case '6':
1524 case '7':
1525 case '8':
1526 case '9':
1527 if (argc < i + 2) {
544cdc88 1528 show_usage(argv);
31b44ba2
MD
1529 goto error;
1530 }
1531 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1532 i++;
1533 break;
1534 case 'm':
1535 if (argc < i + 2) {
544cdc88 1536 show_usage(argv);
31b44ba2
MD
1537 goto error;
1538 }
1539 opt_modulo = atol(argv[i + 1]);
1540 if (opt_modulo < 0) {
544cdc88 1541 show_usage(argv);
31b44ba2
MD
1542 goto error;
1543 }
1544 i++;
1545 break;
1546 case 's':
1547 if (argc < i + 2) {
544cdc88 1548 show_usage(argv);
31b44ba2
MD
1549 goto error;
1550 }
1551 opt_sleep = atol(argv[i + 1]);
1552 if (opt_sleep < 0) {
544cdc88 1553 show_usage(argv);
31b44ba2
MD
1554 goto error;
1555 }
1556 i++;
1557 break;
1558 case 'y':
1559 opt_yield = 1;
1560 break;
1561 case 'k':
1562 opt_signal = 1;
1563 break;
1564 case 'd':
1565 opt_disable_rseq = 1;
1566 break;
1567 case 'D':
1568 if (argc < i + 2) {
544cdc88 1569 show_usage(argv);
31b44ba2
MD
1570 goto error;
1571 }
1572 opt_disable_mod = atol(argv[i + 1]);
1573 if (opt_disable_mod < 0) {
544cdc88 1574 show_usage(argv);
31b44ba2
MD
1575 goto error;
1576 }
1577 i++;
1578 break;
1579 case 't':
1580 if (argc < i + 2) {
544cdc88 1581 show_usage(argv);
31b44ba2
MD
1582 goto error;
1583 }
1584 opt_threads = atol(argv[i + 1]);
1585 if (opt_threads < 0) {
544cdc88 1586 show_usage(argv);
31b44ba2
MD
1587 goto error;
1588 }
1589 i++;
1590 break;
1591 case 'r':
1592 if (argc < i + 2) {
544cdc88 1593 show_usage(argv);
31b44ba2
MD
1594 goto error;
1595 }
1596 opt_reps = atoll(argv[i + 1]);
1597 if (opt_reps < 0) {
544cdc88 1598 show_usage(argv);
31b44ba2
MD
1599 goto error;
1600 }
1601 i++;
1602 break;
1603 case 'h':
544cdc88 1604 show_usage(argv);
31b44ba2
MD
1605 goto end;
1606 case 'T':
1607 if (argc < i + 2) {
544cdc88 1608 show_usage(argv);
31b44ba2
MD
1609 goto error;
1610 }
1611 opt_test = *argv[i + 1];
1612 switch (opt_test) {
1613 case 's':
1614 case 'l':
1615 case 'i':
1616 case 'b':
1617 case 'm':
5368dcb4 1618 case 'r':
31b44ba2
MD
1619 break;
1620 default:
544cdc88 1621 show_usage(argv);
31b44ba2
MD
1622 goto error;
1623 }
1624 i++;
1625 break;
1626 case 'v':
1627 verbose = 1;
1628 break;
1629 case 'M':
369688a5 1630 opt_mo = RSEQ_MO_RELEASE;
31b44ba2 1631 break;
d1cdec98 1632 case 'c':
8b34114a 1633 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
d1cdec98
MJ
1634 printf_verbose("The rseq syscall is available.\n");
1635 goto end;
1636 } else {
1637 printf_verbose("The rseq syscall is unavailable.\n");
1638 goto no_rseq;
1639 }
31b44ba2 1640 default:
544cdc88 1641 show_usage(argv);
31b44ba2
MD
1642 goto error;
1643 }
1644 }
1645
1646 loop_cnt_1 = loop_cnt[1];
1647 loop_cnt_2 = loop_cnt[2];
1648 loop_cnt_3 = loop_cnt[3];
1649 loop_cnt_4 = loop_cnt[4];
1650 loop_cnt_5 = loop_cnt[5];
1651 loop_cnt_6 = loop_cnt[6];
1652
1653 if (set_signal_handler())
1654 goto error;
1655
1656 if (!opt_disable_rseq && rseq_register_current_thread())
1657 goto error;
369688a5 1658 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
04bb9984
MD
1659 printf_verbose("The rseq cpu id getter is unavailable\n");
1660 goto no_rseq;
369688a5 1661 }
31b44ba2
MD
1662 switch (opt_test) {
1663 case 's':
1664 printf_verbose("spinlock\n");
1665 test_percpu_spinlock();
1666 break;
1667 case 'l':
1668 printf_verbose("linked list\n");
1669 test_percpu_list();
1670 break;
1671 case 'b':
1672 printf_verbose("buffer\n");
1673 test_percpu_buffer();
1674 break;
1675 case 'm':
1676 printf_verbose("memcpy buffer\n");
1677 test_percpu_memcpy_buffer();
1678 break;
1679 case 'i':
1680 printf_verbose("counter increment\n");
1681 test_percpu_inc();
1682 break;
5368dcb4
MD
1683 case 'r':
1684 printf_verbose("membarrier\n");
1685 test_membarrier();
1686 break;
31b44ba2
MD
1687 }
1688 if (!opt_disable_rseq && rseq_unregister_current_thread())
1689 abort();
1690end:
1691 return 0;
1692
1693error:
1694 return -1;
d1cdec98
MJ
1695
1696no_rseq:
1697 return 2;
31b44ba2 1698}
This page took 0.096002 seconds and 4 git commands to generate.