Update rseq critical section identifiers to match pseudo-code
[librseq.git] / tests / param_test.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6 #include <assert.h>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
9 #include <pthread.h>
10 #include <sched.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <syscall.h>
16 #include <unistd.h>
17 #include <poll.h>
18 #include <sys/types.h>
19 #include <signal.h>
20 #include <errno.h>
21 #include <stddef.h>
22 #include <stdbool.h>
23
24 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
25 enum {
26 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
27 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
28 };
29
30 enum {
31 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
32 };
33 #endif
34
35 #define NR_INJECT 9
36 static int loop_cnt[NR_INJECT + 1];
37
38 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
39 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
40 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
41 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
42 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
43 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
44
45 static int opt_modulo, verbose;
46
47 static int opt_yield, opt_signal, opt_sleep,
48 opt_disable_rseq, opt_threads = 200,
49 opt_disable_mod = 0, opt_test = 's';
50
51 static long long opt_reps = 5000;
52
53 static __thread __attribute__((tls_model("initial-exec")))
54 unsigned int signals_delivered;
55
56 static inline pid_t rseq_gettid(void)
57 {
58 return syscall(__NR_gettid);
59 }
60
61 #ifndef BENCHMARK
62
63 static __thread __attribute__((tls_model("initial-exec"), unused))
64 int yield_mod_cnt, nr_abort;
65
66 #define printf_verbose(fmt, ...) \
67 do { \
68 if (verbose) \
69 printf(fmt, ## __VA_ARGS__); \
70 } while (0)
71
72 #ifdef __i386__
73
74 #define INJECT_ASM_REG "eax"
75
76 #define RSEQ_INJECT_CLOBBER \
77 , INJECT_ASM_REG
78
79 #define RSEQ_INJECT_ASM(n) \
80 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
81 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
82 "jz 333f\n\t" \
83 "222:\n\t" \
84 "dec %%" INJECT_ASM_REG "\n\t" \
85 "jnz 222b\n\t" \
86 "333:\n\t"
87
88 #elif defined(__x86_64__)
89
90 #define INJECT_ASM_REG_P "rax"
91 #define INJECT_ASM_REG "eax"
92
93 #define RSEQ_INJECT_CLOBBER \
94 , INJECT_ASM_REG_P \
95 , INJECT_ASM_REG
96
97 #define RSEQ_INJECT_ASM(n) \
98 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
99 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
100 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
101 "jz 333f\n\t" \
102 "222:\n\t" \
103 "dec %%" INJECT_ASM_REG "\n\t" \
104 "jnz 222b\n\t" \
105 "333:\n\t"
106
107 #elif defined(__s390__)
108
109 #define RSEQ_INJECT_INPUT \
110 , [loop_cnt_1]"m"(loop_cnt[1]) \
111 , [loop_cnt_2]"m"(loop_cnt[2]) \
112 , [loop_cnt_3]"m"(loop_cnt[3]) \
113 , [loop_cnt_4]"m"(loop_cnt[4]) \
114 , [loop_cnt_5]"m"(loop_cnt[5]) \
115 , [loop_cnt_6]"m"(loop_cnt[6])
116
117 #define INJECT_ASM_REG "r12"
118
119 #define RSEQ_INJECT_CLOBBER \
120 , INJECT_ASM_REG
121
122 #define RSEQ_INJECT_ASM(n) \
123 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
124 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
125 "je 333f\n\t" \
126 "222:\n\t" \
127 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
128 "jnz 222b\n\t" \
129 "333:\n\t"
130
131 #elif defined(__ARMEL__)
132
133 #define RSEQ_INJECT_INPUT \
134 , [loop_cnt_1]"m"(loop_cnt[1]) \
135 , [loop_cnt_2]"m"(loop_cnt[2]) \
136 , [loop_cnt_3]"m"(loop_cnt[3]) \
137 , [loop_cnt_4]"m"(loop_cnt[4]) \
138 , [loop_cnt_5]"m"(loop_cnt[5]) \
139 , [loop_cnt_6]"m"(loop_cnt[6])
140
141 #define INJECT_ASM_REG "r4"
142
143 #define RSEQ_INJECT_CLOBBER \
144 , INJECT_ASM_REG
145
146 #define RSEQ_INJECT_ASM(n) \
147 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
148 "cmp " INJECT_ASM_REG ", #0\n\t" \
149 "beq 333f\n\t" \
150 "222:\n\t" \
151 "subs " INJECT_ASM_REG ", #1\n\t" \
152 "bne 222b\n\t" \
153 "333:\n\t"
154
155 #elif defined(__AARCH64EL__)
156
157 #define RSEQ_INJECT_INPUT \
158 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
159 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
160 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
161 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
162 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
163 , [loop_cnt_6] "Qo" (loop_cnt[6])
164
165 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
166
167 #define RSEQ_INJECT_ASM(n) \
168 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
169 " cbz " INJECT_ASM_REG ", 333f\n" \
170 "222:\n" \
171 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
172 " cbnz " INJECT_ASM_REG ", 222b\n" \
173 "333:\n"
174
175 #elif defined(__PPC__)
176
177 #define RSEQ_INJECT_INPUT \
178 , [loop_cnt_1]"m"(loop_cnt[1]) \
179 , [loop_cnt_2]"m"(loop_cnt[2]) \
180 , [loop_cnt_3]"m"(loop_cnt[3]) \
181 , [loop_cnt_4]"m"(loop_cnt[4]) \
182 , [loop_cnt_5]"m"(loop_cnt[5]) \
183 , [loop_cnt_6]"m"(loop_cnt[6])
184
185 #define INJECT_ASM_REG "r18"
186
187 #define RSEQ_INJECT_CLOBBER \
188 , INJECT_ASM_REG
189
190 #define RSEQ_INJECT_ASM(n) \
191 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
192 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
193 "beq 333f\n\t" \
194 "222:\n\t" \
195 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
196 "bne 222b\n\t" \
197 "333:\n\t"
198
199 #elif defined(__mips__)
200
201 #define RSEQ_INJECT_INPUT \
202 , [loop_cnt_1]"m"(loop_cnt[1]) \
203 , [loop_cnt_2]"m"(loop_cnt[2]) \
204 , [loop_cnt_3]"m"(loop_cnt[3]) \
205 , [loop_cnt_4]"m"(loop_cnt[4]) \
206 , [loop_cnt_5]"m"(loop_cnt[5]) \
207 , [loop_cnt_6]"m"(loop_cnt[6])
208
209 #define INJECT_ASM_REG "$5"
210
211 #define RSEQ_INJECT_CLOBBER \
212 , INJECT_ASM_REG
213
214 #define RSEQ_INJECT_ASM(n) \
215 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
216 "beqz " INJECT_ASM_REG ", 333f\n\t" \
217 "222:\n\t" \
218 "addiu " INJECT_ASM_REG ", -1\n\t" \
219 "bnez " INJECT_ASM_REG ", 222b\n\t" \
220 "333:\n\t"
221
222 #elif defined(__riscv)
223
224 #define RSEQ_INJECT_INPUT \
225 , [loop_cnt_1]"m"(loop_cnt[1]) \
226 , [loop_cnt_2]"m"(loop_cnt[2]) \
227 , [loop_cnt_3]"m"(loop_cnt[3]) \
228 , [loop_cnt_4]"m"(loop_cnt[4]) \
229 , [loop_cnt_5]"m"(loop_cnt[5]) \
230 , [loop_cnt_6]"m"(loop_cnt[6])
231
232 #define INJECT_ASM_REG "t1"
233
234 #define RSEQ_INJECT_CLOBBER \
235 , INJECT_ASM_REG
236
237 #define RSEQ_INJECT_ASM(n) \
238 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
239 "beqz " INJECT_ASM_REG ", 333f\n\t" \
240 "222:\n\t" \
241 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
242 "bnez " INJECT_ASM_REG ", 222b\n\t" \
243 "333:\n\t"
244
245 #else
246 #error unsupported target
247 #endif
248
249 #define RSEQ_INJECT_FAILED \
250 nr_abort++;
251
252 #define RSEQ_INJECT_C(n) \
253 { \
254 int loc_i, loc_nr_loops = loop_cnt[n]; \
255 \
256 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
257 rseq_barrier(); \
258 } \
259 if (loc_nr_loops == -1 && opt_modulo) { \
260 if (yield_mod_cnt == opt_modulo - 1) { \
261 if (opt_sleep > 0) \
262 poll(NULL, 0, opt_sleep); \
263 if (opt_yield) \
264 sched_yield(); \
265 if (opt_signal) \
266 raise(SIGUSR1); \
267 yield_mod_cnt = 0; \
268 } else { \
269 yield_mod_cnt++; \
270 } \
271 } \
272 }
273
274 #else
275
276 #define printf_verbose(fmt, ...)
277
278 #endif /* BENCHMARK */
279
280 #include <rseq/rseq.h>
281
282 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
283
284 static int sys_membarrier(int cmd, int flags, int cpu_id)
285 {
286 return syscall(__NR_membarrier, cmd, flags, cpu_id);
287 }
288
289 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
290 #define TEST_MEMBARRIER
291 #endif
292
293 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
294 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
295 static
296 int get_current_cpu_id(void)
297 {
298 return rseq_current_mm_cid();
299 }
300 static
301 bool rseq_validate_cpu_id(void)
302 {
303 return rseq_mm_cid_available();
304 }
305 static
306 bool rseq_use_cpu_index(void)
307 {
308 return false; /* Use mm_cid */
309 }
310 # ifdef TEST_MEMBARRIER
311 /*
312 * Membarrier does not currently support targeting a mm_cid, so
313 * issue the barrier on all cpus.
314 */
315 static
316 int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
317 {
318 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
319 0, 0);
320 }
321 # endif /* TEST_MEMBARRIER */
322 #else
323 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
324 static
325 int get_current_cpu_id(void)
326 {
327 return rseq_cpu_start();
328 }
329 static
330 bool rseq_validate_cpu_id(void)
331 {
332 return rseq_current_cpu_raw() >= 0;
333 }
334 static
335 bool rseq_use_cpu_index(void)
336 {
337 return true; /* Use cpu_id as index. */
338 }
339 # ifdef TEST_MEMBARRIER
340 static
341 int rseq_membarrier_expedited(int cpu)
342 {
343 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
344 MEMBARRIER_CMD_FLAG_CPU, cpu);
345 }
346 # endif /* TEST_MEMBARRIER */
347 #endif
348
349 struct percpu_lock_entry {
350 intptr_t v;
351 } __attribute__((aligned(128)));
352
353 struct percpu_lock {
354 struct percpu_lock_entry c[CPU_SETSIZE];
355 };
356
357 struct test_data_entry {
358 intptr_t count;
359 } __attribute__((aligned(128)));
360
361 struct spinlock_test_data {
362 struct percpu_lock lock;
363 struct test_data_entry c[CPU_SETSIZE];
364 };
365
366 struct spinlock_thread_test_data {
367 struct spinlock_test_data *data;
368 long long reps;
369 int reg;
370 };
371
372 struct inc_test_data {
373 struct test_data_entry c[CPU_SETSIZE];
374 };
375
376 struct inc_thread_test_data {
377 struct inc_test_data *data;
378 long long reps;
379 int reg;
380 };
381
382 struct percpu_list_node {
383 intptr_t data;
384 struct percpu_list_node *next;
385 };
386
387 struct percpu_list_entry {
388 struct percpu_list_node *head;
389 } __attribute__((aligned(128)));
390
391 struct percpu_list {
392 struct percpu_list_entry c[CPU_SETSIZE];
393 };
394
395 #define BUFFER_ITEM_PER_CPU 100
396
397 struct percpu_buffer_node {
398 intptr_t data;
399 };
400
401 struct percpu_buffer_entry {
402 intptr_t offset;
403 intptr_t buflen;
404 struct percpu_buffer_node **array;
405 } __attribute__((aligned(128)));
406
407 struct percpu_buffer {
408 struct percpu_buffer_entry c[CPU_SETSIZE];
409 };
410
411 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
412
413 struct percpu_memcpy_buffer_node {
414 intptr_t data1;
415 uint64_t data2;
416 };
417
418 struct percpu_memcpy_buffer_entry {
419 intptr_t offset;
420 intptr_t buflen;
421 struct percpu_memcpy_buffer_node *array;
422 } __attribute__((aligned(128)));
423
424 struct percpu_memcpy_buffer {
425 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
426 };
427
428 /* A simple percpu spinlock. Grabs lock on current cpu. */
429 static int rseq_this_cpu_lock(struct percpu_lock *lock)
430 {
431 int cpu;
432
433 for (;;) {
434 int ret;
435
436 cpu = get_current_cpu_id();
437 if (cpu < 0) {
438 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
439 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
440 abort();
441 }
442 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
443 &lock->c[cpu].v,
444 0, 1, cpu);
445 if (rseq_likely(!ret))
446 break;
447 /* Retry if comparison fails or rseq aborts. */
448 }
449 /*
450 * Acquire semantic when taking lock after control dependency.
451 * Matches rseq_smp_store_release().
452 */
453 rseq_smp_acquire__after_ctrl_dep();
454 return cpu;
455 }
456
457 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
458 {
459 assert(lock->c[cpu].v == 1);
460 /*
461 * Release lock, with release semantic. Matches
462 * rseq_smp_acquire__after_ctrl_dep().
463 */
464 rseq_smp_store_release(&lock->c[cpu].v, 0);
465 }
466
467 static void *test_percpu_spinlock_thread(void *arg)
468 {
469 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
470 struct spinlock_test_data *data = thread_data->data;
471 long long i, reps;
472
473 if (!opt_disable_rseq && thread_data->reg &&
474 rseq_register_current_thread())
475 abort();
476 reps = thread_data->reps;
477 for (i = 0; i < reps; i++) {
478 int cpu = rseq_this_cpu_lock(&data->lock);
479 data->c[cpu].count++;
480 rseq_percpu_unlock(&data->lock, cpu);
481 #ifndef BENCHMARK
482 if (i != 0 && !(i % (reps / 10)))
483 printf_verbose("tid %d: count %lld\n",
484 (int) rseq_gettid(), i);
485 #endif
486 }
487 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
488 (int) rseq_gettid(), nr_abort, signals_delivered);
489 if (!opt_disable_rseq && thread_data->reg &&
490 rseq_unregister_current_thread())
491 abort();
492 return NULL;
493 }
494
495 /*
496 * A simple test which implements a sharded counter using a per-cpu
497 * lock. Obviously real applications might prefer to simply use a
498 * per-cpu increment; however, this is reasonable for a test and the
499 * lock can be extended to synchronize more complicated operations.
500 */
501 static void test_percpu_spinlock(void)
502 {
503 const int num_threads = opt_threads;
504 int i, ret;
505 uint64_t sum;
506 pthread_t test_threads[num_threads];
507 struct spinlock_test_data data;
508 struct spinlock_thread_test_data thread_data[num_threads];
509
510 memset(&data, 0, sizeof(data));
511 for (i = 0; i < num_threads; i++) {
512 thread_data[i].reps = opt_reps;
513 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
514 thread_data[i].reg = 1;
515 else
516 thread_data[i].reg = 0;
517 thread_data[i].data = &data;
518 ret = pthread_create(&test_threads[i], NULL,
519 test_percpu_spinlock_thread,
520 &thread_data[i]);
521 if (ret) {
522 errno = ret;
523 perror("pthread_create");
524 abort();
525 }
526 }
527
528 for (i = 0; i < num_threads; i++) {
529 ret = pthread_join(test_threads[i], NULL);
530 if (ret) {
531 errno = ret;
532 perror("pthread_join");
533 abort();
534 }
535 }
536
537 sum = 0;
538 for (i = 0; i < CPU_SETSIZE; i++)
539 sum += data.c[i].count;
540
541 assert(sum == (uint64_t)opt_reps * num_threads);
542 }
543
544 static void *test_percpu_inc_thread(void *arg)
545 {
546 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
547 struct inc_test_data *data = thread_data->data;
548 long long i, reps;
549
550 if (!opt_disable_rseq && thread_data->reg &&
551 rseq_register_current_thread())
552 abort();
553 reps = thread_data->reps;
554 for (i = 0; i < reps; i++) {
555 int ret;
556
557 do {
558 int cpu;
559
560 cpu = get_current_cpu_id();
561 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
562 &data->c[cpu].count, 1, cpu);
563 } while (rseq_unlikely(ret));
564 #ifndef BENCHMARK
565 if (i != 0 && !(i % (reps / 10)))
566 printf_verbose("tid %d: count %lld\n",
567 (int) rseq_gettid(), i);
568 #endif
569 }
570 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
571 (int) rseq_gettid(), nr_abort, signals_delivered);
572 if (!opt_disable_rseq && thread_data->reg &&
573 rseq_unregister_current_thread())
574 abort();
575 return NULL;
576 }
577
578 static void test_percpu_inc(void)
579 {
580 const int num_threads = opt_threads;
581 int i, ret;
582 uint64_t sum;
583 pthread_t test_threads[num_threads];
584 struct inc_test_data data;
585 struct inc_thread_test_data thread_data[num_threads];
586
587 memset(&data, 0, sizeof(data));
588 for (i = 0; i < num_threads; i++) {
589 thread_data[i].reps = opt_reps;
590 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
591 thread_data[i].reg = 1;
592 else
593 thread_data[i].reg = 0;
594 thread_data[i].data = &data;
595 ret = pthread_create(&test_threads[i], NULL,
596 test_percpu_inc_thread,
597 &thread_data[i]);
598 if (ret) {
599 errno = ret;
600 perror("pthread_create");
601 abort();
602 }
603 }
604
605 for (i = 0; i < num_threads; i++) {
606 ret = pthread_join(test_threads[i], NULL);
607 if (ret) {
608 errno = ret;
609 perror("pthread_join");
610 abort();
611 }
612 }
613
614 sum = 0;
615 for (i = 0; i < CPU_SETSIZE; i++)
616 sum += data.c[i].count;
617
618 assert(sum == (uint64_t)opt_reps * num_threads);
619 }
620
621 static void this_cpu_list_push(struct percpu_list *list,
622 struct percpu_list_node *node,
623 int *_cpu)
624 {
625 int cpu;
626
627 for (;;) {
628 intptr_t *targetptr, newval, expect;
629 int ret;
630
631 cpu = get_current_cpu_id();
632 /* Load list->c[cpu].head with single-copy atomicity. */
633 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
634 newval = (intptr_t)node;
635 targetptr = (intptr_t *)&list->c[cpu].head;
636 node->next = (struct percpu_list_node *)expect;
637 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
638 targetptr, expect, newval, cpu);
639 if (rseq_likely(!ret))
640 break;
641 /* Retry if comparison fails or rseq aborts. */
642 }
643 if (_cpu)
644 *_cpu = cpu;
645 }
646
647 /*
648 * Unlike a traditional lock-less linked list; the availability of a
649 * rseq primitive allows us to implement pop without concerns over
650 * ABA-type races.
651 */
652 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
653 int *_cpu)
654 {
655 struct percpu_list_node *node = NULL;
656 int cpu;
657
658 for (;;) {
659 struct percpu_list_node *head;
660 intptr_t *targetptr, expectnot, *load;
661 long offset;
662 int ret;
663
664 cpu = get_current_cpu_id();
665 targetptr = (intptr_t *)&list->c[cpu].head;
666 expectnot = (intptr_t)NULL;
667 offset = offsetof(struct percpu_list_node, next);
668 load = (intptr_t *)&head;
669 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
670 targetptr, expectnot,
671 offset, load, cpu);
672 if (rseq_likely(!ret)) {
673 node = head;
674 break;
675 }
676 if (ret > 0)
677 break;
678 /* Retry if rseq aborts. */
679 }
680 if (_cpu)
681 *_cpu = cpu;
682 return node;
683 }
684
685 /*
686 * __percpu_list_pop is not safe against concurrent accesses. Should
687 * only be used on lists that are not concurrently modified.
688 */
689 static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
690 {
691 struct percpu_list_node *node;
692
693 node = list->c[cpu].head;
694 if (!node)
695 return NULL;
696 list->c[cpu].head = node->next;
697 return node;
698 }
699
700 static void *test_percpu_list_thread(void *arg)
701 {
702 long long i, reps;
703 struct percpu_list *list = (struct percpu_list *)arg;
704
705 if (!opt_disable_rseq && rseq_register_current_thread())
706 abort();
707
708 reps = opt_reps;
709 for (i = 0; i < reps; i++) {
710 struct percpu_list_node *node;
711
712 node = this_cpu_list_pop(list, NULL);
713 if (opt_yield)
714 sched_yield(); /* encourage shuffling */
715 if (node)
716 this_cpu_list_push(list, node, NULL);
717 }
718
719 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
720 (int) rseq_gettid(), nr_abort, signals_delivered);
721 if (!opt_disable_rseq && rseq_unregister_current_thread())
722 abort();
723
724 return NULL;
725 }
726
727 /* Simultaneous modification to a per-cpu linked list from many threads. */
728 static void test_percpu_list(void)
729 {
730 const int num_threads = opt_threads;
731 int i, j, ret;
732 uint64_t sum = 0, expected_sum = 0;
733 struct percpu_list list;
734 pthread_t test_threads[num_threads];
735 cpu_set_t allowed_cpus;
736
737 memset(&list, 0, sizeof(list));
738
739 /* Generate list entries for every usable cpu. */
740 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
741 for (i = 0; i < CPU_SETSIZE; i++) {
742 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
743 continue;
744 for (j = 1; j <= 100; j++) {
745 struct percpu_list_node *node;
746
747 expected_sum += j;
748
749 node = (struct percpu_list_node *) malloc(sizeof(*node));
750 assert(node);
751 node->data = j;
752 node->next = list.c[i].head;
753 list.c[i].head = node;
754 }
755 }
756
757 for (i = 0; i < num_threads; i++) {
758 ret = pthread_create(&test_threads[i], NULL,
759 test_percpu_list_thread, &list);
760 if (ret) {
761 errno = ret;
762 perror("pthread_create");
763 abort();
764 }
765 }
766
767 for (i = 0; i < num_threads; i++) {
768 ret = pthread_join(test_threads[i], NULL);
769 if (ret) {
770 errno = ret;
771 perror("pthread_join");
772 abort();
773 }
774 }
775
776 for (i = 0; i < CPU_SETSIZE; i++) {
777 struct percpu_list_node *node;
778
779 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
780 continue;
781
782 while ((node = __percpu_list_pop(&list, i))) {
783 sum += node->data;
784 free(node);
785 }
786 }
787
788 /*
789 * All entries should now be accounted for (unless some external
790 * actor is interfering with our allowed affinity while this
791 * test is running).
792 */
793 assert(sum == expected_sum);
794 }
795
796 static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
797 struct percpu_buffer_node *node,
798 int *_cpu)
799 {
800 bool result = false;
801 int cpu;
802
803 for (;;) {
804 intptr_t *targetptr_spec, newval_spec;
805 intptr_t *targetptr_final, newval_final;
806 intptr_t offset;
807 int ret;
808
809 cpu = get_current_cpu_id();
810 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
811 if (offset == buffer->c[cpu].buflen)
812 break;
813 newval_spec = (intptr_t)node;
814 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
815 newval_final = offset + 1;
816 targetptr_final = &buffer->c[cpu].offset;
817 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
818 targetptr_final, offset, targetptr_spec,
819 newval_spec, newval_final, cpu);
820 if (rseq_likely(!ret)) {
821 result = true;
822 break;
823 }
824 /* Retry if comparison fails or rseq aborts. */
825 }
826 if (_cpu)
827 *_cpu = cpu;
828 return result;
829 }
830
831 static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
832 int *_cpu)
833 {
834 struct percpu_buffer_node *head;
835 int cpu;
836
837 for (;;) {
838 intptr_t *targetptr, newval;
839 intptr_t offset;
840 int ret;
841
842 cpu = get_current_cpu_id();
843 /* Load offset with single-copy atomicity. */
844 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
845 if (offset == 0) {
846 head = NULL;
847 break;
848 }
849 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
850 newval = offset - 1;
851 targetptr = (intptr_t *)&buffer->c[cpu].offset;
852 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
853 targetptr, offset,
854 (intptr_t *)&buffer->c[cpu].array[offset - 1],
855 (intptr_t)head, newval, cpu);
856 if (rseq_likely(!ret))
857 break;
858 /* Retry if comparison fails or rseq aborts. */
859 }
860 if (_cpu)
861 *_cpu = cpu;
862 return head;
863 }
864
865 /*
866 * __percpu_buffer_pop is not safe against concurrent accesses. Should
867 * only be used on buffers that are not concurrently modified.
868 */
869 static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
870 int cpu)
871 {
872 struct percpu_buffer_node *head;
873 intptr_t offset;
874
875 offset = buffer->c[cpu].offset;
876 if (offset == 0)
877 return NULL;
878 head = buffer->c[cpu].array[offset - 1];
879 buffer->c[cpu].offset = offset - 1;
880 return head;
881 }
882
883 static void *test_percpu_buffer_thread(void *arg)
884 {
885 long long i, reps;
886 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
887
888 if (!opt_disable_rseq && rseq_register_current_thread())
889 abort();
890
891 reps = opt_reps;
892 for (i = 0; i < reps; i++) {
893 struct percpu_buffer_node *node;
894
895 node = this_cpu_buffer_pop(buffer, NULL);
896 if (opt_yield)
897 sched_yield(); /* encourage shuffling */
898 if (node) {
899 if (!this_cpu_buffer_push(buffer, node, NULL)) {
900 /* Should increase buffer size. */
901 abort();
902 }
903 }
904 }
905
906 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
907 (int) rseq_gettid(), nr_abort, signals_delivered);
908 if (!opt_disable_rseq && rseq_unregister_current_thread())
909 abort();
910
911 return NULL;
912 }
913
914 /* Simultaneous modification to a per-cpu buffer from many threads. */
915 static void test_percpu_buffer(void)
916 {
917 const int num_threads = opt_threads;
918 int i, j, ret;
919 uint64_t sum = 0, expected_sum = 0;
920 struct percpu_buffer buffer;
921 pthread_t test_threads[num_threads];
922 cpu_set_t allowed_cpus;
923
924 memset(&buffer, 0, sizeof(buffer));
925
926 /* Generate list entries for every usable cpu. */
927 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
928 for (i = 0; i < CPU_SETSIZE; i++) {
929 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
930 continue;
931 /* Worse-case is every item in same CPU. */
932 buffer.c[i].array =
933 (struct percpu_buffer_node **)
934 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
935 BUFFER_ITEM_PER_CPU);
936 assert(buffer.c[i].array);
937 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
938 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
939 struct percpu_buffer_node *node;
940
941 expected_sum += j;
942
943 /*
944 * We could theoretically put the word-sized
945 * "data" directly in the buffer. However, we
946 * want to model objects that would not fit
947 * within a single word, so allocate an object
948 * for each node.
949 */
950 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
951 assert(node);
952 node->data = j;
953 buffer.c[i].array[j - 1] = node;
954 buffer.c[i].offset++;
955 }
956 }
957
958 for (i = 0; i < num_threads; i++) {
959 ret = pthread_create(&test_threads[i], NULL,
960 test_percpu_buffer_thread, &buffer);
961 if (ret) {
962 errno = ret;
963 perror("pthread_create");
964 abort();
965 }
966 }
967
968 for (i = 0; i < num_threads; i++) {
969 ret = pthread_join(test_threads[i], NULL);
970 if (ret) {
971 errno = ret;
972 perror("pthread_join");
973 abort();
974 }
975 }
976
977 for (i = 0; i < CPU_SETSIZE; i++) {
978 struct percpu_buffer_node *node;
979
980 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
981 continue;
982
983 while ((node = __percpu_buffer_pop(&buffer, i))) {
984 sum += node->data;
985 free(node);
986 }
987 free(buffer.c[i].array);
988 }
989
990 /*
991 * All entries should now be accounted for (unless some external
992 * actor is interfering with our allowed affinity while this
993 * test is running).
994 */
995 assert(sum == expected_sum);
996 }
997
998 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
999 struct percpu_memcpy_buffer_node item,
1000 int *_cpu)
1001 {
1002 bool result = false;
1003 int cpu;
1004
1005 for (;;) {
1006 intptr_t *targetptr_final, newval_final, offset;
1007 char *destptr, *srcptr;
1008 size_t copylen;
1009 int ret;
1010
1011 cpu = get_current_cpu_id();
1012 /* Load offset with single-copy atomicity. */
1013 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1014 if (offset == buffer->c[cpu].buflen)
1015 break;
1016 destptr = (char *)&buffer->c[cpu].array[offset];
1017 srcptr = (char *)&item;
1018 /* copylen must be <= 4kB. */
1019 copylen = sizeof(item);
1020 newval_final = offset + 1;
1021 targetptr_final = &buffer->c[cpu].offset;
1022 ret = rseq_load_cbne_memcpy_store__ptr(
1023 opt_mo, RSEQ_PERCPU,
1024 targetptr_final, offset,
1025 destptr, srcptr, copylen,
1026 newval_final, cpu);
1027 if (rseq_likely(!ret)) {
1028 result = true;
1029 break;
1030 }
1031 /* Retry if comparison fails or rseq aborts. */
1032 }
1033 if (_cpu)
1034 *_cpu = cpu;
1035 return result;
1036 }
1037
1038 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1039 struct percpu_memcpy_buffer_node *item,
1040 int *_cpu)
1041 {
1042 bool result = false;
1043 int cpu;
1044
1045 for (;;) {
1046 intptr_t *targetptr_final, newval_final, offset;
1047 char *destptr, *srcptr;
1048 size_t copylen;
1049 int ret;
1050
1051 cpu = get_current_cpu_id();
1052 /* Load offset with single-copy atomicity. */
1053 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1054 if (offset == 0)
1055 break;
1056 destptr = (char *)item;
1057 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1058 /* copylen must be <= 4kB. */
1059 copylen = sizeof(*item);
1060 newval_final = offset - 1;
1061 targetptr_final = &buffer->c[cpu].offset;
1062 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1063 targetptr_final, offset, destptr, srcptr, copylen,
1064 newval_final, cpu);
1065 if (rseq_likely(!ret)) {
1066 result = true;
1067 break;
1068 }
1069 /* Retry if comparison fails or rseq aborts. */
1070 }
1071 if (_cpu)
1072 *_cpu = cpu;
1073 return result;
1074 }
1075
1076 /*
1077 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1078 * only be used on buffers that are not concurrently modified.
1079 */
1080 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1081 struct percpu_memcpy_buffer_node *item,
1082 int cpu)
1083 {
1084 intptr_t offset;
1085
1086 offset = buffer->c[cpu].offset;
1087 if (offset == 0)
1088 return false;
1089 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1090 buffer->c[cpu].offset = offset - 1;
1091 return true;
1092 }
1093
1094 static void *test_percpu_memcpy_buffer_thread(void *arg)
1095 {
1096 long long i, reps;
1097 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1098
1099 if (!opt_disable_rseq && rseq_register_current_thread())
1100 abort();
1101
1102 reps = opt_reps;
1103 for (i = 0; i < reps; i++) {
1104 struct percpu_memcpy_buffer_node item;
1105 bool result;
1106
1107 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1108 if (opt_yield)
1109 sched_yield(); /* encourage shuffling */
1110 if (result) {
1111 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1112 /* Should increase buffer size. */
1113 abort();
1114 }
1115 }
1116 }
1117
1118 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1119 (int) rseq_gettid(), nr_abort, signals_delivered);
1120 if (!opt_disable_rseq && rseq_unregister_current_thread())
1121 abort();
1122
1123 return NULL;
1124 }
1125
1126 /* Simultaneous modification to a per-cpu buffer from many threads. */
1127 static void test_percpu_memcpy_buffer(void)
1128 {
1129 const int num_threads = opt_threads;
1130 int i, j, ret;
1131 uint64_t sum = 0, expected_sum = 0;
1132 struct percpu_memcpy_buffer buffer;
1133 pthread_t test_threads[num_threads];
1134 cpu_set_t allowed_cpus;
1135
1136 memset(&buffer, 0, sizeof(buffer));
1137
1138 /* Generate list entries for every usable cpu. */
1139 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1140 for (i = 0; i < CPU_SETSIZE; i++) {
1141 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1142 continue;
1143 /* Worse-case is every item in same CPU. */
1144 buffer.c[i].array =
1145 (struct percpu_memcpy_buffer_node *)
1146 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1147 MEMCPY_BUFFER_ITEM_PER_CPU);
1148 assert(buffer.c[i].array);
1149 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1150 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1151 expected_sum += 2 * j + 1;
1152
1153 /*
1154 * We could theoretically put the word-sized
1155 * "data" directly in the buffer. However, we
1156 * want to model objects that would not fit
1157 * within a single word, so allocate an object
1158 * for each node.
1159 */
1160 buffer.c[i].array[j - 1].data1 = j;
1161 buffer.c[i].array[j - 1].data2 = j + 1;
1162 buffer.c[i].offset++;
1163 }
1164 }
1165
1166 for (i = 0; i < num_threads; i++) {
1167 ret = pthread_create(&test_threads[i], NULL,
1168 test_percpu_memcpy_buffer_thread,
1169 &buffer);
1170 if (ret) {
1171 errno = ret;
1172 perror("pthread_create");
1173 abort();
1174 }
1175 }
1176
1177 for (i = 0; i < num_threads; i++) {
1178 ret = pthread_join(test_threads[i], NULL);
1179 if (ret) {
1180 errno = ret;
1181 perror("pthread_join");
1182 abort();
1183 }
1184 }
1185
1186 for (i = 0; i < CPU_SETSIZE; i++) {
1187 struct percpu_memcpy_buffer_node item;
1188
1189 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1190 continue;
1191
1192 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1193 sum += item.data1;
1194 sum += item.data2;
1195 }
1196 free(buffer.c[i].array);
1197 }
1198
1199 /*
1200 * All entries should now be accounted for (unless some external
1201 * actor is interfering with our allowed affinity while this
1202 * test is running).
1203 */
1204 assert(sum == expected_sum);
1205 }
1206
1207
1208 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
1209 {
1210 signals_delivered++;
1211 }
1212
1213 static int set_signal_handler(void)
1214 {
1215 int ret = 0;
1216 struct sigaction sa;
1217 sigset_t sigset;
1218
1219 ret = sigemptyset(&sigset);
1220 if (ret < 0) {
1221 perror("sigemptyset");
1222 return ret;
1223 }
1224
1225 sa.sa_handler = test_signal_interrupt_handler;
1226 sa.sa_mask = sigset;
1227 sa.sa_flags = 0;
1228 ret = sigaction(SIGUSR1, &sa, NULL);
1229 if (ret < 0) {
1230 perror("sigaction");
1231 return ret;
1232 }
1233
1234 printf_verbose("Signal handler set for SIGUSR1\n");
1235
1236 return ret;
1237 }
1238
1239 static
1240 bool membarrier_private_expedited_rseq_available(void)
1241 {
1242 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1243
1244 if (status < 0) {
1245 perror("membarrier");
1246 return false;
1247 }
1248 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1249 return false;
1250 return true;
1251 }
1252
1253 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1254 #ifdef TEST_MEMBARRIER
1255 struct test_membarrier_thread_args {
1256 int stop;
1257 intptr_t percpu_list_ptr;
1258 };
1259
1260 /* Worker threads modify data in their "active" percpu lists. */
1261 static
1262 void *test_membarrier_worker_thread(void *arg)
1263 {
1264 struct test_membarrier_thread_args *args =
1265 (struct test_membarrier_thread_args *)arg;
1266 const int iters = opt_reps;
1267 int i;
1268
1269 if (rseq_register_current_thread()) {
1270 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1271 errno, strerror(errno));
1272 abort();
1273 }
1274
1275 /* Wait for initialization. */
1276 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1277
1278 for (i = 0; i < iters; ++i) {
1279 int ret;
1280
1281 do {
1282 int cpu = get_current_cpu_id();
1283
1284 ret = rseq_load_add_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1285 &args->percpu_list_ptr,
1286 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1287 } while (rseq_unlikely(ret));
1288 }
1289
1290 if (rseq_unregister_current_thread()) {
1291 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1292 errno, strerror(errno));
1293 abort();
1294 }
1295 return NULL;
1296 }
1297
1298 static
1299 void test_membarrier_init_percpu_list(struct percpu_list *list)
1300 {
1301 int i;
1302
1303 memset(list, 0, sizeof(*list));
1304 for (i = 0; i < CPU_SETSIZE; i++) {
1305 struct percpu_list_node *node;
1306
1307 node = (struct percpu_list_node *) malloc(sizeof(*node));
1308 assert(node);
1309 node->data = 0;
1310 node->next = NULL;
1311 list->c[i].head = node;
1312 }
1313 }
1314
1315 static
1316 void test_membarrier_free_percpu_list(struct percpu_list *list)
1317 {
1318 int i;
1319
1320 for (i = 0; i < CPU_SETSIZE; i++)
1321 free(list->c[i].head);
1322 }
1323
1324 /*
1325 * The manager thread swaps per-cpu lists that worker threads see,
1326 * and validates that there are no unexpected modifications.
1327 */
1328 static
1329 void *test_membarrier_manager_thread(void *arg)
1330 {
1331 struct test_membarrier_thread_args *args =
1332 (struct test_membarrier_thread_args *)arg;
1333 struct percpu_list list_a, list_b;
1334 intptr_t expect_a = 0, expect_b = 0;
1335 int cpu_a = 0, cpu_b = 0;
1336
1337 if (rseq_register_current_thread()) {
1338 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1339 errno, strerror(errno));
1340 abort();
1341 }
1342
1343 /* Init lists. */
1344 test_membarrier_init_percpu_list(&list_a);
1345 test_membarrier_init_percpu_list(&list_b);
1346
1347 /* Initialize lists before publishing them. */
1348 rseq_smp_wmb();
1349
1350 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1351
1352 while (!RSEQ_READ_ONCE(args->stop)) {
1353 /* list_a is "active". */
1354 cpu_a = rand() % CPU_SETSIZE;
1355 /*
1356 * As list_b is "inactive", we should never see changes
1357 * to list_b.
1358 */
1359 if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) {
1360 fprintf(stderr, "Membarrier test failed\n");
1361 abort();
1362 }
1363
1364 /* Make list_b "active". */
1365 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b);
1366 if (rseq_membarrier_expedited(cpu_a) &&
1367 errno != ENXIO /* missing CPU */) {
1368 perror("sys_membarrier");
1369 abort();
1370 }
1371 /*
1372 * Cpu A should now only modify list_b, so the values
1373 * in list_a should be stable.
1374 */
1375 expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data);
1376
1377 cpu_b = rand() % CPU_SETSIZE;
1378 /*
1379 * As list_a is "inactive", we should never see changes
1380 * to list_a.
1381 */
1382 if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) {
1383 fprintf(stderr, "Membarrier test failed\n");
1384 abort();
1385 }
1386
1387 /* Make list_a "active". */
1388 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1389 if (rseq_membarrier_expedited(cpu_b) &&
1390 errno != ENXIO /* missing CPU */) {
1391 perror("sys_membarrier");
1392 abort();
1393 }
1394 /* Remember a value from list_b. */
1395 expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data);
1396 }
1397
1398 test_membarrier_free_percpu_list(&list_a);
1399 test_membarrier_free_percpu_list(&list_b);
1400
1401 if (rseq_unregister_current_thread()) {
1402 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1403 errno, strerror(errno));
1404 abort();
1405 }
1406 return NULL;
1407 }
1408
1409 static
1410 void test_membarrier(void)
1411 {
1412 const int num_threads = opt_threads;
1413 struct test_membarrier_thread_args thread_args;
1414 pthread_t worker_threads[num_threads];
1415 pthread_t manager_thread;
1416 int i, ret;
1417
1418 if (!membarrier_private_expedited_rseq_available()) {
1419 fprintf(stderr, "Membarrier private expedited rseq not available. "
1420 "Skipping membarrier test.\n");
1421 return;
1422 }
1423 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1424 perror("sys_membarrier");
1425 abort();
1426 }
1427
1428 thread_args.stop = 0;
1429 thread_args.percpu_list_ptr = 0;
1430 ret = pthread_create(&manager_thread, NULL,
1431 test_membarrier_manager_thread, &thread_args);
1432 if (ret) {
1433 errno = ret;
1434 perror("pthread_create");
1435 abort();
1436 }
1437
1438 for (i = 0; i < num_threads; i++) {
1439 ret = pthread_create(&worker_threads[i], NULL,
1440 test_membarrier_worker_thread, &thread_args);
1441 if (ret) {
1442 errno = ret;
1443 perror("pthread_create");
1444 abort();
1445 }
1446 }
1447
1448
1449 for (i = 0; i < num_threads; i++) {
1450 ret = pthread_join(worker_threads[i], NULL);
1451 if (ret) {
1452 errno = ret;
1453 perror("pthread_join");
1454 abort();
1455 }
1456 }
1457
1458 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1459 ret = pthread_join(manager_thread, NULL);
1460 if (ret) {
1461 errno = ret;
1462 perror("pthread_join");
1463 abort();
1464 }
1465 }
1466 #else /* TEST_MEMBARRIER */
1467 static
1468 void test_membarrier(void)
1469 {
1470 if (!membarrier_private_expedited_rseq_available()) {
1471 fprintf(stderr, "Membarrier private expedited rseq not available. "
1472 "Skipping membarrier test.\n");
1473 return;
1474 }
1475 fprintf(stderr, "rseq_load_add_load_add_store__ptr is not implemented on this architecture. "
1476 "Skipping membarrier test.\n");
1477 }
1478 #endif
1479
1480 static void show_usage(char **argv)
1481 {
1482 printf("Usage : %s <OPTIONS>\n",
1483 argv[0]);
1484 printf("OPTIONS:\n");
1485 printf(" [-1 loops] Number of loops for delay injection 1\n");
1486 printf(" [-2 loops] Number of loops for delay injection 2\n");
1487 printf(" [-3 loops] Number of loops for delay injection 3\n");
1488 printf(" [-4 loops] Number of loops for delay injection 4\n");
1489 printf(" [-5 loops] Number of loops for delay injection 5\n");
1490 printf(" [-6 loops] Number of loops for delay injection 6\n");
1491 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1492 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1493 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1494 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1495 printf(" [-y] Yield\n");
1496 printf(" [-k] Kill thread with signal\n");
1497 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1498 printf(" [-t N] Number of threads (default 200)\n");
1499 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1500 printf(" [-d] Disable rseq system call (no initialization)\n");
1501 printf(" [-D M] Disable rseq for each M threads\n");
1502 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1503 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1504 printf(" [-c] Check if the rseq syscall is available.\n");
1505 printf(" [-v] Verbose output.\n");
1506 printf(" [-h] Show this help.\n");
1507 printf("\n");
1508 }
1509
1510 int main(int argc, char **argv)
1511 {
1512 int i;
1513
1514 for (i = 1; i < argc; i++) {
1515 if (argv[i][0] != '-')
1516 continue;
1517 switch (argv[i][1]) {
1518 case '1':
1519 case '2':
1520 case '3':
1521 case '4':
1522 case '5':
1523 case '6':
1524 case '7':
1525 case '8':
1526 case '9':
1527 if (argc < i + 2) {
1528 show_usage(argv);
1529 goto error;
1530 }
1531 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1532 i++;
1533 break;
1534 case 'm':
1535 if (argc < i + 2) {
1536 show_usage(argv);
1537 goto error;
1538 }
1539 opt_modulo = atol(argv[i + 1]);
1540 if (opt_modulo < 0) {
1541 show_usage(argv);
1542 goto error;
1543 }
1544 i++;
1545 break;
1546 case 's':
1547 if (argc < i + 2) {
1548 show_usage(argv);
1549 goto error;
1550 }
1551 opt_sleep = atol(argv[i + 1]);
1552 if (opt_sleep < 0) {
1553 show_usage(argv);
1554 goto error;
1555 }
1556 i++;
1557 break;
1558 case 'y':
1559 opt_yield = 1;
1560 break;
1561 case 'k':
1562 opt_signal = 1;
1563 break;
1564 case 'd':
1565 opt_disable_rseq = 1;
1566 break;
1567 case 'D':
1568 if (argc < i + 2) {
1569 show_usage(argv);
1570 goto error;
1571 }
1572 opt_disable_mod = atol(argv[i + 1]);
1573 if (opt_disable_mod < 0) {
1574 show_usage(argv);
1575 goto error;
1576 }
1577 i++;
1578 break;
1579 case 't':
1580 if (argc < i + 2) {
1581 show_usage(argv);
1582 goto error;
1583 }
1584 opt_threads = atol(argv[i + 1]);
1585 if (opt_threads < 0) {
1586 show_usage(argv);
1587 goto error;
1588 }
1589 i++;
1590 break;
1591 case 'r':
1592 if (argc < i + 2) {
1593 show_usage(argv);
1594 goto error;
1595 }
1596 opt_reps = atoll(argv[i + 1]);
1597 if (opt_reps < 0) {
1598 show_usage(argv);
1599 goto error;
1600 }
1601 i++;
1602 break;
1603 case 'h':
1604 show_usage(argv);
1605 goto end;
1606 case 'T':
1607 if (argc < i + 2) {
1608 show_usage(argv);
1609 goto error;
1610 }
1611 opt_test = *argv[i + 1];
1612 switch (opt_test) {
1613 case 's':
1614 case 'l':
1615 case 'i':
1616 case 'b':
1617 case 'm':
1618 case 'r':
1619 break;
1620 default:
1621 show_usage(argv);
1622 goto error;
1623 }
1624 i++;
1625 break;
1626 case 'v':
1627 verbose = 1;
1628 break;
1629 case 'M':
1630 opt_mo = RSEQ_MO_RELEASE;
1631 break;
1632 case 'c':
1633 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
1634 printf_verbose("The rseq syscall is available.\n");
1635 goto end;
1636 } else {
1637 printf_verbose("The rseq syscall is unavailable.\n");
1638 goto no_rseq;
1639 }
1640 default:
1641 show_usage(argv);
1642 goto error;
1643 }
1644 }
1645
1646 loop_cnt_1 = loop_cnt[1];
1647 loop_cnt_2 = loop_cnt[2];
1648 loop_cnt_3 = loop_cnt[3];
1649 loop_cnt_4 = loop_cnt[4];
1650 loop_cnt_5 = loop_cnt[5];
1651 loop_cnt_6 = loop_cnt[6];
1652
1653 if (set_signal_handler())
1654 goto error;
1655
1656 if (!opt_disable_rseq && rseq_register_current_thread())
1657 goto error;
1658 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1659 printf_verbose("The rseq cpu id getter is unavailable\n");
1660 goto no_rseq;
1661 }
1662 switch (opt_test) {
1663 case 's':
1664 printf_verbose("spinlock\n");
1665 test_percpu_spinlock();
1666 break;
1667 case 'l':
1668 printf_verbose("linked list\n");
1669 test_percpu_list();
1670 break;
1671 case 'b':
1672 printf_verbose("buffer\n");
1673 test_percpu_buffer();
1674 break;
1675 case 'm':
1676 printf_verbose("memcpy buffer\n");
1677 test_percpu_memcpy_buffer();
1678 break;
1679 case 'i':
1680 printf_verbose("counter increment\n");
1681 test_percpu_inc();
1682 break;
1683 case 'r':
1684 printf_verbose("membarrier\n");
1685 test_membarrier();
1686 break;
1687 }
1688 if (!opt_disable_rseq && rseq_unregister_current_thread())
1689 abort();
1690 end:
1691 return 0;
1692
1693 error:
1694 return -1;
1695
1696 no_rseq:
1697 return 2;
1698 }
This page took 0.10602 seconds and 4 git commands to generate.