Add RISC-V rseq support
[librseq.git] / tests / param_test.c
1 // SPDX-License-Identifier: LGPL-2.1-only
2 #ifndef _GNU_SOURCE
3 #define _GNU_SOURCE
4 #endif
5 #include <assert.h>
6 #include <linux/version.h>
7 #include <linux/membarrier.h>
8 #include <pthread.h>
9 #include <sched.h>
10 #include <stdint.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <syscall.h>
15 #include <unistd.h>
16 #include <poll.h>
17 #include <sys/types.h>
18 #include <signal.h>
19 #include <errno.h>
20 #include <stddef.h>
21
22 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
23 enum {
24 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
25 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
26 };
27
28 enum {
29 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
30 };
31 #endif
32
33 #define NR_INJECT 9
34 static int loop_cnt[NR_INJECT + 1];
35
36 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
37 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
38 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
39 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
40 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
41 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
42
43 static int opt_modulo, verbose;
44
45 static int opt_yield, opt_signal, opt_sleep,
46 opt_disable_rseq, opt_threads = 200,
47 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
48
49 #ifndef RSEQ_SKIP_FASTPATH
50 static long long opt_reps = 5000;
51 #else
52 static long long opt_reps = 100;
53 #endif
54
55 static __thread __attribute__((tls_model("initial-exec")))
56 unsigned int signals_delivered;
57
58 #ifndef BENCHMARK
59
60 static inline pid_t rseq_gettid(void)
61 {
62 return syscall(__NR_gettid);
63 }
64
65 static __thread __attribute__((tls_model("initial-exec"), unused))
66 int yield_mod_cnt, nr_abort;
67
68 #define printf_verbose(fmt, ...) \
69 do { \
70 if (verbose) \
71 printf(fmt, ## __VA_ARGS__); \
72 } while (0)
73
74 #ifdef __i386__
75
76 #define INJECT_ASM_REG "eax"
77
78 #define RSEQ_INJECT_CLOBBER \
79 , INJECT_ASM_REG
80
81 #define RSEQ_INJECT_ASM(n) \
82 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
83 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
84 "jz 333f\n\t" \
85 "222:\n\t" \
86 "dec %%" INJECT_ASM_REG "\n\t" \
87 "jnz 222b\n\t" \
88 "333:\n\t"
89
90 #elif defined(__x86_64__)
91
92 #define INJECT_ASM_REG_P "rax"
93 #define INJECT_ASM_REG "eax"
94
95 #define RSEQ_INJECT_CLOBBER \
96 , INJECT_ASM_REG_P \
97 , INJECT_ASM_REG
98
99 #define RSEQ_INJECT_ASM(n) \
100 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
101 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
102 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
103 "jz 333f\n\t" \
104 "222:\n\t" \
105 "dec %%" INJECT_ASM_REG "\n\t" \
106 "jnz 222b\n\t" \
107 "333:\n\t"
108
109 #elif defined(__s390__)
110
111 #define RSEQ_INJECT_INPUT \
112 , [loop_cnt_1]"m"(loop_cnt[1]) \
113 , [loop_cnt_2]"m"(loop_cnt[2]) \
114 , [loop_cnt_3]"m"(loop_cnt[3]) \
115 , [loop_cnt_4]"m"(loop_cnt[4]) \
116 , [loop_cnt_5]"m"(loop_cnt[5]) \
117 , [loop_cnt_6]"m"(loop_cnt[6])
118
119 #define INJECT_ASM_REG "r12"
120
121 #define RSEQ_INJECT_CLOBBER \
122 , INJECT_ASM_REG
123
124 #define RSEQ_INJECT_ASM(n) \
125 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
126 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
127 "je 333f\n\t" \
128 "222:\n\t" \
129 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
130 "jnz 222b\n\t" \
131 "333:\n\t"
132
133 #elif defined(__ARMEL__)
134
135 #define RSEQ_INJECT_INPUT \
136 , [loop_cnt_1]"m"(loop_cnt[1]) \
137 , [loop_cnt_2]"m"(loop_cnt[2]) \
138 , [loop_cnt_3]"m"(loop_cnt[3]) \
139 , [loop_cnt_4]"m"(loop_cnt[4]) \
140 , [loop_cnt_5]"m"(loop_cnt[5]) \
141 , [loop_cnt_6]"m"(loop_cnt[6])
142
143 #define INJECT_ASM_REG "r4"
144
145 #define RSEQ_INJECT_CLOBBER \
146 , INJECT_ASM_REG
147
148 #define RSEQ_INJECT_ASM(n) \
149 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
150 "cmp " INJECT_ASM_REG ", #0\n\t" \
151 "beq 333f\n\t" \
152 "222:\n\t" \
153 "subs " INJECT_ASM_REG ", #1\n\t" \
154 "bne 222b\n\t" \
155 "333:\n\t"
156
157 #elif defined(__AARCH64EL__)
158
159 #define RSEQ_INJECT_INPUT \
160 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
161 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
162 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
163 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
164 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
165 , [loop_cnt_6] "Qo" (loop_cnt[6])
166
167 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
168
169 #define RSEQ_INJECT_ASM(n) \
170 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
171 " cbz " INJECT_ASM_REG ", 333f\n" \
172 "222:\n" \
173 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
174 " cbnz " INJECT_ASM_REG ", 222b\n" \
175 "333:\n"
176
177 #elif defined(__PPC__)
178
179 #define RSEQ_INJECT_INPUT \
180 , [loop_cnt_1]"m"(loop_cnt[1]) \
181 , [loop_cnt_2]"m"(loop_cnt[2]) \
182 , [loop_cnt_3]"m"(loop_cnt[3]) \
183 , [loop_cnt_4]"m"(loop_cnt[4]) \
184 , [loop_cnt_5]"m"(loop_cnt[5]) \
185 , [loop_cnt_6]"m"(loop_cnt[6])
186
187 #define INJECT_ASM_REG "r18"
188
189 #define RSEQ_INJECT_CLOBBER \
190 , INJECT_ASM_REG
191
192 #define RSEQ_INJECT_ASM(n) \
193 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
194 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
195 "beq 333f\n\t" \
196 "222:\n\t" \
197 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
198 "bne 222b\n\t" \
199 "333:\n\t"
200
201 #elif defined(__mips__)
202
203 #define RSEQ_INJECT_INPUT \
204 , [loop_cnt_1]"m"(loop_cnt[1]) \
205 , [loop_cnt_2]"m"(loop_cnt[2]) \
206 , [loop_cnt_3]"m"(loop_cnt[3]) \
207 , [loop_cnt_4]"m"(loop_cnt[4]) \
208 , [loop_cnt_5]"m"(loop_cnt[5]) \
209 , [loop_cnt_6]"m"(loop_cnt[6])
210
211 #define INJECT_ASM_REG "$5"
212
213 #define RSEQ_INJECT_CLOBBER \
214 , INJECT_ASM_REG
215
216 #define RSEQ_INJECT_ASM(n) \
217 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
218 "beqz " INJECT_ASM_REG ", 333f\n\t" \
219 "222:\n\t" \
220 "addiu " INJECT_ASM_REG ", -1\n\t" \
221 "bnez " INJECT_ASM_REG ", 222b\n\t" \
222 "333:\n\t"
223
224 #elif defined(__riscv)
225
226 #define RSEQ_INJECT_INPUT \
227 , [loop_cnt_1]"m"(loop_cnt[1]) \
228 , [loop_cnt_2]"m"(loop_cnt[2]) \
229 , [loop_cnt_3]"m"(loop_cnt[3]) \
230 , [loop_cnt_4]"m"(loop_cnt[4]) \
231 , [loop_cnt_5]"m"(loop_cnt[5]) \
232 , [loop_cnt_6]"m"(loop_cnt[6])
233
234 #define INJECT_ASM_REG "t1"
235
236 #define RSEQ_INJECT_CLOBBER \
237 , INJECT_ASM_REG
238
239 #define RSEQ_INJECT_ASM(n) \
240 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
241 "beqz " INJECT_ASM_REG ", 333f\n\t" \
242 "222:\n\t" \
243 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
244 "bnez " INJECT_ASM_REG ", 222b\n\t" \
245 "333:\n\t"
246
247 #else
248 #error unsupported target
249 #endif
250
251 #define RSEQ_INJECT_FAILED \
252 nr_abort++;
253
254 #define RSEQ_INJECT_C(n) \
255 { \
256 int loc_i, loc_nr_loops = loop_cnt[n]; \
257 \
258 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
259 rseq_barrier(); \
260 } \
261 if (loc_nr_loops == -1 && opt_modulo) { \
262 if (yield_mod_cnt == opt_modulo - 1) { \
263 if (opt_sleep > 0) \
264 poll(NULL, 0, opt_sleep); \
265 if (opt_yield) \
266 sched_yield(); \
267 if (opt_signal) \
268 raise(SIGUSR1); \
269 yield_mod_cnt = 0; \
270 } else { \
271 yield_mod_cnt++; \
272 } \
273 } \
274 }
275
276 #else
277
278 #define printf_verbose(fmt, ...)
279
280 #endif /* BENCHMARK */
281
282 #include <rseq/rseq.h>
283
284 struct percpu_lock_entry {
285 intptr_t v;
286 } __attribute__((aligned(128)));
287
288 struct percpu_lock {
289 struct percpu_lock_entry c[CPU_SETSIZE];
290 };
291
292 struct test_data_entry {
293 intptr_t count;
294 } __attribute__((aligned(128)));
295
296 struct spinlock_test_data {
297 struct percpu_lock lock;
298 struct test_data_entry c[CPU_SETSIZE];
299 };
300
301 struct spinlock_thread_test_data {
302 struct spinlock_test_data *data;
303 long long reps;
304 int reg;
305 };
306
307 struct inc_test_data {
308 struct test_data_entry c[CPU_SETSIZE];
309 };
310
311 struct inc_thread_test_data {
312 struct inc_test_data *data;
313 long long reps;
314 int reg;
315 };
316
317 struct percpu_list_node {
318 intptr_t data;
319 struct percpu_list_node *next;
320 };
321
322 struct percpu_list_entry {
323 struct percpu_list_node *head;
324 } __attribute__((aligned(128)));
325
326 struct percpu_list {
327 struct percpu_list_entry c[CPU_SETSIZE];
328 };
329
330 #define BUFFER_ITEM_PER_CPU 100
331
332 struct percpu_buffer_node {
333 intptr_t data;
334 };
335
336 struct percpu_buffer_entry {
337 intptr_t offset;
338 intptr_t buflen;
339 struct percpu_buffer_node **array;
340 } __attribute__((aligned(128)));
341
342 struct percpu_buffer {
343 struct percpu_buffer_entry c[CPU_SETSIZE];
344 };
345
346 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
347
348 struct percpu_memcpy_buffer_node {
349 intptr_t data1;
350 uint64_t data2;
351 };
352
353 struct percpu_memcpy_buffer_entry {
354 intptr_t offset;
355 intptr_t buflen;
356 struct percpu_memcpy_buffer_node *array;
357 } __attribute__((aligned(128)));
358
359 struct percpu_memcpy_buffer {
360 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
361 };
362
363 /* A simple percpu spinlock. Grabs lock on current cpu. */
364 static int rseq_this_cpu_lock(struct percpu_lock *lock)
365 {
366 int cpu;
367
368 for (;;) {
369 int ret;
370
371 cpu = rseq_cpu_start();
372 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
373 0, 1, cpu);
374 if (rseq_likely(!ret))
375 break;
376 /* Retry if comparison fails or rseq aborts. */
377 }
378 /*
379 * Acquire semantic when taking lock after control dependency.
380 * Matches rseq_smp_store_release().
381 */
382 rseq_smp_acquire__after_ctrl_dep();
383 return cpu;
384 }
385
386 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
387 {
388 assert(lock->c[cpu].v == 1);
389 /*
390 * Release lock, with release semantic. Matches
391 * rseq_smp_acquire__after_ctrl_dep().
392 */
393 rseq_smp_store_release(&lock->c[cpu].v, 0);
394 }
395
396 static void *test_percpu_spinlock_thread(void *arg)
397 {
398 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
399 struct spinlock_test_data *data = thread_data->data;
400 long long i, reps;
401
402 if (!opt_disable_rseq && thread_data->reg &&
403 rseq_register_current_thread())
404 abort();
405 reps = thread_data->reps;
406 for (i = 0; i < reps; i++) {
407 int cpu = rseq_this_cpu_lock(&data->lock);
408 data->c[cpu].count++;
409 rseq_percpu_unlock(&data->lock, cpu);
410 #ifndef BENCHMARK
411 if (i != 0 && !(i % (reps / 10)))
412 printf_verbose("tid %d: count %lld\n",
413 (int) rseq_gettid(), i);
414 #endif
415 }
416 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
417 (int) rseq_gettid(), nr_abort, signals_delivered);
418 if (!opt_disable_rseq && thread_data->reg &&
419 rseq_unregister_current_thread())
420 abort();
421 return NULL;
422 }
423
424 /*
425 * A simple test which implements a sharded counter using a per-cpu
426 * lock. Obviously real applications might prefer to simply use a
427 * per-cpu increment; however, this is reasonable for a test and the
428 * lock can be extended to synchronize more complicated operations.
429 */
430 static void test_percpu_spinlock(void)
431 {
432 const int num_threads = opt_threads;
433 int i, ret;
434 uint64_t sum;
435 pthread_t test_threads[num_threads];
436 struct spinlock_test_data data;
437 struct spinlock_thread_test_data thread_data[num_threads];
438
439 memset(&data, 0, sizeof(data));
440 for (i = 0; i < num_threads; i++) {
441 thread_data[i].reps = opt_reps;
442 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
443 thread_data[i].reg = 1;
444 else
445 thread_data[i].reg = 0;
446 thread_data[i].data = &data;
447 ret = pthread_create(&test_threads[i], NULL,
448 test_percpu_spinlock_thread,
449 &thread_data[i]);
450 if (ret) {
451 errno = ret;
452 perror("pthread_create");
453 abort();
454 }
455 }
456
457 for (i = 0; i < num_threads; i++) {
458 ret = pthread_join(test_threads[i], NULL);
459 if (ret) {
460 errno = ret;
461 perror("pthread_join");
462 abort();
463 }
464 }
465
466 sum = 0;
467 for (i = 0; i < CPU_SETSIZE; i++)
468 sum += data.c[i].count;
469
470 assert(sum == (uint64_t)opt_reps * num_threads);
471 }
472
473 static void *test_percpu_inc_thread(void *arg)
474 {
475 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
476 struct inc_test_data *data = thread_data->data;
477 long long i, reps;
478
479 if (!opt_disable_rseq && thread_data->reg &&
480 rseq_register_current_thread())
481 abort();
482 reps = thread_data->reps;
483 for (i = 0; i < reps; i++) {
484 int ret;
485
486 do {
487 int cpu;
488
489 cpu = rseq_cpu_start();
490 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
491 } while (rseq_unlikely(ret));
492 #ifndef BENCHMARK
493 if (i != 0 && !(i % (reps / 10)))
494 printf_verbose("tid %d: count %lld\n",
495 (int) rseq_gettid(), i);
496 #endif
497 }
498 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
499 (int) rseq_gettid(), nr_abort, signals_delivered);
500 if (!opt_disable_rseq && thread_data->reg &&
501 rseq_unregister_current_thread())
502 abort();
503 return NULL;
504 }
505
506 static void test_percpu_inc(void)
507 {
508 const int num_threads = opt_threads;
509 int i, ret;
510 uint64_t sum;
511 pthread_t test_threads[num_threads];
512 struct inc_test_data data;
513 struct inc_thread_test_data thread_data[num_threads];
514
515 memset(&data, 0, sizeof(data));
516 for (i = 0; i < num_threads; i++) {
517 thread_data[i].reps = opt_reps;
518 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
519 thread_data[i].reg = 1;
520 else
521 thread_data[i].reg = 0;
522 thread_data[i].data = &data;
523 ret = pthread_create(&test_threads[i], NULL,
524 test_percpu_inc_thread,
525 &thread_data[i]);
526 if (ret) {
527 errno = ret;
528 perror("pthread_create");
529 abort();
530 }
531 }
532
533 for (i = 0; i < num_threads; i++) {
534 ret = pthread_join(test_threads[i], NULL);
535 if (ret) {
536 errno = ret;
537 perror("pthread_join");
538 abort();
539 }
540 }
541
542 sum = 0;
543 for (i = 0; i < CPU_SETSIZE; i++)
544 sum += data.c[i].count;
545
546 assert(sum == (uint64_t)opt_reps * num_threads);
547 }
548
549 static void this_cpu_list_push(struct percpu_list *list,
550 struct percpu_list_node *node,
551 int *_cpu)
552 {
553 int cpu;
554
555 for (;;) {
556 intptr_t *targetptr, newval, expect;
557 int ret;
558
559 cpu = rseq_cpu_start();
560 /* Load list->c[cpu].head with single-copy atomicity. */
561 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
562 newval = (intptr_t)node;
563 targetptr = (intptr_t *)&list->c[cpu].head;
564 node->next = (struct percpu_list_node *)expect;
565 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
566 if (rseq_likely(!ret))
567 break;
568 /* Retry if comparison fails or rseq aborts. */
569 }
570 if (_cpu)
571 *_cpu = cpu;
572 }
573
574 /*
575 * Unlike a traditional lock-less linked list; the availability of a
576 * rseq primitive allows us to implement pop without concerns over
577 * ABA-type races.
578 */
579 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
580 int *_cpu)
581 {
582 struct percpu_list_node *node = NULL;
583 int cpu;
584
585 for (;;) {
586 struct percpu_list_node *head;
587 intptr_t *targetptr, expectnot, *load;
588 long offset;
589 int ret;
590
591 cpu = rseq_cpu_start();
592 targetptr = (intptr_t *)&list->c[cpu].head;
593 expectnot = (intptr_t)NULL;
594 offset = offsetof(struct percpu_list_node, next);
595 load = (intptr_t *)&head;
596 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
597 offset, load, cpu);
598 if (rseq_likely(!ret)) {
599 node = head;
600 break;
601 }
602 if (ret > 0)
603 break;
604 /* Retry if rseq aborts. */
605 }
606 if (_cpu)
607 *_cpu = cpu;
608 return node;
609 }
610
611 /*
612 * __percpu_list_pop is not safe against concurrent accesses. Should
613 * only be used on lists that are not concurrently modified.
614 */
615 static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
616 {
617 struct percpu_list_node *node;
618
619 node = list->c[cpu].head;
620 if (!node)
621 return NULL;
622 list->c[cpu].head = node->next;
623 return node;
624 }
625
626 static void *test_percpu_list_thread(void *arg)
627 {
628 long long i, reps;
629 struct percpu_list *list = (struct percpu_list *)arg;
630
631 if (!opt_disable_rseq && rseq_register_current_thread())
632 abort();
633
634 reps = opt_reps;
635 for (i = 0; i < reps; i++) {
636 struct percpu_list_node *node;
637
638 node = this_cpu_list_pop(list, NULL);
639 if (opt_yield)
640 sched_yield(); /* encourage shuffling */
641 if (node)
642 this_cpu_list_push(list, node, NULL);
643 }
644
645 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
646 (int) rseq_gettid(), nr_abort, signals_delivered);
647 if (!opt_disable_rseq && rseq_unregister_current_thread())
648 abort();
649
650 return NULL;
651 }
652
653 /* Simultaneous modification to a per-cpu linked list from many threads. */
654 static void test_percpu_list(void)
655 {
656 const int num_threads = opt_threads;
657 int i, j, ret;
658 uint64_t sum = 0, expected_sum = 0;
659 struct percpu_list list;
660 pthread_t test_threads[num_threads];
661 cpu_set_t allowed_cpus;
662
663 memset(&list, 0, sizeof(list));
664
665 /* Generate list entries for every usable cpu. */
666 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
667 for (i = 0; i < CPU_SETSIZE; i++) {
668 if (!CPU_ISSET(i, &allowed_cpus))
669 continue;
670 for (j = 1; j <= 100; j++) {
671 struct percpu_list_node *node;
672
673 expected_sum += j;
674
675 node = (struct percpu_list_node *) malloc(sizeof(*node));
676 assert(node);
677 node->data = j;
678 node->next = list.c[i].head;
679 list.c[i].head = node;
680 }
681 }
682
683 for (i = 0; i < num_threads; i++) {
684 ret = pthread_create(&test_threads[i], NULL,
685 test_percpu_list_thread, &list);
686 if (ret) {
687 errno = ret;
688 perror("pthread_create");
689 abort();
690 }
691 }
692
693 for (i = 0; i < num_threads; i++) {
694 ret = pthread_join(test_threads[i], NULL);
695 if (ret) {
696 errno = ret;
697 perror("pthread_join");
698 abort();
699 }
700 }
701
702 for (i = 0; i < CPU_SETSIZE; i++) {
703 struct percpu_list_node *node;
704
705 if (!CPU_ISSET(i, &allowed_cpus))
706 continue;
707
708 while ((node = __percpu_list_pop(&list, i))) {
709 sum += node->data;
710 free(node);
711 }
712 }
713
714 /*
715 * All entries should now be accounted for (unless some external
716 * actor is interfering with our allowed affinity while this
717 * test is running).
718 */
719 assert(sum == expected_sum);
720 }
721
722 static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
723 struct percpu_buffer_node *node,
724 int *_cpu)
725 {
726 bool result = false;
727 int cpu;
728
729 for (;;) {
730 intptr_t *targetptr_spec, newval_spec;
731 intptr_t *targetptr_final, newval_final;
732 intptr_t offset;
733 int ret;
734
735 cpu = rseq_cpu_start();
736 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
737 if (offset == buffer->c[cpu].buflen)
738 break;
739 newval_spec = (intptr_t)node;
740 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
741 newval_final = offset + 1;
742 targetptr_final = &buffer->c[cpu].offset;
743 if (opt_mb)
744 ret = rseq_cmpeqv_trystorev_storev_release(
745 targetptr_final, offset, targetptr_spec,
746 newval_spec, newval_final, cpu);
747 else
748 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
749 offset, targetptr_spec, newval_spec,
750 newval_final, cpu);
751 if (rseq_likely(!ret)) {
752 result = true;
753 break;
754 }
755 /* Retry if comparison fails or rseq aborts. */
756 }
757 if (_cpu)
758 *_cpu = cpu;
759 return result;
760 }
761
762 static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
763 int *_cpu)
764 {
765 struct percpu_buffer_node *head;
766 int cpu;
767
768 for (;;) {
769 intptr_t *targetptr, newval;
770 intptr_t offset;
771 int ret;
772
773 cpu = rseq_cpu_start();
774 /* Load offset with single-copy atomicity. */
775 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
776 if (offset == 0) {
777 head = NULL;
778 break;
779 }
780 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
781 newval = offset - 1;
782 targetptr = (intptr_t *)&buffer->c[cpu].offset;
783 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
784 (intptr_t *)&buffer->c[cpu].array[offset - 1],
785 (intptr_t)head, newval, cpu);
786 if (rseq_likely(!ret))
787 break;
788 /* Retry if comparison fails or rseq aborts. */
789 }
790 if (_cpu)
791 *_cpu = cpu;
792 return head;
793 }
794
795 /*
796 * __percpu_buffer_pop is not safe against concurrent accesses. Should
797 * only be used on buffers that are not concurrently modified.
798 */
799 static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
800 int cpu)
801 {
802 struct percpu_buffer_node *head;
803 intptr_t offset;
804
805 offset = buffer->c[cpu].offset;
806 if (offset == 0)
807 return NULL;
808 head = buffer->c[cpu].array[offset - 1];
809 buffer->c[cpu].offset = offset - 1;
810 return head;
811 }
812
813 static void *test_percpu_buffer_thread(void *arg)
814 {
815 long long i, reps;
816 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
817
818 if (!opt_disable_rseq && rseq_register_current_thread())
819 abort();
820
821 reps = opt_reps;
822 for (i = 0; i < reps; i++) {
823 struct percpu_buffer_node *node;
824
825 node = this_cpu_buffer_pop(buffer, NULL);
826 if (opt_yield)
827 sched_yield(); /* encourage shuffling */
828 if (node) {
829 if (!this_cpu_buffer_push(buffer, node, NULL)) {
830 /* Should increase buffer size. */
831 abort();
832 }
833 }
834 }
835
836 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
837 (int) rseq_gettid(), nr_abort, signals_delivered);
838 if (!opt_disable_rseq && rseq_unregister_current_thread())
839 abort();
840
841 return NULL;
842 }
843
844 /* Simultaneous modification to a per-cpu buffer from many threads. */
845 static void test_percpu_buffer(void)
846 {
847 const int num_threads = opt_threads;
848 int i, j, ret;
849 uint64_t sum = 0, expected_sum = 0;
850 struct percpu_buffer buffer;
851 pthread_t test_threads[num_threads];
852 cpu_set_t allowed_cpus;
853
854 memset(&buffer, 0, sizeof(buffer));
855
856 /* Generate list entries for every usable cpu. */
857 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
858 for (i = 0; i < CPU_SETSIZE; i++) {
859 if (!CPU_ISSET(i, &allowed_cpus))
860 continue;
861 /* Worse-case is every item in same CPU. */
862 buffer.c[i].array =
863 (struct percpu_buffer_node **)
864 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
865 BUFFER_ITEM_PER_CPU);
866 assert(buffer.c[i].array);
867 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
868 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
869 struct percpu_buffer_node *node;
870
871 expected_sum += j;
872
873 /*
874 * We could theoretically put the word-sized
875 * "data" directly in the buffer. However, we
876 * want to model objects that would not fit
877 * within a single word, so allocate an object
878 * for each node.
879 */
880 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
881 assert(node);
882 node->data = j;
883 buffer.c[i].array[j - 1] = node;
884 buffer.c[i].offset++;
885 }
886 }
887
888 for (i = 0; i < num_threads; i++) {
889 ret = pthread_create(&test_threads[i], NULL,
890 test_percpu_buffer_thread, &buffer);
891 if (ret) {
892 errno = ret;
893 perror("pthread_create");
894 abort();
895 }
896 }
897
898 for (i = 0; i < num_threads; i++) {
899 ret = pthread_join(test_threads[i], NULL);
900 if (ret) {
901 errno = ret;
902 perror("pthread_join");
903 abort();
904 }
905 }
906
907 for (i = 0; i < CPU_SETSIZE; i++) {
908 struct percpu_buffer_node *node;
909
910 if (!CPU_ISSET(i, &allowed_cpus))
911 continue;
912
913 while ((node = __percpu_buffer_pop(&buffer, i))) {
914 sum += node->data;
915 free(node);
916 }
917 free(buffer.c[i].array);
918 }
919
920 /*
921 * All entries should now be accounted for (unless some external
922 * actor is interfering with our allowed affinity while this
923 * test is running).
924 */
925 assert(sum == expected_sum);
926 }
927
928 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
929 struct percpu_memcpy_buffer_node item,
930 int *_cpu)
931 {
932 bool result = false;
933 int cpu;
934
935 for (;;) {
936 intptr_t *targetptr_final, newval_final, offset;
937 char *destptr, *srcptr;
938 size_t copylen;
939 int ret;
940
941 cpu = rseq_cpu_start();
942 /* Load offset with single-copy atomicity. */
943 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
944 if (offset == buffer->c[cpu].buflen)
945 break;
946 destptr = (char *)&buffer->c[cpu].array[offset];
947 srcptr = (char *)&item;
948 /* copylen must be <= 4kB. */
949 copylen = sizeof(item);
950 newval_final = offset + 1;
951 targetptr_final = &buffer->c[cpu].offset;
952 if (opt_mb)
953 ret = rseq_cmpeqv_trymemcpy_storev_release(
954 targetptr_final, offset,
955 destptr, srcptr, copylen,
956 newval_final, cpu);
957 else
958 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
959 offset, destptr, srcptr, copylen,
960 newval_final, cpu);
961 if (rseq_likely(!ret)) {
962 result = true;
963 break;
964 }
965 /* Retry if comparison fails or rseq aborts. */
966 }
967 if (_cpu)
968 *_cpu = cpu;
969 return result;
970 }
971
972 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
973 struct percpu_memcpy_buffer_node *item,
974 int *_cpu)
975 {
976 bool result = false;
977 int cpu;
978
979 for (;;) {
980 intptr_t *targetptr_final, newval_final, offset;
981 char *destptr, *srcptr;
982 size_t copylen;
983 int ret;
984
985 cpu = rseq_cpu_start();
986 /* Load offset with single-copy atomicity. */
987 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
988 if (offset == 0)
989 break;
990 destptr = (char *)item;
991 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
992 /* copylen must be <= 4kB. */
993 copylen = sizeof(*item);
994 newval_final = offset - 1;
995 targetptr_final = &buffer->c[cpu].offset;
996 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
997 offset, destptr, srcptr, copylen,
998 newval_final, cpu);
999 if (rseq_likely(!ret)) {
1000 result = true;
1001 break;
1002 }
1003 /* Retry if comparison fails or rseq aborts. */
1004 }
1005 if (_cpu)
1006 *_cpu = cpu;
1007 return result;
1008 }
1009
1010 /*
1011 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1012 * only be used on buffers that are not concurrently modified.
1013 */
1014 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1015 struct percpu_memcpy_buffer_node *item,
1016 int cpu)
1017 {
1018 intptr_t offset;
1019
1020 offset = buffer->c[cpu].offset;
1021 if (offset == 0)
1022 return false;
1023 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1024 buffer->c[cpu].offset = offset - 1;
1025 return true;
1026 }
1027
1028 static void *test_percpu_memcpy_buffer_thread(void *arg)
1029 {
1030 long long i, reps;
1031 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1032
1033 if (!opt_disable_rseq && rseq_register_current_thread())
1034 abort();
1035
1036 reps = opt_reps;
1037 for (i = 0; i < reps; i++) {
1038 struct percpu_memcpy_buffer_node item;
1039 bool result;
1040
1041 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1042 if (opt_yield)
1043 sched_yield(); /* encourage shuffling */
1044 if (result) {
1045 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1046 /* Should increase buffer size. */
1047 abort();
1048 }
1049 }
1050 }
1051
1052 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1053 (int) rseq_gettid(), nr_abort, signals_delivered);
1054 if (!opt_disable_rseq && rseq_unregister_current_thread())
1055 abort();
1056
1057 return NULL;
1058 }
1059
1060 /* Simultaneous modification to a per-cpu buffer from many threads. */
1061 static void test_percpu_memcpy_buffer(void)
1062 {
1063 const int num_threads = opt_threads;
1064 int i, j, ret;
1065 uint64_t sum = 0, expected_sum = 0;
1066 struct percpu_memcpy_buffer buffer;
1067 pthread_t test_threads[num_threads];
1068 cpu_set_t allowed_cpus;
1069
1070 memset(&buffer, 0, sizeof(buffer));
1071
1072 /* Generate list entries for every usable cpu. */
1073 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1074 for (i = 0; i < CPU_SETSIZE; i++) {
1075 if (!CPU_ISSET(i, &allowed_cpus))
1076 continue;
1077 /* Worse-case is every item in same CPU. */
1078 buffer.c[i].array =
1079 (struct percpu_memcpy_buffer_node *)
1080 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1081 MEMCPY_BUFFER_ITEM_PER_CPU);
1082 assert(buffer.c[i].array);
1083 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1084 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1085 expected_sum += 2 * j + 1;
1086
1087 /*
1088 * We could theoretically put the word-sized
1089 * "data" directly in the buffer. However, we
1090 * want to model objects that would not fit
1091 * within a single word, so allocate an object
1092 * for each node.
1093 */
1094 buffer.c[i].array[j - 1].data1 = j;
1095 buffer.c[i].array[j - 1].data2 = j + 1;
1096 buffer.c[i].offset++;
1097 }
1098 }
1099
1100 for (i = 0; i < num_threads; i++) {
1101 ret = pthread_create(&test_threads[i], NULL,
1102 test_percpu_memcpy_buffer_thread,
1103 &buffer);
1104 if (ret) {
1105 errno = ret;
1106 perror("pthread_create");
1107 abort();
1108 }
1109 }
1110
1111 for (i = 0; i < num_threads; i++) {
1112 ret = pthread_join(test_threads[i], NULL);
1113 if (ret) {
1114 errno = ret;
1115 perror("pthread_join");
1116 abort();
1117 }
1118 }
1119
1120 for (i = 0; i < CPU_SETSIZE; i++) {
1121 struct percpu_memcpy_buffer_node item;
1122
1123 if (!CPU_ISSET(i, &allowed_cpus))
1124 continue;
1125
1126 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1127 sum += item.data1;
1128 sum += item.data2;
1129 }
1130 free(buffer.c[i].array);
1131 }
1132
1133 /*
1134 * All entries should now be accounted for (unless some external
1135 * actor is interfering with our allowed affinity while this
1136 * test is running).
1137 */
1138 assert(sum == expected_sum);
1139 }
1140
1141
1142 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
1143 {
1144 signals_delivered++;
1145 }
1146
1147 static int set_signal_handler(void)
1148 {
1149 int ret = 0;
1150 struct sigaction sa;
1151 sigset_t sigset;
1152
1153 ret = sigemptyset(&sigset);
1154 if (ret < 0) {
1155 perror("sigemptyset");
1156 return ret;
1157 }
1158
1159 sa.sa_handler = test_signal_interrupt_handler;
1160 sa.sa_mask = sigset;
1161 sa.sa_flags = 0;
1162 ret = sigaction(SIGUSR1, &sa, NULL);
1163 if (ret < 0) {
1164 perror("sigaction");
1165 return ret;
1166 }
1167
1168 printf_verbose("Signal handler set for SIGUSR1\n");
1169
1170 return ret;
1171 }
1172
1173 static
1174 int sys_membarrier(int cmd, int flags, int cpu_id)
1175 {
1176 return syscall(__NR_membarrier, cmd, flags, cpu_id);
1177 }
1178
1179 static
1180 bool membarrier_private_expedited_rseq_available(void)
1181 {
1182 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1183
1184 if (status < 0) {
1185 perror("membarrier");
1186 return false;
1187 }
1188 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1189 return false;
1190 return true;
1191 }
1192
1193 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1194 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
1195 struct test_membarrier_thread_args {
1196 int stop;
1197 intptr_t percpu_list_ptr;
1198 };
1199
1200 /* Worker threads modify data in their "active" percpu lists. */
1201 static
1202 void *test_membarrier_worker_thread(void *arg)
1203 {
1204 struct test_membarrier_thread_args *args =
1205 (struct test_membarrier_thread_args *)arg;
1206 const int iters = opt_reps;
1207 int i;
1208
1209 if (rseq_register_current_thread()) {
1210 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1211 errno, strerror(errno));
1212 abort();
1213 }
1214
1215 /* Wait for initialization. */
1216 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1217
1218 for (i = 0; i < iters; ++i) {
1219 int ret;
1220
1221 do {
1222 int cpu = rseq_cpu_start();
1223
1224 ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1225 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1226 } while (rseq_unlikely(ret));
1227 }
1228
1229 if (rseq_unregister_current_thread()) {
1230 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1231 errno, strerror(errno));
1232 abort();
1233 }
1234 return NULL;
1235 }
1236
1237 static
1238 void test_membarrier_init_percpu_list(struct percpu_list *list)
1239 {
1240 int i;
1241
1242 memset(list, 0, sizeof(*list));
1243 for (i = 0; i < CPU_SETSIZE; i++) {
1244 struct percpu_list_node *node;
1245
1246 node = (struct percpu_list_node *) malloc(sizeof(*node));
1247 assert(node);
1248 node->data = 0;
1249 node->next = NULL;
1250 list->c[i].head = node;
1251 }
1252 }
1253
1254 static
1255 void test_membarrier_free_percpu_list(struct percpu_list *list)
1256 {
1257 int i;
1258
1259 for (i = 0; i < CPU_SETSIZE; i++)
1260 free(list->c[i].head);
1261 }
1262
1263 /*
1264 * The manager thread swaps per-cpu lists that worker threads see,
1265 * and validates that there are no unexpected modifications.
1266 */
1267 static
1268 void *test_membarrier_manager_thread(void *arg)
1269 {
1270 struct test_membarrier_thread_args *args =
1271 (struct test_membarrier_thread_args *)arg;
1272 struct percpu_list list_a, list_b;
1273 intptr_t expect_a = 0, expect_b = 0;
1274 int cpu_a = 0, cpu_b = 0;
1275
1276 if (rseq_register_current_thread()) {
1277 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1278 errno, strerror(errno));
1279 abort();
1280 }
1281
1282 /* Init lists. */
1283 test_membarrier_init_percpu_list(&list_a);
1284 test_membarrier_init_percpu_list(&list_b);
1285
1286 /* Initialize lists before publishing them. */
1287 rseq_smp_wmb();
1288
1289 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1290
1291 while (!RSEQ_READ_ONCE(args->stop)) {
1292 /* list_a is "active". */
1293 cpu_a = rand() % CPU_SETSIZE;
1294 /*
1295 * As list_b is "inactive", we should never see changes
1296 * to list_b.
1297 */
1298 if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) {
1299 fprintf(stderr, "Membarrier test failed\n");
1300 abort();
1301 }
1302
1303 /* Make list_b "active". */
1304 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b);
1305 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1306 MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1307 errno != ENXIO /* missing CPU */) {
1308 perror("sys_membarrier");
1309 abort();
1310 }
1311 /*
1312 * Cpu A should now only modify list_b, so the values
1313 * in list_a should be stable.
1314 */
1315 expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data);
1316
1317 cpu_b = rand() % CPU_SETSIZE;
1318 /*
1319 * As list_a is "inactive", we should never see changes
1320 * to list_a.
1321 */
1322 if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) {
1323 fprintf(stderr, "Membarrier test failed\n");
1324 abort();
1325 }
1326
1327 /* Make list_a "active". */
1328 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1329 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1330 MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1331 errno != ENXIO /* missing CPU */) {
1332 perror("sys_membarrier");
1333 abort();
1334 }
1335 /* Remember a value from list_b. */
1336 expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data);
1337 }
1338
1339 test_membarrier_free_percpu_list(&list_a);
1340 test_membarrier_free_percpu_list(&list_b);
1341
1342 if (rseq_unregister_current_thread()) {
1343 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1344 errno, strerror(errno));
1345 abort();
1346 }
1347 return NULL;
1348 }
1349
1350 static
1351 void test_membarrier(void)
1352 {
1353 const int num_threads = opt_threads;
1354 struct test_membarrier_thread_args thread_args;
1355 pthread_t worker_threads[num_threads];
1356 pthread_t manager_thread;
1357 int i, ret;
1358
1359 if (!membarrier_private_expedited_rseq_available()) {
1360 fprintf(stderr, "Membarrier private expedited rseq not available. "
1361 "Skipping membarrier test.\n");
1362 return;
1363 }
1364 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1365 perror("sys_membarrier");
1366 abort();
1367 }
1368
1369 thread_args.stop = 0;
1370 thread_args.percpu_list_ptr = 0;
1371 ret = pthread_create(&manager_thread, NULL,
1372 test_membarrier_manager_thread, &thread_args);
1373 if (ret) {
1374 errno = ret;
1375 perror("pthread_create");
1376 abort();
1377 }
1378
1379 for (i = 0; i < num_threads; i++) {
1380 ret = pthread_create(&worker_threads[i], NULL,
1381 test_membarrier_worker_thread, &thread_args);
1382 if (ret) {
1383 errno = ret;
1384 perror("pthread_create");
1385 abort();
1386 }
1387 }
1388
1389
1390 for (i = 0; i < num_threads; i++) {
1391 ret = pthread_join(worker_threads[i], NULL);
1392 if (ret) {
1393 errno = ret;
1394 perror("pthread_join");
1395 abort();
1396 }
1397 }
1398
1399 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1400 ret = pthread_join(manager_thread, NULL);
1401 if (ret) {
1402 errno = ret;
1403 perror("pthread_join");
1404 abort();
1405 }
1406 }
1407 #else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
1408 static
1409 void test_membarrier(void)
1410 {
1411 if (!membarrier_private_expedited_rseq_available()) {
1412 fprintf(stderr, "Membarrier private expedited rseq not available. "
1413 "Skipping membarrier test.\n");
1414 return;
1415 }
1416 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1417 "Skipping membarrier test.\n");
1418 }
1419 #endif
1420
1421 static void show_usage(char **argv)
1422 {
1423 printf("Usage : %s <OPTIONS>\n",
1424 argv[0]);
1425 printf("OPTIONS:\n");
1426 printf(" [-1 loops] Number of loops for delay injection 1\n");
1427 printf(" [-2 loops] Number of loops for delay injection 2\n");
1428 printf(" [-3 loops] Number of loops for delay injection 3\n");
1429 printf(" [-4 loops] Number of loops for delay injection 4\n");
1430 printf(" [-5 loops] Number of loops for delay injection 5\n");
1431 printf(" [-6 loops] Number of loops for delay injection 6\n");
1432 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1433 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1434 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1435 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1436 printf(" [-y] Yield\n");
1437 printf(" [-k] Kill thread with signal\n");
1438 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1439 printf(" [-t N] Number of threads (default 200)\n");
1440 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1441 printf(" [-d] Disable rseq system call (no initialization)\n");
1442 printf(" [-D M] Disable rseq for each M threads\n");
1443 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1444 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1445 printf(" [-c] Check if the rseq syscall is available.\n");
1446 printf(" [-v] Verbose output.\n");
1447 printf(" [-h] Show this help.\n");
1448 printf("\n");
1449 }
1450
1451 int main(int argc, char **argv)
1452 {
1453 int i;
1454
1455 for (i = 1; i < argc; i++) {
1456 if (argv[i][0] != '-')
1457 continue;
1458 switch (argv[i][1]) {
1459 case '1':
1460 case '2':
1461 case '3':
1462 case '4':
1463 case '5':
1464 case '6':
1465 case '7':
1466 case '8':
1467 case '9':
1468 if (argc < i + 2) {
1469 show_usage(argv);
1470 goto error;
1471 }
1472 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1473 i++;
1474 break;
1475 case 'm':
1476 if (argc < i + 2) {
1477 show_usage(argv);
1478 goto error;
1479 }
1480 opt_modulo = atol(argv[i + 1]);
1481 if (opt_modulo < 0) {
1482 show_usage(argv);
1483 goto error;
1484 }
1485 i++;
1486 break;
1487 case 's':
1488 if (argc < i + 2) {
1489 show_usage(argv);
1490 goto error;
1491 }
1492 opt_sleep = atol(argv[i + 1]);
1493 if (opt_sleep < 0) {
1494 show_usage(argv);
1495 goto error;
1496 }
1497 i++;
1498 break;
1499 case 'y':
1500 opt_yield = 1;
1501 break;
1502 case 'k':
1503 opt_signal = 1;
1504 break;
1505 case 'd':
1506 opt_disable_rseq = 1;
1507 break;
1508 case 'D':
1509 if (argc < i + 2) {
1510 show_usage(argv);
1511 goto error;
1512 }
1513 opt_disable_mod = atol(argv[i + 1]);
1514 if (opt_disable_mod < 0) {
1515 show_usage(argv);
1516 goto error;
1517 }
1518 i++;
1519 break;
1520 case 't':
1521 if (argc < i + 2) {
1522 show_usage(argv);
1523 goto error;
1524 }
1525 opt_threads = atol(argv[i + 1]);
1526 if (opt_threads < 0) {
1527 show_usage(argv);
1528 goto error;
1529 }
1530 i++;
1531 break;
1532 case 'r':
1533 if (argc < i + 2) {
1534 show_usage(argv);
1535 goto error;
1536 }
1537 opt_reps = atoll(argv[i + 1]);
1538 if (opt_reps < 0) {
1539 show_usage(argv);
1540 goto error;
1541 }
1542 i++;
1543 break;
1544 case 'h':
1545 show_usage(argv);
1546 goto end;
1547 case 'T':
1548 if (argc < i + 2) {
1549 show_usage(argv);
1550 goto error;
1551 }
1552 opt_test = *argv[i + 1];
1553 switch (opt_test) {
1554 case 's':
1555 case 'l':
1556 case 'i':
1557 case 'b':
1558 case 'm':
1559 case 'r':
1560 break;
1561 default:
1562 show_usage(argv);
1563 goto error;
1564 }
1565 i++;
1566 break;
1567 case 'v':
1568 verbose = 1;
1569 break;
1570 case 'M':
1571 opt_mb = 1;
1572 break;
1573 case 'c':
1574 if (rseq_available()) {
1575 printf_verbose("The rseq syscall is available.\n");
1576 goto end;
1577 } else {
1578 printf_verbose("The rseq syscall is unavailable.\n");
1579 goto no_rseq;
1580 }
1581 default:
1582 show_usage(argv);
1583 goto error;
1584 }
1585 }
1586
1587 loop_cnt_1 = loop_cnt[1];
1588 loop_cnt_2 = loop_cnt[2];
1589 loop_cnt_3 = loop_cnt[3];
1590 loop_cnt_4 = loop_cnt[4];
1591 loop_cnt_5 = loop_cnt[5];
1592 loop_cnt_6 = loop_cnt[6];
1593
1594 if (set_signal_handler())
1595 goto error;
1596
1597 if (!opt_disable_rseq && rseq_register_current_thread())
1598 goto error;
1599 switch (opt_test) {
1600 case 's':
1601 printf_verbose("spinlock\n");
1602 test_percpu_spinlock();
1603 break;
1604 case 'l':
1605 printf_verbose("linked list\n");
1606 test_percpu_list();
1607 break;
1608 case 'b':
1609 printf_verbose("buffer\n");
1610 test_percpu_buffer();
1611 break;
1612 case 'm':
1613 printf_verbose("memcpy buffer\n");
1614 test_percpu_memcpy_buffer();
1615 break;
1616 case 'i':
1617 printf_verbose("counter increment\n");
1618 test_percpu_inc();
1619 break;
1620 case 'r':
1621 printf_verbose("membarrier\n");
1622 test_membarrier();
1623 break;
1624 }
1625 if (!opt_disable_rseq && rseq_unregister_current_thread())
1626 abort();
1627 end:
1628 return 0;
1629
1630 error:
1631 return -1;
1632
1633 no_rseq:
1634 return 2;
1635 }
This page took 0.065294 seconds and 5 git commands to generate.