Fix: work-around asm goto compiler bugs
[librseq.git] / tests / param_test.c
CommitLineData
544cdc88 1// SPDX-License-Identifier: LGPL-2.1-only
31b44ba2
MD
2#ifndef _GNU_SOURCE
3#define _GNU_SOURCE
4#endif
5#include <assert.h>
6#include <pthread.h>
7#include <sched.h>
8#include <stdint.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <syscall.h>
13#include <unistd.h>
14#include <poll.h>
15#include <sys/types.h>
16#include <signal.h>
17#include <errno.h>
18#include <stddef.h>
19
31b44ba2
MD
20#define NR_INJECT 9
21static int loop_cnt[NR_INJECT + 1];
22
23static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
24static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
25static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
26static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
27static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
28static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
29
30static int opt_modulo, verbose;
31
32static int opt_yield, opt_signal, opt_sleep,
33 opt_disable_rseq, opt_threads = 200,
34 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
35
36#ifndef RSEQ_SKIP_FASTPATH
37static long long opt_reps = 5000;
38#else
39static long long opt_reps = 100;
40#endif
41
42static __thread __attribute__((tls_model("initial-exec")))
43unsigned int signals_delivered;
44
45#ifndef BENCHMARK
46
c6e1dc81
MD
47static inline pid_t rseq_gettid(void)
48{
49 return syscall(__NR_gettid);
50}
51
31b44ba2
MD
52static __thread __attribute__((tls_model("initial-exec"), unused))
53int yield_mod_cnt, nr_abort;
54
55#define printf_verbose(fmt, ...) \
56 do { \
57 if (verbose) \
58 printf(fmt, ## __VA_ARGS__); \
59 } while (0)
60
61#ifdef __i386__
62
63#define INJECT_ASM_REG "eax"
64
65#define RSEQ_INJECT_CLOBBER \
66 , INJECT_ASM_REG
67
68#define RSEQ_INJECT_ASM(n) \
69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71 "jz 333f\n\t" \
72 "222:\n\t" \
73 "dec %%" INJECT_ASM_REG "\n\t" \
74 "jnz 222b\n\t" \
75 "333:\n\t"
76
77#elif defined(__x86_64__)
78
79#define INJECT_ASM_REG_P "rax"
80#define INJECT_ASM_REG "eax"
81
82#define RSEQ_INJECT_CLOBBER \
83 , INJECT_ASM_REG_P \
84 , INJECT_ASM_REG
85
86#define RSEQ_INJECT_ASM(n) \
87 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
88 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "jz 333f\n\t" \
91 "222:\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
93 "jnz 222b\n\t" \
94 "333:\n\t"
95
96#elif defined(__s390__)
97
98#define RSEQ_INJECT_INPUT \
99 , [loop_cnt_1]"m"(loop_cnt[1]) \
100 , [loop_cnt_2]"m"(loop_cnt[2]) \
101 , [loop_cnt_3]"m"(loop_cnt[3]) \
102 , [loop_cnt_4]"m"(loop_cnt[4]) \
103 , [loop_cnt_5]"m"(loop_cnt[5]) \
104 , [loop_cnt_6]"m"(loop_cnt[6])
105
106#define INJECT_ASM_REG "r12"
107
108#define RSEQ_INJECT_CLOBBER \
109 , INJECT_ASM_REG
110
111#define RSEQ_INJECT_ASM(n) \
112 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
113 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
114 "je 333f\n\t" \
115 "222:\n\t" \
116 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
117 "jnz 222b\n\t" \
118 "333:\n\t"
119
120#elif defined(__ARMEL__)
121
122#define RSEQ_INJECT_INPUT \
123 , [loop_cnt_1]"m"(loop_cnt[1]) \
124 , [loop_cnt_2]"m"(loop_cnt[2]) \
125 , [loop_cnt_3]"m"(loop_cnt[3]) \
126 , [loop_cnt_4]"m"(loop_cnt[4]) \
127 , [loop_cnt_5]"m"(loop_cnt[5]) \
128 , [loop_cnt_6]"m"(loop_cnt[6])
129
130#define INJECT_ASM_REG "r4"
131
132#define RSEQ_INJECT_CLOBBER \
133 , INJECT_ASM_REG
134
135#define RSEQ_INJECT_ASM(n) \
136 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
137 "cmp " INJECT_ASM_REG ", #0\n\t" \
138 "beq 333f\n\t" \
139 "222:\n\t" \
140 "subs " INJECT_ASM_REG ", #1\n\t" \
141 "bne 222b\n\t" \
142 "333:\n\t"
143
144#elif defined(__AARCH64EL__)
145
146#define RSEQ_INJECT_INPUT \
147 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
148 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
149 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
150 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
151 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
152 , [loop_cnt_6] "Qo" (loop_cnt[6])
153
154#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
155
156#define RSEQ_INJECT_ASM(n) \
157 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
158 " cbz " INJECT_ASM_REG ", 333f\n" \
159 "222:\n" \
160 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
161 " cbnz " INJECT_ASM_REG ", 222b\n" \
162 "333:\n"
163
164#elif __PPC__
165
166#define RSEQ_INJECT_INPUT \
167 , [loop_cnt_1]"m"(loop_cnt[1]) \
168 , [loop_cnt_2]"m"(loop_cnt[2]) \
169 , [loop_cnt_3]"m"(loop_cnt[3]) \
170 , [loop_cnt_4]"m"(loop_cnt[4]) \
171 , [loop_cnt_5]"m"(loop_cnt[5]) \
172 , [loop_cnt_6]"m"(loop_cnt[6])
173
174#define INJECT_ASM_REG "r18"
175
176#define RSEQ_INJECT_CLOBBER \
177 , INJECT_ASM_REG
178
179#define RSEQ_INJECT_ASM(n) \
180 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
181 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
182 "beq 333f\n\t" \
183 "222:\n\t" \
184 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
185 "bne 222b\n\t" \
186 "333:\n\t"
187
188#elif defined(__mips__)
189
190#define RSEQ_INJECT_INPUT \
191 , [loop_cnt_1]"m"(loop_cnt[1]) \
192 , [loop_cnt_2]"m"(loop_cnt[2]) \
193 , [loop_cnt_3]"m"(loop_cnt[3]) \
194 , [loop_cnt_4]"m"(loop_cnt[4]) \
195 , [loop_cnt_5]"m"(loop_cnt[5]) \
196 , [loop_cnt_6]"m"(loop_cnt[6])
197
198#define INJECT_ASM_REG "$5"
199
200#define RSEQ_INJECT_CLOBBER \
201 , INJECT_ASM_REG
202
203#define RSEQ_INJECT_ASM(n) \
204 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
205 "beqz " INJECT_ASM_REG ", 333f\n\t" \
206 "222:\n\t" \
207 "addiu " INJECT_ASM_REG ", -1\n\t" \
208 "bnez " INJECT_ASM_REG ", 222b\n\t" \
209 "333:\n\t"
210
211#else
212#error unsupported target
213#endif
214
215#define RSEQ_INJECT_FAILED \
216 nr_abort++;
217
218#define RSEQ_INJECT_C(n) \
219{ \
220 int loc_i, loc_nr_loops = loop_cnt[n]; \
221 \
222 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
223 rseq_barrier(); \
224 } \
225 if (loc_nr_loops == -1 && opt_modulo) { \
226 if (yield_mod_cnt == opt_modulo - 1) { \
227 if (opt_sleep > 0) \
228 poll(NULL, 0, opt_sleep); \
229 if (opt_yield) \
230 sched_yield(); \
231 if (opt_signal) \
232 raise(SIGUSR1); \
233 yield_mod_cnt = 0; \
234 } else { \
235 yield_mod_cnt++; \
236 } \
237 } \
238}
239
240#else
241
242#define printf_verbose(fmt, ...)
243
244#endif /* BENCHMARK */
245
246#include <rseq/rseq.h>
247
248struct percpu_lock_entry {
249 intptr_t v;
250} __attribute__((aligned(128)));
251
252struct percpu_lock {
253 struct percpu_lock_entry c[CPU_SETSIZE];
254};
255
256struct test_data_entry {
257 intptr_t count;
258} __attribute__((aligned(128)));
259
260struct spinlock_test_data {
261 struct percpu_lock lock;
262 struct test_data_entry c[CPU_SETSIZE];
263};
264
265struct spinlock_thread_test_data {
266 struct spinlock_test_data *data;
267 long long reps;
268 int reg;
269};
270
271struct inc_test_data {
272 struct test_data_entry c[CPU_SETSIZE];
273};
274
275struct inc_thread_test_data {
276 struct inc_test_data *data;
277 long long reps;
278 int reg;
279};
280
281struct percpu_list_node {
282 intptr_t data;
283 struct percpu_list_node *next;
284};
285
286struct percpu_list_entry {
287 struct percpu_list_node *head;
288} __attribute__((aligned(128)));
289
290struct percpu_list {
291 struct percpu_list_entry c[CPU_SETSIZE];
292};
293
294#define BUFFER_ITEM_PER_CPU 100
295
296struct percpu_buffer_node {
297 intptr_t data;
298};
299
300struct percpu_buffer_entry {
301 intptr_t offset;
302 intptr_t buflen;
303 struct percpu_buffer_node **array;
304} __attribute__((aligned(128)));
305
306struct percpu_buffer {
307 struct percpu_buffer_entry c[CPU_SETSIZE];
308};
309
310#define MEMCPY_BUFFER_ITEM_PER_CPU 100
311
312struct percpu_memcpy_buffer_node {
313 intptr_t data1;
314 uint64_t data2;
315};
316
317struct percpu_memcpy_buffer_entry {
318 intptr_t offset;
319 intptr_t buflen;
320 struct percpu_memcpy_buffer_node *array;
321} __attribute__((aligned(128)));
322
323struct percpu_memcpy_buffer {
324 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
325};
326
327/* A simple percpu spinlock. Grabs lock on current cpu. */
328static int rseq_this_cpu_lock(struct percpu_lock *lock)
329{
330 int cpu;
331
332 for (;;) {
333 int ret;
334
335 cpu = rseq_cpu_start();
336 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
337 0, 1, cpu);
338 if (rseq_likely(!ret))
339 break;
340 /* Retry if comparison fails or rseq aborts. */
341 }
342 /*
343 * Acquire semantic when taking lock after control dependency.
344 * Matches rseq_smp_store_release().
345 */
346 rseq_smp_acquire__after_ctrl_dep();
347 return cpu;
348}
349
350static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
351{
352 assert(lock->c[cpu].v == 1);
353 /*
354 * Release lock, with release semantic. Matches
355 * rseq_smp_acquire__after_ctrl_dep().
356 */
357 rseq_smp_store_release(&lock->c[cpu].v, 0);
358}
359
6e284b80 360static void *test_percpu_spinlock_thread(void *arg)
31b44ba2 361{
d268885a 362 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
31b44ba2
MD
363 struct spinlock_test_data *data = thread_data->data;
364 long long i, reps;
365
366 if (!opt_disable_rseq && thread_data->reg &&
367 rseq_register_current_thread())
368 abort();
369 reps = thread_data->reps;
370 for (i = 0; i < reps; i++) {
af895f04 371 int cpu = rseq_this_cpu_lock(&data->lock);
31b44ba2
MD
372 data->c[cpu].count++;
373 rseq_percpu_unlock(&data->lock, cpu);
374#ifndef BENCHMARK
375 if (i != 0 && !(i % (reps / 10)))
376 printf_verbose("tid %d: count %lld\n",
377 (int) rseq_gettid(), i);
378#endif
379 }
380 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
381 (int) rseq_gettid(), nr_abort, signals_delivered);
382 if (!opt_disable_rseq && thread_data->reg &&
383 rseq_unregister_current_thread())
384 abort();
385 return NULL;
386}
387
388/*
389 * A simple test which implements a sharded counter using a per-cpu
390 * lock. Obviously real applications might prefer to simply use a
391 * per-cpu increment; however, this is reasonable for a test and the
392 * lock can be extended to synchronize more complicated operations.
393 */
6e284b80 394static void test_percpu_spinlock(void)
31b44ba2
MD
395{
396 const int num_threads = opt_threads;
397 int i, ret;
398 uint64_t sum;
399 pthread_t test_threads[num_threads];
400 struct spinlock_test_data data;
401 struct spinlock_thread_test_data thread_data[num_threads];
402
403 memset(&data, 0, sizeof(data));
404 for (i = 0; i < num_threads; i++) {
405 thread_data[i].reps = opt_reps;
406 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
407 thread_data[i].reg = 1;
408 else
409 thread_data[i].reg = 0;
410 thread_data[i].data = &data;
411 ret = pthread_create(&test_threads[i], NULL,
412 test_percpu_spinlock_thread,
413 &thread_data[i]);
414 if (ret) {
415 errno = ret;
416 perror("pthread_create");
417 abort();
418 }
419 }
420
421 for (i = 0; i < num_threads; i++) {
422 ret = pthread_join(test_threads[i], NULL);
423 if (ret) {
424 errno = ret;
425 perror("pthread_join");
426 abort();
427 }
428 }
429
430 sum = 0;
431 for (i = 0; i < CPU_SETSIZE; i++)
432 sum += data.c[i].count;
433
434 assert(sum == (uint64_t)opt_reps * num_threads);
435}
436
6e284b80 437static void *test_percpu_inc_thread(void *arg)
31b44ba2 438{
d268885a 439 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
31b44ba2
MD
440 struct inc_test_data *data = thread_data->data;
441 long long i, reps;
442
443 if (!opt_disable_rseq && thread_data->reg &&
444 rseq_register_current_thread())
445 abort();
446 reps = thread_data->reps;
447 for (i = 0; i < reps; i++) {
448 int ret;
449
450 do {
451 int cpu;
452
453 cpu = rseq_cpu_start();
454 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
455 } while (rseq_unlikely(ret));
456#ifndef BENCHMARK
457 if (i != 0 && !(i % (reps / 10)))
458 printf_verbose("tid %d: count %lld\n",
459 (int) rseq_gettid(), i);
460#endif
461 }
462 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
463 (int) rseq_gettid(), nr_abort, signals_delivered);
464 if (!opt_disable_rseq && thread_data->reg &&
465 rseq_unregister_current_thread())
466 abort();
467 return NULL;
468}
469
6e284b80 470static void test_percpu_inc(void)
31b44ba2
MD
471{
472 const int num_threads = opt_threads;
473 int i, ret;
474 uint64_t sum;
475 pthread_t test_threads[num_threads];
476 struct inc_test_data data;
477 struct inc_thread_test_data thread_data[num_threads];
478
479 memset(&data, 0, sizeof(data));
480 for (i = 0; i < num_threads; i++) {
481 thread_data[i].reps = opt_reps;
482 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
483 thread_data[i].reg = 1;
484 else
485 thread_data[i].reg = 0;
486 thread_data[i].data = &data;
487 ret = pthread_create(&test_threads[i], NULL,
488 test_percpu_inc_thread,
489 &thread_data[i]);
490 if (ret) {
491 errno = ret;
492 perror("pthread_create");
493 abort();
494 }
495 }
496
497 for (i = 0; i < num_threads; i++) {
498 ret = pthread_join(test_threads[i], NULL);
499 if (ret) {
500 errno = ret;
501 perror("pthread_join");
502 abort();
503 }
504 }
505
506 sum = 0;
507 for (i = 0; i < CPU_SETSIZE; i++)
508 sum += data.c[i].count;
509
510 assert(sum == (uint64_t)opt_reps * num_threads);
511}
512
6e284b80 513static void this_cpu_list_push(struct percpu_list *list,
31b44ba2
MD
514 struct percpu_list_node *node,
515 int *_cpu)
516{
517 int cpu;
518
519 for (;;) {
520 intptr_t *targetptr, newval, expect;
521 int ret;
522
523 cpu = rseq_cpu_start();
524 /* Load list->c[cpu].head with single-copy atomicity. */
525 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
526 newval = (intptr_t)node;
527 targetptr = (intptr_t *)&list->c[cpu].head;
528 node->next = (struct percpu_list_node *)expect;
529 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
530 if (rseq_likely(!ret))
531 break;
532 /* Retry if comparison fails or rseq aborts. */
533 }
534 if (_cpu)
535 *_cpu = cpu;
536}
537
538/*
539 * Unlike a traditional lock-less linked list; the availability of a
540 * rseq primitive allows us to implement pop without concerns over
541 * ABA-type races.
542 */
6e284b80 543static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
31b44ba2
MD
544 int *_cpu)
545{
546 struct percpu_list_node *node = NULL;
547 int cpu;
548
549 for (;;) {
550 struct percpu_list_node *head;
551 intptr_t *targetptr, expectnot, *load;
552 off_t offset;
553 int ret;
554
555 cpu = rseq_cpu_start();
556 targetptr = (intptr_t *)&list->c[cpu].head;
557 expectnot = (intptr_t)NULL;
558 offset = offsetof(struct percpu_list_node, next);
559 load = (intptr_t *)&head;
560 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
561 offset, load, cpu);
562 if (rseq_likely(!ret)) {
563 node = head;
564 break;
565 }
566 if (ret > 0)
567 break;
568 /* Retry if rseq aborts. */
569 }
570 if (_cpu)
571 *_cpu = cpu;
572 return node;
573}
574
575/*
576 * __percpu_list_pop is not safe against concurrent accesses. Should
577 * only be used on lists that are not concurrently modified.
578 */
6e284b80 579static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
31b44ba2
MD
580{
581 struct percpu_list_node *node;
582
583 node = list->c[cpu].head;
584 if (!node)
585 return NULL;
586 list->c[cpu].head = node->next;
587 return node;
588}
589
6e284b80 590static void *test_percpu_list_thread(void *arg)
31b44ba2
MD
591{
592 long long i, reps;
593 struct percpu_list *list = (struct percpu_list *)arg;
594
595 if (!opt_disable_rseq && rseq_register_current_thread())
596 abort();
597
598 reps = opt_reps;
599 for (i = 0; i < reps; i++) {
600 struct percpu_list_node *node;
601
602 node = this_cpu_list_pop(list, NULL);
603 if (opt_yield)
604 sched_yield(); /* encourage shuffling */
605 if (node)
606 this_cpu_list_push(list, node, NULL);
607 }
608
609 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
610 (int) rseq_gettid(), nr_abort, signals_delivered);
611 if (!opt_disable_rseq && rseq_unregister_current_thread())
612 abort();
613
614 return NULL;
615}
616
617/* Simultaneous modification to a per-cpu linked list from many threads. */
6e284b80 618static void test_percpu_list(void)
31b44ba2
MD
619{
620 const int num_threads = opt_threads;
621 int i, j, ret;
622 uint64_t sum = 0, expected_sum = 0;
623 struct percpu_list list;
624 pthread_t test_threads[num_threads];
625 cpu_set_t allowed_cpus;
626
627 memset(&list, 0, sizeof(list));
628
629 /* Generate list entries for every usable cpu. */
630 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
631 for (i = 0; i < CPU_SETSIZE; i++) {
632 if (!CPU_ISSET(i, &allowed_cpus))
633 continue;
634 for (j = 1; j <= 100; j++) {
635 struct percpu_list_node *node;
636
637 expected_sum += j;
638
d268885a 639 node = (struct percpu_list_node *) malloc(sizeof(*node));
31b44ba2
MD
640 assert(node);
641 node->data = j;
642 node->next = list.c[i].head;
643 list.c[i].head = node;
644 }
645 }
646
647 for (i = 0; i < num_threads; i++) {
648 ret = pthread_create(&test_threads[i], NULL,
649 test_percpu_list_thread, &list);
650 if (ret) {
651 errno = ret;
652 perror("pthread_create");
653 abort();
654 }
655 }
656
657 for (i = 0; i < num_threads; i++) {
658 ret = pthread_join(test_threads[i], NULL);
659 if (ret) {
660 errno = ret;
661 perror("pthread_join");
662 abort();
663 }
664 }
665
666 for (i = 0; i < CPU_SETSIZE; i++) {
667 struct percpu_list_node *node;
668
669 if (!CPU_ISSET(i, &allowed_cpus))
670 continue;
671
672 while ((node = __percpu_list_pop(&list, i))) {
673 sum += node->data;
674 free(node);
675 }
676 }
677
678 /*
679 * All entries should now be accounted for (unless some external
680 * actor is interfering with our allowed affinity while this
681 * test is running).
682 */
683 assert(sum == expected_sum);
684}
685
6e284b80 686static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
31b44ba2
MD
687 struct percpu_buffer_node *node,
688 int *_cpu)
689{
690 bool result = false;
691 int cpu;
692
693 for (;;) {
694 intptr_t *targetptr_spec, newval_spec;
695 intptr_t *targetptr_final, newval_final;
696 intptr_t offset;
697 int ret;
698
699 cpu = rseq_cpu_start();
700 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
701 if (offset == buffer->c[cpu].buflen)
702 break;
703 newval_spec = (intptr_t)node;
704 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
705 newval_final = offset + 1;
706 targetptr_final = &buffer->c[cpu].offset;
707 if (opt_mb)
708 ret = rseq_cmpeqv_trystorev_storev_release(
709 targetptr_final, offset, targetptr_spec,
710 newval_spec, newval_final, cpu);
711 else
712 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
713 offset, targetptr_spec, newval_spec,
714 newval_final, cpu);
715 if (rseq_likely(!ret)) {
716 result = true;
717 break;
718 }
719 /* Retry if comparison fails or rseq aborts. */
720 }
721 if (_cpu)
722 *_cpu = cpu;
723 return result;
724}
725
6e284b80 726static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
727 int *_cpu)
728{
729 struct percpu_buffer_node *head;
730 int cpu;
731
732 for (;;) {
733 intptr_t *targetptr, newval;
734 intptr_t offset;
735 int ret;
736
737 cpu = rseq_cpu_start();
738 /* Load offset with single-copy atomicity. */
739 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
740 if (offset == 0) {
741 head = NULL;
742 break;
743 }
744 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
745 newval = offset - 1;
746 targetptr = (intptr_t *)&buffer->c[cpu].offset;
747 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
748 (intptr_t *)&buffer->c[cpu].array[offset - 1],
749 (intptr_t)head, newval, cpu);
750 if (rseq_likely(!ret))
751 break;
752 /* Retry if comparison fails or rseq aborts. */
753 }
754 if (_cpu)
755 *_cpu = cpu;
756 return head;
757}
758
759/*
760 * __percpu_buffer_pop is not safe against concurrent accesses. Should
761 * only be used on buffers that are not concurrently modified.
762 */
6e284b80 763static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
764 int cpu)
765{
766 struct percpu_buffer_node *head;
767 intptr_t offset;
768
769 offset = buffer->c[cpu].offset;
770 if (offset == 0)
771 return NULL;
772 head = buffer->c[cpu].array[offset - 1];
773 buffer->c[cpu].offset = offset - 1;
774 return head;
775}
776
6e284b80 777static void *test_percpu_buffer_thread(void *arg)
31b44ba2
MD
778{
779 long long i, reps;
780 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
781
782 if (!opt_disable_rseq && rseq_register_current_thread())
783 abort();
784
785 reps = opt_reps;
786 for (i = 0; i < reps; i++) {
787 struct percpu_buffer_node *node;
788
789 node = this_cpu_buffer_pop(buffer, NULL);
790 if (opt_yield)
791 sched_yield(); /* encourage shuffling */
792 if (node) {
793 if (!this_cpu_buffer_push(buffer, node, NULL)) {
794 /* Should increase buffer size. */
795 abort();
796 }
797 }
798 }
799
800 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
801 (int) rseq_gettid(), nr_abort, signals_delivered);
802 if (!opt_disable_rseq && rseq_unregister_current_thread())
803 abort();
804
805 return NULL;
806}
807
808/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 809static void test_percpu_buffer(void)
31b44ba2
MD
810{
811 const int num_threads = opt_threads;
812 int i, j, ret;
813 uint64_t sum = 0, expected_sum = 0;
814 struct percpu_buffer buffer;
815 pthread_t test_threads[num_threads];
816 cpu_set_t allowed_cpus;
817
818 memset(&buffer, 0, sizeof(buffer));
819
820 /* Generate list entries for every usable cpu. */
821 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
822 for (i = 0; i < CPU_SETSIZE; i++) {
823 if (!CPU_ISSET(i, &allowed_cpus))
824 continue;
825 /* Worse-case is every item in same CPU. */
826 buffer.c[i].array =
d268885a 827 (struct percpu_buffer_node **)
31b44ba2
MD
828 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
829 BUFFER_ITEM_PER_CPU);
830 assert(buffer.c[i].array);
831 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
832 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
833 struct percpu_buffer_node *node;
834
835 expected_sum += j;
836
837 /*
838 * We could theoretically put the word-sized
839 * "data" directly in the buffer. However, we
840 * want to model objects that would not fit
841 * within a single word, so allocate an object
842 * for each node.
843 */
d268885a 844 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
31b44ba2
MD
845 assert(node);
846 node->data = j;
847 buffer.c[i].array[j - 1] = node;
848 buffer.c[i].offset++;
849 }
850 }
851
852 for (i = 0; i < num_threads; i++) {
853 ret = pthread_create(&test_threads[i], NULL,
854 test_percpu_buffer_thread, &buffer);
855 if (ret) {
856 errno = ret;
857 perror("pthread_create");
858 abort();
859 }
860 }
861
862 for (i = 0; i < num_threads; i++) {
863 ret = pthread_join(test_threads[i], NULL);
864 if (ret) {
865 errno = ret;
866 perror("pthread_join");
867 abort();
868 }
869 }
870
871 for (i = 0; i < CPU_SETSIZE; i++) {
872 struct percpu_buffer_node *node;
873
874 if (!CPU_ISSET(i, &allowed_cpus))
875 continue;
876
877 while ((node = __percpu_buffer_pop(&buffer, i))) {
878 sum += node->data;
879 free(node);
880 }
881 free(buffer.c[i].array);
882 }
883
884 /*
885 * All entries should now be accounted for (unless some external
886 * actor is interfering with our allowed affinity while this
887 * test is running).
888 */
889 assert(sum == expected_sum);
890}
891
6e284b80 892static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
893 struct percpu_memcpy_buffer_node item,
894 int *_cpu)
895{
896 bool result = false;
897 int cpu;
898
899 for (;;) {
900 intptr_t *targetptr_final, newval_final, offset;
901 char *destptr, *srcptr;
902 size_t copylen;
903 int ret;
904
905 cpu = rseq_cpu_start();
906 /* Load offset with single-copy atomicity. */
907 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
908 if (offset == buffer->c[cpu].buflen)
909 break;
910 destptr = (char *)&buffer->c[cpu].array[offset];
911 srcptr = (char *)&item;
912 /* copylen must be <= 4kB. */
913 copylen = sizeof(item);
914 newval_final = offset + 1;
915 targetptr_final = &buffer->c[cpu].offset;
916 if (opt_mb)
917 ret = rseq_cmpeqv_trymemcpy_storev_release(
918 targetptr_final, offset,
919 destptr, srcptr, copylen,
920 newval_final, cpu);
921 else
922 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
923 offset, destptr, srcptr, copylen,
924 newval_final, cpu);
925 if (rseq_likely(!ret)) {
926 result = true;
927 break;
928 }
929 /* Retry if comparison fails or rseq aborts. */
930 }
931 if (_cpu)
932 *_cpu = cpu;
933 return result;
934}
935
6e284b80 936static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
937 struct percpu_memcpy_buffer_node *item,
938 int *_cpu)
939{
940 bool result = false;
941 int cpu;
942
943 for (;;) {
944 intptr_t *targetptr_final, newval_final, offset;
945 char *destptr, *srcptr;
946 size_t copylen;
947 int ret;
948
949 cpu = rseq_cpu_start();
950 /* Load offset with single-copy atomicity. */
951 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
952 if (offset == 0)
953 break;
954 destptr = (char *)item;
955 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
956 /* copylen must be <= 4kB. */
957 copylen = sizeof(*item);
958 newval_final = offset - 1;
959 targetptr_final = &buffer->c[cpu].offset;
960 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
961 offset, destptr, srcptr, copylen,
962 newval_final, cpu);
963 if (rseq_likely(!ret)) {
964 result = true;
965 break;
966 }
967 /* Retry if comparison fails or rseq aborts. */
968 }
969 if (_cpu)
970 *_cpu = cpu;
971 return result;
972}
973
974/*
975 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
976 * only be used on buffers that are not concurrently modified.
977 */
6e284b80 978static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
979 struct percpu_memcpy_buffer_node *item,
980 int cpu)
981{
982 intptr_t offset;
983
984 offset = buffer->c[cpu].offset;
985 if (offset == 0)
986 return false;
987 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
988 buffer->c[cpu].offset = offset - 1;
989 return true;
990}
991
6e284b80 992static void *test_percpu_memcpy_buffer_thread(void *arg)
31b44ba2
MD
993{
994 long long i, reps;
995 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
996
997 if (!opt_disable_rseq && rseq_register_current_thread())
998 abort();
999
1000 reps = opt_reps;
1001 for (i = 0; i < reps; i++) {
1002 struct percpu_memcpy_buffer_node item;
1003 bool result;
1004
1005 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1006 if (opt_yield)
1007 sched_yield(); /* encourage shuffling */
1008 if (result) {
1009 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1010 /* Should increase buffer size. */
1011 abort();
1012 }
1013 }
1014 }
1015
1016 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1017 (int) rseq_gettid(), nr_abort, signals_delivered);
1018 if (!opt_disable_rseq && rseq_unregister_current_thread())
1019 abort();
1020
1021 return NULL;
1022}
1023
1024/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 1025static void test_percpu_memcpy_buffer(void)
31b44ba2
MD
1026{
1027 const int num_threads = opt_threads;
1028 int i, j, ret;
1029 uint64_t sum = 0, expected_sum = 0;
1030 struct percpu_memcpy_buffer buffer;
1031 pthread_t test_threads[num_threads];
1032 cpu_set_t allowed_cpus;
1033
1034 memset(&buffer, 0, sizeof(buffer));
1035
1036 /* Generate list entries for every usable cpu. */
1037 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1038 for (i = 0; i < CPU_SETSIZE; i++) {
1039 if (!CPU_ISSET(i, &allowed_cpus))
1040 continue;
1041 /* Worse-case is every item in same CPU. */
1042 buffer.c[i].array =
d268885a 1043 (struct percpu_memcpy_buffer_node *)
31b44ba2
MD
1044 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1045 MEMCPY_BUFFER_ITEM_PER_CPU);
1046 assert(buffer.c[i].array);
1047 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1048 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1049 expected_sum += 2 * j + 1;
1050
1051 /*
1052 * We could theoretically put the word-sized
1053 * "data" directly in the buffer. However, we
1054 * want to model objects that would not fit
1055 * within a single word, so allocate an object
1056 * for each node.
1057 */
1058 buffer.c[i].array[j - 1].data1 = j;
1059 buffer.c[i].array[j - 1].data2 = j + 1;
1060 buffer.c[i].offset++;
1061 }
1062 }
1063
1064 for (i = 0; i < num_threads; i++) {
1065 ret = pthread_create(&test_threads[i], NULL,
1066 test_percpu_memcpy_buffer_thread,
1067 &buffer);
1068 if (ret) {
1069 errno = ret;
1070 perror("pthread_create");
1071 abort();
1072 }
1073 }
1074
1075 for (i = 0; i < num_threads; i++) {
1076 ret = pthread_join(test_threads[i], NULL);
1077 if (ret) {
1078 errno = ret;
1079 perror("pthread_join");
1080 abort();
1081 }
1082 }
1083
1084 for (i = 0; i < CPU_SETSIZE; i++) {
1085 struct percpu_memcpy_buffer_node item;
1086
1087 if (!CPU_ISSET(i, &allowed_cpus))
1088 continue;
1089
1090 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1091 sum += item.data1;
1092 sum += item.data2;
1093 }
1094 free(buffer.c[i].array);
1095 }
1096
1097 /*
1098 * All entries should now be accounted for (unless some external
1099 * actor is interfering with our allowed affinity while this
1100 * test is running).
1101 */
1102 assert(sum == expected_sum);
1103}
1104
544cdc88
MJ
1105
1106static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
31b44ba2
MD
1107{
1108 signals_delivered++;
1109}
1110
1111static int set_signal_handler(void)
1112{
1113 int ret = 0;
1114 struct sigaction sa;
1115 sigset_t sigset;
1116
1117 ret = sigemptyset(&sigset);
1118 if (ret < 0) {
1119 perror("sigemptyset");
1120 return ret;
1121 }
1122
1123 sa.sa_handler = test_signal_interrupt_handler;
1124 sa.sa_mask = sigset;
1125 sa.sa_flags = 0;
1126 ret = sigaction(SIGUSR1, &sa, NULL);
1127 if (ret < 0) {
1128 perror("sigaction");
1129 return ret;
1130 }
1131
1132 printf_verbose("Signal handler set for SIGUSR1\n");
1133
1134 return ret;
1135}
1136
544cdc88 1137static void show_usage(char **argv)
31b44ba2
MD
1138{
1139 printf("Usage : %s <OPTIONS>\n",
1140 argv[0]);
1141 printf("OPTIONS:\n");
1142 printf(" [-1 loops] Number of loops for delay injection 1\n");
1143 printf(" [-2 loops] Number of loops for delay injection 2\n");
1144 printf(" [-3 loops] Number of loops for delay injection 3\n");
1145 printf(" [-4 loops] Number of loops for delay injection 4\n");
1146 printf(" [-5 loops] Number of loops for delay injection 5\n");
1147 printf(" [-6 loops] Number of loops for delay injection 6\n");
1148 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1149 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1150 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1151 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1152 printf(" [-y] Yield\n");
1153 printf(" [-k] Kill thread with signal\n");
1154 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1155 printf(" [-t N] Number of threads (default 200)\n");
1156 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1157 printf(" [-d] Disable rseq system call (no initialization)\n");
1158 printf(" [-D M] Disable rseq for each M threads\n");
1159 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
1160 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
d1cdec98 1161 printf(" [-c] Check if the rseq syscall is available.\n");
31b44ba2
MD
1162 printf(" [-v] Verbose output.\n");
1163 printf(" [-h] Show this help.\n");
1164 printf("\n");
1165}
1166
1167int main(int argc, char **argv)
1168{
1169 int i;
1170
1171 for (i = 1; i < argc; i++) {
1172 if (argv[i][0] != '-')
1173 continue;
1174 switch (argv[i][1]) {
1175 case '1':
1176 case '2':
1177 case '3':
1178 case '4':
1179 case '5':
1180 case '6':
1181 case '7':
1182 case '8':
1183 case '9':
1184 if (argc < i + 2) {
544cdc88 1185 show_usage(argv);
31b44ba2
MD
1186 goto error;
1187 }
1188 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1189 i++;
1190 break;
1191 case 'm':
1192 if (argc < i + 2) {
544cdc88 1193 show_usage(argv);
31b44ba2
MD
1194 goto error;
1195 }
1196 opt_modulo = atol(argv[i + 1]);
1197 if (opt_modulo < 0) {
544cdc88 1198 show_usage(argv);
31b44ba2
MD
1199 goto error;
1200 }
1201 i++;
1202 break;
1203 case 's':
1204 if (argc < i + 2) {
544cdc88 1205 show_usage(argv);
31b44ba2
MD
1206 goto error;
1207 }
1208 opt_sleep = atol(argv[i + 1]);
1209 if (opt_sleep < 0) {
544cdc88 1210 show_usage(argv);
31b44ba2
MD
1211 goto error;
1212 }
1213 i++;
1214 break;
1215 case 'y':
1216 opt_yield = 1;
1217 break;
1218 case 'k':
1219 opt_signal = 1;
1220 break;
1221 case 'd':
1222 opt_disable_rseq = 1;
1223 break;
1224 case 'D':
1225 if (argc < i + 2) {
544cdc88 1226 show_usage(argv);
31b44ba2
MD
1227 goto error;
1228 }
1229 opt_disable_mod = atol(argv[i + 1]);
1230 if (opt_disable_mod < 0) {
544cdc88 1231 show_usage(argv);
31b44ba2
MD
1232 goto error;
1233 }
1234 i++;
1235 break;
1236 case 't':
1237 if (argc < i + 2) {
544cdc88 1238 show_usage(argv);
31b44ba2
MD
1239 goto error;
1240 }
1241 opt_threads = atol(argv[i + 1]);
1242 if (opt_threads < 0) {
544cdc88 1243 show_usage(argv);
31b44ba2
MD
1244 goto error;
1245 }
1246 i++;
1247 break;
1248 case 'r':
1249 if (argc < i + 2) {
544cdc88 1250 show_usage(argv);
31b44ba2
MD
1251 goto error;
1252 }
1253 opt_reps = atoll(argv[i + 1]);
1254 if (opt_reps < 0) {
544cdc88 1255 show_usage(argv);
31b44ba2
MD
1256 goto error;
1257 }
1258 i++;
1259 break;
1260 case 'h':
544cdc88 1261 show_usage(argv);
31b44ba2
MD
1262 goto end;
1263 case 'T':
1264 if (argc < i + 2) {
544cdc88 1265 show_usage(argv);
31b44ba2
MD
1266 goto error;
1267 }
1268 opt_test = *argv[i + 1];
1269 switch (opt_test) {
1270 case 's':
1271 case 'l':
1272 case 'i':
1273 case 'b':
1274 case 'm':
1275 break;
1276 default:
544cdc88 1277 show_usage(argv);
31b44ba2
MD
1278 goto error;
1279 }
1280 i++;
1281 break;
1282 case 'v':
1283 verbose = 1;
1284 break;
1285 case 'M':
1286 opt_mb = 1;
1287 break;
d1cdec98
MJ
1288 case 'c':
1289 if (rseq_available()) {
1290 printf_verbose("The rseq syscall is available.\n");
1291 goto end;
1292 } else {
1293 printf_verbose("The rseq syscall is unavailable.\n");
1294 goto no_rseq;
1295 }
31b44ba2 1296 default:
544cdc88 1297 show_usage(argv);
31b44ba2
MD
1298 goto error;
1299 }
1300 }
1301
1302 loop_cnt_1 = loop_cnt[1];
1303 loop_cnt_2 = loop_cnt[2];
1304 loop_cnt_3 = loop_cnt[3];
1305 loop_cnt_4 = loop_cnt[4];
1306 loop_cnt_5 = loop_cnt[5];
1307 loop_cnt_6 = loop_cnt[6];
1308
1309 if (set_signal_handler())
1310 goto error;
1311
1312 if (!opt_disable_rseq && rseq_register_current_thread())
1313 goto error;
1314 switch (opt_test) {
1315 case 's':
1316 printf_verbose("spinlock\n");
1317 test_percpu_spinlock();
1318 break;
1319 case 'l':
1320 printf_verbose("linked list\n");
1321 test_percpu_list();
1322 break;
1323 case 'b':
1324 printf_verbose("buffer\n");
1325 test_percpu_buffer();
1326 break;
1327 case 'm':
1328 printf_verbose("memcpy buffer\n");
1329 test_percpu_memcpy_buffer();
1330 break;
1331 case 'i':
1332 printf_verbose("counter increment\n");
1333 test_percpu_inc();
1334 break;
1335 }
1336 if (!opt_disable_rseq && rseq_unregister_current_thread())
1337 abort();
1338end:
1339 return 0;
1340
1341error:
1342 return -1;
d1cdec98
MJ
1343
1344no_rseq:
1345 return 2;
31b44ba2 1346}
This page took 0.072211 seconds and 4 git commands to generate.