b423767b2e8ef7c79bd0d036098f25898f69b84b
[librseq.git] / tests / param_test.c
1 // SPDX-License-Identifier: LGPL-2.1-only
2 #ifndef _GNU_SOURCE
3 #define _GNU_SOURCE
4 #endif
5 #include <assert.h>
6 #include <pthread.h>
7 #include <sched.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <poll.h>
15 #include <sys/types.h>
16 #include <signal.h>
17 #include <errno.h>
18 #include <stddef.h>
19
20 #define NR_INJECT 9
21 static int loop_cnt[NR_INJECT + 1];
22
23 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
24 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
25 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
26 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
27 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
28 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
29
30 static int opt_modulo, verbose;
31
32 static int opt_yield, opt_signal, opt_sleep,
33 opt_disable_rseq, opt_threads = 200,
34 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
35
36 #ifndef RSEQ_SKIP_FASTPATH
37 static long long opt_reps = 5000;
38 #else
39 static long long opt_reps = 100;
40 #endif
41
42 static __thread __attribute__((tls_model("initial-exec")))
43 unsigned int signals_delivered;
44
45 #ifndef BENCHMARK
46
47 static inline pid_t rseq_gettid(void)
48 {
49 return syscall(__NR_gettid);
50 }
51
52 static __thread __attribute__((tls_model("initial-exec"), unused))
53 int yield_mod_cnt, nr_abort;
54
55 #define printf_verbose(fmt, ...) \
56 do { \
57 if (verbose) \
58 printf(fmt, ## __VA_ARGS__); \
59 } while (0)
60
61 #ifdef __i386__
62
63 #define INJECT_ASM_REG "eax"
64
65 #define RSEQ_INJECT_CLOBBER \
66 , INJECT_ASM_REG
67
68 #define RSEQ_INJECT_ASM(n) \
69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71 "jz 333f\n\t" \
72 "222:\n\t" \
73 "dec %%" INJECT_ASM_REG "\n\t" \
74 "jnz 222b\n\t" \
75 "333:\n\t"
76
77 #elif defined(__x86_64__)
78
79 #define INJECT_ASM_REG_P "rax"
80 #define INJECT_ASM_REG "eax"
81
82 #define RSEQ_INJECT_CLOBBER \
83 , INJECT_ASM_REG_P \
84 , INJECT_ASM_REG
85
86 #define RSEQ_INJECT_ASM(n) \
87 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
88 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "jz 333f\n\t" \
91 "222:\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
93 "jnz 222b\n\t" \
94 "333:\n\t"
95
96 #elif defined(__s390__)
97
98 #define RSEQ_INJECT_INPUT \
99 , [loop_cnt_1]"m"(loop_cnt[1]) \
100 , [loop_cnt_2]"m"(loop_cnt[2]) \
101 , [loop_cnt_3]"m"(loop_cnt[3]) \
102 , [loop_cnt_4]"m"(loop_cnt[4]) \
103 , [loop_cnt_5]"m"(loop_cnt[5]) \
104 , [loop_cnt_6]"m"(loop_cnt[6])
105
106 #define INJECT_ASM_REG "r12"
107
108 #define RSEQ_INJECT_CLOBBER \
109 , INJECT_ASM_REG
110
111 #define RSEQ_INJECT_ASM(n) \
112 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
113 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
114 "je 333f\n\t" \
115 "222:\n\t" \
116 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
117 "jnz 222b\n\t" \
118 "333:\n\t"
119
120 #elif defined(__ARMEL__)
121
122 #define RSEQ_INJECT_INPUT \
123 , [loop_cnt_1]"m"(loop_cnt[1]) \
124 , [loop_cnt_2]"m"(loop_cnt[2]) \
125 , [loop_cnt_3]"m"(loop_cnt[3]) \
126 , [loop_cnt_4]"m"(loop_cnt[4]) \
127 , [loop_cnt_5]"m"(loop_cnt[5]) \
128 , [loop_cnt_6]"m"(loop_cnt[6])
129
130 #define INJECT_ASM_REG "r4"
131
132 #define RSEQ_INJECT_CLOBBER \
133 , INJECT_ASM_REG
134
135 #define RSEQ_INJECT_ASM(n) \
136 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
137 "cmp " INJECT_ASM_REG ", #0\n\t" \
138 "beq 333f\n\t" \
139 "222:\n\t" \
140 "subs " INJECT_ASM_REG ", #1\n\t" \
141 "bne 222b\n\t" \
142 "333:\n\t"
143
144 #elif defined(__AARCH64EL__)
145
146 #define RSEQ_INJECT_INPUT \
147 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
148 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
149 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
150 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
151 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
152 , [loop_cnt_6] "Qo" (loop_cnt[6])
153
154 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
155
156 #define RSEQ_INJECT_ASM(n) \
157 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
158 " cbz " INJECT_ASM_REG ", 333f\n" \
159 "222:\n" \
160 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
161 " cbnz " INJECT_ASM_REG ", 222b\n" \
162 "333:\n"
163
164 #elif __PPC__
165
166 #define RSEQ_INJECT_INPUT \
167 , [loop_cnt_1]"m"(loop_cnt[1]) \
168 , [loop_cnt_2]"m"(loop_cnt[2]) \
169 , [loop_cnt_3]"m"(loop_cnt[3]) \
170 , [loop_cnt_4]"m"(loop_cnt[4]) \
171 , [loop_cnt_5]"m"(loop_cnt[5]) \
172 , [loop_cnt_6]"m"(loop_cnt[6])
173
174 #define INJECT_ASM_REG "r18"
175
176 #define RSEQ_INJECT_CLOBBER \
177 , INJECT_ASM_REG
178
179 #define RSEQ_INJECT_ASM(n) \
180 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
181 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
182 "beq 333f\n\t" \
183 "222:\n\t" \
184 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
185 "bne 222b\n\t" \
186 "333:\n\t"
187
188 #elif defined(__mips__)
189
190 #define RSEQ_INJECT_INPUT \
191 , [loop_cnt_1]"m"(loop_cnt[1]) \
192 , [loop_cnt_2]"m"(loop_cnt[2]) \
193 , [loop_cnt_3]"m"(loop_cnt[3]) \
194 , [loop_cnt_4]"m"(loop_cnt[4]) \
195 , [loop_cnt_5]"m"(loop_cnt[5]) \
196 , [loop_cnt_6]"m"(loop_cnt[6])
197
198 #define INJECT_ASM_REG "$5"
199
200 #define RSEQ_INJECT_CLOBBER \
201 , INJECT_ASM_REG
202
203 #define RSEQ_INJECT_ASM(n) \
204 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
205 "beqz " INJECT_ASM_REG ", 333f\n\t" \
206 "222:\n\t" \
207 "addiu " INJECT_ASM_REG ", -1\n\t" \
208 "bnez " INJECT_ASM_REG ", 222b\n\t" \
209 "333:\n\t"
210
211 #else
212 #error unsupported target
213 #endif
214
215 #define RSEQ_INJECT_FAILED \
216 nr_abort++;
217
218 #define RSEQ_INJECT_C(n) \
219 { \
220 int loc_i, loc_nr_loops = loop_cnt[n]; \
221 \
222 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
223 rseq_barrier(); \
224 } \
225 if (loc_nr_loops == -1 && opt_modulo) { \
226 if (yield_mod_cnt == opt_modulo - 1) { \
227 if (opt_sleep > 0) \
228 poll(NULL, 0, opt_sleep); \
229 if (opt_yield) \
230 sched_yield(); \
231 if (opt_signal) \
232 raise(SIGUSR1); \
233 yield_mod_cnt = 0; \
234 } else { \
235 yield_mod_cnt++; \
236 } \
237 } \
238 }
239
240 #else
241
242 #define printf_verbose(fmt, ...)
243
244 #endif /* BENCHMARK */
245
246 #include <rseq/rseq.h>
247
248 struct percpu_lock_entry {
249 intptr_t v;
250 } __attribute__((aligned(128)));
251
252 struct percpu_lock {
253 struct percpu_lock_entry c[CPU_SETSIZE];
254 };
255
256 struct test_data_entry {
257 intptr_t count;
258 } __attribute__((aligned(128)));
259
260 struct spinlock_test_data {
261 struct percpu_lock lock;
262 struct test_data_entry c[CPU_SETSIZE];
263 };
264
265 struct spinlock_thread_test_data {
266 struct spinlock_test_data *data;
267 long long reps;
268 int reg;
269 };
270
271 struct inc_test_data {
272 struct test_data_entry c[CPU_SETSIZE];
273 };
274
275 struct inc_thread_test_data {
276 struct inc_test_data *data;
277 long long reps;
278 int reg;
279 };
280
281 struct percpu_list_node {
282 intptr_t data;
283 struct percpu_list_node *next;
284 };
285
286 struct percpu_list_entry {
287 struct percpu_list_node *head;
288 } __attribute__((aligned(128)));
289
290 struct percpu_list {
291 struct percpu_list_entry c[CPU_SETSIZE];
292 };
293
294 #define BUFFER_ITEM_PER_CPU 100
295
296 struct percpu_buffer_node {
297 intptr_t data;
298 };
299
300 struct percpu_buffer_entry {
301 intptr_t offset;
302 intptr_t buflen;
303 struct percpu_buffer_node **array;
304 } __attribute__((aligned(128)));
305
306 struct percpu_buffer {
307 struct percpu_buffer_entry c[CPU_SETSIZE];
308 };
309
310 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
311
312 struct percpu_memcpy_buffer_node {
313 intptr_t data1;
314 uint64_t data2;
315 };
316
317 struct percpu_memcpy_buffer_entry {
318 intptr_t offset;
319 intptr_t buflen;
320 struct percpu_memcpy_buffer_node *array;
321 } __attribute__((aligned(128)));
322
323 struct percpu_memcpy_buffer {
324 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
325 };
326
327 /* A simple percpu spinlock. Grabs lock on current cpu. */
328 static int rseq_this_cpu_lock(struct percpu_lock *lock)
329 {
330 int cpu;
331
332 for (;;) {
333 int ret;
334
335 cpu = rseq_cpu_start();
336 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
337 0, 1, cpu);
338 if (rseq_likely(!ret))
339 break;
340 /* Retry if comparison fails or rseq aborts. */
341 }
342 /*
343 * Acquire semantic when taking lock after control dependency.
344 * Matches rseq_smp_store_release().
345 */
346 rseq_smp_acquire__after_ctrl_dep();
347 return cpu;
348 }
349
350 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
351 {
352 assert(lock->c[cpu].v == 1);
353 /*
354 * Release lock, with release semantic. Matches
355 * rseq_smp_acquire__after_ctrl_dep().
356 */
357 rseq_smp_store_release(&lock->c[cpu].v, 0);
358 }
359
360 void *test_percpu_spinlock_thread(void *arg)
361 {
362 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
363 struct spinlock_test_data *data = thread_data->data;
364 long long i, reps;
365
366 if (!opt_disable_rseq && thread_data->reg &&
367 rseq_register_current_thread())
368 abort();
369 reps = thread_data->reps;
370 for (i = 0; i < reps; i++) {
371 int cpu = rseq_cpu_start();
372
373 cpu = rseq_this_cpu_lock(&data->lock);
374 data->c[cpu].count++;
375 rseq_percpu_unlock(&data->lock, cpu);
376 #ifndef BENCHMARK
377 if (i != 0 && !(i % (reps / 10)))
378 printf_verbose("tid %d: count %lld\n",
379 (int) rseq_gettid(), i);
380 #endif
381 }
382 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
383 (int) rseq_gettid(), nr_abort, signals_delivered);
384 if (!opt_disable_rseq && thread_data->reg &&
385 rseq_unregister_current_thread())
386 abort();
387 return NULL;
388 }
389
390 /*
391 * A simple test which implements a sharded counter using a per-cpu
392 * lock. Obviously real applications might prefer to simply use a
393 * per-cpu increment; however, this is reasonable for a test and the
394 * lock can be extended to synchronize more complicated operations.
395 */
396 void test_percpu_spinlock(void)
397 {
398 const int num_threads = opt_threads;
399 int i, ret;
400 uint64_t sum;
401 pthread_t test_threads[num_threads];
402 struct spinlock_test_data data;
403 struct spinlock_thread_test_data thread_data[num_threads];
404
405 memset(&data, 0, sizeof(data));
406 for (i = 0; i < num_threads; i++) {
407 thread_data[i].reps = opt_reps;
408 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
409 thread_data[i].reg = 1;
410 else
411 thread_data[i].reg = 0;
412 thread_data[i].data = &data;
413 ret = pthread_create(&test_threads[i], NULL,
414 test_percpu_spinlock_thread,
415 &thread_data[i]);
416 if (ret) {
417 errno = ret;
418 perror("pthread_create");
419 abort();
420 }
421 }
422
423 for (i = 0; i < num_threads; i++) {
424 ret = pthread_join(test_threads[i], NULL);
425 if (ret) {
426 errno = ret;
427 perror("pthread_join");
428 abort();
429 }
430 }
431
432 sum = 0;
433 for (i = 0; i < CPU_SETSIZE; i++)
434 sum += data.c[i].count;
435
436 assert(sum == (uint64_t)opt_reps * num_threads);
437 }
438
439 void *test_percpu_inc_thread(void *arg)
440 {
441 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
442 struct inc_test_data *data = thread_data->data;
443 long long i, reps;
444
445 if (!opt_disable_rseq && thread_data->reg &&
446 rseq_register_current_thread())
447 abort();
448 reps = thread_data->reps;
449 for (i = 0; i < reps; i++) {
450 int ret;
451
452 do {
453 int cpu;
454
455 cpu = rseq_cpu_start();
456 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
457 } while (rseq_unlikely(ret));
458 #ifndef BENCHMARK
459 if (i != 0 && !(i % (reps / 10)))
460 printf_verbose("tid %d: count %lld\n",
461 (int) rseq_gettid(), i);
462 #endif
463 }
464 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
465 (int) rseq_gettid(), nr_abort, signals_delivered);
466 if (!opt_disable_rseq && thread_data->reg &&
467 rseq_unregister_current_thread())
468 abort();
469 return NULL;
470 }
471
472 void test_percpu_inc(void)
473 {
474 const int num_threads = opt_threads;
475 int i, ret;
476 uint64_t sum;
477 pthread_t test_threads[num_threads];
478 struct inc_test_data data;
479 struct inc_thread_test_data thread_data[num_threads];
480
481 memset(&data, 0, sizeof(data));
482 for (i = 0; i < num_threads; i++) {
483 thread_data[i].reps = opt_reps;
484 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
485 thread_data[i].reg = 1;
486 else
487 thread_data[i].reg = 0;
488 thread_data[i].data = &data;
489 ret = pthread_create(&test_threads[i], NULL,
490 test_percpu_inc_thread,
491 &thread_data[i]);
492 if (ret) {
493 errno = ret;
494 perror("pthread_create");
495 abort();
496 }
497 }
498
499 for (i = 0; i < num_threads; i++) {
500 ret = pthread_join(test_threads[i], NULL);
501 if (ret) {
502 errno = ret;
503 perror("pthread_join");
504 abort();
505 }
506 }
507
508 sum = 0;
509 for (i = 0; i < CPU_SETSIZE; i++)
510 sum += data.c[i].count;
511
512 assert(sum == (uint64_t)opt_reps * num_threads);
513 }
514
515 void this_cpu_list_push(struct percpu_list *list,
516 struct percpu_list_node *node,
517 int *_cpu)
518 {
519 int cpu;
520
521 for (;;) {
522 intptr_t *targetptr, newval, expect;
523 int ret;
524
525 cpu = rseq_cpu_start();
526 /* Load list->c[cpu].head with single-copy atomicity. */
527 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
528 newval = (intptr_t)node;
529 targetptr = (intptr_t *)&list->c[cpu].head;
530 node->next = (struct percpu_list_node *)expect;
531 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
532 if (rseq_likely(!ret))
533 break;
534 /* Retry if comparison fails or rseq aborts. */
535 }
536 if (_cpu)
537 *_cpu = cpu;
538 }
539
540 /*
541 * Unlike a traditional lock-less linked list; the availability of a
542 * rseq primitive allows us to implement pop without concerns over
543 * ABA-type races.
544 */
545 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
546 int *_cpu)
547 {
548 struct percpu_list_node *node = NULL;
549 int cpu;
550
551 for (;;) {
552 struct percpu_list_node *head;
553 intptr_t *targetptr, expectnot, *load;
554 off_t offset;
555 int ret;
556
557 cpu = rseq_cpu_start();
558 targetptr = (intptr_t *)&list->c[cpu].head;
559 expectnot = (intptr_t)NULL;
560 offset = offsetof(struct percpu_list_node, next);
561 load = (intptr_t *)&head;
562 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
563 offset, load, cpu);
564 if (rseq_likely(!ret)) {
565 node = head;
566 break;
567 }
568 if (ret > 0)
569 break;
570 /* Retry if rseq aborts. */
571 }
572 if (_cpu)
573 *_cpu = cpu;
574 return node;
575 }
576
577 /*
578 * __percpu_list_pop is not safe against concurrent accesses. Should
579 * only be used on lists that are not concurrently modified.
580 */
581 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
582 {
583 struct percpu_list_node *node;
584
585 node = list->c[cpu].head;
586 if (!node)
587 return NULL;
588 list->c[cpu].head = node->next;
589 return node;
590 }
591
592 void *test_percpu_list_thread(void *arg)
593 {
594 long long i, reps;
595 struct percpu_list *list = (struct percpu_list *)arg;
596
597 if (!opt_disable_rseq && rseq_register_current_thread())
598 abort();
599
600 reps = opt_reps;
601 for (i = 0; i < reps; i++) {
602 struct percpu_list_node *node;
603
604 node = this_cpu_list_pop(list, NULL);
605 if (opt_yield)
606 sched_yield(); /* encourage shuffling */
607 if (node)
608 this_cpu_list_push(list, node, NULL);
609 }
610
611 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
612 (int) rseq_gettid(), nr_abort, signals_delivered);
613 if (!opt_disable_rseq && rseq_unregister_current_thread())
614 abort();
615
616 return NULL;
617 }
618
619 /* Simultaneous modification to a per-cpu linked list from many threads. */
620 void test_percpu_list(void)
621 {
622 const int num_threads = opt_threads;
623 int i, j, ret;
624 uint64_t sum = 0, expected_sum = 0;
625 struct percpu_list list;
626 pthread_t test_threads[num_threads];
627 cpu_set_t allowed_cpus;
628
629 memset(&list, 0, sizeof(list));
630
631 /* Generate list entries for every usable cpu. */
632 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
633 for (i = 0; i < CPU_SETSIZE; i++) {
634 if (!CPU_ISSET(i, &allowed_cpus))
635 continue;
636 for (j = 1; j <= 100; j++) {
637 struct percpu_list_node *node;
638
639 expected_sum += j;
640
641 node = (struct percpu_list_node *) malloc(sizeof(*node));
642 assert(node);
643 node->data = j;
644 node->next = list.c[i].head;
645 list.c[i].head = node;
646 }
647 }
648
649 for (i = 0; i < num_threads; i++) {
650 ret = pthread_create(&test_threads[i], NULL,
651 test_percpu_list_thread, &list);
652 if (ret) {
653 errno = ret;
654 perror("pthread_create");
655 abort();
656 }
657 }
658
659 for (i = 0; i < num_threads; i++) {
660 ret = pthread_join(test_threads[i], NULL);
661 if (ret) {
662 errno = ret;
663 perror("pthread_join");
664 abort();
665 }
666 }
667
668 for (i = 0; i < CPU_SETSIZE; i++) {
669 struct percpu_list_node *node;
670
671 if (!CPU_ISSET(i, &allowed_cpus))
672 continue;
673
674 while ((node = __percpu_list_pop(&list, i))) {
675 sum += node->data;
676 free(node);
677 }
678 }
679
680 /*
681 * All entries should now be accounted for (unless some external
682 * actor is interfering with our allowed affinity while this
683 * test is running).
684 */
685 assert(sum == expected_sum);
686 }
687
688 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
689 struct percpu_buffer_node *node,
690 int *_cpu)
691 {
692 bool result = false;
693 int cpu;
694
695 for (;;) {
696 intptr_t *targetptr_spec, newval_spec;
697 intptr_t *targetptr_final, newval_final;
698 intptr_t offset;
699 int ret;
700
701 cpu = rseq_cpu_start();
702 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
703 if (offset == buffer->c[cpu].buflen)
704 break;
705 newval_spec = (intptr_t)node;
706 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
707 newval_final = offset + 1;
708 targetptr_final = &buffer->c[cpu].offset;
709 if (opt_mb)
710 ret = rseq_cmpeqv_trystorev_storev_release(
711 targetptr_final, offset, targetptr_spec,
712 newval_spec, newval_final, cpu);
713 else
714 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
715 offset, targetptr_spec, newval_spec,
716 newval_final, cpu);
717 if (rseq_likely(!ret)) {
718 result = true;
719 break;
720 }
721 /* Retry if comparison fails or rseq aborts. */
722 }
723 if (_cpu)
724 *_cpu = cpu;
725 return result;
726 }
727
728 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
729 int *_cpu)
730 {
731 struct percpu_buffer_node *head;
732 int cpu;
733
734 for (;;) {
735 intptr_t *targetptr, newval;
736 intptr_t offset;
737 int ret;
738
739 cpu = rseq_cpu_start();
740 /* Load offset with single-copy atomicity. */
741 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
742 if (offset == 0) {
743 head = NULL;
744 break;
745 }
746 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
747 newval = offset - 1;
748 targetptr = (intptr_t *)&buffer->c[cpu].offset;
749 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
750 (intptr_t *)&buffer->c[cpu].array[offset - 1],
751 (intptr_t)head, newval, cpu);
752 if (rseq_likely(!ret))
753 break;
754 /* Retry if comparison fails or rseq aborts. */
755 }
756 if (_cpu)
757 *_cpu = cpu;
758 return head;
759 }
760
761 /*
762 * __percpu_buffer_pop is not safe against concurrent accesses. Should
763 * only be used on buffers that are not concurrently modified.
764 */
765 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
766 int cpu)
767 {
768 struct percpu_buffer_node *head;
769 intptr_t offset;
770
771 offset = buffer->c[cpu].offset;
772 if (offset == 0)
773 return NULL;
774 head = buffer->c[cpu].array[offset - 1];
775 buffer->c[cpu].offset = offset - 1;
776 return head;
777 }
778
779 void *test_percpu_buffer_thread(void *arg)
780 {
781 long long i, reps;
782 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
783
784 if (!opt_disable_rseq && rseq_register_current_thread())
785 abort();
786
787 reps = opt_reps;
788 for (i = 0; i < reps; i++) {
789 struct percpu_buffer_node *node;
790
791 node = this_cpu_buffer_pop(buffer, NULL);
792 if (opt_yield)
793 sched_yield(); /* encourage shuffling */
794 if (node) {
795 if (!this_cpu_buffer_push(buffer, node, NULL)) {
796 /* Should increase buffer size. */
797 abort();
798 }
799 }
800 }
801
802 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
803 (int) rseq_gettid(), nr_abort, signals_delivered);
804 if (!opt_disable_rseq && rseq_unregister_current_thread())
805 abort();
806
807 return NULL;
808 }
809
810 /* Simultaneous modification to a per-cpu buffer from many threads. */
811 void test_percpu_buffer(void)
812 {
813 const int num_threads = opt_threads;
814 int i, j, ret;
815 uint64_t sum = 0, expected_sum = 0;
816 struct percpu_buffer buffer;
817 pthread_t test_threads[num_threads];
818 cpu_set_t allowed_cpus;
819
820 memset(&buffer, 0, sizeof(buffer));
821
822 /* Generate list entries for every usable cpu. */
823 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
824 for (i = 0; i < CPU_SETSIZE; i++) {
825 if (!CPU_ISSET(i, &allowed_cpus))
826 continue;
827 /* Worse-case is every item in same CPU. */
828 buffer.c[i].array =
829 (struct percpu_buffer_node **)
830 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
831 BUFFER_ITEM_PER_CPU);
832 assert(buffer.c[i].array);
833 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
834 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
835 struct percpu_buffer_node *node;
836
837 expected_sum += j;
838
839 /*
840 * We could theoretically put the word-sized
841 * "data" directly in the buffer. However, we
842 * want to model objects that would not fit
843 * within a single word, so allocate an object
844 * for each node.
845 */
846 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
847 assert(node);
848 node->data = j;
849 buffer.c[i].array[j - 1] = node;
850 buffer.c[i].offset++;
851 }
852 }
853
854 for (i = 0; i < num_threads; i++) {
855 ret = pthread_create(&test_threads[i], NULL,
856 test_percpu_buffer_thread, &buffer);
857 if (ret) {
858 errno = ret;
859 perror("pthread_create");
860 abort();
861 }
862 }
863
864 for (i = 0; i < num_threads; i++) {
865 ret = pthread_join(test_threads[i], NULL);
866 if (ret) {
867 errno = ret;
868 perror("pthread_join");
869 abort();
870 }
871 }
872
873 for (i = 0; i < CPU_SETSIZE; i++) {
874 struct percpu_buffer_node *node;
875
876 if (!CPU_ISSET(i, &allowed_cpus))
877 continue;
878
879 while ((node = __percpu_buffer_pop(&buffer, i))) {
880 sum += node->data;
881 free(node);
882 }
883 free(buffer.c[i].array);
884 }
885
886 /*
887 * All entries should now be accounted for (unless some external
888 * actor is interfering with our allowed affinity while this
889 * test is running).
890 */
891 assert(sum == expected_sum);
892 }
893
894 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
895 struct percpu_memcpy_buffer_node item,
896 int *_cpu)
897 {
898 bool result = false;
899 int cpu;
900
901 for (;;) {
902 intptr_t *targetptr_final, newval_final, offset;
903 char *destptr, *srcptr;
904 size_t copylen;
905 int ret;
906
907 cpu = rseq_cpu_start();
908 /* Load offset with single-copy atomicity. */
909 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
910 if (offset == buffer->c[cpu].buflen)
911 break;
912 destptr = (char *)&buffer->c[cpu].array[offset];
913 srcptr = (char *)&item;
914 /* copylen must be <= 4kB. */
915 copylen = sizeof(item);
916 newval_final = offset + 1;
917 targetptr_final = &buffer->c[cpu].offset;
918 if (opt_mb)
919 ret = rseq_cmpeqv_trymemcpy_storev_release(
920 targetptr_final, offset,
921 destptr, srcptr, copylen,
922 newval_final, cpu);
923 else
924 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
925 offset, destptr, srcptr, copylen,
926 newval_final, cpu);
927 if (rseq_likely(!ret)) {
928 result = true;
929 break;
930 }
931 /* Retry if comparison fails or rseq aborts. */
932 }
933 if (_cpu)
934 *_cpu = cpu;
935 return result;
936 }
937
938 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
939 struct percpu_memcpy_buffer_node *item,
940 int *_cpu)
941 {
942 bool result = false;
943 int cpu;
944
945 for (;;) {
946 intptr_t *targetptr_final, newval_final, offset;
947 char *destptr, *srcptr;
948 size_t copylen;
949 int ret;
950
951 cpu = rseq_cpu_start();
952 /* Load offset with single-copy atomicity. */
953 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
954 if (offset == 0)
955 break;
956 destptr = (char *)item;
957 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
958 /* copylen must be <= 4kB. */
959 copylen = sizeof(*item);
960 newval_final = offset - 1;
961 targetptr_final = &buffer->c[cpu].offset;
962 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
963 offset, destptr, srcptr, copylen,
964 newval_final, cpu);
965 if (rseq_likely(!ret)) {
966 result = true;
967 break;
968 }
969 /* Retry if comparison fails or rseq aborts. */
970 }
971 if (_cpu)
972 *_cpu = cpu;
973 return result;
974 }
975
976 /*
977 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
978 * only be used on buffers that are not concurrently modified.
979 */
980 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
981 struct percpu_memcpy_buffer_node *item,
982 int cpu)
983 {
984 intptr_t offset;
985
986 offset = buffer->c[cpu].offset;
987 if (offset == 0)
988 return false;
989 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
990 buffer->c[cpu].offset = offset - 1;
991 return true;
992 }
993
994 void *test_percpu_memcpy_buffer_thread(void *arg)
995 {
996 long long i, reps;
997 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
998
999 if (!opt_disable_rseq && rseq_register_current_thread())
1000 abort();
1001
1002 reps = opt_reps;
1003 for (i = 0; i < reps; i++) {
1004 struct percpu_memcpy_buffer_node item;
1005 bool result;
1006
1007 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1008 if (opt_yield)
1009 sched_yield(); /* encourage shuffling */
1010 if (result) {
1011 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1012 /* Should increase buffer size. */
1013 abort();
1014 }
1015 }
1016 }
1017
1018 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1019 (int) rseq_gettid(), nr_abort, signals_delivered);
1020 if (!opt_disable_rseq && rseq_unregister_current_thread())
1021 abort();
1022
1023 return NULL;
1024 }
1025
1026 /* Simultaneous modification to a per-cpu buffer from many threads. */
1027 void test_percpu_memcpy_buffer(void)
1028 {
1029 const int num_threads = opt_threads;
1030 int i, j, ret;
1031 uint64_t sum = 0, expected_sum = 0;
1032 struct percpu_memcpy_buffer buffer;
1033 pthread_t test_threads[num_threads];
1034 cpu_set_t allowed_cpus;
1035
1036 memset(&buffer, 0, sizeof(buffer));
1037
1038 /* Generate list entries for every usable cpu. */
1039 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1040 for (i = 0; i < CPU_SETSIZE; i++) {
1041 if (!CPU_ISSET(i, &allowed_cpus))
1042 continue;
1043 /* Worse-case is every item in same CPU. */
1044 buffer.c[i].array =
1045 (struct percpu_memcpy_buffer_node *)
1046 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1047 MEMCPY_BUFFER_ITEM_PER_CPU);
1048 assert(buffer.c[i].array);
1049 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1050 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1051 expected_sum += 2 * j + 1;
1052
1053 /*
1054 * We could theoretically put the word-sized
1055 * "data" directly in the buffer. However, we
1056 * want to model objects that would not fit
1057 * within a single word, so allocate an object
1058 * for each node.
1059 */
1060 buffer.c[i].array[j - 1].data1 = j;
1061 buffer.c[i].array[j - 1].data2 = j + 1;
1062 buffer.c[i].offset++;
1063 }
1064 }
1065
1066 for (i = 0; i < num_threads; i++) {
1067 ret = pthread_create(&test_threads[i], NULL,
1068 test_percpu_memcpy_buffer_thread,
1069 &buffer);
1070 if (ret) {
1071 errno = ret;
1072 perror("pthread_create");
1073 abort();
1074 }
1075 }
1076
1077 for (i = 0; i < num_threads; i++) {
1078 ret = pthread_join(test_threads[i], NULL);
1079 if (ret) {
1080 errno = ret;
1081 perror("pthread_join");
1082 abort();
1083 }
1084 }
1085
1086 for (i = 0; i < CPU_SETSIZE; i++) {
1087 struct percpu_memcpy_buffer_node item;
1088
1089 if (!CPU_ISSET(i, &allowed_cpus))
1090 continue;
1091
1092 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1093 sum += item.data1;
1094 sum += item.data2;
1095 }
1096 free(buffer.c[i].array);
1097 }
1098
1099 /*
1100 * All entries should now be accounted for (unless some external
1101 * actor is interfering with our allowed affinity while this
1102 * test is running).
1103 */
1104 assert(sum == expected_sum);
1105 }
1106
1107
1108 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
1109 {
1110 signals_delivered++;
1111 }
1112
1113 static int set_signal_handler(void)
1114 {
1115 int ret = 0;
1116 struct sigaction sa;
1117 sigset_t sigset;
1118
1119 ret = sigemptyset(&sigset);
1120 if (ret < 0) {
1121 perror("sigemptyset");
1122 return ret;
1123 }
1124
1125 sa.sa_handler = test_signal_interrupt_handler;
1126 sa.sa_mask = sigset;
1127 sa.sa_flags = 0;
1128 ret = sigaction(SIGUSR1, &sa, NULL);
1129 if (ret < 0) {
1130 perror("sigaction");
1131 return ret;
1132 }
1133
1134 printf_verbose("Signal handler set for SIGUSR1\n");
1135
1136 return ret;
1137 }
1138
1139 static void show_usage(char **argv)
1140 {
1141 printf("Usage : %s <OPTIONS>\n",
1142 argv[0]);
1143 printf("OPTIONS:\n");
1144 printf(" [-1 loops] Number of loops for delay injection 1\n");
1145 printf(" [-2 loops] Number of loops for delay injection 2\n");
1146 printf(" [-3 loops] Number of loops for delay injection 3\n");
1147 printf(" [-4 loops] Number of loops for delay injection 4\n");
1148 printf(" [-5 loops] Number of loops for delay injection 5\n");
1149 printf(" [-6 loops] Number of loops for delay injection 6\n");
1150 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1151 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1152 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1153 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1154 printf(" [-y] Yield\n");
1155 printf(" [-k] Kill thread with signal\n");
1156 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1157 printf(" [-t N] Number of threads (default 200)\n");
1158 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1159 printf(" [-d] Disable rseq system call (no initialization)\n");
1160 printf(" [-D M] Disable rseq for each M threads\n");
1161 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
1162 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1163 printf(" [-c] Check if the rseq syscall is available.\n");
1164 printf(" [-v] Verbose output.\n");
1165 printf(" [-h] Show this help.\n");
1166 printf("\n");
1167 }
1168
1169 int main(int argc, char **argv)
1170 {
1171 int i;
1172
1173 for (i = 1; i < argc; i++) {
1174 if (argv[i][0] != '-')
1175 continue;
1176 switch (argv[i][1]) {
1177 case '1':
1178 case '2':
1179 case '3':
1180 case '4':
1181 case '5':
1182 case '6':
1183 case '7':
1184 case '8':
1185 case '9':
1186 if (argc < i + 2) {
1187 show_usage(argv);
1188 goto error;
1189 }
1190 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1191 i++;
1192 break;
1193 case 'm':
1194 if (argc < i + 2) {
1195 show_usage(argv);
1196 goto error;
1197 }
1198 opt_modulo = atol(argv[i + 1]);
1199 if (opt_modulo < 0) {
1200 show_usage(argv);
1201 goto error;
1202 }
1203 i++;
1204 break;
1205 case 's':
1206 if (argc < i + 2) {
1207 show_usage(argv);
1208 goto error;
1209 }
1210 opt_sleep = atol(argv[i + 1]);
1211 if (opt_sleep < 0) {
1212 show_usage(argv);
1213 goto error;
1214 }
1215 i++;
1216 break;
1217 case 'y':
1218 opt_yield = 1;
1219 break;
1220 case 'k':
1221 opt_signal = 1;
1222 break;
1223 case 'd':
1224 opt_disable_rseq = 1;
1225 break;
1226 case 'D':
1227 if (argc < i + 2) {
1228 show_usage(argv);
1229 goto error;
1230 }
1231 opt_disable_mod = atol(argv[i + 1]);
1232 if (opt_disable_mod < 0) {
1233 show_usage(argv);
1234 goto error;
1235 }
1236 i++;
1237 break;
1238 case 't':
1239 if (argc < i + 2) {
1240 show_usage(argv);
1241 goto error;
1242 }
1243 opt_threads = atol(argv[i + 1]);
1244 if (opt_threads < 0) {
1245 show_usage(argv);
1246 goto error;
1247 }
1248 i++;
1249 break;
1250 case 'r':
1251 if (argc < i + 2) {
1252 show_usage(argv);
1253 goto error;
1254 }
1255 opt_reps = atoll(argv[i + 1]);
1256 if (opt_reps < 0) {
1257 show_usage(argv);
1258 goto error;
1259 }
1260 i++;
1261 break;
1262 case 'h':
1263 show_usage(argv);
1264 goto end;
1265 case 'T':
1266 if (argc < i + 2) {
1267 show_usage(argv);
1268 goto error;
1269 }
1270 opt_test = *argv[i + 1];
1271 switch (opt_test) {
1272 case 's':
1273 case 'l':
1274 case 'i':
1275 case 'b':
1276 case 'm':
1277 break;
1278 default:
1279 show_usage(argv);
1280 goto error;
1281 }
1282 i++;
1283 break;
1284 case 'v':
1285 verbose = 1;
1286 break;
1287 case 'M':
1288 opt_mb = 1;
1289 break;
1290 case 'c':
1291 if (rseq_available()) {
1292 printf_verbose("The rseq syscall is available.\n");
1293 goto end;
1294 } else {
1295 printf_verbose("The rseq syscall is unavailable.\n");
1296 goto no_rseq;
1297 }
1298 default:
1299 show_usage(argv);
1300 goto error;
1301 }
1302 }
1303
1304 loop_cnt_1 = loop_cnt[1];
1305 loop_cnt_2 = loop_cnt[2];
1306 loop_cnt_3 = loop_cnt[3];
1307 loop_cnt_4 = loop_cnt[4];
1308 loop_cnt_5 = loop_cnt[5];
1309 loop_cnt_6 = loop_cnt[6];
1310
1311 if (set_signal_handler())
1312 goto error;
1313
1314 if (!opt_disable_rseq && rseq_register_current_thread())
1315 goto error;
1316 switch (opt_test) {
1317 case 's':
1318 printf_verbose("spinlock\n");
1319 test_percpu_spinlock();
1320 break;
1321 case 'l':
1322 printf_verbose("linked list\n");
1323 test_percpu_list();
1324 break;
1325 case 'b':
1326 printf_verbose("buffer\n");
1327 test_percpu_buffer();
1328 break;
1329 case 'm':
1330 printf_verbose("memcpy buffer\n");
1331 test_percpu_memcpy_buffer();
1332 break;
1333 case 'i':
1334 printf_verbose("counter increment\n");
1335 test_percpu_inc();
1336 break;
1337 }
1338 if (!opt_disable_rseq && rseq_unregister_current_thread())
1339 abort();
1340 end:
1341 return 0;
1342
1343 error:
1344 return -1;
1345
1346 no_rseq:
1347 return 2;
1348 }
This page took 0.082217 seconds and 4 git commands to generate.