Restartable sequences: self-tests
[deliverable/linux.git] / tools / testing / selftests / rseq / param_test.c
1 #define _GNU_SOURCE
2 #include <assert.h>
3 #include <pthread.h>
4 #include <sched.h>
5 #include <stdint.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <syscall.h>
10 #include <unistd.h>
11 #include <poll.h>
12 #include <sys/types.h>
13 #include <signal.h>
14 #include <errno.h>
15
16 static inline pid_t gettid(void)
17 {
18 return syscall(__NR_gettid);
19 }
20
21 #define NR_INJECT 9
22 static int loop_cnt[NR_INJECT + 1];
23
24 static int opt_modulo;
25
26 static int opt_yield, opt_signal, opt_sleep, opt_fallback_cnt = 3,
27 opt_disable_rseq, opt_threads = 200,
28 opt_reps = 5000, opt_disable_mod = 0, opt_test = 's';
29
30 static __thread unsigned int signals_delivered;
31
32 static struct rseq_lock rseq_lock;
33
34 #ifndef BENCHMARK
35
36 static __thread unsigned int yield_mod_cnt, nr_retry;
37
38 #define printf_nobench(fmt, ...) printf(fmt, ## __VA_ARGS__)
39
40 #define RSEQ_INJECT_INPUT \
41 , [loop_cnt_1]"m"(loop_cnt[1]) \
42 , [loop_cnt_2]"m"(loop_cnt[2]) \
43 , [loop_cnt_3]"m"(loop_cnt[3]) \
44 , [loop_cnt_4]"m"(loop_cnt[4])
45
46 #if defined(__x86_64__) || defined(__i386__)
47
48 #define INJECT_ASM_REG "eax"
49
50 #define RSEQ_INJECT_CLOBBER \
51 , INJECT_ASM_REG
52
53 #define RSEQ_INJECT_ASM(n) \
54 "mov %[loop_cnt_" #n "], %%" INJECT_ASM_REG "\n\t" \
55 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
56 "jz 333f\n\t" \
57 "222:\n\t" \
58 "dec %%" INJECT_ASM_REG "\n\t" \
59 "jnz 222b\n\t" \
60 "333:\n\t"
61
62 #elif defined(__ARMEL__)
63
64 #define INJECT_ASM_REG "r4"
65
66 #define RSEQ_INJECT_CLOBBER \
67 , INJECT_ASM_REG
68
69 #define RSEQ_INJECT_ASM(n) \
70 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
71 "cmp " INJECT_ASM_REG ", #0\n\t" \
72 "beq 333f\n\t" \
73 "222:\n\t" \
74 "subs " INJECT_ASM_REG ", #1\n\t" \
75 "bne 222b\n\t" \
76 "333:\n\t"
77
78 #else
79 #error unsupported target
80 #endif
81
82 #define RSEQ_INJECT_FAILED \
83 nr_retry++;
84
85 #define RSEQ_INJECT_C(n) \
86 { \
87 int loc_i, loc_nr_loops = loop_cnt[n]; \
88 \
89 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
90 barrier(); \
91 } \
92 if (loc_nr_loops == -1 && opt_modulo) { \
93 if (yield_mod_cnt == opt_modulo - 1) { \
94 if (opt_sleep > 0) \
95 poll(NULL, 0, opt_sleep); \
96 if (opt_yield) \
97 sched_yield(); \
98 if (opt_signal) \
99 raise(SIGUSR1); \
100 yield_mod_cnt = 0; \
101 } else { \
102 yield_mod_cnt++; \
103 } \
104 } \
105 }
106
107 #define RSEQ_FALLBACK_CNT \
108 opt_fallback_cnt
109
110 #else
111
112 #define printf_nobench(fmt, ...)
113
114 #endif /* BENCHMARK */
115
116 #include "rseq.h"
117
118 struct percpu_lock_entry {
119 intptr_t v;
120 } __attribute__((aligned(128)));
121
122 struct percpu_lock {
123 struct percpu_lock_entry c[CPU_SETSIZE];
124 };
125
126 struct test_data_entry {
127 int count;
128 } __attribute__((aligned(128)));
129
130 struct spinlock_test_data {
131 struct percpu_lock lock;
132 struct test_data_entry c[CPU_SETSIZE];
133 };
134
135 struct spinlock_thread_test_data {
136 struct spinlock_test_data *data;
137 int reps;
138 int reg;
139 };
140
141 struct inc_test_data {
142 struct test_data_entry c[CPU_SETSIZE];
143 };
144
145 struct inc_thread_test_data {
146 struct inc_test_data *data;
147 int reps;
148 int reg;
149 };
150
151 struct percpu_list_node {
152 intptr_t data;
153 struct percpu_list_node *next;
154 };
155
156 struct percpu_list_entry {
157 struct percpu_list_node *head;
158 } __attribute__((aligned(128)));
159
160 struct percpu_list {
161 struct percpu_list_entry c[CPU_SETSIZE];
162 };
163
164 /* A simple percpu spinlock. Returns the cpu lock was acquired on. */
165 static int rseq_percpu_lock(struct percpu_lock *lock)
166 {
167 struct rseq_state rseq_state;
168 intptr_t *targetptr, newval;
169 int cpu;
170 bool result;
171
172 for (;;) {
173 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
174 {
175 if (unlikely(lock->c[cpu].v)) {
176 result = false;
177 } else {
178 newval = 1;
179 targetptr = (intptr_t *)&lock->c[cpu].v;
180 }
181 });
182 if (likely(result))
183 break;
184 }
185 /*
186 * Acquire semantic when taking lock after control dependency.
187 * Matches smp_store_release().
188 */
189 smp_acquire__after_ctrl_dep();
190 return cpu;
191 }
192
193 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
194 {
195 assert(lock->c[cpu].v == 1);
196 /*
197 * Release lock, with release semantic. Matches
198 * smp_acquire__after_ctrl_dep().
199 */
200 smp_store_release(&lock->c[cpu].v, 0);
201 }
202
203 void *test_percpu_spinlock_thread(void *arg)
204 {
205 struct spinlock_thread_test_data *thread_data = arg;
206 struct spinlock_test_data *data = thread_data->data;
207 int i, cpu;
208
209 if (!opt_disable_rseq && thread_data->reg
210 && rseq_init_current_thread())
211 abort();
212 for (i = 0; i < thread_data->reps; i++) {
213 cpu = rseq_percpu_lock(&data->lock);
214 data->c[cpu].count++;
215 rseq_percpu_unlock(&data->lock, cpu);
216 #ifndef BENCHMARK
217 if (i != 0 && !(i % (thread_data->reps / 10)))
218 printf("tid %d: count %d\n", (int) gettid(), i);
219 #endif
220 }
221 printf_nobench("tid %d: number of retry: %d, signals delivered: %u, nr_fallback %u, nr_fallback_wait %u\n",
222 (int) gettid(), nr_retry, signals_delivered,
223 __rseq_thread_state.fallback_cnt,
224 __rseq_thread_state.fallback_wait_cnt);
225 return NULL;
226 }
227
228 /*
229 * A simple test which implements a sharded counter using a per-cpu
230 * lock. Obviously real applications might prefer to simply use a
231 * per-cpu increment; however, this is reasonable for a test and the
232 * lock can be extended to synchronize more complicated operations.
233 */
234 void test_percpu_spinlock(void)
235 {
236 const int num_threads = opt_threads;
237 int i, sum, ret;
238 pthread_t test_threads[num_threads];
239 struct spinlock_test_data data;
240 struct spinlock_thread_test_data thread_data[num_threads];
241
242 memset(&data, 0, sizeof(data));
243 for (i = 0; i < num_threads; i++) {
244 thread_data[i].reps = opt_reps;
245 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
246 thread_data[i].reg = 1;
247 else
248 thread_data[i].reg = 0;
249 thread_data[i].data = &data;
250 ret = pthread_create(&test_threads[i], NULL,
251 test_percpu_spinlock_thread, &thread_data[i]);
252 if (ret) {
253 errno = ret;
254 perror("pthread_create");
255 abort();
256 }
257 }
258
259 for (i = 0; i < num_threads; i++) {
260 pthread_join(test_threads[i], NULL);
261 if (ret) {
262 errno = ret;
263 perror("pthread_join");
264 abort();
265 }
266 }
267
268 sum = 0;
269 for (i = 0; i < CPU_SETSIZE; i++)
270 sum += data.c[i].count;
271
272 assert(sum == opt_reps * num_threads);
273 }
274
275 void *test_percpu_inc_thread(void *arg)
276 {
277 struct inc_thread_test_data *thread_data = arg;
278 struct inc_test_data *data = thread_data->data;
279 int i;
280
281 if (!opt_disable_rseq && thread_data->reg
282 && rseq_init_current_thread())
283 abort();
284 for (i = 0; i < thread_data->reps; i++) {
285 struct rseq_state rseq_state;
286 intptr_t *targetptr, newval;
287 int cpu;
288 bool result;
289
290 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
291 {
292 newval = (intptr_t)data->c[cpu].count + 1;
293 targetptr = (intptr_t *)&data->c[cpu].count;
294 });
295
296 #ifndef BENCHMARK
297 if (i != 0 && !(i % (thread_data->reps / 10)))
298 printf("tid %d: count %d\n", (int) gettid(), i);
299 #endif
300 }
301 printf_nobench("tid %d: number of retry: %d, signals delivered: %u, nr_fallback %u, nr_fallback_wait %u\n",
302 (int) gettid(), nr_retry, signals_delivered,
303 __rseq_thread_state.fallback_cnt,
304 __rseq_thread_state.fallback_wait_cnt);
305 return NULL;
306 }
307
308 void test_percpu_inc(void)
309 {
310 const int num_threads = opt_threads;
311 int i, sum, ret;
312 pthread_t test_threads[num_threads];
313 struct inc_test_data data;
314 struct inc_thread_test_data thread_data[num_threads];
315
316 memset(&data, 0, sizeof(data));
317 for (i = 0; i < num_threads; i++) {
318 thread_data[i].reps = opt_reps;
319 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
320 thread_data[i].reg = 1;
321 else
322 thread_data[i].reg = 0;
323 thread_data[i].data = &data;
324 ret = pthread_create(&test_threads[i], NULL,
325 test_percpu_inc_thread, &thread_data[i]);
326 if (ret) {
327 errno = ret;
328 perror("pthread_create");
329 abort();
330 }
331 }
332
333 for (i = 0; i < num_threads; i++) {
334 pthread_join(test_threads[i], NULL);
335 if (ret) {
336 errno = ret;
337 perror("pthread_join");
338 abort();
339 }
340 }
341
342 sum = 0;
343 for (i = 0; i < CPU_SETSIZE; i++)
344 sum += data.c[i].count;
345
346 assert(sum == opt_reps * num_threads);
347 }
348
349 int percpu_list_push(struct percpu_list *list, struct percpu_list_node *node)
350 {
351 struct rseq_state rseq_state;
352 intptr_t *targetptr, newval;
353 int cpu;
354 bool result;
355
356 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
357 {
358 newval = (intptr_t)node;
359 targetptr = (intptr_t *)&list->c[cpu].head;
360 node->next = list->c[cpu].head;
361 });
362
363 return cpu;
364 }
365
366 /*
367 * Unlike a traditional lock-less linked list; the availability of a
368 * rseq primitive allows us to implement pop without concerns over
369 * ABA-type races.
370 */
371 struct percpu_list_node *percpu_list_pop(struct percpu_list *list)
372 {
373 struct percpu_list_node *head, *next;
374 struct rseq_state rseq_state;
375 intptr_t *targetptr, newval;
376 int cpu;
377 bool result;
378
379 do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
380 {
381 head = list->c[cpu].head;
382 if (!head) {
383 result = false;
384 } else {
385 next = head->next;
386 newval = (intptr_t) next;
387 targetptr = (intptr_t *) &list->c[cpu].head;
388 }
389 });
390
391 return head;
392 }
393
394 void *test_percpu_list_thread(void *arg)
395 {
396 int i;
397 struct percpu_list *list = (struct percpu_list *)arg;
398
399 if (rseq_init_current_thread())
400 abort();
401
402 for (i = 0; i < opt_reps; i++) {
403 struct percpu_list_node *node = percpu_list_pop(list);
404
405 if (opt_yield)
406 sched_yield(); /* encourage shuffling */
407 if (node)
408 percpu_list_push(list, node);
409 }
410
411 return NULL;
412 }
413
414 /* Simultaneous modification to a per-cpu linked list from many threads. */
415 void test_percpu_list(void)
416 {
417 const int num_threads = opt_threads;
418 int i, j, ret;
419 long sum = 0, expected_sum = 0;
420 struct percpu_list list;
421 pthread_t test_threads[num_threads];
422 cpu_set_t allowed_cpus;
423
424 memset(&list, 0, sizeof(list));
425
426 /* Generate list entries for every usable cpu. */
427 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
428 for (i = 0; i < CPU_SETSIZE; i++) {
429 if (!CPU_ISSET(i, &allowed_cpus))
430 continue;
431 for (j = 1; j <= 100; j++) {
432 struct percpu_list_node *node;
433
434 expected_sum += j;
435
436 node = malloc(sizeof(*node));
437 assert(node);
438 node->data = j;
439 node->next = list.c[i].head;
440 list.c[i].head = node;
441 }
442 }
443
444 for (i = 0; i < num_threads; i++) {
445 ret = pthread_create(&test_threads[i], NULL,
446 test_percpu_list_thread, &list);
447 if (ret) {
448 errno = ret;
449 perror("pthread_create");
450 abort();
451 }
452 }
453
454 for (i = 0; i < num_threads; i++) {
455 pthread_join(test_threads[i], NULL);
456 if (ret) {
457 errno = ret;
458 perror("pthread_join");
459 abort();
460 }
461 }
462
463 for (i = 0; i < CPU_SETSIZE; i++) {
464 cpu_set_t pin_mask;
465 struct percpu_list_node *node;
466
467 if (!CPU_ISSET(i, &allowed_cpus))
468 continue;
469
470 CPU_ZERO(&pin_mask);
471 CPU_SET(i, &pin_mask);
472 sched_setaffinity(0, sizeof(pin_mask), &pin_mask);
473
474 while ((node = percpu_list_pop(&list))) {
475 sum += node->data;
476 free(node);
477 }
478 }
479
480 /*
481 * All entries should now be accounted for (unless some external
482 * actor is interfering with our allowed affinity while this
483 * test is running).
484 */
485 assert(sum == expected_sum);
486 }
487
488 static void test_signal_interrupt_handler(int signo)
489 {
490 signals_delivered++;
491 }
492
493 static int set_signal_handler(void)
494 {
495 int ret = 0;
496 struct sigaction sa;
497 sigset_t sigset;
498
499 ret = sigemptyset(&sigset);
500 if (ret < 0) {
501 perror("sigemptyset");
502 return ret;
503 }
504
505 sa.sa_handler = test_signal_interrupt_handler;
506 sa.sa_mask = sigset;
507 sa.sa_flags = 0;
508 ret = sigaction(SIGUSR1, &sa, NULL);
509 if (ret < 0) {
510 perror("sigaction");
511 return ret;
512 }
513
514 printf_nobench("Signal handler set for SIGUSR1\n");
515
516 return ret;
517 }
518
519 static void show_usage(int argc, char **argv)
520 {
521 printf("Usage : %s <OPTIONS>\n",
522 argv[0]);
523 printf("OPTIONS:\n");
524 printf(" [-1 loops] Number of loops for delay injection 1\n");
525 printf(" [-2 loops] Number of loops for delay injection 2\n");
526 printf(" [-3 loops] Number of loops for delay injection 3\n");
527 printf(" [-4 loops] Number of loops for delay injection 4\n");
528 printf(" [-5 loops] Number of loops for delay injection 5 (-1 to enable -m)\n");
529 printf(" [-6 loops] Number of loops for delay injection 6 (-1 to enable -m)\n");
530 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
531 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
532 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
533 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
534 printf(" [-y] Yield\n");
535 printf(" [-k] Kill thread with signal\n");
536 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
537 printf(" [-f N] Use fallback every N failure (>= 1)\n");
538 printf(" [-t N] Number of threads (default 200)\n");
539 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
540 printf(" [-d] Disable rseq system call (no initialization)\n");
541 printf(" [-D M] Disable rseq for each M threads\n");
542 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (i)ncrement\n");
543 printf(" [-h] Show this help.\n");
544 printf("\n");
545 }
546
547 int main(int argc, char **argv)
548 {
549 int i;
550
551 if (rseq_init_lock(&rseq_lock)) {
552 perror("rseq_init_lock");
553 return -1;
554 }
555 if (set_signal_handler())
556 goto error;
557 for (i = 1; i < argc; i++) {
558 if (argv[i][0] != '-')
559 continue;
560 switch (argv[i][1]) {
561 case '1':
562 case '2':
563 case '3':
564 case '4':
565 case '5':
566 case '6':
567 case '7':
568 case '8':
569 case '9':
570 if (argc < i + 2) {
571 show_usage(argc, argv);
572 goto error;
573 }
574 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
575 i++;
576 break;
577 case 'm':
578 if (argc < i + 2) {
579 show_usage(argc, argv);
580 goto error;
581 }
582 opt_modulo = atol(argv[i + 1]);
583 if (opt_modulo < 0) {
584 show_usage(argc, argv);
585 goto error;
586 }
587 i++;
588 break;
589 case 's':
590 if (argc < i + 2) {
591 show_usage(argc, argv);
592 goto error;
593 }
594 opt_sleep = atol(argv[i + 1]);
595 if (opt_sleep < 0) {
596 show_usage(argc, argv);
597 goto error;
598 }
599 i++;
600 break;
601 case 'y':
602 opt_yield = 1;
603 break;
604 case 'k':
605 opt_signal = 1;
606 break;
607 case 'd':
608 opt_disable_rseq = 1;
609 break;
610 case 'D':
611 if (argc < i + 2) {
612 show_usage(argc, argv);
613 goto error;
614 }
615 opt_disable_mod = atol(argv[i + 1]);
616 if (opt_disable_mod < 0) {
617 show_usage(argc, argv);
618 goto error;
619 }
620 i++;
621 break;
622 case 'f':
623 if (argc < i + 2) {
624 show_usage(argc, argv);
625 goto error;
626 }
627 opt_fallback_cnt = atol(argv[i + 1]);
628 if (opt_fallback_cnt < 1) {
629 show_usage(argc, argv);
630 goto error;
631 }
632 i++;
633 break;
634 case 't':
635 if (argc < i + 2) {
636 show_usage(argc, argv);
637 goto error;
638 }
639 opt_threads = atol(argv[i + 1]);
640 if (opt_threads < 0) {
641 show_usage(argc, argv);
642 goto error;
643 }
644 i++;
645 break;
646 case 'r':
647 if (argc < i + 2) {
648 show_usage(argc, argv);
649 goto error;
650 }
651 opt_reps = atol(argv[i + 1]);
652 if (opt_reps < 0) {
653 show_usage(argc, argv);
654 goto error;
655 }
656 i++;
657 break;
658 case 'h':
659 show_usage(argc, argv);
660 goto end;
661 case 'T':
662 if (argc < i + 2) {
663 show_usage(argc, argv);
664 goto error;
665 }
666 opt_test = *argv[i + 1];
667 switch (opt_test) {
668 case 's':
669 case 'l':
670 case 'i':
671 break;
672 default:
673 show_usage(argc, argv);
674 goto error;
675 }
676 i++;
677 break;
678 default:
679 show_usage(argc, argv);
680 goto error;
681 }
682 }
683
684 if (!opt_disable_rseq && rseq_init_current_thread())
685 goto error;
686 switch (opt_test) {
687 case 's':
688 printf_nobench("spinlock\n");
689 test_percpu_spinlock();
690 break;
691 case 'l':
692 printf_nobench("linked list\n");
693 test_percpu_list();
694 break;
695 case 'i':
696 printf_nobench("counter increment\n");
697 test_percpu_inc();
698 break;
699 }
700 end:
701 return 0;
702
703 error:
704 if (rseq_destroy_lock(&rseq_lock))
705 perror("rseq_destroy_lock");
706 return -1;
707 }
This page took 0.047407 seconds and 5 git commands to generate.