Adapt spinlock param test to percpu alloc
[librseq.git] / tests / param_test.c
1 // SPDX-License-Identifier: MIT
2 // SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6 #include <assert.h>
7 #include <linux/version.h>
8 #include <linux/membarrier.h>
9 #include <pthread.h>
10 #include <sched.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <syscall.h>
16 #include <unistd.h>
17 #include <poll.h>
18 #include <sys/types.h>
19 #include <signal.h>
20 #include <errno.h>
21 #include <stddef.h>
22 #include <stdbool.h>
23 #include <rseq/percpu-alloc.h>
24
25 #define PERCPU_POOL_LEN (1024*1024) /* 1MB */
26
27 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
28 enum {
29 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
30 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
31 };
32
33 enum {
34 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
35 };
36 #endif
37
38 #define NR_INJECT 9
39 static int loop_cnt[NR_INJECT + 1];
40
41 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
42 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
43 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
44 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
45 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
46 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
47
48 static int opt_modulo, verbose;
49
50 static int opt_yield, opt_signal, opt_sleep,
51 opt_disable_rseq, opt_threads = 200,
52 opt_disable_mod = 0, opt_test = 's';
53
54 static long long opt_reps = 5000;
55
56 static __thread __attribute__((tls_model("initial-exec")))
57 unsigned int signals_delivered;
58
59 static inline pid_t rseq_gettid(void)
60 {
61 return syscall(__NR_gettid);
62 }
63
64 #ifndef BENCHMARK
65
66 static __thread __attribute__((tls_model("initial-exec"), unused))
67 int yield_mod_cnt, nr_abort;
68
69 #define printf_verbose(fmt, ...) \
70 do { \
71 if (verbose) \
72 printf(fmt, ## __VA_ARGS__); \
73 } while (0)
74
75 #ifdef __i386__
76
77 #define INJECT_ASM_REG "eax"
78
79 #define RSEQ_INJECT_CLOBBER \
80 , INJECT_ASM_REG
81
82 #define RSEQ_INJECT_ASM(n) \
83 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
84 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
85 "jz 333f\n\t" \
86 "222:\n\t" \
87 "dec %%" INJECT_ASM_REG "\n\t" \
88 "jnz 222b\n\t" \
89 "333:\n\t"
90
91 #elif defined(__x86_64__)
92
93 #define INJECT_ASM_REG_P "rax"
94 #define INJECT_ASM_REG "eax"
95
96 #define RSEQ_INJECT_CLOBBER \
97 , INJECT_ASM_REG_P \
98 , INJECT_ASM_REG
99
100 #define RSEQ_INJECT_ASM(n) \
101 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
102 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
103 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
104 "jz 333f\n\t" \
105 "222:\n\t" \
106 "dec %%" INJECT_ASM_REG "\n\t" \
107 "jnz 222b\n\t" \
108 "333:\n\t"
109
110 #elif defined(__s390__)
111
112 #define RSEQ_INJECT_INPUT \
113 , [loop_cnt_1]"m"(loop_cnt[1]) \
114 , [loop_cnt_2]"m"(loop_cnt[2]) \
115 , [loop_cnt_3]"m"(loop_cnt[3]) \
116 , [loop_cnt_4]"m"(loop_cnt[4]) \
117 , [loop_cnt_5]"m"(loop_cnt[5]) \
118 , [loop_cnt_6]"m"(loop_cnt[6])
119
120 #define INJECT_ASM_REG "r12"
121
122 #define RSEQ_INJECT_CLOBBER \
123 , INJECT_ASM_REG
124
125 #define RSEQ_INJECT_ASM(n) \
126 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
127 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
128 "je 333f\n\t" \
129 "222:\n\t" \
130 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
131 "jnz 222b\n\t" \
132 "333:\n\t"
133
134 #elif defined(__ARMEL__)
135
136 #define RSEQ_INJECT_INPUT \
137 , [loop_cnt_1]"m"(loop_cnt[1]) \
138 , [loop_cnt_2]"m"(loop_cnt[2]) \
139 , [loop_cnt_3]"m"(loop_cnt[3]) \
140 , [loop_cnt_4]"m"(loop_cnt[4]) \
141 , [loop_cnt_5]"m"(loop_cnt[5]) \
142 , [loop_cnt_6]"m"(loop_cnt[6])
143
144 #define INJECT_ASM_REG "r4"
145
146 #define RSEQ_INJECT_CLOBBER \
147 , INJECT_ASM_REG
148
149 #define RSEQ_INJECT_ASM(n) \
150 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
151 "cmp " INJECT_ASM_REG ", #0\n\t" \
152 "beq 333f\n\t" \
153 "222:\n\t" \
154 "subs " INJECT_ASM_REG ", #1\n\t" \
155 "bne 222b\n\t" \
156 "333:\n\t"
157
158 #elif defined(__AARCH64EL__)
159
160 #define RSEQ_INJECT_INPUT \
161 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
162 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
163 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
164 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
165 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
166 , [loop_cnt_6] "Qo" (loop_cnt[6])
167
168 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
169
170 #define RSEQ_INJECT_ASM(n) \
171 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
172 " cbz " INJECT_ASM_REG ", 333f\n" \
173 "222:\n" \
174 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
175 " cbnz " INJECT_ASM_REG ", 222b\n" \
176 "333:\n"
177
178 #elif defined(__PPC__)
179
180 #define RSEQ_INJECT_INPUT \
181 , [loop_cnt_1]"m"(loop_cnt[1]) \
182 , [loop_cnt_2]"m"(loop_cnt[2]) \
183 , [loop_cnt_3]"m"(loop_cnt[3]) \
184 , [loop_cnt_4]"m"(loop_cnt[4]) \
185 , [loop_cnt_5]"m"(loop_cnt[5]) \
186 , [loop_cnt_6]"m"(loop_cnt[6])
187
188 #define INJECT_ASM_REG "r18"
189
190 #define RSEQ_INJECT_CLOBBER \
191 , INJECT_ASM_REG
192
193 #define RSEQ_INJECT_ASM(n) \
194 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
195 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
196 "beq 333f\n\t" \
197 "222:\n\t" \
198 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
199 "bne 222b\n\t" \
200 "333:\n\t"
201
202 #elif defined(__mips__)
203
204 #define RSEQ_INJECT_INPUT \
205 , [loop_cnt_1]"m"(loop_cnt[1]) \
206 , [loop_cnt_2]"m"(loop_cnt[2]) \
207 , [loop_cnt_3]"m"(loop_cnt[3]) \
208 , [loop_cnt_4]"m"(loop_cnt[4]) \
209 , [loop_cnt_5]"m"(loop_cnt[5]) \
210 , [loop_cnt_6]"m"(loop_cnt[6])
211
212 #define INJECT_ASM_REG "$5"
213
214 #define RSEQ_INJECT_CLOBBER \
215 , INJECT_ASM_REG
216
217 #define RSEQ_INJECT_ASM(n) \
218 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
219 "beqz " INJECT_ASM_REG ", 333f\n\t" \
220 "222:\n\t" \
221 "addiu " INJECT_ASM_REG ", -1\n\t" \
222 "bnez " INJECT_ASM_REG ", 222b\n\t" \
223 "333:\n\t"
224
225 #elif defined(__riscv)
226
227 #define RSEQ_INJECT_INPUT \
228 , [loop_cnt_1]"m"(loop_cnt[1]) \
229 , [loop_cnt_2]"m"(loop_cnt[2]) \
230 , [loop_cnt_3]"m"(loop_cnt[3]) \
231 , [loop_cnt_4]"m"(loop_cnt[4]) \
232 , [loop_cnt_5]"m"(loop_cnt[5]) \
233 , [loop_cnt_6]"m"(loop_cnt[6])
234
235 #define INJECT_ASM_REG "t1"
236
237 #define RSEQ_INJECT_CLOBBER \
238 , INJECT_ASM_REG
239
240 #define RSEQ_INJECT_ASM(n) \
241 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
242 "beqz " INJECT_ASM_REG ", 333f\n\t" \
243 "222:\n\t" \
244 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
245 "bnez " INJECT_ASM_REG ", 222b\n\t" \
246 "333:\n\t"
247
248 #else
249 #error unsupported target
250 #endif
251
252 #define RSEQ_INJECT_FAILED \
253 nr_abort++;
254
255 #define RSEQ_INJECT_C(n) \
256 { \
257 int loc_i, loc_nr_loops = loop_cnt[n]; \
258 \
259 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
260 rseq_barrier(); \
261 } \
262 if (loc_nr_loops == -1 && opt_modulo) { \
263 if (yield_mod_cnt == opt_modulo - 1) { \
264 if (opt_sleep > 0) \
265 poll(NULL, 0, opt_sleep); \
266 if (opt_yield) \
267 sched_yield(); \
268 if (opt_signal) \
269 raise(SIGUSR1); \
270 yield_mod_cnt = 0; \
271 } else { \
272 yield_mod_cnt++; \
273 } \
274 } \
275 }
276
277 #else
278
279 #define printf_verbose(fmt, ...)
280
281 #endif /* BENCHMARK */
282
283 #include <rseq/rseq.h>
284
285 static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
286
287 static int sys_membarrier(int cmd, int flags, int cpu_id)
288 {
289 return syscall(__NR_membarrier, cmd, flags, cpu_id);
290 }
291
292 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
293 #define TEST_MEMBARRIER
294 #endif
295
296 #ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
297 # define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
298 static
299 int get_current_cpu_id(void)
300 {
301 return rseq_current_mm_cid();
302 }
303 static
304 bool rseq_validate_cpu_id(void)
305 {
306 return rseq_mm_cid_available();
307 }
308 static
309 bool rseq_use_cpu_index(void)
310 {
311 return false; /* Use mm_cid */
312 }
313 # ifdef TEST_MEMBARRIER
314 /*
315 * Membarrier does not currently support targeting a mm_cid, so
316 * issue the barrier on all cpus.
317 */
318 static
319 int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
320 {
321 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
322 0, 0);
323 }
324 # endif /* TEST_MEMBARRIER */
325 #else
326 # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
327 static
328 int get_current_cpu_id(void)
329 {
330 return rseq_cpu_start();
331 }
332 static
333 bool rseq_validate_cpu_id(void)
334 {
335 return rseq_current_cpu_raw() >= 0;
336 }
337 static
338 bool rseq_use_cpu_index(void)
339 {
340 return true; /* Use cpu_id as index. */
341 }
342 # ifdef TEST_MEMBARRIER
343 static
344 int rseq_membarrier_expedited(int cpu)
345 {
346 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
347 MEMBARRIER_CMD_FLAG_CPU, cpu);
348 }
349 # endif /* TEST_MEMBARRIER */
350 #endif
351
352 struct percpu_lock {
353 intptr_t v;
354 };
355
356 struct test_data_entry {
357 intptr_t count;
358 };
359
360 struct spinlock_test_data {
361 struct percpu_lock lock;
362 intptr_t count;
363 };
364
365 struct spinlock_thread_test_data {
366 struct spinlock_test_data *data; /* Per-cpu pointer */
367 long long reps;
368 int reg;
369 };
370
371 struct inc_test_data {
372 struct test_data_entry c[CPU_SETSIZE];
373 };
374
375 struct inc_thread_test_data {
376 struct inc_test_data *data;
377 long long reps;
378 int reg;
379 };
380
381 struct percpu_list_node {
382 intptr_t data;
383 struct percpu_list_node *next;
384 };
385
386 struct percpu_list_entry {
387 struct percpu_list_node *head;
388 } __attribute__((aligned(128)));
389
390 struct percpu_list {
391 struct percpu_list_entry c[CPU_SETSIZE];
392 };
393
394 #define BUFFER_ITEM_PER_CPU 100
395
396 struct percpu_buffer_node {
397 intptr_t data;
398 };
399
400 struct percpu_buffer_entry {
401 intptr_t offset;
402 intptr_t buflen;
403 struct percpu_buffer_node **array;
404 } __attribute__((aligned(128)));
405
406 struct percpu_buffer {
407 struct percpu_buffer_entry c[CPU_SETSIZE];
408 };
409
410 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
411
412 struct percpu_memcpy_buffer_node {
413 intptr_t data1;
414 uint64_t data2;
415 };
416
417 struct percpu_memcpy_buffer_entry {
418 intptr_t offset;
419 intptr_t buflen;
420 struct percpu_memcpy_buffer_node *array;
421 } __attribute__((aligned(128)));
422
423 struct percpu_memcpy_buffer {
424 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
425 };
426
427 /* A simple percpu spinlock. Grabs lock on current cpu. */
428 static int rseq_this_cpu_lock(struct percpu_lock *lock /* Per-cpu pointer */)
429 {
430 int cpu;
431
432 for (;;) {
433 int ret;
434
435 cpu = get_current_cpu_id();
436 if (cpu < 0) {
437 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
438 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
439 abort();
440 }
441 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
442 &rseq_percpu_ptr(lock, cpu)->v,
443 0, 1, cpu);
444 if (rseq_likely(!ret))
445 break;
446 /* Retry if comparison fails or rseq aborts. */
447 }
448 /*
449 * Acquire semantic when taking lock after control dependency.
450 * Matches rseq_smp_store_release().
451 */
452 rseq_smp_acquire__after_ctrl_dep();
453 return cpu;
454 }
455
456 static void rseq_percpu_unlock(struct percpu_lock *lock /* Per-cpu pointer */, int cpu)
457 {
458 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
459 /*
460 * Release lock, with release semantic. Matches
461 * rseq_smp_acquire__after_ctrl_dep().
462 */
463 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
464 }
465
466 static void *test_percpu_spinlock_thread(void *arg)
467 {
468 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
469 struct spinlock_test_data *data = thread_data->data; /* Per-cpu pointer */
470 long long i, reps;
471
472 if (!opt_disable_rseq && thread_data->reg &&
473 rseq_register_current_thread())
474 abort();
475 reps = thread_data->reps;
476 for (i = 0; i < reps; i++) {
477 int cpu = rseq_this_cpu_lock(&data->lock);
478 rseq_percpu_ptr(data, cpu)->count++;
479 rseq_percpu_unlock(&data->lock, cpu);
480 #ifndef BENCHMARK
481 if (i != 0 && !(i % (reps / 10)))
482 printf_verbose("tid %d: count %lld\n",
483 (int) rseq_gettid(), i);
484 #endif
485 }
486 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
487 (int) rseq_gettid(), nr_abort, signals_delivered);
488 if (!opt_disable_rseq && thread_data->reg &&
489 rseq_unregister_current_thread())
490 abort();
491 return NULL;
492 }
493
494 /*
495 * A simple test which implements a sharded counter using a per-cpu
496 * lock. Obviously real applications might prefer to simply use a
497 * per-cpu increment; however, this is reasonable for a test and the
498 * lock can be extended to synchronize more complicated operations.
499 */
500 static void test_percpu_spinlock(void)
501 {
502 const int num_threads = opt_threads;
503 int i, ret;
504 uint64_t sum;
505 pthread_t test_threads[num_threads];
506 struct spinlock_test_data *data; /* Per-cpu pointer */
507 struct spinlock_thread_test_data thread_data[num_threads];
508 struct rseq_percpu_pool *mempool;
509
510 mempool = rseq_percpu_pool_create(sizeof(struct spinlock_test_data),
511 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
512 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
513 if (!mempool) {
514 perror("rseq_percpu_pool_create");
515 abort();
516 }
517 data = (struct spinlock_test_data *)rseq_percpu_zmalloc(mempool);
518 if (!data) {
519 perror("rseq_percpu_zmalloc");
520 abort();
521 }
522
523 for (i = 0; i < num_threads; i++) {
524 thread_data[i].reps = opt_reps;
525 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
526 thread_data[i].reg = 1;
527 else
528 thread_data[i].reg = 0;
529 thread_data[i].data = data;
530 ret = pthread_create(&test_threads[i], NULL,
531 test_percpu_spinlock_thread,
532 &thread_data[i]);
533 if (ret) {
534 errno = ret;
535 perror("pthread_create");
536 abort();
537 }
538 }
539
540 for (i = 0; i < num_threads; i++) {
541 ret = pthread_join(test_threads[i], NULL);
542 if (ret) {
543 errno = ret;
544 perror("pthread_join");
545 abort();
546 }
547 }
548
549 sum = 0;
550 for (i = 0; i < CPU_SETSIZE; i++)
551 sum += rseq_percpu_ptr(data, i)->count;
552
553 assert(sum == (uint64_t)opt_reps * num_threads);
554 rseq_percpu_free(data);
555 ret = rseq_percpu_pool_destroy(mempool);
556 if (ret) {
557 perror("rseq_percpu_pool_destroy");
558 abort();
559 }
560 }
561
562 static void *test_percpu_inc_thread(void *arg)
563 {
564 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
565 struct inc_test_data *data = thread_data->data;
566 long long i, reps;
567
568 if (!opt_disable_rseq && thread_data->reg &&
569 rseq_register_current_thread())
570 abort();
571 reps = thread_data->reps;
572 for (i = 0; i < reps; i++) {
573 int ret;
574
575 do {
576 int cpu;
577
578 cpu = get_current_cpu_id();
579 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
580 &data->c[cpu].count, 1, cpu);
581 } while (rseq_unlikely(ret));
582 #ifndef BENCHMARK
583 if (i != 0 && !(i % (reps / 10)))
584 printf_verbose("tid %d: count %lld\n",
585 (int) rseq_gettid(), i);
586 #endif
587 }
588 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
589 (int) rseq_gettid(), nr_abort, signals_delivered);
590 if (!opt_disable_rseq && thread_data->reg &&
591 rseq_unregister_current_thread())
592 abort();
593 return NULL;
594 }
595
596 static void test_percpu_inc(void)
597 {
598 const int num_threads = opt_threads;
599 int i, ret;
600 uint64_t sum;
601 pthread_t test_threads[num_threads];
602 struct inc_test_data data;
603 struct inc_thread_test_data thread_data[num_threads];
604
605 memset(&data, 0, sizeof(data));
606 for (i = 0; i < num_threads; i++) {
607 thread_data[i].reps = opt_reps;
608 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
609 thread_data[i].reg = 1;
610 else
611 thread_data[i].reg = 0;
612 thread_data[i].data = &data;
613 ret = pthread_create(&test_threads[i], NULL,
614 test_percpu_inc_thread,
615 &thread_data[i]);
616 if (ret) {
617 errno = ret;
618 perror("pthread_create");
619 abort();
620 }
621 }
622
623 for (i = 0; i < num_threads; i++) {
624 ret = pthread_join(test_threads[i], NULL);
625 if (ret) {
626 errno = ret;
627 perror("pthread_join");
628 abort();
629 }
630 }
631
632 sum = 0;
633 for (i = 0; i < CPU_SETSIZE; i++)
634 sum += data.c[i].count;
635
636 assert(sum == (uint64_t)opt_reps * num_threads);
637 }
638
639 static void this_cpu_list_push(struct percpu_list *list,
640 struct percpu_list_node *node,
641 int *_cpu)
642 {
643 int cpu;
644
645 for (;;) {
646 intptr_t *targetptr, newval, expect;
647 int ret;
648
649 cpu = get_current_cpu_id();
650 /* Load list->c[cpu].head with single-copy atomicity. */
651 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
652 newval = (intptr_t)node;
653 targetptr = (intptr_t *)&list->c[cpu].head;
654 node->next = (struct percpu_list_node *)expect;
655 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
656 targetptr, expect, newval, cpu);
657 if (rseq_likely(!ret))
658 break;
659 /* Retry if comparison fails or rseq aborts. */
660 }
661 if (_cpu)
662 *_cpu = cpu;
663 }
664
665 /*
666 * Unlike a traditional lock-less linked list; the availability of a
667 * rseq primitive allows us to implement pop without concerns over
668 * ABA-type races.
669 */
670 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
671 int *_cpu)
672 {
673 struct percpu_list_node *node = NULL;
674 int cpu;
675
676 for (;;) {
677 struct percpu_list_node *head;
678 intptr_t *targetptr, expectnot, *load;
679 long offset;
680 int ret;
681
682 cpu = get_current_cpu_id();
683 targetptr = (intptr_t *)&list->c[cpu].head;
684 expectnot = (intptr_t)NULL;
685 offset = offsetof(struct percpu_list_node, next);
686 load = (intptr_t *)&head;
687 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
688 targetptr, expectnot,
689 offset, load, cpu);
690 if (rseq_likely(!ret)) {
691 node = head;
692 break;
693 }
694 if (ret > 0)
695 break;
696 /* Retry if rseq aborts. */
697 }
698 if (_cpu)
699 *_cpu = cpu;
700 return node;
701 }
702
703 /*
704 * __percpu_list_pop is not safe against concurrent accesses. Should
705 * only be used on lists that are not concurrently modified.
706 */
707 static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
708 {
709 struct percpu_list_node *node;
710
711 node = list->c[cpu].head;
712 if (!node)
713 return NULL;
714 list->c[cpu].head = node->next;
715 return node;
716 }
717
718 static void *test_percpu_list_thread(void *arg)
719 {
720 long long i, reps;
721 struct percpu_list *list = (struct percpu_list *)arg;
722
723 if (!opt_disable_rseq && rseq_register_current_thread())
724 abort();
725
726 reps = opt_reps;
727 for (i = 0; i < reps; i++) {
728 struct percpu_list_node *node;
729
730 node = this_cpu_list_pop(list, NULL);
731 if (opt_yield)
732 sched_yield(); /* encourage shuffling */
733 if (node)
734 this_cpu_list_push(list, node, NULL);
735 }
736
737 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
738 (int) rseq_gettid(), nr_abort, signals_delivered);
739 if (!opt_disable_rseq && rseq_unregister_current_thread())
740 abort();
741
742 return NULL;
743 }
744
745 /* Simultaneous modification to a per-cpu linked list from many threads. */
746 static void test_percpu_list(void)
747 {
748 const int num_threads = opt_threads;
749 int i, j, ret;
750 uint64_t sum = 0, expected_sum = 0;
751 struct percpu_list list;
752 pthread_t test_threads[num_threads];
753 cpu_set_t allowed_cpus;
754
755 memset(&list, 0, sizeof(list));
756
757 /* Generate list entries for every usable cpu. */
758 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
759 for (i = 0; i < CPU_SETSIZE; i++) {
760 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
761 continue;
762 for (j = 1; j <= 100; j++) {
763 struct percpu_list_node *node;
764
765 expected_sum += j;
766
767 node = (struct percpu_list_node *) malloc(sizeof(*node));
768 assert(node);
769 node->data = j;
770 node->next = list.c[i].head;
771 list.c[i].head = node;
772 }
773 }
774
775 for (i = 0; i < num_threads; i++) {
776 ret = pthread_create(&test_threads[i], NULL,
777 test_percpu_list_thread, &list);
778 if (ret) {
779 errno = ret;
780 perror("pthread_create");
781 abort();
782 }
783 }
784
785 for (i = 0; i < num_threads; i++) {
786 ret = pthread_join(test_threads[i], NULL);
787 if (ret) {
788 errno = ret;
789 perror("pthread_join");
790 abort();
791 }
792 }
793
794 for (i = 0; i < CPU_SETSIZE; i++) {
795 struct percpu_list_node *node;
796
797 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
798 continue;
799
800 while ((node = __percpu_list_pop(&list, i))) {
801 sum += node->data;
802 free(node);
803 }
804 }
805
806 /*
807 * All entries should now be accounted for (unless some external
808 * actor is interfering with our allowed affinity while this
809 * test is running).
810 */
811 assert(sum == expected_sum);
812 }
813
814 static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
815 struct percpu_buffer_node *node,
816 int *_cpu)
817 {
818 bool result = false;
819 int cpu;
820
821 for (;;) {
822 intptr_t *targetptr_spec, newval_spec;
823 intptr_t *targetptr_final, newval_final;
824 intptr_t offset;
825 int ret;
826
827 cpu = get_current_cpu_id();
828 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
829 if (offset == buffer->c[cpu].buflen)
830 break;
831 newval_spec = (intptr_t)node;
832 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
833 newval_final = offset + 1;
834 targetptr_final = &buffer->c[cpu].offset;
835 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
836 targetptr_final, offset, targetptr_spec,
837 newval_spec, newval_final, cpu);
838 if (rseq_likely(!ret)) {
839 result = true;
840 break;
841 }
842 /* Retry if comparison fails or rseq aborts. */
843 }
844 if (_cpu)
845 *_cpu = cpu;
846 return result;
847 }
848
849 static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
850 int *_cpu)
851 {
852 struct percpu_buffer_node *head;
853 int cpu;
854
855 for (;;) {
856 intptr_t *targetptr, newval;
857 intptr_t offset;
858 int ret;
859
860 cpu = get_current_cpu_id();
861 /* Load offset with single-copy atomicity. */
862 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
863 if (offset == 0) {
864 head = NULL;
865 break;
866 }
867 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
868 newval = offset - 1;
869 targetptr = (intptr_t *)&buffer->c[cpu].offset;
870 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
871 targetptr, offset,
872 (intptr_t *)&buffer->c[cpu].array[offset - 1],
873 (intptr_t)head, newval, cpu);
874 if (rseq_likely(!ret))
875 break;
876 /* Retry if comparison fails or rseq aborts. */
877 }
878 if (_cpu)
879 *_cpu = cpu;
880 return head;
881 }
882
883 /*
884 * __percpu_buffer_pop is not safe against concurrent accesses. Should
885 * only be used on buffers that are not concurrently modified.
886 */
887 static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
888 int cpu)
889 {
890 struct percpu_buffer_node *head;
891 intptr_t offset;
892
893 offset = buffer->c[cpu].offset;
894 if (offset == 0)
895 return NULL;
896 head = buffer->c[cpu].array[offset - 1];
897 buffer->c[cpu].offset = offset - 1;
898 return head;
899 }
900
901 static void *test_percpu_buffer_thread(void *arg)
902 {
903 long long i, reps;
904 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
905
906 if (!opt_disable_rseq && rseq_register_current_thread())
907 abort();
908
909 reps = opt_reps;
910 for (i = 0; i < reps; i++) {
911 struct percpu_buffer_node *node;
912
913 node = this_cpu_buffer_pop(buffer, NULL);
914 if (opt_yield)
915 sched_yield(); /* encourage shuffling */
916 if (node) {
917 if (!this_cpu_buffer_push(buffer, node, NULL)) {
918 /* Should increase buffer size. */
919 abort();
920 }
921 }
922 }
923
924 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
925 (int) rseq_gettid(), nr_abort, signals_delivered);
926 if (!opt_disable_rseq && rseq_unregister_current_thread())
927 abort();
928
929 return NULL;
930 }
931
932 /* Simultaneous modification to a per-cpu buffer from many threads. */
933 static void test_percpu_buffer(void)
934 {
935 const int num_threads = opt_threads;
936 int i, j, ret;
937 uint64_t sum = 0, expected_sum = 0;
938 struct percpu_buffer buffer;
939 pthread_t test_threads[num_threads];
940 cpu_set_t allowed_cpus;
941
942 memset(&buffer, 0, sizeof(buffer));
943
944 /* Generate list entries for every usable cpu. */
945 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
946 for (i = 0; i < CPU_SETSIZE; i++) {
947 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
948 continue;
949 /* Worse-case is every item in same CPU. */
950 buffer.c[i].array =
951 (struct percpu_buffer_node **)
952 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
953 BUFFER_ITEM_PER_CPU);
954 assert(buffer.c[i].array);
955 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
956 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
957 struct percpu_buffer_node *node;
958
959 expected_sum += j;
960
961 /*
962 * We could theoretically put the word-sized
963 * "data" directly in the buffer. However, we
964 * want to model objects that would not fit
965 * within a single word, so allocate an object
966 * for each node.
967 */
968 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
969 assert(node);
970 node->data = j;
971 buffer.c[i].array[j - 1] = node;
972 buffer.c[i].offset++;
973 }
974 }
975
976 for (i = 0; i < num_threads; i++) {
977 ret = pthread_create(&test_threads[i], NULL,
978 test_percpu_buffer_thread, &buffer);
979 if (ret) {
980 errno = ret;
981 perror("pthread_create");
982 abort();
983 }
984 }
985
986 for (i = 0; i < num_threads; i++) {
987 ret = pthread_join(test_threads[i], NULL);
988 if (ret) {
989 errno = ret;
990 perror("pthread_join");
991 abort();
992 }
993 }
994
995 for (i = 0; i < CPU_SETSIZE; i++) {
996 struct percpu_buffer_node *node;
997
998 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
999 continue;
1000
1001 while ((node = __percpu_buffer_pop(&buffer, i))) {
1002 sum += node->data;
1003 free(node);
1004 }
1005 free(buffer.c[i].array);
1006 }
1007
1008 /*
1009 * All entries should now be accounted for (unless some external
1010 * actor is interfering with our allowed affinity while this
1011 * test is running).
1012 */
1013 assert(sum == expected_sum);
1014 }
1015
1016 static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
1017 struct percpu_memcpy_buffer_node item,
1018 int *_cpu)
1019 {
1020 bool result = false;
1021 int cpu;
1022
1023 for (;;) {
1024 intptr_t *targetptr_final, newval_final, offset;
1025 char *destptr, *srcptr;
1026 size_t copylen;
1027 int ret;
1028
1029 cpu = get_current_cpu_id();
1030 /* Load offset with single-copy atomicity. */
1031 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1032 if (offset == buffer->c[cpu].buflen)
1033 break;
1034 destptr = (char *)&buffer->c[cpu].array[offset];
1035 srcptr = (char *)&item;
1036 /* copylen must be <= 4kB. */
1037 copylen = sizeof(item);
1038 newval_final = offset + 1;
1039 targetptr_final = &buffer->c[cpu].offset;
1040 ret = rseq_load_cbne_memcpy_store__ptr(
1041 opt_mo, RSEQ_PERCPU,
1042 targetptr_final, offset,
1043 destptr, srcptr, copylen,
1044 newval_final, cpu);
1045 if (rseq_likely(!ret)) {
1046 result = true;
1047 break;
1048 }
1049 /* Retry if comparison fails or rseq aborts. */
1050 }
1051 if (_cpu)
1052 *_cpu = cpu;
1053 return result;
1054 }
1055
1056 static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1057 struct percpu_memcpy_buffer_node *item,
1058 int *_cpu)
1059 {
1060 bool result = false;
1061 int cpu;
1062
1063 for (;;) {
1064 intptr_t *targetptr_final, newval_final, offset;
1065 char *destptr, *srcptr;
1066 size_t copylen;
1067 int ret;
1068
1069 cpu = get_current_cpu_id();
1070 /* Load offset with single-copy atomicity. */
1071 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1072 if (offset == 0)
1073 break;
1074 destptr = (char *)item;
1075 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1076 /* copylen must be <= 4kB. */
1077 copylen = sizeof(*item);
1078 newval_final = offset - 1;
1079 targetptr_final = &buffer->c[cpu].offset;
1080 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1081 targetptr_final, offset, destptr, srcptr, copylen,
1082 newval_final, cpu);
1083 if (rseq_likely(!ret)) {
1084 result = true;
1085 break;
1086 }
1087 /* Retry if comparison fails or rseq aborts. */
1088 }
1089 if (_cpu)
1090 *_cpu = cpu;
1091 return result;
1092 }
1093
1094 /*
1095 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1096 * only be used on buffers that are not concurrently modified.
1097 */
1098 static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
1099 struct percpu_memcpy_buffer_node *item,
1100 int cpu)
1101 {
1102 intptr_t offset;
1103
1104 offset = buffer->c[cpu].offset;
1105 if (offset == 0)
1106 return false;
1107 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1108 buffer->c[cpu].offset = offset - 1;
1109 return true;
1110 }
1111
1112 static void *test_percpu_memcpy_buffer_thread(void *arg)
1113 {
1114 long long i, reps;
1115 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1116
1117 if (!opt_disable_rseq && rseq_register_current_thread())
1118 abort();
1119
1120 reps = opt_reps;
1121 for (i = 0; i < reps; i++) {
1122 struct percpu_memcpy_buffer_node item;
1123 bool result;
1124
1125 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1126 if (opt_yield)
1127 sched_yield(); /* encourage shuffling */
1128 if (result) {
1129 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1130 /* Should increase buffer size. */
1131 abort();
1132 }
1133 }
1134 }
1135
1136 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1137 (int) rseq_gettid(), nr_abort, signals_delivered);
1138 if (!opt_disable_rseq && rseq_unregister_current_thread())
1139 abort();
1140
1141 return NULL;
1142 }
1143
1144 /* Simultaneous modification to a per-cpu buffer from many threads. */
1145 static void test_percpu_memcpy_buffer(void)
1146 {
1147 const int num_threads = opt_threads;
1148 int i, j, ret;
1149 uint64_t sum = 0, expected_sum = 0;
1150 struct percpu_memcpy_buffer buffer;
1151 pthread_t test_threads[num_threads];
1152 cpu_set_t allowed_cpus;
1153
1154 memset(&buffer, 0, sizeof(buffer));
1155
1156 /* Generate list entries for every usable cpu. */
1157 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1158 for (i = 0; i < CPU_SETSIZE; i++) {
1159 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1160 continue;
1161 /* Worse-case is every item in same CPU. */
1162 buffer.c[i].array =
1163 (struct percpu_memcpy_buffer_node *)
1164 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1165 MEMCPY_BUFFER_ITEM_PER_CPU);
1166 assert(buffer.c[i].array);
1167 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1168 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1169 expected_sum += 2 * j + 1;
1170
1171 /*
1172 * We could theoretically put the word-sized
1173 * "data" directly in the buffer. However, we
1174 * want to model objects that would not fit
1175 * within a single word, so allocate an object
1176 * for each node.
1177 */
1178 buffer.c[i].array[j - 1].data1 = j;
1179 buffer.c[i].array[j - 1].data2 = j + 1;
1180 buffer.c[i].offset++;
1181 }
1182 }
1183
1184 for (i = 0; i < num_threads; i++) {
1185 ret = pthread_create(&test_threads[i], NULL,
1186 test_percpu_memcpy_buffer_thread,
1187 &buffer);
1188 if (ret) {
1189 errno = ret;
1190 perror("pthread_create");
1191 abort();
1192 }
1193 }
1194
1195 for (i = 0; i < num_threads; i++) {
1196 ret = pthread_join(test_threads[i], NULL);
1197 if (ret) {
1198 errno = ret;
1199 perror("pthread_join");
1200 abort();
1201 }
1202 }
1203
1204 for (i = 0; i < CPU_SETSIZE; i++) {
1205 struct percpu_memcpy_buffer_node item;
1206
1207 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
1208 continue;
1209
1210 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1211 sum += item.data1;
1212 sum += item.data2;
1213 }
1214 free(buffer.c[i].array);
1215 }
1216
1217 /*
1218 * All entries should now be accounted for (unless some external
1219 * actor is interfering with our allowed affinity while this
1220 * test is running).
1221 */
1222 assert(sum == expected_sum);
1223 }
1224
1225
1226 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
1227 {
1228 signals_delivered++;
1229 }
1230
1231 static int set_signal_handler(void)
1232 {
1233 int ret = 0;
1234 struct sigaction sa;
1235 sigset_t sigset;
1236
1237 ret = sigemptyset(&sigset);
1238 if (ret < 0) {
1239 perror("sigemptyset");
1240 return ret;
1241 }
1242
1243 sa.sa_handler = test_signal_interrupt_handler;
1244 sa.sa_mask = sigset;
1245 sa.sa_flags = 0;
1246 ret = sigaction(SIGUSR1, &sa, NULL);
1247 if (ret < 0) {
1248 perror("sigaction");
1249 return ret;
1250 }
1251
1252 printf_verbose("Signal handler set for SIGUSR1\n");
1253
1254 return ret;
1255 }
1256
1257 static
1258 bool membarrier_private_expedited_rseq_available(void)
1259 {
1260 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1261
1262 if (status < 0) {
1263 perror("membarrier");
1264 return false;
1265 }
1266 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1267 return false;
1268 return true;
1269 }
1270
1271 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1272 #ifdef TEST_MEMBARRIER
1273 struct test_membarrier_thread_args {
1274 int stop;
1275 intptr_t percpu_list_ptr;
1276 };
1277
1278 /* Worker threads modify data in their "active" percpu lists. */
1279 static
1280 void *test_membarrier_worker_thread(void *arg)
1281 {
1282 struct test_membarrier_thread_args *args =
1283 (struct test_membarrier_thread_args *)arg;
1284 const int iters = opt_reps;
1285 int i;
1286
1287 if (rseq_register_current_thread()) {
1288 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1289 errno, strerror(errno));
1290 abort();
1291 }
1292
1293 /* Wait for initialization. */
1294 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1295
1296 for (i = 0; i < iters; ++i) {
1297 int ret;
1298
1299 do {
1300 int cpu = get_current_cpu_id();
1301
1302 ret = rseq_load_add_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1303 &args->percpu_list_ptr,
1304 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1305 } while (rseq_unlikely(ret));
1306 }
1307
1308 if (rseq_unregister_current_thread()) {
1309 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1310 errno, strerror(errno));
1311 abort();
1312 }
1313 return NULL;
1314 }
1315
1316 static
1317 void test_membarrier_init_percpu_list(struct percpu_list *list)
1318 {
1319 int i;
1320
1321 memset(list, 0, sizeof(*list));
1322 for (i = 0; i < CPU_SETSIZE; i++) {
1323 struct percpu_list_node *node;
1324
1325 node = (struct percpu_list_node *) malloc(sizeof(*node));
1326 assert(node);
1327 node->data = 0;
1328 node->next = NULL;
1329 list->c[i].head = node;
1330 }
1331 }
1332
1333 static
1334 void test_membarrier_free_percpu_list(struct percpu_list *list)
1335 {
1336 int i;
1337
1338 for (i = 0; i < CPU_SETSIZE; i++)
1339 free(list->c[i].head);
1340 }
1341
1342 /*
1343 * The manager thread swaps per-cpu lists that worker threads see,
1344 * and validates that there are no unexpected modifications.
1345 */
1346 static
1347 void *test_membarrier_manager_thread(void *arg)
1348 {
1349 struct test_membarrier_thread_args *args =
1350 (struct test_membarrier_thread_args *)arg;
1351 struct percpu_list list_a, list_b;
1352 intptr_t expect_a = 0, expect_b = 0;
1353 int cpu_a = 0, cpu_b = 0;
1354
1355 if (rseq_register_current_thread()) {
1356 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1357 errno, strerror(errno));
1358 abort();
1359 }
1360
1361 /* Init lists. */
1362 test_membarrier_init_percpu_list(&list_a);
1363 test_membarrier_init_percpu_list(&list_b);
1364
1365 /* Initialize lists before publishing them. */
1366 rseq_smp_wmb();
1367
1368 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1369
1370 while (!RSEQ_READ_ONCE(args->stop)) {
1371 /* list_a is "active". */
1372 cpu_a = rand() % CPU_SETSIZE;
1373 /*
1374 * As list_b is "inactive", we should never see changes
1375 * to list_b.
1376 */
1377 if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) {
1378 fprintf(stderr, "Membarrier test failed\n");
1379 abort();
1380 }
1381
1382 /* Make list_b "active". */
1383 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b);
1384 if (rseq_membarrier_expedited(cpu_a) &&
1385 errno != ENXIO /* missing CPU */) {
1386 perror("sys_membarrier");
1387 abort();
1388 }
1389 /*
1390 * Cpu A should now only modify list_b, so the values
1391 * in list_a should be stable.
1392 */
1393 expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data);
1394
1395 cpu_b = rand() % CPU_SETSIZE;
1396 /*
1397 * As list_a is "inactive", we should never see changes
1398 * to list_a.
1399 */
1400 if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) {
1401 fprintf(stderr, "Membarrier test failed\n");
1402 abort();
1403 }
1404
1405 /* Make list_a "active". */
1406 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1407 if (rseq_membarrier_expedited(cpu_b) &&
1408 errno != ENXIO /* missing CPU */) {
1409 perror("sys_membarrier");
1410 abort();
1411 }
1412 /* Remember a value from list_b. */
1413 expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data);
1414 }
1415
1416 test_membarrier_free_percpu_list(&list_a);
1417 test_membarrier_free_percpu_list(&list_b);
1418
1419 if (rseq_unregister_current_thread()) {
1420 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1421 errno, strerror(errno));
1422 abort();
1423 }
1424 return NULL;
1425 }
1426
1427 static
1428 void test_membarrier(void)
1429 {
1430 const int num_threads = opt_threads;
1431 struct test_membarrier_thread_args thread_args;
1432 pthread_t worker_threads[num_threads];
1433 pthread_t manager_thread;
1434 int i, ret;
1435
1436 if (!membarrier_private_expedited_rseq_available()) {
1437 fprintf(stderr, "Membarrier private expedited rseq not available. "
1438 "Skipping membarrier test.\n");
1439 return;
1440 }
1441 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1442 perror("sys_membarrier");
1443 abort();
1444 }
1445
1446 thread_args.stop = 0;
1447 thread_args.percpu_list_ptr = 0;
1448 ret = pthread_create(&manager_thread, NULL,
1449 test_membarrier_manager_thread, &thread_args);
1450 if (ret) {
1451 errno = ret;
1452 perror("pthread_create");
1453 abort();
1454 }
1455
1456 for (i = 0; i < num_threads; i++) {
1457 ret = pthread_create(&worker_threads[i], NULL,
1458 test_membarrier_worker_thread, &thread_args);
1459 if (ret) {
1460 errno = ret;
1461 perror("pthread_create");
1462 abort();
1463 }
1464 }
1465
1466
1467 for (i = 0; i < num_threads; i++) {
1468 ret = pthread_join(worker_threads[i], NULL);
1469 if (ret) {
1470 errno = ret;
1471 perror("pthread_join");
1472 abort();
1473 }
1474 }
1475
1476 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1477 ret = pthread_join(manager_thread, NULL);
1478 if (ret) {
1479 errno = ret;
1480 perror("pthread_join");
1481 abort();
1482 }
1483 }
1484 #else /* TEST_MEMBARRIER */
1485 static
1486 void test_membarrier(void)
1487 {
1488 if (!membarrier_private_expedited_rseq_available()) {
1489 fprintf(stderr, "Membarrier private expedited rseq not available. "
1490 "Skipping membarrier test.\n");
1491 return;
1492 }
1493 fprintf(stderr, "rseq_load_add_load_add_store__ptr is not implemented on this architecture. "
1494 "Skipping membarrier test.\n");
1495 }
1496 #endif
1497
1498 static void show_usage(char **argv)
1499 {
1500 printf("Usage : %s <OPTIONS>\n",
1501 argv[0]);
1502 printf("OPTIONS:\n");
1503 printf(" [-1 loops] Number of loops for delay injection 1\n");
1504 printf(" [-2 loops] Number of loops for delay injection 2\n");
1505 printf(" [-3 loops] Number of loops for delay injection 3\n");
1506 printf(" [-4 loops] Number of loops for delay injection 4\n");
1507 printf(" [-5 loops] Number of loops for delay injection 5\n");
1508 printf(" [-6 loops] Number of loops for delay injection 6\n");
1509 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1510 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1511 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1512 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1513 printf(" [-y] Yield\n");
1514 printf(" [-k] Kill thread with signal\n");
1515 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1516 printf(" [-t N] Number of threads (default 200)\n");
1517 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1518 printf(" [-d] Disable rseq system call (no initialization)\n");
1519 printf(" [-D M] Disable rseq for each M threads\n");
1520 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1521 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1522 printf(" [-c] Check if the rseq syscall is available.\n");
1523 printf(" [-v] Verbose output.\n");
1524 printf(" [-h] Show this help.\n");
1525 printf("\n");
1526 }
1527
1528 int main(int argc, char **argv)
1529 {
1530 int i;
1531
1532 for (i = 1; i < argc; i++) {
1533 if (argv[i][0] != '-')
1534 continue;
1535 switch (argv[i][1]) {
1536 case '1':
1537 case '2':
1538 case '3':
1539 case '4':
1540 case '5':
1541 case '6':
1542 case '7':
1543 case '8':
1544 case '9':
1545 if (argc < i + 2) {
1546 show_usage(argv);
1547 goto error;
1548 }
1549 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1550 i++;
1551 break;
1552 case 'm':
1553 if (argc < i + 2) {
1554 show_usage(argv);
1555 goto error;
1556 }
1557 opt_modulo = atol(argv[i + 1]);
1558 if (opt_modulo < 0) {
1559 show_usage(argv);
1560 goto error;
1561 }
1562 i++;
1563 break;
1564 case 's':
1565 if (argc < i + 2) {
1566 show_usage(argv);
1567 goto error;
1568 }
1569 opt_sleep = atol(argv[i + 1]);
1570 if (opt_sleep < 0) {
1571 show_usage(argv);
1572 goto error;
1573 }
1574 i++;
1575 break;
1576 case 'y':
1577 opt_yield = 1;
1578 break;
1579 case 'k':
1580 opt_signal = 1;
1581 break;
1582 case 'd':
1583 opt_disable_rseq = 1;
1584 break;
1585 case 'D':
1586 if (argc < i + 2) {
1587 show_usage(argv);
1588 goto error;
1589 }
1590 opt_disable_mod = atol(argv[i + 1]);
1591 if (opt_disable_mod < 0) {
1592 show_usage(argv);
1593 goto error;
1594 }
1595 i++;
1596 break;
1597 case 't':
1598 if (argc < i + 2) {
1599 show_usage(argv);
1600 goto error;
1601 }
1602 opt_threads = atol(argv[i + 1]);
1603 if (opt_threads < 0) {
1604 show_usage(argv);
1605 goto error;
1606 }
1607 i++;
1608 break;
1609 case 'r':
1610 if (argc < i + 2) {
1611 show_usage(argv);
1612 goto error;
1613 }
1614 opt_reps = atoll(argv[i + 1]);
1615 if (opt_reps < 0) {
1616 show_usage(argv);
1617 goto error;
1618 }
1619 i++;
1620 break;
1621 case 'h':
1622 show_usage(argv);
1623 goto end;
1624 case 'T':
1625 if (argc < i + 2) {
1626 show_usage(argv);
1627 goto error;
1628 }
1629 opt_test = *argv[i + 1];
1630 switch (opt_test) {
1631 case 's':
1632 case 'l':
1633 case 'i':
1634 case 'b':
1635 case 'm':
1636 case 'r':
1637 break;
1638 default:
1639 show_usage(argv);
1640 goto error;
1641 }
1642 i++;
1643 break;
1644 case 'v':
1645 verbose = 1;
1646 break;
1647 case 'M':
1648 opt_mo = RSEQ_MO_RELEASE;
1649 break;
1650 case 'c':
1651 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
1652 printf_verbose("The rseq syscall is available.\n");
1653 goto end;
1654 } else {
1655 printf_verbose("The rseq syscall is unavailable.\n");
1656 goto no_rseq;
1657 }
1658 default:
1659 show_usage(argv);
1660 goto error;
1661 }
1662 }
1663
1664 loop_cnt_1 = loop_cnt[1];
1665 loop_cnt_2 = loop_cnt[2];
1666 loop_cnt_3 = loop_cnt[3];
1667 loop_cnt_4 = loop_cnt[4];
1668 loop_cnt_5 = loop_cnt[5];
1669 loop_cnt_6 = loop_cnt[6];
1670
1671 if (set_signal_handler())
1672 goto error;
1673
1674 if (!opt_disable_rseq && rseq_register_current_thread())
1675 goto error;
1676 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
1677 printf_verbose("The rseq cpu id getter is unavailable\n");
1678 goto no_rseq;
1679 }
1680 switch (opt_test) {
1681 case 's':
1682 printf_verbose("spinlock\n");
1683 test_percpu_spinlock();
1684 break;
1685 case 'l':
1686 printf_verbose("linked list\n");
1687 test_percpu_list();
1688 break;
1689 case 'b':
1690 printf_verbose("buffer\n");
1691 test_percpu_buffer();
1692 break;
1693 case 'm':
1694 printf_verbose("memcpy buffer\n");
1695 test_percpu_memcpy_buffer();
1696 break;
1697 case 'i':
1698 printf_verbose("counter increment\n");
1699 test_percpu_inc();
1700 break;
1701 case 'r':
1702 printf_verbose("membarrier\n");
1703 test_membarrier();
1704 break;
1705 }
1706 if (!opt_disable_rseq && rseq_unregister_current_thread())
1707 abort();
1708 end:
1709 return 0;
1710
1711 error:
1712 return -1;
1713
1714 no_rseq:
1715 return 2;
1716 }
This page took 0.062353 seconds and 5 git commands to generate.