param test: counter increment: use percpu alloc
[librseq.git] / tests / param_test.c
CommitLineData
90702366 1// SPDX-License-Identifier: MIT
f2d7b530 2// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
31b44ba2
MD
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6#include <assert.h>
cb900b45 7#include <linux/version.h>
5368dcb4 8#include <linux/membarrier.h>
31b44ba2
MD
9#include <pthread.h>
10#include <sched.h>
11#include <stdint.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <syscall.h>
16#include <unistd.h>
17#include <poll.h>
18#include <sys/types.h>
19#include <signal.h>
20#include <errno.h>
21#include <stddef.h>
369688a5 22#include <stdbool.h>
324633af
MD
23#include <rseq/percpu-alloc.h>
24
25#define PERCPU_POOL_LEN (1024*1024) /* 1MB */
31b44ba2 26
cb900b45
MD
27#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
28enum {
29 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
30 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
31};
32
33enum {
34 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
35};
36#endif
37
31b44ba2
MD
38#define NR_INJECT 9
39static int loop_cnt[NR_INJECT + 1];
40
41static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
42static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
43static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
44static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
45static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
46static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
47
48static int opt_modulo, verbose;
49
50static int opt_yield, opt_signal, opt_sleep,
51 opt_disable_rseq, opt_threads = 200,
369688a5 52 opt_disable_mod = 0, opt_test = 's';
31b44ba2 53
31b44ba2 54static long long opt_reps = 5000;
31b44ba2
MD
55
56static __thread __attribute__((tls_model("initial-exec")))
57unsigned int signals_delivered;
58
c6e1dc81
MD
59static inline pid_t rseq_gettid(void)
60{
61 return syscall(__NR_gettid);
62}
63
3726b9f1
MD
64#ifndef BENCHMARK
65
31b44ba2
MD
66static __thread __attribute__((tls_model("initial-exec"), unused))
67int yield_mod_cnt, nr_abort;
68
69#define printf_verbose(fmt, ...) \
70 do { \
71 if (verbose) \
72 printf(fmt, ## __VA_ARGS__); \
73 } while (0)
74
75#ifdef __i386__
76
77#define INJECT_ASM_REG "eax"
78
79#define RSEQ_INJECT_CLOBBER \
80 , INJECT_ASM_REG
81
82#define RSEQ_INJECT_ASM(n) \
83 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
84 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
85 "jz 333f\n\t" \
86 "222:\n\t" \
87 "dec %%" INJECT_ASM_REG "\n\t" \
88 "jnz 222b\n\t" \
89 "333:\n\t"
90
91#elif defined(__x86_64__)
92
93#define INJECT_ASM_REG_P "rax"
94#define INJECT_ASM_REG "eax"
95
96#define RSEQ_INJECT_CLOBBER \
97 , INJECT_ASM_REG_P \
98 , INJECT_ASM_REG
99
100#define RSEQ_INJECT_ASM(n) \
101 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
102 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
103 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
104 "jz 333f\n\t" \
105 "222:\n\t" \
106 "dec %%" INJECT_ASM_REG "\n\t" \
107 "jnz 222b\n\t" \
108 "333:\n\t"
109
110#elif defined(__s390__)
111
112#define RSEQ_INJECT_INPUT \
113 , [loop_cnt_1]"m"(loop_cnt[1]) \
114 , [loop_cnt_2]"m"(loop_cnt[2]) \
115 , [loop_cnt_3]"m"(loop_cnt[3]) \
116 , [loop_cnt_4]"m"(loop_cnt[4]) \
117 , [loop_cnt_5]"m"(loop_cnt[5]) \
118 , [loop_cnt_6]"m"(loop_cnt[6])
119
120#define INJECT_ASM_REG "r12"
121
122#define RSEQ_INJECT_CLOBBER \
123 , INJECT_ASM_REG
124
125#define RSEQ_INJECT_ASM(n) \
126 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
127 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
128 "je 333f\n\t" \
129 "222:\n\t" \
130 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
131 "jnz 222b\n\t" \
132 "333:\n\t"
133
134#elif defined(__ARMEL__)
135
136#define RSEQ_INJECT_INPUT \
137 , [loop_cnt_1]"m"(loop_cnt[1]) \
138 , [loop_cnt_2]"m"(loop_cnt[2]) \
139 , [loop_cnt_3]"m"(loop_cnt[3]) \
140 , [loop_cnt_4]"m"(loop_cnt[4]) \
141 , [loop_cnt_5]"m"(loop_cnt[5]) \
142 , [loop_cnt_6]"m"(loop_cnt[6])
143
144#define INJECT_ASM_REG "r4"
145
146#define RSEQ_INJECT_CLOBBER \
147 , INJECT_ASM_REG
148
149#define RSEQ_INJECT_ASM(n) \
150 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
151 "cmp " INJECT_ASM_REG ", #0\n\t" \
152 "beq 333f\n\t" \
153 "222:\n\t" \
154 "subs " INJECT_ASM_REG ", #1\n\t" \
155 "bne 222b\n\t" \
156 "333:\n\t"
157
158#elif defined(__AARCH64EL__)
159
160#define RSEQ_INJECT_INPUT \
161 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
162 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
163 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
164 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
165 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
166 , [loop_cnt_6] "Qo" (loop_cnt[6])
167
168#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
169
170#define RSEQ_INJECT_ASM(n) \
171 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
172 " cbz " INJECT_ASM_REG ", 333f\n" \
173 "222:\n" \
174 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
175 " cbnz " INJECT_ASM_REG ", 222b\n" \
176 "333:\n"
177
f1c6b55b 178#elif defined(__PPC__)
31b44ba2
MD
179
180#define RSEQ_INJECT_INPUT \
181 , [loop_cnt_1]"m"(loop_cnt[1]) \
182 , [loop_cnt_2]"m"(loop_cnt[2]) \
183 , [loop_cnt_3]"m"(loop_cnt[3]) \
184 , [loop_cnt_4]"m"(loop_cnt[4]) \
185 , [loop_cnt_5]"m"(loop_cnt[5]) \
186 , [loop_cnt_6]"m"(loop_cnt[6])
187
188#define INJECT_ASM_REG "r18"
189
190#define RSEQ_INJECT_CLOBBER \
191 , INJECT_ASM_REG
192
193#define RSEQ_INJECT_ASM(n) \
194 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
195 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
196 "beq 333f\n\t" \
197 "222:\n\t" \
198 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
199 "bne 222b\n\t" \
200 "333:\n\t"
201
202#elif defined(__mips__)
203
204#define RSEQ_INJECT_INPUT \
205 , [loop_cnt_1]"m"(loop_cnt[1]) \
206 , [loop_cnt_2]"m"(loop_cnt[2]) \
207 , [loop_cnt_3]"m"(loop_cnt[3]) \
208 , [loop_cnt_4]"m"(loop_cnt[4]) \
209 , [loop_cnt_5]"m"(loop_cnt[5]) \
210 , [loop_cnt_6]"m"(loop_cnt[6])
211
212#define INJECT_ASM_REG "$5"
213
214#define RSEQ_INJECT_CLOBBER \
215 , INJECT_ASM_REG
216
217#define RSEQ_INJECT_ASM(n) \
218 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
219 "beqz " INJECT_ASM_REG ", 333f\n\t" \
220 "222:\n\t" \
221 "addiu " INJECT_ASM_REG ", -1\n\t" \
222 "bnez " INJECT_ASM_REG ", 222b\n\t" \
223 "333:\n\t"
224
074b1077
MJ
225#elif defined(__riscv)
226
227#define RSEQ_INJECT_INPUT \
228 , [loop_cnt_1]"m"(loop_cnt[1]) \
229 , [loop_cnt_2]"m"(loop_cnt[2]) \
230 , [loop_cnt_3]"m"(loop_cnt[3]) \
231 , [loop_cnt_4]"m"(loop_cnt[4]) \
232 , [loop_cnt_5]"m"(loop_cnt[5]) \
233 , [loop_cnt_6]"m"(loop_cnt[6])
234
235#define INJECT_ASM_REG "t1"
236
237#define RSEQ_INJECT_CLOBBER \
238 , INJECT_ASM_REG
239
240#define RSEQ_INJECT_ASM(n) \
241 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
242 "beqz " INJECT_ASM_REG ", 333f\n\t" \
243 "222:\n\t" \
244 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
245 "bnez " INJECT_ASM_REG ", 222b\n\t" \
246 "333:\n\t"
247
31b44ba2
MD
248#else
249#error unsupported target
250#endif
251
252#define RSEQ_INJECT_FAILED \
253 nr_abort++;
254
255#define RSEQ_INJECT_C(n) \
256{ \
257 int loc_i, loc_nr_loops = loop_cnt[n]; \
258 \
259 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
260 rseq_barrier(); \
261 } \
262 if (loc_nr_loops == -1 && opt_modulo) { \
263 if (yield_mod_cnt == opt_modulo - 1) { \
264 if (opt_sleep > 0) \
265 poll(NULL, 0, opt_sleep); \
266 if (opt_yield) \
267 sched_yield(); \
268 if (opt_signal) \
269 raise(SIGUSR1); \
270 yield_mod_cnt = 0; \
271 } else { \
272 yield_mod_cnt++; \
273 } \
274 } \
275}
276
277#else
278
279#define printf_verbose(fmt, ...)
280
281#endif /* BENCHMARK */
282
283#include <rseq/rseq.h>
284
369688a5
MD
285static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
286
287static int sys_membarrier(int cmd, int flags, int cpu_id)
288{
289 return syscall(__NR_membarrier, cmd, flags, cpu_id);
290}
291
292#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
293#define TEST_MEMBARRIER
294#endif
295
296#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
297# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
298static
299int get_current_cpu_id(void)
300{
301 return rseq_current_mm_cid();
302}
303static
304bool rseq_validate_cpu_id(void)
305{
306 return rseq_mm_cid_available();
307}
40797ae3
MD
308static
309bool rseq_use_cpu_index(void)
310{
311 return false; /* Use mm_cid */
312}
369688a5
MD
313# ifdef TEST_MEMBARRIER
314/*
315 * Membarrier does not currently support targeting a mm_cid, so
316 * issue the barrier on all cpus.
317 */
318static
319int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
320{
321 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
322 0, 0);
323}
324# endif /* TEST_MEMBARRIER */
325#else
326# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
327static
328int get_current_cpu_id(void)
329{
330 return rseq_cpu_start();
331}
332static
333bool rseq_validate_cpu_id(void)
334{
335 return rseq_current_cpu_raw() >= 0;
336}
40797ae3
MD
337static
338bool rseq_use_cpu_index(void)
339{
340 return true; /* Use cpu_id as index. */
341}
369688a5
MD
342# ifdef TEST_MEMBARRIER
343static
344int rseq_membarrier_expedited(int cpu)
345{
346 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
347 MEMBARRIER_CMD_FLAG_CPU, cpu);
348}
349# endif /* TEST_MEMBARRIER */
350#endif
351
31b44ba2 352struct percpu_lock {
324633af 353 intptr_t v;
31b44ba2
MD
354};
355
31b44ba2
MD
356struct spinlock_test_data {
357 struct percpu_lock lock;
324633af 358 intptr_t count;
31b44ba2
MD
359};
360
361struct spinlock_thread_test_data {
fe7f954a 362 struct spinlock_test_data __rseq_percpu *data;
31b44ba2
MD
363 long long reps;
364 int reg;
365};
366
367struct inc_test_data {
c8278da8 368 intptr_t count;
31b44ba2
MD
369};
370
371struct inc_thread_test_data {
c8278da8 372 struct inc_test_data __rseq_percpu *data;
31b44ba2
MD
373 long long reps;
374 int reg;
375};
376
377struct percpu_list_node {
378 intptr_t data;
379 struct percpu_list_node *next;
380};
381
382struct percpu_list_entry {
383 struct percpu_list_node *head;
384} __attribute__((aligned(128)));
385
386struct percpu_list {
387 struct percpu_list_entry c[CPU_SETSIZE];
388};
389
390#define BUFFER_ITEM_PER_CPU 100
391
392struct percpu_buffer_node {
393 intptr_t data;
394};
395
396struct percpu_buffer_entry {
397 intptr_t offset;
398 intptr_t buflen;
399 struct percpu_buffer_node **array;
400} __attribute__((aligned(128)));
401
402struct percpu_buffer {
403 struct percpu_buffer_entry c[CPU_SETSIZE];
404};
405
406#define MEMCPY_BUFFER_ITEM_PER_CPU 100
407
408struct percpu_memcpy_buffer_node {
409 intptr_t data1;
410 uint64_t data2;
411};
412
413struct percpu_memcpy_buffer_entry {
414 intptr_t offset;
415 intptr_t buflen;
416 struct percpu_memcpy_buffer_node *array;
417} __attribute__((aligned(128)));
418
419struct percpu_memcpy_buffer {
420 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
421};
422
423/* A simple percpu spinlock. Grabs lock on current cpu. */
fe7f954a 424static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
31b44ba2
MD
425{
426 int cpu;
427
428 for (;;) {
429 int ret;
430
369688a5 431 cpu = get_current_cpu_id();
3726b9f1
MD
432 if (cpu < 0) {
433 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
434 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
435 abort();
436 }
41149e28 437 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
324633af 438 &rseq_percpu_ptr(lock, cpu)->v,
31b44ba2
MD
439 0, 1, cpu);
440 if (rseq_likely(!ret))
441 break;
442 /* Retry if comparison fails or rseq aborts. */
443 }
444 /*
445 * Acquire semantic when taking lock after control dependency.
446 * Matches rseq_smp_store_release().
447 */
448 rseq_smp_acquire__after_ctrl_dep();
449 return cpu;
450}
451
fe7f954a 452static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
31b44ba2 453{
324633af 454 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
31b44ba2
MD
455 /*
456 * Release lock, with release semantic. Matches
457 * rseq_smp_acquire__after_ctrl_dep().
458 */
324633af 459 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
31b44ba2
MD
460}
461
6e284b80 462static void *test_percpu_spinlock_thread(void *arg)
31b44ba2 463{
d268885a 464 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
fe7f954a 465 struct spinlock_test_data __rseq_percpu *data = thread_data->data;
31b44ba2
MD
466 long long i, reps;
467
468 if (!opt_disable_rseq && thread_data->reg &&
469 rseq_register_current_thread())
470 abort();
471 reps = thread_data->reps;
472 for (i = 0; i < reps; i++) {
af895f04 473 int cpu = rseq_this_cpu_lock(&data->lock);
324633af 474 rseq_percpu_ptr(data, cpu)->count++;
31b44ba2
MD
475 rseq_percpu_unlock(&data->lock, cpu);
476#ifndef BENCHMARK
477 if (i != 0 && !(i % (reps / 10)))
478 printf_verbose("tid %d: count %lld\n",
479 (int) rseq_gettid(), i);
480#endif
481 }
482 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
483 (int) rseq_gettid(), nr_abort, signals_delivered);
484 if (!opt_disable_rseq && thread_data->reg &&
485 rseq_unregister_current_thread())
486 abort();
487 return NULL;
488}
489
490/*
491 * A simple test which implements a sharded counter using a per-cpu
492 * lock. Obviously real applications might prefer to simply use a
493 * per-cpu increment; however, this is reasonable for a test and the
494 * lock can be extended to synchronize more complicated operations.
495 */
6e284b80 496static void test_percpu_spinlock(void)
31b44ba2
MD
497{
498 const int num_threads = opt_threads;
499 int i, ret;
500 uint64_t sum;
501 pthread_t test_threads[num_threads];
fe7f954a 502 struct spinlock_test_data __rseq_percpu *data;
31b44ba2 503 struct spinlock_thread_test_data thread_data[num_threads];
324633af
MD
504 struct rseq_percpu_pool *mempool;
505
506 mempool = rseq_percpu_pool_create(sizeof(struct spinlock_test_data),
507 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
508 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
509 if (!mempool) {
510 perror("rseq_percpu_pool_create");
511 abort();
512 }
fe7f954a 513 data = (struct spinlock_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
324633af
MD
514 if (!data) {
515 perror("rseq_percpu_zmalloc");
516 abort();
517 }
31b44ba2 518
31b44ba2
MD
519 for (i = 0; i < num_threads; i++) {
520 thread_data[i].reps = opt_reps;
521 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
522 thread_data[i].reg = 1;
523 else
524 thread_data[i].reg = 0;
324633af 525 thread_data[i].data = data;
31b44ba2
MD
526 ret = pthread_create(&test_threads[i], NULL,
527 test_percpu_spinlock_thread,
528 &thread_data[i]);
529 if (ret) {
530 errno = ret;
531 perror("pthread_create");
532 abort();
533 }
534 }
535
536 for (i = 0; i < num_threads; i++) {
537 ret = pthread_join(test_threads[i], NULL);
538 if (ret) {
539 errno = ret;
540 perror("pthread_join");
541 abort();
542 }
543 }
544
545 sum = 0;
546 for (i = 0; i < CPU_SETSIZE; i++)
324633af 547 sum += rseq_percpu_ptr(data, i)->count;
31b44ba2
MD
548
549 assert(sum == (uint64_t)opt_reps * num_threads);
324633af
MD
550 rseq_percpu_free(data);
551 ret = rseq_percpu_pool_destroy(mempool);
552 if (ret) {
553 perror("rseq_percpu_pool_destroy");
554 abort();
555 }
31b44ba2
MD
556}
557
6e284b80 558static void *test_percpu_inc_thread(void *arg)
31b44ba2 559{
d268885a 560 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
c8278da8 561 struct inc_test_data __rseq_percpu *data = thread_data->data;
31b44ba2
MD
562 long long i, reps;
563
564 if (!opt_disable_rseq && thread_data->reg &&
565 rseq_register_current_thread())
566 abort();
567 reps = thread_data->reps;
568 for (i = 0; i < reps; i++) {
569 int ret;
570
571 do {
572 int cpu;
573
369688a5 574 cpu = get_current_cpu_id();
41149e28 575 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
c8278da8 576 &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
31b44ba2
MD
577 } while (rseq_unlikely(ret));
578#ifndef BENCHMARK
579 if (i != 0 && !(i % (reps / 10)))
580 printf_verbose("tid %d: count %lld\n",
581 (int) rseq_gettid(), i);
582#endif
583 }
584 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
585 (int) rseq_gettid(), nr_abort, signals_delivered);
586 if (!opt_disable_rseq && thread_data->reg &&
587 rseq_unregister_current_thread())
588 abort();
589 return NULL;
590}
591
6e284b80 592static void test_percpu_inc(void)
31b44ba2
MD
593{
594 const int num_threads = opt_threads;
595 int i, ret;
596 uint64_t sum;
597 pthread_t test_threads[num_threads];
c8278da8 598 struct inc_test_data __rseq_percpu *data;
31b44ba2 599 struct inc_thread_test_data thread_data[num_threads];
c8278da8
MD
600 struct rseq_percpu_pool *mempool;
601
602 mempool = rseq_percpu_pool_create(sizeof(struct inc_test_data),
603 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
604 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
605 if (!mempool) {
606 perror("rseq_percpu_pool_create");
607 abort();
608 }
609 data = (struct inc_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
610 if (!data) {
611 perror("rseq_percpu_zmalloc");
612 abort();
613 }
31b44ba2 614
31b44ba2
MD
615 for (i = 0; i < num_threads; i++) {
616 thread_data[i].reps = opt_reps;
617 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
618 thread_data[i].reg = 1;
619 else
620 thread_data[i].reg = 0;
c8278da8 621 thread_data[i].data = data;
31b44ba2
MD
622 ret = pthread_create(&test_threads[i], NULL,
623 test_percpu_inc_thread,
624 &thread_data[i]);
625 if (ret) {
626 errno = ret;
627 perror("pthread_create");
628 abort();
629 }
630 }
631
632 for (i = 0; i < num_threads; i++) {
633 ret = pthread_join(test_threads[i], NULL);
634 if (ret) {
635 errno = ret;
636 perror("pthread_join");
637 abort();
638 }
639 }
640
641 sum = 0;
642 for (i = 0; i < CPU_SETSIZE; i++)
c8278da8 643 sum += rseq_percpu_ptr(data, i)->count;
31b44ba2
MD
644
645 assert(sum == (uint64_t)opt_reps * num_threads);
c8278da8
MD
646 rseq_percpu_free(data);
647 ret = rseq_percpu_pool_destroy(mempool);
648 if (ret) {
649 perror("rseq_percpu_pool_destroy");
650 abort();
651 }
31b44ba2
MD
652}
653
6e284b80 654static void this_cpu_list_push(struct percpu_list *list,
31b44ba2
MD
655 struct percpu_list_node *node,
656 int *_cpu)
657{
658 int cpu;
659
660 for (;;) {
661 intptr_t *targetptr, newval, expect;
662 int ret;
663
369688a5 664 cpu = get_current_cpu_id();
31b44ba2
MD
665 /* Load list->c[cpu].head with single-copy atomicity. */
666 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
667 newval = (intptr_t)node;
668 targetptr = (intptr_t *)&list->c[cpu].head;
669 node->next = (struct percpu_list_node *)expect;
41149e28 670 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 671 targetptr, expect, newval, cpu);
31b44ba2
MD
672 if (rseq_likely(!ret))
673 break;
674 /* Retry if comparison fails or rseq aborts. */
675 }
676 if (_cpu)
677 *_cpu = cpu;
678}
679
680/*
681 * Unlike a traditional lock-less linked list; the availability of a
682 * rseq primitive allows us to implement pop without concerns over
683 * ABA-type races.
684 */
6e284b80 685static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
31b44ba2
MD
686 int *_cpu)
687{
688 struct percpu_list_node *node = NULL;
689 int cpu;
690
691 for (;;) {
692 struct percpu_list_node *head;
693 intptr_t *targetptr, expectnot, *load;
d35eae6b
MD
694 long offset;
695 int ret;
31b44ba2 696
369688a5 697 cpu = get_current_cpu_id();
31b44ba2
MD
698 targetptr = (intptr_t *)&list->c[cpu].head;
699 expectnot = (intptr_t)NULL;
700 offset = offsetof(struct percpu_list_node, next);
701 load = (intptr_t *)&head;
41149e28 702 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5
MD
703 targetptr, expectnot,
704 offset, load, cpu);
31b44ba2
MD
705 if (rseq_likely(!ret)) {
706 node = head;
707 break;
708 }
709 if (ret > 0)
710 break;
711 /* Retry if rseq aborts. */
712 }
713 if (_cpu)
714 *_cpu = cpu;
715 return node;
716}
717
718/*
719 * __percpu_list_pop is not safe against concurrent accesses. Should
720 * only be used on lists that are not concurrently modified.
721 */
6e284b80 722static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
31b44ba2
MD
723{
724 struct percpu_list_node *node;
725
726 node = list->c[cpu].head;
727 if (!node)
728 return NULL;
729 list->c[cpu].head = node->next;
730 return node;
731}
732
6e284b80 733static void *test_percpu_list_thread(void *arg)
31b44ba2
MD
734{
735 long long i, reps;
736 struct percpu_list *list = (struct percpu_list *)arg;
737
738 if (!opt_disable_rseq && rseq_register_current_thread())
739 abort();
740
741 reps = opt_reps;
742 for (i = 0; i < reps; i++) {
743 struct percpu_list_node *node;
744
745 node = this_cpu_list_pop(list, NULL);
746 if (opt_yield)
747 sched_yield(); /* encourage shuffling */
748 if (node)
749 this_cpu_list_push(list, node, NULL);
750 }
751
752 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
753 (int) rseq_gettid(), nr_abort, signals_delivered);
754 if (!opt_disable_rseq && rseq_unregister_current_thread())
755 abort();
756
757 return NULL;
758}
759
760/* Simultaneous modification to a per-cpu linked list from many threads. */
6e284b80 761static void test_percpu_list(void)
31b44ba2
MD
762{
763 const int num_threads = opt_threads;
764 int i, j, ret;
765 uint64_t sum = 0, expected_sum = 0;
766 struct percpu_list list;
767 pthread_t test_threads[num_threads];
768 cpu_set_t allowed_cpus;
769
770 memset(&list, 0, sizeof(list));
771
772 /* Generate list entries for every usable cpu. */
773 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
774 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 775 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
776 continue;
777 for (j = 1; j <= 100; j++) {
778 struct percpu_list_node *node;
779
780 expected_sum += j;
781
d268885a 782 node = (struct percpu_list_node *) malloc(sizeof(*node));
31b44ba2
MD
783 assert(node);
784 node->data = j;
785 node->next = list.c[i].head;
786 list.c[i].head = node;
787 }
788 }
789
790 for (i = 0; i < num_threads; i++) {
791 ret = pthread_create(&test_threads[i], NULL,
792 test_percpu_list_thread, &list);
793 if (ret) {
794 errno = ret;
795 perror("pthread_create");
796 abort();
797 }
798 }
799
800 for (i = 0; i < num_threads; i++) {
801 ret = pthread_join(test_threads[i], NULL);
802 if (ret) {
803 errno = ret;
804 perror("pthread_join");
805 abort();
806 }
807 }
808
809 for (i = 0; i < CPU_SETSIZE; i++) {
810 struct percpu_list_node *node;
811
40797ae3 812 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
813 continue;
814
815 while ((node = __percpu_list_pop(&list, i))) {
816 sum += node->data;
817 free(node);
818 }
819 }
820
821 /*
822 * All entries should now be accounted for (unless some external
823 * actor is interfering with our allowed affinity while this
824 * test is running).
825 */
826 assert(sum == expected_sum);
827}
828
6e284b80 829static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
31b44ba2
MD
830 struct percpu_buffer_node *node,
831 int *_cpu)
832{
833 bool result = false;
834 int cpu;
835
836 for (;;) {
837 intptr_t *targetptr_spec, newval_spec;
838 intptr_t *targetptr_final, newval_final;
839 intptr_t offset;
840 int ret;
841
369688a5 842 cpu = get_current_cpu_id();
31b44ba2
MD
843 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
844 if (offset == buffer->c[cpu].buflen)
845 break;
846 newval_spec = (intptr_t)node;
847 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
848 newval_final = offset + 1;
849 targetptr_final = &buffer->c[cpu].offset;
41149e28 850 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
369688a5
MD
851 targetptr_final, offset, targetptr_spec,
852 newval_spec, newval_final, cpu);
31b44ba2
MD
853 if (rseq_likely(!ret)) {
854 result = true;
855 break;
856 }
857 /* Retry if comparison fails or rseq aborts. */
858 }
859 if (_cpu)
860 *_cpu = cpu;
861 return result;
862}
863
6e284b80 864static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
865 int *_cpu)
866{
867 struct percpu_buffer_node *head;
868 int cpu;
869
870 for (;;) {
871 intptr_t *targetptr, newval;
872 intptr_t offset;
873 int ret;
874
369688a5 875 cpu = get_current_cpu_id();
31b44ba2
MD
876 /* Load offset with single-copy atomicity. */
877 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
878 if (offset == 0) {
879 head = NULL;
880 break;
881 }
882 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
883 newval = offset - 1;
884 targetptr = (intptr_t *)&buffer->c[cpu].offset;
41149e28 885 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 886 targetptr, offset,
31b44ba2
MD
887 (intptr_t *)&buffer->c[cpu].array[offset - 1],
888 (intptr_t)head, newval, cpu);
889 if (rseq_likely(!ret))
890 break;
891 /* Retry if comparison fails or rseq aborts. */
892 }
893 if (_cpu)
894 *_cpu = cpu;
895 return head;
896}
897
898/*
899 * __percpu_buffer_pop is not safe against concurrent accesses. Should
900 * only be used on buffers that are not concurrently modified.
901 */
6e284b80 902static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
903 int cpu)
904{
905 struct percpu_buffer_node *head;
906 intptr_t offset;
907
908 offset = buffer->c[cpu].offset;
909 if (offset == 0)
910 return NULL;
911 head = buffer->c[cpu].array[offset - 1];
912 buffer->c[cpu].offset = offset - 1;
913 return head;
914}
915
6e284b80 916static void *test_percpu_buffer_thread(void *arg)
31b44ba2
MD
917{
918 long long i, reps;
919 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
920
921 if (!opt_disable_rseq && rseq_register_current_thread())
922 abort();
923
924 reps = opt_reps;
925 for (i = 0; i < reps; i++) {
926 struct percpu_buffer_node *node;
927
928 node = this_cpu_buffer_pop(buffer, NULL);
929 if (opt_yield)
930 sched_yield(); /* encourage shuffling */
931 if (node) {
932 if (!this_cpu_buffer_push(buffer, node, NULL)) {
933 /* Should increase buffer size. */
934 abort();
935 }
936 }
937 }
938
939 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
940 (int) rseq_gettid(), nr_abort, signals_delivered);
941 if (!opt_disable_rseq && rseq_unregister_current_thread())
942 abort();
943
944 return NULL;
945}
946
947/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 948static void test_percpu_buffer(void)
31b44ba2
MD
949{
950 const int num_threads = opt_threads;
951 int i, j, ret;
952 uint64_t sum = 0, expected_sum = 0;
953 struct percpu_buffer buffer;
954 pthread_t test_threads[num_threads];
955 cpu_set_t allowed_cpus;
956
957 memset(&buffer, 0, sizeof(buffer));
958
959 /* Generate list entries for every usable cpu. */
960 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
961 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 962 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
963 continue;
964 /* Worse-case is every item in same CPU. */
965 buffer.c[i].array =
d268885a 966 (struct percpu_buffer_node **)
31b44ba2
MD
967 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
968 BUFFER_ITEM_PER_CPU);
969 assert(buffer.c[i].array);
970 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
971 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
972 struct percpu_buffer_node *node;
973
974 expected_sum += j;
975
976 /*
977 * We could theoretically put the word-sized
978 * "data" directly in the buffer. However, we
979 * want to model objects that would not fit
980 * within a single word, so allocate an object
981 * for each node.
982 */
d268885a 983 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
31b44ba2
MD
984 assert(node);
985 node->data = j;
986 buffer.c[i].array[j - 1] = node;
987 buffer.c[i].offset++;
988 }
989 }
990
991 for (i = 0; i < num_threads; i++) {
992 ret = pthread_create(&test_threads[i], NULL,
993 test_percpu_buffer_thread, &buffer);
994 if (ret) {
995 errno = ret;
996 perror("pthread_create");
997 abort();
998 }
999 }
1000
1001 for (i = 0; i < num_threads; i++) {
1002 ret = pthread_join(test_threads[i], NULL);
1003 if (ret) {
1004 errno = ret;
1005 perror("pthread_join");
1006 abort();
1007 }
1008 }
1009
1010 for (i = 0; i < CPU_SETSIZE; i++) {
1011 struct percpu_buffer_node *node;
1012
40797ae3 1013 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1014 continue;
1015
1016 while ((node = __percpu_buffer_pop(&buffer, i))) {
1017 sum += node->data;
1018 free(node);
1019 }
1020 free(buffer.c[i].array);
1021 }
1022
1023 /*
1024 * All entries should now be accounted for (unless some external
1025 * actor is interfering with our allowed affinity while this
1026 * test is running).
1027 */
1028 assert(sum == expected_sum);
1029}
1030
6e284b80 1031static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1032 struct percpu_memcpy_buffer_node item,
1033 int *_cpu)
1034{
1035 bool result = false;
1036 int cpu;
1037
1038 for (;;) {
1039 intptr_t *targetptr_final, newval_final, offset;
1040 char *destptr, *srcptr;
1041 size_t copylen;
1042 int ret;
1043
369688a5 1044 cpu = get_current_cpu_id();
31b44ba2
MD
1045 /* Load offset with single-copy atomicity. */
1046 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1047 if (offset == buffer->c[cpu].buflen)
1048 break;
1049 destptr = (char *)&buffer->c[cpu].array[offset];
1050 srcptr = (char *)&item;
1051 /* copylen must be <= 4kB. */
1052 copylen = sizeof(item);
1053 newval_final = offset + 1;
1054 targetptr_final = &buffer->c[cpu].offset;
41149e28 1055 ret = rseq_load_cbne_memcpy_store__ptr(
369688a5
MD
1056 opt_mo, RSEQ_PERCPU,
1057 targetptr_final, offset,
1058 destptr, srcptr, copylen,
1059 newval_final, cpu);
31b44ba2
MD
1060 if (rseq_likely(!ret)) {
1061 result = true;
1062 break;
1063 }
1064 /* Retry if comparison fails or rseq aborts. */
1065 }
1066 if (_cpu)
1067 *_cpu = cpu;
1068 return result;
1069}
1070
6e284b80 1071static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1072 struct percpu_memcpy_buffer_node *item,
1073 int *_cpu)
1074{
1075 bool result = false;
1076 int cpu;
1077
1078 for (;;) {
1079 intptr_t *targetptr_final, newval_final, offset;
1080 char *destptr, *srcptr;
1081 size_t copylen;
1082 int ret;
1083
369688a5 1084 cpu = get_current_cpu_id();
31b44ba2
MD
1085 /* Load offset with single-copy atomicity. */
1086 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1087 if (offset == 0)
1088 break;
1089 destptr = (char *)item;
1090 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1091 /* copylen must be <= 4kB. */
1092 copylen = sizeof(*item);
1093 newval_final = offset - 1;
1094 targetptr_final = &buffer->c[cpu].offset;
41149e28 1095 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 1096 targetptr_final, offset, destptr, srcptr, copylen,
31b44ba2
MD
1097 newval_final, cpu);
1098 if (rseq_likely(!ret)) {
1099 result = true;
1100 break;
1101 }
1102 /* Retry if comparison fails or rseq aborts. */
1103 }
1104 if (_cpu)
1105 *_cpu = cpu;
1106 return result;
1107}
1108
1109/*
1110 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1111 * only be used on buffers that are not concurrently modified.
1112 */
6e284b80 1113static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1114 struct percpu_memcpy_buffer_node *item,
1115 int cpu)
1116{
1117 intptr_t offset;
1118
1119 offset = buffer->c[cpu].offset;
1120 if (offset == 0)
1121 return false;
1122 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1123 buffer->c[cpu].offset = offset - 1;
1124 return true;
1125}
1126
6e284b80 1127static void *test_percpu_memcpy_buffer_thread(void *arg)
31b44ba2
MD
1128{
1129 long long i, reps;
1130 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1131
1132 if (!opt_disable_rseq && rseq_register_current_thread())
1133 abort();
1134
1135 reps = opt_reps;
1136 for (i = 0; i < reps; i++) {
1137 struct percpu_memcpy_buffer_node item;
1138 bool result;
1139
1140 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1141 if (opt_yield)
1142 sched_yield(); /* encourage shuffling */
1143 if (result) {
1144 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1145 /* Should increase buffer size. */
1146 abort();
1147 }
1148 }
1149 }
1150
1151 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1152 (int) rseq_gettid(), nr_abort, signals_delivered);
1153 if (!opt_disable_rseq && rseq_unregister_current_thread())
1154 abort();
1155
1156 return NULL;
1157}
1158
1159/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 1160static void test_percpu_memcpy_buffer(void)
31b44ba2
MD
1161{
1162 const int num_threads = opt_threads;
1163 int i, j, ret;
1164 uint64_t sum = 0, expected_sum = 0;
1165 struct percpu_memcpy_buffer buffer;
1166 pthread_t test_threads[num_threads];
1167 cpu_set_t allowed_cpus;
1168
1169 memset(&buffer, 0, sizeof(buffer));
1170
1171 /* Generate list entries for every usable cpu. */
1172 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1173 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 1174 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1175 continue;
1176 /* Worse-case is every item in same CPU. */
1177 buffer.c[i].array =
d268885a 1178 (struct percpu_memcpy_buffer_node *)
31b44ba2
MD
1179 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1180 MEMCPY_BUFFER_ITEM_PER_CPU);
1181 assert(buffer.c[i].array);
1182 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1183 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1184 expected_sum += 2 * j + 1;
1185
1186 /*
1187 * We could theoretically put the word-sized
1188 * "data" directly in the buffer. However, we
1189 * want to model objects that would not fit
1190 * within a single word, so allocate an object
1191 * for each node.
1192 */
1193 buffer.c[i].array[j - 1].data1 = j;
1194 buffer.c[i].array[j - 1].data2 = j + 1;
1195 buffer.c[i].offset++;
1196 }
1197 }
1198
1199 for (i = 0; i < num_threads; i++) {
1200 ret = pthread_create(&test_threads[i], NULL,
1201 test_percpu_memcpy_buffer_thread,
1202 &buffer);
1203 if (ret) {
1204 errno = ret;
1205 perror("pthread_create");
1206 abort();
1207 }
1208 }
1209
1210 for (i = 0; i < num_threads; i++) {
1211 ret = pthread_join(test_threads[i], NULL);
1212 if (ret) {
1213 errno = ret;
1214 perror("pthread_join");
1215 abort();
1216 }
1217 }
1218
1219 for (i = 0; i < CPU_SETSIZE; i++) {
1220 struct percpu_memcpy_buffer_node item;
1221
40797ae3 1222 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1223 continue;
1224
1225 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1226 sum += item.data1;
1227 sum += item.data2;
1228 }
1229 free(buffer.c[i].array);
1230 }
1231
1232 /*
1233 * All entries should now be accounted for (unless some external
1234 * actor is interfering with our allowed affinity while this
1235 * test is running).
1236 */
1237 assert(sum == expected_sum);
1238}
1239
544cdc88
MJ
1240
1241static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
31b44ba2
MD
1242{
1243 signals_delivered++;
1244}
1245
1246static int set_signal_handler(void)
1247{
1248 int ret = 0;
1249 struct sigaction sa;
1250 sigset_t sigset;
1251
1252 ret = sigemptyset(&sigset);
1253 if (ret < 0) {
1254 perror("sigemptyset");
1255 return ret;
1256 }
1257
1258 sa.sa_handler = test_signal_interrupt_handler;
1259 sa.sa_mask = sigset;
1260 sa.sa_flags = 0;
1261 ret = sigaction(SIGUSR1, &sa, NULL);
1262 if (ret < 0) {
1263 perror("sigaction");
1264 return ret;
1265 }
1266
1267 printf_verbose("Signal handler set for SIGUSR1\n");
1268
1269 return ret;
1270}
1271
3664098e
MD
1272static
1273bool membarrier_private_expedited_rseq_available(void)
1274{
1275 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1276
1277 if (status < 0) {
1278 perror("membarrier");
1279 return false;
1280 }
1281 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1282 return false;
1283 return true;
1284}
1285
5368dcb4 1286/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
369688a5 1287#ifdef TEST_MEMBARRIER
5368dcb4
MD
1288struct test_membarrier_thread_args {
1289 int stop;
1290 intptr_t percpu_list_ptr;
1291};
1292
1293/* Worker threads modify data in their "active" percpu lists. */
1294static
1295void *test_membarrier_worker_thread(void *arg)
1296{
1297 struct test_membarrier_thread_args *args =
1298 (struct test_membarrier_thread_args *)arg;
1299 const int iters = opt_reps;
1300 int i;
1301
1302 if (rseq_register_current_thread()) {
1303 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1304 errno, strerror(errno));
1305 abort();
1306 }
1307
1308 /* Wait for initialization. */
1309 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1310
1311 for (i = 0; i < iters; ++i) {
1312 int ret;
1313
1314 do {
369688a5 1315 int cpu = get_current_cpu_id();
5368dcb4 1316
41149e28 1317 ret = rseq_load_add_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 1318 &args->percpu_list_ptr,
5368dcb4
MD
1319 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1320 } while (rseq_unlikely(ret));
1321 }
1322
1323 if (rseq_unregister_current_thread()) {
1324 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1325 errno, strerror(errno));
1326 abort();
1327 }
1328 return NULL;
1329}
1330
1331static
1332void test_membarrier_init_percpu_list(struct percpu_list *list)
1333{
1334 int i;
1335
1336 memset(list, 0, sizeof(*list));
1337 for (i = 0; i < CPU_SETSIZE; i++) {
1338 struct percpu_list_node *node;
1339
1340 node = (struct percpu_list_node *) malloc(sizeof(*node));
1341 assert(node);
1342 node->data = 0;
1343 node->next = NULL;
1344 list->c[i].head = node;
1345 }
1346}
1347
1348static
1349void test_membarrier_free_percpu_list(struct percpu_list *list)
1350{
1351 int i;
1352
1353 for (i = 0; i < CPU_SETSIZE; i++)
1354 free(list->c[i].head);
1355}
1356
5368dcb4
MD
1357/*
1358 * The manager thread swaps per-cpu lists that worker threads see,
1359 * and validates that there are no unexpected modifications.
1360 */
1361static
1362void *test_membarrier_manager_thread(void *arg)
1363{
1364 struct test_membarrier_thread_args *args =
1365 (struct test_membarrier_thread_args *)arg;
1366 struct percpu_list list_a, list_b;
1367 intptr_t expect_a = 0, expect_b = 0;
1368 int cpu_a = 0, cpu_b = 0;
1369
1370 if (rseq_register_current_thread()) {
1371 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1372 errno, strerror(errno));
1373 abort();
1374 }
1375
1376 /* Init lists. */
1377 test_membarrier_init_percpu_list(&list_a);
1378 test_membarrier_init_percpu_list(&list_b);
1379
1380 /* Initialize lists before publishing them. */
1381 rseq_smp_wmb();
1382
1383 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
1384
1385 while (!RSEQ_READ_ONCE(args->stop)) {
1386 /* list_a is "active". */
1387 cpu_a = rand() % CPU_SETSIZE;
1388 /*
1389 * As list_b is "inactive", we should never see changes
1390 * to list_b.
1391 */
1392 if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) {
1393 fprintf(stderr, "Membarrier test failed\n");
1394 abort();
1395 }
1396
1397 /* Make list_b "active". */
1398 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b);
369688a5 1399 if (rseq_membarrier_expedited(cpu_a) &&
5368dcb4
MD
1400 errno != ENXIO /* missing CPU */) {
1401 perror("sys_membarrier");
1402 abort();
1403 }
1404 /*
1405 * Cpu A should now only modify list_b, so the values
1406 * in list_a should be stable.
1407 */
1408 expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data);
1409
1410 cpu_b = rand() % CPU_SETSIZE;
1411 /*
1412 * As list_a is "inactive", we should never see changes
1413 * to list_a.
1414 */
1415 if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) {
1416 fprintf(stderr, "Membarrier test failed\n");
1417 abort();
1418 }
1419
1420 /* Make list_a "active". */
1421 RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
369688a5 1422 if (rseq_membarrier_expedited(cpu_b) &&
5368dcb4
MD
1423 errno != ENXIO /* missing CPU */) {
1424 perror("sys_membarrier");
1425 abort();
1426 }
1427 /* Remember a value from list_b. */
1428 expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data);
1429 }
1430
1431 test_membarrier_free_percpu_list(&list_a);
1432 test_membarrier_free_percpu_list(&list_b);
1433
1434 if (rseq_unregister_current_thread()) {
1435 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1436 errno, strerror(errno));
1437 abort();
1438 }
1439 return NULL;
1440}
1441
1442static
1443void test_membarrier(void)
1444{
1445 const int num_threads = opt_threads;
1446 struct test_membarrier_thread_args thread_args;
1447 pthread_t worker_threads[num_threads];
1448 pthread_t manager_thread;
1449 int i, ret;
1450
d4bff8ed
MD
1451 if (!membarrier_private_expedited_rseq_available()) {
1452 fprintf(stderr, "Membarrier private expedited rseq not available. "
1453 "Skipping membarrier test.\n");
1454 return;
1455 }
5368dcb4
MD
1456 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1457 perror("sys_membarrier");
1458 abort();
1459 }
1460
1461 thread_args.stop = 0;
1462 thread_args.percpu_list_ptr = 0;
1463 ret = pthread_create(&manager_thread, NULL,
1464 test_membarrier_manager_thread, &thread_args);
1465 if (ret) {
1466 errno = ret;
1467 perror("pthread_create");
1468 abort();
1469 }
1470
1471 for (i = 0; i < num_threads; i++) {
1472 ret = pthread_create(&worker_threads[i], NULL,
1473 test_membarrier_worker_thread, &thread_args);
1474 if (ret) {
1475 errno = ret;
1476 perror("pthread_create");
1477 abort();
1478 }
1479 }
1480
1481
1482 for (i = 0; i < num_threads; i++) {
1483 ret = pthread_join(worker_threads[i], NULL);
1484 if (ret) {
1485 errno = ret;
1486 perror("pthread_join");
1487 abort();
1488 }
1489 }
1490
1491 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1492 ret = pthread_join(manager_thread, NULL);
1493 if (ret) {
1494 errno = ret;
1495 perror("pthread_join");
1496 abort();
1497 }
1498}
369688a5 1499#else /* TEST_MEMBARRIER */
5368dcb4
MD
1500static
1501void test_membarrier(void)
1502{
d4bff8ed
MD
1503 if (!membarrier_private_expedited_rseq_available()) {
1504 fprintf(stderr, "Membarrier private expedited rseq not available. "
1505 "Skipping membarrier test.\n");
1506 return;
1507 }
41149e28 1508 fprintf(stderr, "rseq_load_add_load_add_store__ptr is not implemented on this architecture. "
5368dcb4
MD
1509 "Skipping membarrier test.\n");
1510}
1511#endif
1512
544cdc88 1513static void show_usage(char **argv)
31b44ba2
MD
1514{
1515 printf("Usage : %s <OPTIONS>\n",
1516 argv[0]);
1517 printf("OPTIONS:\n");
1518 printf(" [-1 loops] Number of loops for delay injection 1\n");
1519 printf(" [-2 loops] Number of loops for delay injection 2\n");
1520 printf(" [-3 loops] Number of loops for delay injection 3\n");
1521 printf(" [-4 loops] Number of loops for delay injection 4\n");
1522 printf(" [-5 loops] Number of loops for delay injection 5\n");
1523 printf(" [-6 loops] Number of loops for delay injection 6\n");
1524 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1525 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1526 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1527 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1528 printf(" [-y] Yield\n");
1529 printf(" [-k] Kill thread with signal\n");
1530 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1531 printf(" [-t N] Number of threads (default 200)\n");
1532 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1533 printf(" [-d] Disable rseq system call (no initialization)\n");
1534 printf(" [-D M] Disable rseq for each M threads\n");
5368dcb4 1535 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
31b44ba2 1536 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
d1cdec98 1537 printf(" [-c] Check if the rseq syscall is available.\n");
31b44ba2
MD
1538 printf(" [-v] Verbose output.\n");
1539 printf(" [-h] Show this help.\n");
1540 printf("\n");
1541}
1542
1543int main(int argc, char **argv)
1544{
1545 int i;
1546
1547 for (i = 1; i < argc; i++) {
1548 if (argv[i][0] != '-')
1549 continue;
1550 switch (argv[i][1]) {
1551 case '1':
1552 case '2':
1553 case '3':
1554 case '4':
1555 case '5':
1556 case '6':
1557 case '7':
1558 case '8':
1559 case '9':
1560 if (argc < i + 2) {
544cdc88 1561 show_usage(argv);
31b44ba2
MD
1562 goto error;
1563 }
1564 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1565 i++;
1566 break;
1567 case 'm':
1568 if (argc < i + 2) {
544cdc88 1569 show_usage(argv);
31b44ba2
MD
1570 goto error;
1571 }
1572 opt_modulo = atol(argv[i + 1]);
1573 if (opt_modulo < 0) {
544cdc88 1574 show_usage(argv);
31b44ba2
MD
1575 goto error;
1576 }
1577 i++;
1578 break;
1579 case 's':
1580 if (argc < i + 2) {
544cdc88 1581 show_usage(argv);
31b44ba2
MD
1582 goto error;
1583 }
1584 opt_sleep = atol(argv[i + 1]);
1585 if (opt_sleep < 0) {
544cdc88 1586 show_usage(argv);
31b44ba2
MD
1587 goto error;
1588 }
1589 i++;
1590 break;
1591 case 'y':
1592 opt_yield = 1;
1593 break;
1594 case 'k':
1595 opt_signal = 1;
1596 break;
1597 case 'd':
1598 opt_disable_rseq = 1;
1599 break;
1600 case 'D':
1601 if (argc < i + 2) {
544cdc88 1602 show_usage(argv);
31b44ba2
MD
1603 goto error;
1604 }
1605 opt_disable_mod = atol(argv[i + 1]);
1606 if (opt_disable_mod < 0) {
544cdc88 1607 show_usage(argv);
31b44ba2
MD
1608 goto error;
1609 }
1610 i++;
1611 break;
1612 case 't':
1613 if (argc < i + 2) {
544cdc88 1614 show_usage(argv);
31b44ba2
MD
1615 goto error;
1616 }
1617 opt_threads = atol(argv[i + 1]);
1618 if (opt_threads < 0) {
544cdc88 1619 show_usage(argv);
31b44ba2
MD
1620 goto error;
1621 }
1622 i++;
1623 break;
1624 case 'r':
1625 if (argc < i + 2) {
544cdc88 1626 show_usage(argv);
31b44ba2
MD
1627 goto error;
1628 }
1629 opt_reps = atoll(argv[i + 1]);
1630 if (opt_reps < 0) {
544cdc88 1631 show_usage(argv);
31b44ba2
MD
1632 goto error;
1633 }
1634 i++;
1635 break;
1636 case 'h':
544cdc88 1637 show_usage(argv);
31b44ba2
MD
1638 goto end;
1639 case 'T':
1640 if (argc < i + 2) {
544cdc88 1641 show_usage(argv);
31b44ba2
MD
1642 goto error;
1643 }
1644 opt_test = *argv[i + 1];
1645 switch (opt_test) {
1646 case 's':
1647 case 'l':
1648 case 'i':
1649 case 'b':
1650 case 'm':
5368dcb4 1651 case 'r':
31b44ba2
MD
1652 break;
1653 default:
544cdc88 1654 show_usage(argv);
31b44ba2
MD
1655 goto error;
1656 }
1657 i++;
1658 break;
1659 case 'v':
1660 verbose = 1;
1661 break;
1662 case 'M':
369688a5 1663 opt_mo = RSEQ_MO_RELEASE;
31b44ba2 1664 break;
d1cdec98 1665 case 'c':
8b34114a 1666 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
d1cdec98
MJ
1667 printf_verbose("The rseq syscall is available.\n");
1668 goto end;
1669 } else {
1670 printf_verbose("The rseq syscall is unavailable.\n");
1671 goto no_rseq;
1672 }
31b44ba2 1673 default:
544cdc88 1674 show_usage(argv);
31b44ba2
MD
1675 goto error;
1676 }
1677 }
1678
1679 loop_cnt_1 = loop_cnt[1];
1680 loop_cnt_2 = loop_cnt[2];
1681 loop_cnt_3 = loop_cnt[3];
1682 loop_cnt_4 = loop_cnt[4];
1683 loop_cnt_5 = loop_cnt[5];
1684 loop_cnt_6 = loop_cnt[6];
1685
1686 if (set_signal_handler())
1687 goto error;
1688
1689 if (!opt_disable_rseq && rseq_register_current_thread())
1690 goto error;
369688a5 1691 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
04bb9984
MD
1692 printf_verbose("The rseq cpu id getter is unavailable\n");
1693 goto no_rseq;
369688a5 1694 }
31b44ba2
MD
1695 switch (opt_test) {
1696 case 's':
1697 printf_verbose("spinlock\n");
1698 test_percpu_spinlock();
1699 break;
1700 case 'l':
1701 printf_verbose("linked list\n");
1702 test_percpu_list();
1703 break;
1704 case 'b':
1705 printf_verbose("buffer\n");
1706 test_percpu_buffer();
1707 break;
1708 case 'm':
1709 printf_verbose("memcpy buffer\n");
1710 test_percpu_memcpy_buffer();
1711 break;
1712 case 'i':
1713 printf_verbose("counter increment\n");
1714 test_percpu_inc();
1715 break;
5368dcb4
MD
1716 case 'r':
1717 printf_verbose("membarrier\n");
1718 test_membarrier();
1719 break;
31b44ba2
MD
1720 }
1721 if (!opt_disable_rseq && rseq_unregister_current_thread())
1722 abort();
1723end:
1724 return 0;
1725
1726error:
1727 return -1;
d1cdec98
MJ
1728
1729no_rseq:
1730 return 2;
31b44ba2 1731}
This page took 0.092204 seconds and 4 git commands to generate.