param tests: percpu list: use percpu alloc
[librseq.git] / tests / param_test.c
CommitLineData
90702366 1// SPDX-License-Identifier: MIT
f2d7b530 2// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
31b44ba2
MD
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6#include <assert.h>
cb900b45 7#include <linux/version.h>
5368dcb4 8#include <linux/membarrier.h>
31b44ba2
MD
9#include <pthread.h>
10#include <sched.h>
11#include <stdint.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <syscall.h>
16#include <unistd.h>
17#include <poll.h>
18#include <sys/types.h>
19#include <signal.h>
20#include <errno.h>
21#include <stddef.h>
369688a5 22#include <stdbool.h>
324633af
MD
23#include <rseq/percpu-alloc.h>
24
25#define PERCPU_POOL_LEN (1024*1024) /* 1MB */
31b44ba2 26
cb900b45
MD
27#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
28enum {
29 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
30 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
31};
32
33enum {
34 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
35};
36#endif
37
31b44ba2
MD
38#define NR_INJECT 9
39static int loop_cnt[NR_INJECT + 1];
40
41static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
42static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
43static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
44static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
45static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
46static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
47
48static int opt_modulo, verbose;
49
50static int opt_yield, opt_signal, opt_sleep,
51 opt_disable_rseq, opt_threads = 200,
369688a5 52 opt_disable_mod = 0, opt_test = 's';
31b44ba2 53
31b44ba2 54static long long opt_reps = 5000;
31b44ba2
MD
55
56static __thread __attribute__((tls_model("initial-exec")))
57unsigned int signals_delivered;
58
c6e1dc81
MD
59static inline pid_t rseq_gettid(void)
60{
61 return syscall(__NR_gettid);
62}
63
3726b9f1
MD
64#ifndef BENCHMARK
65
31b44ba2
MD
66static __thread __attribute__((tls_model("initial-exec"), unused))
67int yield_mod_cnt, nr_abort;
68
69#define printf_verbose(fmt, ...) \
70 do { \
71 if (verbose) \
72 printf(fmt, ## __VA_ARGS__); \
73 } while (0)
74
75#ifdef __i386__
76
77#define INJECT_ASM_REG "eax"
78
79#define RSEQ_INJECT_CLOBBER \
80 , INJECT_ASM_REG
81
82#define RSEQ_INJECT_ASM(n) \
83 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
84 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
85 "jz 333f\n\t" \
86 "222:\n\t" \
87 "dec %%" INJECT_ASM_REG "\n\t" \
88 "jnz 222b\n\t" \
89 "333:\n\t"
90
91#elif defined(__x86_64__)
92
93#define INJECT_ASM_REG_P "rax"
94#define INJECT_ASM_REG "eax"
95
96#define RSEQ_INJECT_CLOBBER \
97 , INJECT_ASM_REG_P \
98 , INJECT_ASM_REG
99
100#define RSEQ_INJECT_ASM(n) \
101 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
102 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
103 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
104 "jz 333f\n\t" \
105 "222:\n\t" \
106 "dec %%" INJECT_ASM_REG "\n\t" \
107 "jnz 222b\n\t" \
108 "333:\n\t"
109
110#elif defined(__s390__)
111
112#define RSEQ_INJECT_INPUT \
113 , [loop_cnt_1]"m"(loop_cnt[1]) \
114 , [loop_cnt_2]"m"(loop_cnt[2]) \
115 , [loop_cnt_3]"m"(loop_cnt[3]) \
116 , [loop_cnt_4]"m"(loop_cnt[4]) \
117 , [loop_cnt_5]"m"(loop_cnt[5]) \
118 , [loop_cnt_6]"m"(loop_cnt[6])
119
120#define INJECT_ASM_REG "r12"
121
122#define RSEQ_INJECT_CLOBBER \
123 , INJECT_ASM_REG
124
125#define RSEQ_INJECT_ASM(n) \
126 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
127 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
128 "je 333f\n\t" \
129 "222:\n\t" \
130 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
131 "jnz 222b\n\t" \
132 "333:\n\t"
133
134#elif defined(__ARMEL__)
135
136#define RSEQ_INJECT_INPUT \
137 , [loop_cnt_1]"m"(loop_cnt[1]) \
138 , [loop_cnt_2]"m"(loop_cnt[2]) \
139 , [loop_cnt_3]"m"(loop_cnt[3]) \
140 , [loop_cnt_4]"m"(loop_cnt[4]) \
141 , [loop_cnt_5]"m"(loop_cnt[5]) \
142 , [loop_cnt_6]"m"(loop_cnt[6])
143
144#define INJECT_ASM_REG "r4"
145
146#define RSEQ_INJECT_CLOBBER \
147 , INJECT_ASM_REG
148
149#define RSEQ_INJECT_ASM(n) \
150 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
151 "cmp " INJECT_ASM_REG ", #0\n\t" \
152 "beq 333f\n\t" \
153 "222:\n\t" \
154 "subs " INJECT_ASM_REG ", #1\n\t" \
155 "bne 222b\n\t" \
156 "333:\n\t"
157
158#elif defined(__AARCH64EL__)
159
160#define RSEQ_INJECT_INPUT \
161 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
162 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
163 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
164 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
165 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
166 , [loop_cnt_6] "Qo" (loop_cnt[6])
167
168#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
169
170#define RSEQ_INJECT_ASM(n) \
171 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
172 " cbz " INJECT_ASM_REG ", 333f\n" \
173 "222:\n" \
174 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
175 " cbnz " INJECT_ASM_REG ", 222b\n" \
176 "333:\n"
177
f1c6b55b 178#elif defined(__PPC__)
31b44ba2
MD
179
180#define RSEQ_INJECT_INPUT \
181 , [loop_cnt_1]"m"(loop_cnt[1]) \
182 , [loop_cnt_2]"m"(loop_cnt[2]) \
183 , [loop_cnt_3]"m"(loop_cnt[3]) \
184 , [loop_cnt_4]"m"(loop_cnt[4]) \
185 , [loop_cnt_5]"m"(loop_cnt[5]) \
186 , [loop_cnt_6]"m"(loop_cnt[6])
187
188#define INJECT_ASM_REG "r18"
189
190#define RSEQ_INJECT_CLOBBER \
191 , INJECT_ASM_REG
192
193#define RSEQ_INJECT_ASM(n) \
194 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
195 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
196 "beq 333f\n\t" \
197 "222:\n\t" \
198 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
199 "bne 222b\n\t" \
200 "333:\n\t"
201
202#elif defined(__mips__)
203
204#define RSEQ_INJECT_INPUT \
205 , [loop_cnt_1]"m"(loop_cnt[1]) \
206 , [loop_cnt_2]"m"(loop_cnt[2]) \
207 , [loop_cnt_3]"m"(loop_cnt[3]) \
208 , [loop_cnt_4]"m"(loop_cnt[4]) \
209 , [loop_cnt_5]"m"(loop_cnt[5]) \
210 , [loop_cnt_6]"m"(loop_cnt[6])
211
212#define INJECT_ASM_REG "$5"
213
214#define RSEQ_INJECT_CLOBBER \
215 , INJECT_ASM_REG
216
217#define RSEQ_INJECT_ASM(n) \
218 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
219 "beqz " INJECT_ASM_REG ", 333f\n\t" \
220 "222:\n\t" \
221 "addiu " INJECT_ASM_REG ", -1\n\t" \
222 "bnez " INJECT_ASM_REG ", 222b\n\t" \
223 "333:\n\t"
224
074b1077
MJ
225#elif defined(__riscv)
226
227#define RSEQ_INJECT_INPUT \
228 , [loop_cnt_1]"m"(loop_cnt[1]) \
229 , [loop_cnt_2]"m"(loop_cnt[2]) \
230 , [loop_cnt_3]"m"(loop_cnt[3]) \
231 , [loop_cnt_4]"m"(loop_cnt[4]) \
232 , [loop_cnt_5]"m"(loop_cnt[5]) \
233 , [loop_cnt_6]"m"(loop_cnt[6])
234
235#define INJECT_ASM_REG "t1"
236
237#define RSEQ_INJECT_CLOBBER \
238 , INJECT_ASM_REG
239
240#define RSEQ_INJECT_ASM(n) \
241 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
242 "beqz " INJECT_ASM_REG ", 333f\n\t" \
243 "222:\n\t" \
244 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
245 "bnez " INJECT_ASM_REG ", 222b\n\t" \
246 "333:\n\t"
247
31b44ba2
MD
248#else
249#error unsupported target
250#endif
251
252#define RSEQ_INJECT_FAILED \
253 nr_abort++;
254
255#define RSEQ_INJECT_C(n) \
256{ \
257 int loc_i, loc_nr_loops = loop_cnt[n]; \
258 \
259 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
260 rseq_barrier(); \
261 } \
262 if (loc_nr_loops == -1 && opt_modulo) { \
263 if (yield_mod_cnt == opt_modulo - 1) { \
264 if (opt_sleep > 0) \
265 poll(NULL, 0, opt_sleep); \
266 if (opt_yield) \
267 sched_yield(); \
268 if (opt_signal) \
269 raise(SIGUSR1); \
270 yield_mod_cnt = 0; \
271 } else { \
272 yield_mod_cnt++; \
273 } \
274 } \
275}
276
277#else
278
279#define printf_verbose(fmt, ...)
280
281#endif /* BENCHMARK */
282
283#include <rseq/rseq.h>
284
369688a5
MD
285static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
286
287static int sys_membarrier(int cmd, int flags, int cpu_id)
288{
289 return syscall(__NR_membarrier, cmd, flags, cpu_id);
290}
291
b08be829 292#ifdef rseq_arch_has_load_cbne_load_add_store
369688a5
MD
293#define TEST_MEMBARRIER
294#endif
295
296#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
297# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
298static
299int get_current_cpu_id(void)
300{
301 return rseq_current_mm_cid();
302}
303static
304bool rseq_validate_cpu_id(void)
305{
306 return rseq_mm_cid_available();
307}
40797ae3
MD
308static
309bool rseq_use_cpu_index(void)
310{
311 return false; /* Use mm_cid */
312}
369688a5
MD
313# ifdef TEST_MEMBARRIER
314/*
315 * Membarrier does not currently support targeting a mm_cid, so
316 * issue the barrier on all cpus.
317 */
318static
319int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
320{
321 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
322 0, 0);
323}
324# endif /* TEST_MEMBARRIER */
325#else
326# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
327static
328int get_current_cpu_id(void)
329{
330 return rseq_cpu_start();
331}
332static
333bool rseq_validate_cpu_id(void)
334{
335 return rseq_current_cpu_raw() >= 0;
336}
40797ae3
MD
337static
338bool rseq_use_cpu_index(void)
339{
340 return true; /* Use cpu_id as index. */
341}
369688a5
MD
342# ifdef TEST_MEMBARRIER
343static
344int rseq_membarrier_expedited(int cpu)
345{
346 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
347 MEMBARRIER_CMD_FLAG_CPU, cpu);
348}
349# endif /* TEST_MEMBARRIER */
350#endif
351
31b44ba2 352struct percpu_lock {
324633af 353 intptr_t v;
31b44ba2
MD
354};
355
31b44ba2
MD
356struct spinlock_test_data {
357 struct percpu_lock lock;
324633af 358 intptr_t count;
31b44ba2
MD
359};
360
361struct spinlock_thread_test_data {
fe7f954a 362 struct spinlock_test_data __rseq_percpu *data;
31b44ba2
MD
363 long long reps;
364 int reg;
365};
366
367struct inc_test_data {
c8278da8 368 intptr_t count;
31b44ba2
MD
369};
370
371struct inc_thread_test_data {
c8278da8 372 struct inc_test_data __rseq_percpu *data;
31b44ba2
MD
373 long long reps;
374 int reg;
375};
376
377struct percpu_list_node {
378 intptr_t data;
379 struct percpu_list_node *next;
380};
381
31b44ba2 382struct percpu_list {
b08be829 383 struct percpu_list_node *head;
31b44ba2
MD
384};
385
386#define BUFFER_ITEM_PER_CPU 100
387
388struct percpu_buffer_node {
389 intptr_t data;
390};
391
392struct percpu_buffer_entry {
393 intptr_t offset;
394 intptr_t buflen;
395 struct percpu_buffer_node **array;
396} __attribute__((aligned(128)));
397
398struct percpu_buffer {
399 struct percpu_buffer_entry c[CPU_SETSIZE];
400};
401
402#define MEMCPY_BUFFER_ITEM_PER_CPU 100
403
404struct percpu_memcpy_buffer_node {
405 intptr_t data1;
406 uint64_t data2;
407};
408
409struct percpu_memcpy_buffer_entry {
410 intptr_t offset;
411 intptr_t buflen;
412 struct percpu_memcpy_buffer_node *array;
413} __attribute__((aligned(128)));
414
415struct percpu_memcpy_buffer {
416 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
417};
418
419/* A simple percpu spinlock. Grabs lock on current cpu. */
fe7f954a 420static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
31b44ba2
MD
421{
422 int cpu;
423
424 for (;;) {
425 int ret;
426
369688a5 427 cpu = get_current_cpu_id();
3726b9f1
MD
428 if (cpu < 0) {
429 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
430 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
431 abort();
432 }
41149e28 433 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
324633af 434 &rseq_percpu_ptr(lock, cpu)->v,
31b44ba2
MD
435 0, 1, cpu);
436 if (rseq_likely(!ret))
437 break;
438 /* Retry if comparison fails or rseq aborts. */
439 }
440 /*
441 * Acquire semantic when taking lock after control dependency.
442 * Matches rseq_smp_store_release().
443 */
444 rseq_smp_acquire__after_ctrl_dep();
445 return cpu;
446}
447
fe7f954a 448static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
31b44ba2 449{
324633af 450 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
31b44ba2
MD
451 /*
452 * Release lock, with release semantic. Matches
453 * rseq_smp_acquire__after_ctrl_dep().
454 */
324633af 455 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
31b44ba2
MD
456}
457
6e284b80 458static void *test_percpu_spinlock_thread(void *arg)
31b44ba2 459{
d268885a 460 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
fe7f954a 461 struct spinlock_test_data __rseq_percpu *data = thread_data->data;
31b44ba2
MD
462 long long i, reps;
463
464 if (!opt_disable_rseq && thread_data->reg &&
465 rseq_register_current_thread())
466 abort();
467 reps = thread_data->reps;
468 for (i = 0; i < reps; i++) {
af895f04 469 int cpu = rseq_this_cpu_lock(&data->lock);
324633af 470 rseq_percpu_ptr(data, cpu)->count++;
31b44ba2
MD
471 rseq_percpu_unlock(&data->lock, cpu);
472#ifndef BENCHMARK
473 if (i != 0 && !(i % (reps / 10)))
474 printf_verbose("tid %d: count %lld\n",
475 (int) rseq_gettid(), i);
476#endif
477 }
478 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
479 (int) rseq_gettid(), nr_abort, signals_delivered);
480 if (!opt_disable_rseq && thread_data->reg &&
481 rseq_unregister_current_thread())
482 abort();
483 return NULL;
484}
485
486/*
487 * A simple test which implements a sharded counter using a per-cpu
488 * lock. Obviously real applications might prefer to simply use a
489 * per-cpu increment; however, this is reasonable for a test and the
490 * lock can be extended to synchronize more complicated operations.
491 */
6e284b80 492static void test_percpu_spinlock(void)
31b44ba2
MD
493{
494 const int num_threads = opt_threads;
495 int i, ret;
496 uint64_t sum;
497 pthread_t test_threads[num_threads];
fe7f954a 498 struct spinlock_test_data __rseq_percpu *data;
31b44ba2 499 struct spinlock_thread_test_data thread_data[num_threads];
324633af
MD
500 struct rseq_percpu_pool *mempool;
501
502 mempool = rseq_percpu_pool_create(sizeof(struct spinlock_test_data),
503 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
504 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
505 if (!mempool) {
506 perror("rseq_percpu_pool_create");
507 abort();
508 }
fe7f954a 509 data = (struct spinlock_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
324633af
MD
510 if (!data) {
511 perror("rseq_percpu_zmalloc");
512 abort();
513 }
31b44ba2 514
31b44ba2
MD
515 for (i = 0; i < num_threads; i++) {
516 thread_data[i].reps = opt_reps;
517 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
518 thread_data[i].reg = 1;
519 else
520 thread_data[i].reg = 0;
324633af 521 thread_data[i].data = data;
31b44ba2
MD
522 ret = pthread_create(&test_threads[i], NULL,
523 test_percpu_spinlock_thread,
524 &thread_data[i]);
525 if (ret) {
526 errno = ret;
527 perror("pthread_create");
528 abort();
529 }
530 }
531
532 for (i = 0; i < num_threads; i++) {
533 ret = pthread_join(test_threads[i], NULL);
534 if (ret) {
535 errno = ret;
536 perror("pthread_join");
537 abort();
538 }
539 }
540
541 sum = 0;
542 for (i = 0; i < CPU_SETSIZE; i++)
324633af 543 sum += rseq_percpu_ptr(data, i)->count;
31b44ba2
MD
544
545 assert(sum == (uint64_t)opt_reps * num_threads);
324633af
MD
546 rseq_percpu_free(data);
547 ret = rseq_percpu_pool_destroy(mempool);
548 if (ret) {
549 perror("rseq_percpu_pool_destroy");
550 abort();
551 }
31b44ba2
MD
552}
553
6e284b80 554static void *test_percpu_inc_thread(void *arg)
31b44ba2 555{
d268885a 556 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
c8278da8 557 struct inc_test_data __rseq_percpu *data = thread_data->data;
31b44ba2
MD
558 long long i, reps;
559
560 if (!opt_disable_rseq && thread_data->reg &&
561 rseq_register_current_thread())
562 abort();
563 reps = thread_data->reps;
564 for (i = 0; i < reps; i++) {
565 int ret;
566
567 do {
568 int cpu;
569
369688a5 570 cpu = get_current_cpu_id();
41149e28 571 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
c8278da8 572 &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
31b44ba2
MD
573 } while (rseq_unlikely(ret));
574#ifndef BENCHMARK
575 if (i != 0 && !(i % (reps / 10)))
576 printf_verbose("tid %d: count %lld\n",
577 (int) rseq_gettid(), i);
578#endif
579 }
580 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
581 (int) rseq_gettid(), nr_abort, signals_delivered);
582 if (!opt_disable_rseq && thread_data->reg &&
583 rseq_unregister_current_thread())
584 abort();
585 return NULL;
586}
587
6e284b80 588static void test_percpu_inc(void)
31b44ba2
MD
589{
590 const int num_threads = opt_threads;
591 int i, ret;
592 uint64_t sum;
593 pthread_t test_threads[num_threads];
c8278da8 594 struct inc_test_data __rseq_percpu *data;
31b44ba2 595 struct inc_thread_test_data thread_data[num_threads];
c8278da8
MD
596 struct rseq_percpu_pool *mempool;
597
598 mempool = rseq_percpu_pool_create(sizeof(struct inc_test_data),
599 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
600 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
601 if (!mempool) {
602 perror("rseq_percpu_pool_create");
603 abort();
604 }
605 data = (struct inc_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
606 if (!data) {
607 perror("rseq_percpu_zmalloc");
608 abort();
609 }
31b44ba2 610
31b44ba2
MD
611 for (i = 0; i < num_threads; i++) {
612 thread_data[i].reps = opt_reps;
613 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
614 thread_data[i].reg = 1;
615 else
616 thread_data[i].reg = 0;
c8278da8 617 thread_data[i].data = data;
31b44ba2
MD
618 ret = pthread_create(&test_threads[i], NULL,
619 test_percpu_inc_thread,
620 &thread_data[i]);
621 if (ret) {
622 errno = ret;
623 perror("pthread_create");
624 abort();
625 }
626 }
627
628 for (i = 0; i < num_threads; i++) {
629 ret = pthread_join(test_threads[i], NULL);
630 if (ret) {
631 errno = ret;
632 perror("pthread_join");
633 abort();
634 }
635 }
636
637 sum = 0;
638 for (i = 0; i < CPU_SETSIZE; i++)
c8278da8 639 sum += rseq_percpu_ptr(data, i)->count;
31b44ba2
MD
640
641 assert(sum == (uint64_t)opt_reps * num_threads);
c8278da8
MD
642 rseq_percpu_free(data);
643 ret = rseq_percpu_pool_destroy(mempool);
644 if (ret) {
645 perror("rseq_percpu_pool_destroy");
646 abort();
647 }
31b44ba2
MD
648}
649
b08be829 650static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
31b44ba2
MD
651 struct percpu_list_node *node,
652 int *_cpu)
653{
654 int cpu;
655
656 for (;;) {
657 intptr_t *targetptr, newval, expect;
b08be829 658 struct percpu_list *cpulist;
31b44ba2
MD
659 int ret;
660
369688a5 661 cpu = get_current_cpu_id();
b08be829 662 cpulist = rseq_percpu_ptr(list, cpu);
31b44ba2 663 /* Load list->c[cpu].head with single-copy atomicity. */
b08be829 664 expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
31b44ba2 665 newval = (intptr_t)node;
b08be829 666 targetptr = (intptr_t *)&cpulist->head;
31b44ba2 667 node->next = (struct percpu_list_node *)expect;
41149e28 668 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 669 targetptr, expect, newval, cpu);
31b44ba2
MD
670 if (rseq_likely(!ret))
671 break;
672 /* Retry if comparison fails or rseq aborts. */
673 }
674 if (_cpu)
675 *_cpu = cpu;
676}
677
678/*
679 * Unlike a traditional lock-less linked list; the availability of a
680 * rseq primitive allows us to implement pop without concerns over
681 * ABA-type races.
682 */
b08be829 683static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
31b44ba2
MD
684 int *_cpu)
685{
686 struct percpu_list_node *node = NULL;
687 int cpu;
688
689 for (;;) {
690 struct percpu_list_node *head;
691 intptr_t *targetptr, expectnot, *load;
b08be829 692 struct percpu_list *cpulist;
d35eae6b
MD
693 long offset;
694 int ret;
31b44ba2 695
369688a5 696 cpu = get_current_cpu_id();
b08be829
MD
697 cpulist = rseq_percpu_ptr(list, cpu);
698 targetptr = (intptr_t *)&cpulist->head;
31b44ba2
MD
699 expectnot = (intptr_t)NULL;
700 offset = offsetof(struct percpu_list_node, next);
701 load = (intptr_t *)&head;
41149e28 702 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5
MD
703 targetptr, expectnot,
704 offset, load, cpu);
31b44ba2
MD
705 if (rseq_likely(!ret)) {
706 node = head;
707 break;
708 }
709 if (ret > 0)
710 break;
711 /* Retry if rseq aborts. */
712 }
713 if (_cpu)
714 *_cpu = cpu;
715 return node;
716}
717
718/*
719 * __percpu_list_pop is not safe against concurrent accesses. Should
720 * only be used on lists that are not concurrently modified.
721 */
b08be829 722static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
31b44ba2 723{
b08be829 724 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
31b44ba2
MD
725 struct percpu_list_node *node;
726
b08be829 727 node = cpulist->head;
31b44ba2
MD
728 if (!node)
729 return NULL;
b08be829 730 cpulist->head = node->next;
31b44ba2
MD
731 return node;
732}
733
6e284b80 734static void *test_percpu_list_thread(void *arg)
31b44ba2
MD
735{
736 long long i, reps;
b08be829 737 struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
31b44ba2
MD
738
739 if (!opt_disable_rseq && rseq_register_current_thread())
740 abort();
741
742 reps = opt_reps;
743 for (i = 0; i < reps; i++) {
744 struct percpu_list_node *node;
745
746 node = this_cpu_list_pop(list, NULL);
747 if (opt_yield)
748 sched_yield(); /* encourage shuffling */
749 if (node)
750 this_cpu_list_push(list, node, NULL);
751 }
752
753 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
754 (int) rseq_gettid(), nr_abort, signals_delivered);
755 if (!opt_disable_rseq && rseq_unregister_current_thread())
756 abort();
757
758 return NULL;
759}
760
761/* Simultaneous modification to a per-cpu linked list from many threads. */
6e284b80 762static void test_percpu_list(void)
31b44ba2
MD
763{
764 const int num_threads = opt_threads;
765 int i, j, ret;
766 uint64_t sum = 0, expected_sum = 0;
b08be829 767 struct percpu_list __rseq_percpu *list;
31b44ba2
MD
768 pthread_t test_threads[num_threads];
769 cpu_set_t allowed_cpus;
b08be829 770 struct rseq_percpu_pool *mempool;
31b44ba2 771
b08be829
MD
772 mempool = rseq_percpu_pool_create(sizeof(struct percpu_list),
773 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
774 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
775 if (!mempool) {
776 perror("rseq_percpu_pool_create");
777 abort();
778 }
779 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
780 if (!list) {
781 perror("rseq_percpu_zmalloc");
782 abort();
783 }
31b44ba2
MD
784
785 /* Generate list entries for every usable cpu. */
786 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
787 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 788 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
789 continue;
790 for (j = 1; j <= 100; j++) {
b08be829 791 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
31b44ba2
MD
792 struct percpu_list_node *node;
793
794 expected_sum += j;
795
d268885a 796 node = (struct percpu_list_node *) malloc(sizeof(*node));
31b44ba2
MD
797 assert(node);
798 node->data = j;
b08be829
MD
799 node->next = cpulist->head;
800 cpulist->head = node;
31b44ba2
MD
801 }
802 }
803
804 for (i = 0; i < num_threads; i++) {
805 ret = pthread_create(&test_threads[i], NULL,
b08be829 806 test_percpu_list_thread, list);
31b44ba2
MD
807 if (ret) {
808 errno = ret;
809 perror("pthread_create");
810 abort();
811 }
812 }
813
814 for (i = 0; i < num_threads; i++) {
815 ret = pthread_join(test_threads[i], NULL);
816 if (ret) {
817 errno = ret;
818 perror("pthread_join");
819 abort();
820 }
821 }
822
823 for (i = 0; i < CPU_SETSIZE; i++) {
824 struct percpu_list_node *node;
825
40797ae3 826 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
827 continue;
828
b08be829 829 while ((node = __percpu_list_pop(list, i))) {
31b44ba2
MD
830 sum += node->data;
831 free(node);
832 }
833 }
834
835 /*
836 * All entries should now be accounted for (unless some external
837 * actor is interfering with our allowed affinity while this
838 * test is running).
839 */
840 assert(sum == expected_sum);
b08be829
MD
841 rseq_percpu_free(list);
842 ret = rseq_percpu_pool_destroy(mempool);
843 if (ret) {
844 perror("rseq_percpu_pool_destroy");
845 abort();
846 }
31b44ba2
MD
847}
848
6e284b80 849static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
31b44ba2
MD
850 struct percpu_buffer_node *node,
851 int *_cpu)
852{
853 bool result = false;
854 int cpu;
855
856 for (;;) {
857 intptr_t *targetptr_spec, newval_spec;
858 intptr_t *targetptr_final, newval_final;
859 intptr_t offset;
860 int ret;
861
369688a5 862 cpu = get_current_cpu_id();
31b44ba2
MD
863 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
864 if (offset == buffer->c[cpu].buflen)
865 break;
866 newval_spec = (intptr_t)node;
867 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
868 newval_final = offset + 1;
869 targetptr_final = &buffer->c[cpu].offset;
41149e28 870 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
369688a5
MD
871 targetptr_final, offset, targetptr_spec,
872 newval_spec, newval_final, cpu);
31b44ba2
MD
873 if (rseq_likely(!ret)) {
874 result = true;
875 break;
876 }
877 /* Retry if comparison fails or rseq aborts. */
878 }
879 if (_cpu)
880 *_cpu = cpu;
881 return result;
882}
883
6e284b80 884static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
885 int *_cpu)
886{
887 struct percpu_buffer_node *head;
888 int cpu;
889
890 for (;;) {
891 intptr_t *targetptr, newval;
892 intptr_t offset;
893 int ret;
894
369688a5 895 cpu = get_current_cpu_id();
31b44ba2
MD
896 /* Load offset with single-copy atomicity. */
897 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
898 if (offset == 0) {
899 head = NULL;
900 break;
901 }
902 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
903 newval = offset - 1;
904 targetptr = (intptr_t *)&buffer->c[cpu].offset;
41149e28 905 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 906 targetptr, offset,
31b44ba2
MD
907 (intptr_t *)&buffer->c[cpu].array[offset - 1],
908 (intptr_t)head, newval, cpu);
909 if (rseq_likely(!ret))
910 break;
911 /* Retry if comparison fails or rseq aborts. */
912 }
913 if (_cpu)
914 *_cpu = cpu;
915 return head;
916}
917
918/*
919 * __percpu_buffer_pop is not safe against concurrent accesses. Should
920 * only be used on buffers that are not concurrently modified.
921 */
6e284b80 922static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
31b44ba2
MD
923 int cpu)
924{
925 struct percpu_buffer_node *head;
926 intptr_t offset;
927
928 offset = buffer->c[cpu].offset;
929 if (offset == 0)
930 return NULL;
931 head = buffer->c[cpu].array[offset - 1];
932 buffer->c[cpu].offset = offset - 1;
933 return head;
934}
935
6e284b80 936static void *test_percpu_buffer_thread(void *arg)
31b44ba2
MD
937{
938 long long i, reps;
939 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
940
941 if (!opt_disable_rseq && rseq_register_current_thread())
942 abort();
943
944 reps = opt_reps;
945 for (i = 0; i < reps; i++) {
946 struct percpu_buffer_node *node;
947
948 node = this_cpu_buffer_pop(buffer, NULL);
949 if (opt_yield)
950 sched_yield(); /* encourage shuffling */
951 if (node) {
952 if (!this_cpu_buffer_push(buffer, node, NULL)) {
953 /* Should increase buffer size. */
954 abort();
955 }
956 }
957 }
958
959 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
960 (int) rseq_gettid(), nr_abort, signals_delivered);
961 if (!opt_disable_rseq && rseq_unregister_current_thread())
962 abort();
963
964 return NULL;
965}
966
967/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 968static void test_percpu_buffer(void)
31b44ba2
MD
969{
970 const int num_threads = opt_threads;
971 int i, j, ret;
972 uint64_t sum = 0, expected_sum = 0;
973 struct percpu_buffer buffer;
974 pthread_t test_threads[num_threads];
975 cpu_set_t allowed_cpus;
976
977 memset(&buffer, 0, sizeof(buffer));
978
979 /* Generate list entries for every usable cpu. */
980 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
981 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 982 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
983 continue;
984 /* Worse-case is every item in same CPU. */
985 buffer.c[i].array =
d268885a 986 (struct percpu_buffer_node **)
31b44ba2
MD
987 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
988 BUFFER_ITEM_PER_CPU);
989 assert(buffer.c[i].array);
990 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
991 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
992 struct percpu_buffer_node *node;
993
994 expected_sum += j;
995
996 /*
997 * We could theoretically put the word-sized
998 * "data" directly in the buffer. However, we
999 * want to model objects that would not fit
1000 * within a single word, so allocate an object
1001 * for each node.
1002 */
d268885a 1003 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
31b44ba2
MD
1004 assert(node);
1005 node->data = j;
1006 buffer.c[i].array[j - 1] = node;
1007 buffer.c[i].offset++;
1008 }
1009 }
1010
1011 for (i = 0; i < num_threads; i++) {
1012 ret = pthread_create(&test_threads[i], NULL,
1013 test_percpu_buffer_thread, &buffer);
1014 if (ret) {
1015 errno = ret;
1016 perror("pthread_create");
1017 abort();
1018 }
1019 }
1020
1021 for (i = 0; i < num_threads; i++) {
1022 ret = pthread_join(test_threads[i], NULL);
1023 if (ret) {
1024 errno = ret;
1025 perror("pthread_join");
1026 abort();
1027 }
1028 }
1029
1030 for (i = 0; i < CPU_SETSIZE; i++) {
1031 struct percpu_buffer_node *node;
1032
40797ae3 1033 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1034 continue;
1035
1036 while ((node = __percpu_buffer_pop(&buffer, i))) {
1037 sum += node->data;
1038 free(node);
1039 }
1040 free(buffer.c[i].array);
1041 }
1042
1043 /*
1044 * All entries should now be accounted for (unless some external
1045 * actor is interfering with our allowed affinity while this
1046 * test is running).
1047 */
1048 assert(sum == expected_sum);
1049}
1050
6e284b80 1051static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1052 struct percpu_memcpy_buffer_node item,
1053 int *_cpu)
1054{
1055 bool result = false;
1056 int cpu;
1057
1058 for (;;) {
1059 intptr_t *targetptr_final, newval_final, offset;
1060 char *destptr, *srcptr;
1061 size_t copylen;
1062 int ret;
1063
369688a5 1064 cpu = get_current_cpu_id();
31b44ba2
MD
1065 /* Load offset with single-copy atomicity. */
1066 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1067 if (offset == buffer->c[cpu].buflen)
1068 break;
1069 destptr = (char *)&buffer->c[cpu].array[offset];
1070 srcptr = (char *)&item;
1071 /* copylen must be <= 4kB. */
1072 copylen = sizeof(item);
1073 newval_final = offset + 1;
1074 targetptr_final = &buffer->c[cpu].offset;
41149e28 1075 ret = rseq_load_cbne_memcpy_store__ptr(
369688a5
MD
1076 opt_mo, RSEQ_PERCPU,
1077 targetptr_final, offset,
1078 destptr, srcptr, copylen,
1079 newval_final, cpu);
31b44ba2
MD
1080 if (rseq_likely(!ret)) {
1081 result = true;
1082 break;
1083 }
1084 /* Retry if comparison fails or rseq aborts. */
1085 }
1086 if (_cpu)
1087 *_cpu = cpu;
1088 return result;
1089}
1090
6e284b80 1091static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1092 struct percpu_memcpy_buffer_node *item,
1093 int *_cpu)
1094{
1095 bool result = false;
1096 int cpu;
1097
1098 for (;;) {
1099 intptr_t *targetptr_final, newval_final, offset;
1100 char *destptr, *srcptr;
1101 size_t copylen;
1102 int ret;
1103
369688a5 1104 cpu = get_current_cpu_id();
31b44ba2
MD
1105 /* Load offset with single-copy atomicity. */
1106 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
1107 if (offset == 0)
1108 break;
1109 destptr = (char *)item;
1110 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
1111 /* copylen must be <= 4kB. */
1112 copylen = sizeof(*item);
1113 newval_final = offset - 1;
1114 targetptr_final = &buffer->c[cpu].offset;
41149e28 1115 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 1116 targetptr_final, offset, destptr, srcptr, copylen,
31b44ba2
MD
1117 newval_final, cpu);
1118 if (rseq_likely(!ret)) {
1119 result = true;
1120 break;
1121 }
1122 /* Retry if comparison fails or rseq aborts. */
1123 }
1124 if (_cpu)
1125 *_cpu = cpu;
1126 return result;
1127}
1128
1129/*
1130 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1131 * only be used on buffers that are not concurrently modified.
1132 */
6e284b80 1133static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
31b44ba2
MD
1134 struct percpu_memcpy_buffer_node *item,
1135 int cpu)
1136{
1137 intptr_t offset;
1138
1139 offset = buffer->c[cpu].offset;
1140 if (offset == 0)
1141 return false;
1142 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
1143 buffer->c[cpu].offset = offset - 1;
1144 return true;
1145}
1146
6e284b80 1147static void *test_percpu_memcpy_buffer_thread(void *arg)
31b44ba2
MD
1148{
1149 long long i, reps;
1150 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
1151
1152 if (!opt_disable_rseq && rseq_register_current_thread())
1153 abort();
1154
1155 reps = opt_reps;
1156 for (i = 0; i < reps; i++) {
1157 struct percpu_memcpy_buffer_node item;
1158 bool result;
1159
1160 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1161 if (opt_yield)
1162 sched_yield(); /* encourage shuffling */
1163 if (result) {
1164 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1165 /* Should increase buffer size. */
1166 abort();
1167 }
1168 }
1169 }
1170
1171 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1172 (int) rseq_gettid(), nr_abort, signals_delivered);
1173 if (!opt_disable_rseq && rseq_unregister_current_thread())
1174 abort();
1175
1176 return NULL;
1177}
1178
1179/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 1180static void test_percpu_memcpy_buffer(void)
31b44ba2
MD
1181{
1182 const int num_threads = opt_threads;
1183 int i, j, ret;
1184 uint64_t sum = 0, expected_sum = 0;
1185 struct percpu_memcpy_buffer buffer;
1186 pthread_t test_threads[num_threads];
1187 cpu_set_t allowed_cpus;
1188
1189 memset(&buffer, 0, sizeof(buffer));
1190
1191 /* Generate list entries for every usable cpu. */
1192 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1193 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 1194 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1195 continue;
1196 /* Worse-case is every item in same CPU. */
1197 buffer.c[i].array =
d268885a 1198 (struct percpu_memcpy_buffer_node *)
31b44ba2
MD
1199 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1200 MEMCPY_BUFFER_ITEM_PER_CPU);
1201 assert(buffer.c[i].array);
1202 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1203 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1204 expected_sum += 2 * j + 1;
1205
1206 /*
1207 * We could theoretically put the word-sized
1208 * "data" directly in the buffer. However, we
1209 * want to model objects that would not fit
1210 * within a single word, so allocate an object
1211 * for each node.
1212 */
1213 buffer.c[i].array[j - 1].data1 = j;
1214 buffer.c[i].array[j - 1].data2 = j + 1;
1215 buffer.c[i].offset++;
1216 }
1217 }
1218
1219 for (i = 0; i < num_threads; i++) {
1220 ret = pthread_create(&test_threads[i], NULL,
1221 test_percpu_memcpy_buffer_thread,
1222 &buffer);
1223 if (ret) {
1224 errno = ret;
1225 perror("pthread_create");
1226 abort();
1227 }
1228 }
1229
1230 for (i = 0; i < num_threads; i++) {
1231 ret = pthread_join(test_threads[i], NULL);
1232 if (ret) {
1233 errno = ret;
1234 perror("pthread_join");
1235 abort();
1236 }
1237 }
1238
1239 for (i = 0; i < CPU_SETSIZE; i++) {
1240 struct percpu_memcpy_buffer_node item;
1241
40797ae3 1242 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1243 continue;
1244
1245 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1246 sum += item.data1;
1247 sum += item.data2;
1248 }
1249 free(buffer.c[i].array);
1250 }
1251
1252 /*
1253 * All entries should now be accounted for (unless some external
1254 * actor is interfering with our allowed affinity while this
1255 * test is running).
1256 */
1257 assert(sum == expected_sum);
1258}
1259
544cdc88
MJ
1260
1261static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
31b44ba2
MD
1262{
1263 signals_delivered++;
1264}
1265
1266static int set_signal_handler(void)
1267{
1268 int ret = 0;
1269 struct sigaction sa;
1270 sigset_t sigset;
1271
1272 ret = sigemptyset(&sigset);
1273 if (ret < 0) {
1274 perror("sigemptyset");
1275 return ret;
1276 }
1277
1278 sa.sa_handler = test_signal_interrupt_handler;
1279 sa.sa_mask = sigset;
1280 sa.sa_flags = 0;
1281 ret = sigaction(SIGUSR1, &sa, NULL);
1282 if (ret < 0) {
1283 perror("sigaction");
1284 return ret;
1285 }
1286
1287 printf_verbose("Signal handler set for SIGUSR1\n");
1288
1289 return ret;
1290}
1291
3664098e
MD
1292static
1293bool membarrier_private_expedited_rseq_available(void)
1294{
1295 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1296
1297 if (status < 0) {
1298 perror("membarrier");
1299 return false;
1300 }
1301 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1302 return false;
1303 return true;
1304}
1305
5368dcb4 1306/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
369688a5 1307#ifdef TEST_MEMBARRIER
5368dcb4 1308struct test_membarrier_thread_args {
b08be829 1309 struct percpu_list __rseq_percpu *percpu_list_ptr;
5368dcb4 1310 int stop;
5368dcb4
MD
1311};
1312
1313/* Worker threads modify data in their "active" percpu lists. */
1314static
1315void *test_membarrier_worker_thread(void *arg)
1316{
1317 struct test_membarrier_thread_args *args =
1318 (struct test_membarrier_thread_args *)arg;
1319 const int iters = opt_reps;
1320 int i;
1321
1322 if (rseq_register_current_thread()) {
1323 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1324 errno, strerror(errno));
1325 abort();
1326 }
1327
1328 /* Wait for initialization. */
1329 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1330
1331 for (i = 0; i < iters; ++i) {
1332 int ret;
1333
1334 do {
369688a5 1335 int cpu = get_current_cpu_id();
b08be829
MD
1336 struct percpu_list __rseq_percpu *list = RSEQ_READ_ONCE(args->percpu_list_ptr);
1337 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
5368dcb4 1338
b08be829
MD
1339 ret = rseq_load_cbne_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1340 (intptr_t *) &args->percpu_list_ptr, (intptr_t) list,
1341 &cpulist->head->data, 1, cpu);
5368dcb4
MD
1342 } while (rseq_unlikely(ret));
1343 }
1344
1345 if (rseq_unregister_current_thread()) {
1346 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1347 errno, strerror(errno));
1348 abort();
1349 }
1350 return NULL;
1351}
1352
1353static
b08be829 1354struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_percpu_pool *mempool)
5368dcb4 1355{
b08be829 1356 struct percpu_list __rseq_percpu *list;
5368dcb4
MD
1357 int i;
1358
b08be829
MD
1359 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
1360 if (!list) {
1361 perror("rseq_percpu_zmalloc");
1362 return NULL;
1363 }
5368dcb4 1364 for (i = 0; i < CPU_SETSIZE; i++) {
b08be829 1365 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
5368dcb4
MD
1366 struct percpu_list_node *node;
1367
1368 node = (struct percpu_list_node *) malloc(sizeof(*node));
1369 assert(node);
1370 node->data = 0;
1371 node->next = NULL;
b08be829 1372 cpulist->head = node;
5368dcb4 1373 }
b08be829 1374 return list;
5368dcb4
MD
1375}
1376
1377static
b08be829 1378void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list)
5368dcb4
MD
1379{
1380 int i;
1381
1382 for (i = 0; i < CPU_SETSIZE; i++)
b08be829
MD
1383 free(rseq_percpu_ptr(list, i)->head);
1384 rseq_percpu_free(list);
5368dcb4
MD
1385}
1386
5368dcb4
MD
1387/*
1388 * The manager thread swaps per-cpu lists that worker threads see,
1389 * and validates that there are no unexpected modifications.
1390 */
1391static
1392void *test_membarrier_manager_thread(void *arg)
1393{
1394 struct test_membarrier_thread_args *args =
1395 (struct test_membarrier_thread_args *)arg;
b08be829 1396 struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
5368dcb4
MD
1397 intptr_t expect_a = 0, expect_b = 0;
1398 int cpu_a = 0, cpu_b = 0;
b08be829
MD
1399 struct rseq_percpu_pool *mempool;
1400 int ret;
1401
1402 mempool = rseq_percpu_pool_create(sizeof(struct percpu_list),
1403 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
1404 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
1405 if (!mempool) {
1406 perror("rseq_percpu_pool_create");
1407 abort();
1408 }
5368dcb4
MD
1409
1410 if (rseq_register_current_thread()) {
1411 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1412 errno, strerror(errno));
1413 abort();
1414 }
1415
1416 /* Init lists. */
b08be829
MD
1417 list_a = test_membarrier_alloc_percpu_list(mempool);
1418 assert(list_a);
1419 list_b = test_membarrier_alloc_percpu_list(mempool);
1420 assert(list_b);
5368dcb4
MD
1421
1422 /* Initialize lists before publishing them. */
1423 rseq_smp_wmb();
1424
b08be829 1425 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
5368dcb4
MD
1426
1427 while (!RSEQ_READ_ONCE(args->stop)) {
1428 /* list_a is "active". */
1429 cpu_a = rand() % CPU_SETSIZE;
1430 /*
1431 * As list_b is "inactive", we should never see changes
1432 * to list_b.
1433 */
b08be829 1434 if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
5368dcb4
MD
1435 fprintf(stderr, "Membarrier test failed\n");
1436 abort();
1437 }
1438
1439 /* Make list_b "active". */
b08be829 1440 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
369688a5 1441 if (rseq_membarrier_expedited(cpu_a) &&
5368dcb4
MD
1442 errno != ENXIO /* missing CPU */) {
1443 perror("sys_membarrier");
1444 abort();
1445 }
1446 /*
1447 * Cpu A should now only modify list_b, so the values
1448 * in list_a should be stable.
1449 */
b08be829 1450 expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
5368dcb4
MD
1451
1452 cpu_b = rand() % CPU_SETSIZE;
1453 /*
1454 * As list_a is "inactive", we should never see changes
1455 * to list_a.
1456 */
b08be829 1457 if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
5368dcb4
MD
1458 fprintf(stderr, "Membarrier test failed\n");
1459 abort();
1460 }
1461
1462 /* Make list_a "active". */
b08be829 1463 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
369688a5 1464 if (rseq_membarrier_expedited(cpu_b) &&
5368dcb4
MD
1465 errno != ENXIO /* missing CPU */) {
1466 perror("sys_membarrier");
1467 abort();
1468 }
1469 /* Remember a value from list_b. */
b08be829 1470 expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
5368dcb4
MD
1471 }
1472
b08be829
MD
1473 test_membarrier_free_percpu_list(list_a);
1474 test_membarrier_free_percpu_list(list_b);
5368dcb4
MD
1475
1476 if (rseq_unregister_current_thread()) {
1477 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1478 errno, strerror(errno));
1479 abort();
1480 }
b08be829
MD
1481 ret = rseq_percpu_pool_destroy(mempool);
1482 if (ret) {
1483 perror("rseq_percpu_pool_destroy");
1484 abort();
1485 }
1486
5368dcb4
MD
1487 return NULL;
1488}
1489
1490static
1491void test_membarrier(void)
1492{
1493 const int num_threads = opt_threads;
1494 struct test_membarrier_thread_args thread_args;
1495 pthread_t worker_threads[num_threads];
1496 pthread_t manager_thread;
1497 int i, ret;
1498
d4bff8ed
MD
1499 if (!membarrier_private_expedited_rseq_available()) {
1500 fprintf(stderr, "Membarrier private expedited rseq not available. "
1501 "Skipping membarrier test.\n");
1502 return;
1503 }
5368dcb4
MD
1504 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1505 perror("sys_membarrier");
1506 abort();
1507 }
1508
b08be829 1509 thread_args.percpu_list_ptr = NULL;
5368dcb4 1510 thread_args.stop = 0;
5368dcb4
MD
1511 ret = pthread_create(&manager_thread, NULL,
1512 test_membarrier_manager_thread, &thread_args);
1513 if (ret) {
1514 errno = ret;
1515 perror("pthread_create");
1516 abort();
1517 }
1518
1519 for (i = 0; i < num_threads; i++) {
1520 ret = pthread_create(&worker_threads[i], NULL,
1521 test_membarrier_worker_thread, &thread_args);
1522 if (ret) {
1523 errno = ret;
1524 perror("pthread_create");
1525 abort();
1526 }
1527 }
1528
1529
1530 for (i = 0; i < num_threads; i++) {
1531 ret = pthread_join(worker_threads[i], NULL);
1532 if (ret) {
1533 errno = ret;
1534 perror("pthread_join");
1535 abort();
1536 }
1537 }
1538
1539 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1540 ret = pthread_join(manager_thread, NULL);
1541 if (ret) {
1542 errno = ret;
1543 perror("pthread_join");
1544 abort();
1545 }
1546}
369688a5 1547#else /* TEST_MEMBARRIER */
5368dcb4
MD
1548static
1549void test_membarrier(void)
1550{
d4bff8ed
MD
1551 if (!membarrier_private_expedited_rseq_available()) {
1552 fprintf(stderr, "Membarrier private expedited rseq not available. "
1553 "Skipping membarrier test.\n");
1554 return;
1555 }
3cde2ee2 1556 fprintf(stderr, "rseq_load_add_load_load_add_store__ptr is not implemented on this architecture. "
5368dcb4
MD
1557 "Skipping membarrier test.\n");
1558}
1559#endif
1560
544cdc88 1561static void show_usage(char **argv)
31b44ba2
MD
1562{
1563 printf("Usage : %s <OPTIONS>\n",
1564 argv[0]);
1565 printf("OPTIONS:\n");
1566 printf(" [-1 loops] Number of loops for delay injection 1\n");
1567 printf(" [-2 loops] Number of loops for delay injection 2\n");
1568 printf(" [-3 loops] Number of loops for delay injection 3\n");
1569 printf(" [-4 loops] Number of loops for delay injection 4\n");
1570 printf(" [-5 loops] Number of loops for delay injection 5\n");
1571 printf(" [-6 loops] Number of loops for delay injection 6\n");
1572 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1573 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1574 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1575 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1576 printf(" [-y] Yield\n");
1577 printf(" [-k] Kill thread with signal\n");
1578 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1579 printf(" [-t N] Number of threads (default 200)\n");
1580 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1581 printf(" [-d] Disable rseq system call (no initialization)\n");
1582 printf(" [-D M] Disable rseq for each M threads\n");
5368dcb4 1583 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
31b44ba2 1584 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
d1cdec98 1585 printf(" [-c] Check if the rseq syscall is available.\n");
31b44ba2
MD
1586 printf(" [-v] Verbose output.\n");
1587 printf(" [-h] Show this help.\n");
1588 printf("\n");
1589}
1590
1591int main(int argc, char **argv)
1592{
1593 int i;
1594
1595 for (i = 1; i < argc; i++) {
1596 if (argv[i][0] != '-')
1597 continue;
1598 switch (argv[i][1]) {
1599 case '1':
1600 case '2':
1601 case '3':
1602 case '4':
1603 case '5':
1604 case '6':
1605 case '7':
1606 case '8':
1607 case '9':
1608 if (argc < i + 2) {
544cdc88 1609 show_usage(argv);
31b44ba2
MD
1610 goto error;
1611 }
1612 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1613 i++;
1614 break;
1615 case 'm':
1616 if (argc < i + 2) {
544cdc88 1617 show_usage(argv);
31b44ba2
MD
1618 goto error;
1619 }
1620 opt_modulo = atol(argv[i + 1]);
1621 if (opt_modulo < 0) {
544cdc88 1622 show_usage(argv);
31b44ba2
MD
1623 goto error;
1624 }
1625 i++;
1626 break;
1627 case 's':
1628 if (argc < i + 2) {
544cdc88 1629 show_usage(argv);
31b44ba2
MD
1630 goto error;
1631 }
1632 opt_sleep = atol(argv[i + 1]);
1633 if (opt_sleep < 0) {
544cdc88 1634 show_usage(argv);
31b44ba2
MD
1635 goto error;
1636 }
1637 i++;
1638 break;
1639 case 'y':
1640 opt_yield = 1;
1641 break;
1642 case 'k':
1643 opt_signal = 1;
1644 break;
1645 case 'd':
1646 opt_disable_rseq = 1;
1647 break;
1648 case 'D':
1649 if (argc < i + 2) {
544cdc88 1650 show_usage(argv);
31b44ba2
MD
1651 goto error;
1652 }
1653 opt_disable_mod = atol(argv[i + 1]);
1654 if (opt_disable_mod < 0) {
544cdc88 1655 show_usage(argv);
31b44ba2
MD
1656 goto error;
1657 }
1658 i++;
1659 break;
1660 case 't':
1661 if (argc < i + 2) {
544cdc88 1662 show_usage(argv);
31b44ba2
MD
1663 goto error;
1664 }
1665 opt_threads = atol(argv[i + 1]);
1666 if (opt_threads < 0) {
544cdc88 1667 show_usage(argv);
31b44ba2
MD
1668 goto error;
1669 }
1670 i++;
1671 break;
1672 case 'r':
1673 if (argc < i + 2) {
544cdc88 1674 show_usage(argv);
31b44ba2
MD
1675 goto error;
1676 }
1677 opt_reps = atoll(argv[i + 1]);
1678 if (opt_reps < 0) {
544cdc88 1679 show_usage(argv);
31b44ba2
MD
1680 goto error;
1681 }
1682 i++;
1683 break;
1684 case 'h':
544cdc88 1685 show_usage(argv);
31b44ba2
MD
1686 goto end;
1687 case 'T':
1688 if (argc < i + 2) {
544cdc88 1689 show_usage(argv);
31b44ba2
MD
1690 goto error;
1691 }
1692 opt_test = *argv[i + 1];
1693 switch (opt_test) {
1694 case 's':
1695 case 'l':
1696 case 'i':
1697 case 'b':
1698 case 'm':
5368dcb4 1699 case 'r':
31b44ba2
MD
1700 break;
1701 default:
544cdc88 1702 show_usage(argv);
31b44ba2
MD
1703 goto error;
1704 }
1705 i++;
1706 break;
1707 case 'v':
1708 verbose = 1;
1709 break;
1710 case 'M':
369688a5 1711 opt_mo = RSEQ_MO_RELEASE;
31b44ba2 1712 break;
d1cdec98 1713 case 'c':
8b34114a 1714 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
d1cdec98
MJ
1715 printf_verbose("The rseq syscall is available.\n");
1716 goto end;
1717 } else {
1718 printf_verbose("The rseq syscall is unavailable.\n");
1719 goto no_rseq;
1720 }
31b44ba2 1721 default:
544cdc88 1722 show_usage(argv);
31b44ba2
MD
1723 goto error;
1724 }
1725 }
1726
1727 loop_cnt_1 = loop_cnt[1];
1728 loop_cnt_2 = loop_cnt[2];
1729 loop_cnt_3 = loop_cnt[3];
1730 loop_cnt_4 = loop_cnt[4];
1731 loop_cnt_5 = loop_cnt[5];
1732 loop_cnt_6 = loop_cnt[6];
1733
1734 if (set_signal_handler())
1735 goto error;
1736
1737 if (!opt_disable_rseq && rseq_register_current_thread())
1738 goto error;
369688a5 1739 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
04bb9984
MD
1740 printf_verbose("The rseq cpu id getter is unavailable\n");
1741 goto no_rseq;
369688a5 1742 }
31b44ba2
MD
1743 switch (opt_test) {
1744 case 's':
1745 printf_verbose("spinlock\n");
1746 test_percpu_spinlock();
1747 break;
1748 case 'l':
1749 printf_verbose("linked list\n");
1750 test_percpu_list();
1751 break;
1752 case 'b':
1753 printf_verbose("buffer\n");
1754 test_percpu_buffer();
1755 break;
1756 case 'm':
1757 printf_verbose("memcpy buffer\n");
1758 test_percpu_memcpy_buffer();
1759 break;
1760 case 'i':
1761 printf_verbose("counter increment\n");
1762 test_percpu_inc();
1763 break;
5368dcb4
MD
1764 case 'r':
1765 printf_verbose("membarrier\n");
1766 test_membarrier();
1767 break;
31b44ba2
MD
1768 }
1769 if (!opt_disable_rseq && rseq_unregister_current_thread())
1770 abort();
1771end:
1772 return 0;
1773
1774error:
1775 return -1;
d1cdec98
MJ
1776
1777no_rseq:
1778 return 2;
31b44ba2 1779}
This page took 0.166942 seconds and 4 git commands to generate.