x86-32: Save ip reference for ip-relative addressing
[librseq.git] / tests / param_test.c
CommitLineData
90702366 1// SPDX-License-Identifier: MIT
f2d7b530 2// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
31b44ba2
MD
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6#include <assert.h>
cb900b45 7#include <linux/version.h>
5368dcb4 8#include <linux/membarrier.h>
31b44ba2
MD
9#include <pthread.h>
10#include <sched.h>
11#include <stdint.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <syscall.h>
16#include <unistd.h>
17#include <poll.h>
18#include <sys/types.h>
19#include <signal.h>
20#include <errno.h>
21#include <stddef.h>
369688a5 22#include <stdbool.h>
324633af
MD
23#include <rseq/percpu-alloc.h>
24
25#define PERCPU_POOL_LEN (1024*1024) /* 1MB */
31b44ba2 26
cb900b45
MD
27#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
28enum {
29 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7),
30 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8),
31};
32
33enum {
34 MEMBARRIER_CMD_FLAG_CPU = (1 << 0),
35};
36#endif
37
31b44ba2
MD
38#define NR_INJECT 9
39static int loop_cnt[NR_INJECT + 1];
40
41static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
42static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
43static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
44static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
45static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
46static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
47
48static int opt_modulo, verbose;
49
50static int opt_yield, opt_signal, opt_sleep,
51 opt_disable_rseq, opt_threads = 200,
369688a5 52 opt_disable_mod = 0, opt_test = 's';
31b44ba2 53
31b44ba2 54static long long opt_reps = 5000;
31b44ba2
MD
55
56static __thread __attribute__((tls_model("initial-exec")))
57unsigned int signals_delivered;
58
c6e1dc81
MD
59static inline pid_t rseq_gettid(void)
60{
61 return syscall(__NR_gettid);
62}
63
3726b9f1
MD
64#ifndef BENCHMARK
65
31b44ba2
MD
66static __thread __attribute__((tls_model("initial-exec"), unused))
67int yield_mod_cnt, nr_abort;
68
69#define printf_verbose(fmt, ...) \
70 do { \
71 if (verbose) \
72 printf(fmt, ## __VA_ARGS__); \
73 } while (0)
74
75#ifdef __i386__
76
77#define INJECT_ASM_REG "eax"
78
79#define RSEQ_INJECT_CLOBBER \
80 , INJECT_ASM_REG
81
9b6b5311
MD
82/*
83 * "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" causes the
84 * following linker warning:
85 *
86 * /usr/bin/ld: param_test.o: warning: relocation in read-only section `.text'
87 * /usr/bin/ld: warning: creating DT_TEXTREL in a PIE
88 */
31b44ba2
MD
89#define RSEQ_INJECT_ASM(n) \
90 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
91 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
92 "jz 333f\n\t" \
93 "222:\n\t" \
94 "dec %%" INJECT_ASM_REG "\n\t" \
95 "jnz 222b\n\t" \
96 "333:\n\t"
97
98#elif defined(__x86_64__)
99
100#define INJECT_ASM_REG_P "rax"
101#define INJECT_ASM_REG "eax"
102
103#define RSEQ_INJECT_CLOBBER \
104 , INJECT_ASM_REG_P \
105 , INJECT_ASM_REG
106
107#define RSEQ_INJECT_ASM(n) \
108 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
109 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
110 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
111 "jz 333f\n\t" \
112 "222:\n\t" \
113 "dec %%" INJECT_ASM_REG "\n\t" \
114 "jnz 222b\n\t" \
115 "333:\n\t"
116
117#elif defined(__s390__)
118
119#define RSEQ_INJECT_INPUT \
120 , [loop_cnt_1]"m"(loop_cnt[1]) \
121 , [loop_cnt_2]"m"(loop_cnt[2]) \
122 , [loop_cnt_3]"m"(loop_cnt[3]) \
123 , [loop_cnt_4]"m"(loop_cnt[4]) \
124 , [loop_cnt_5]"m"(loop_cnt[5]) \
125 , [loop_cnt_6]"m"(loop_cnt[6])
126
127#define INJECT_ASM_REG "r12"
128
129#define RSEQ_INJECT_CLOBBER \
130 , INJECT_ASM_REG
131
132#define RSEQ_INJECT_ASM(n) \
133 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
134 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
135 "je 333f\n\t" \
136 "222:\n\t" \
137 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
138 "jnz 222b\n\t" \
139 "333:\n\t"
140
141#elif defined(__ARMEL__)
142
143#define RSEQ_INJECT_INPUT \
144 , [loop_cnt_1]"m"(loop_cnt[1]) \
145 , [loop_cnt_2]"m"(loop_cnt[2]) \
146 , [loop_cnt_3]"m"(loop_cnt[3]) \
147 , [loop_cnt_4]"m"(loop_cnt[4]) \
148 , [loop_cnt_5]"m"(loop_cnt[5]) \
149 , [loop_cnt_6]"m"(loop_cnt[6])
150
151#define INJECT_ASM_REG "r4"
152
153#define RSEQ_INJECT_CLOBBER \
154 , INJECT_ASM_REG
155
156#define RSEQ_INJECT_ASM(n) \
157 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
158 "cmp " INJECT_ASM_REG ", #0\n\t" \
159 "beq 333f\n\t" \
160 "222:\n\t" \
161 "subs " INJECT_ASM_REG ", #1\n\t" \
162 "bne 222b\n\t" \
163 "333:\n\t"
164
165#elif defined(__AARCH64EL__)
166
167#define RSEQ_INJECT_INPUT \
168 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
169 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
170 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
171 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
172 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
173 , [loop_cnt_6] "Qo" (loop_cnt[6])
174
175#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
176
177#define RSEQ_INJECT_ASM(n) \
178 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
179 " cbz " INJECT_ASM_REG ", 333f\n" \
180 "222:\n" \
181 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
182 " cbnz " INJECT_ASM_REG ", 222b\n" \
183 "333:\n"
184
f1c6b55b 185#elif defined(__PPC__)
31b44ba2
MD
186
187#define RSEQ_INJECT_INPUT \
188 , [loop_cnt_1]"m"(loop_cnt[1]) \
189 , [loop_cnt_2]"m"(loop_cnt[2]) \
190 , [loop_cnt_3]"m"(loop_cnt[3]) \
191 , [loop_cnt_4]"m"(loop_cnt[4]) \
192 , [loop_cnt_5]"m"(loop_cnt[5]) \
193 , [loop_cnt_6]"m"(loop_cnt[6])
194
195#define INJECT_ASM_REG "r18"
196
197#define RSEQ_INJECT_CLOBBER \
198 , INJECT_ASM_REG
199
200#define RSEQ_INJECT_ASM(n) \
201 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
202 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
203 "beq 333f\n\t" \
204 "222:\n\t" \
205 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
206 "bne 222b\n\t" \
207 "333:\n\t"
208
209#elif defined(__mips__)
210
211#define RSEQ_INJECT_INPUT \
212 , [loop_cnt_1]"m"(loop_cnt[1]) \
213 , [loop_cnt_2]"m"(loop_cnt[2]) \
214 , [loop_cnt_3]"m"(loop_cnt[3]) \
215 , [loop_cnt_4]"m"(loop_cnt[4]) \
216 , [loop_cnt_5]"m"(loop_cnt[5]) \
217 , [loop_cnt_6]"m"(loop_cnt[6])
218
219#define INJECT_ASM_REG "$5"
220
221#define RSEQ_INJECT_CLOBBER \
222 , INJECT_ASM_REG
223
224#define RSEQ_INJECT_ASM(n) \
225 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
226 "beqz " INJECT_ASM_REG ", 333f\n\t" \
227 "222:\n\t" \
228 "addiu " INJECT_ASM_REG ", -1\n\t" \
229 "bnez " INJECT_ASM_REG ", 222b\n\t" \
230 "333:\n\t"
231
074b1077
MJ
232#elif defined(__riscv)
233
234#define RSEQ_INJECT_INPUT \
235 , [loop_cnt_1]"m"(loop_cnt[1]) \
236 , [loop_cnt_2]"m"(loop_cnt[2]) \
237 , [loop_cnt_3]"m"(loop_cnt[3]) \
238 , [loop_cnt_4]"m"(loop_cnt[4]) \
239 , [loop_cnt_5]"m"(loop_cnt[5]) \
240 , [loop_cnt_6]"m"(loop_cnt[6])
241
242#define INJECT_ASM_REG "t1"
243
244#define RSEQ_INJECT_CLOBBER \
245 , INJECT_ASM_REG
246
247#define RSEQ_INJECT_ASM(n) \
248 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
249 "beqz " INJECT_ASM_REG ", 333f\n\t" \
250 "222:\n\t" \
251 "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
252 "bnez " INJECT_ASM_REG ", 222b\n\t" \
253 "333:\n\t"
254
31b44ba2
MD
255#else
256#error unsupported target
257#endif
258
259#define RSEQ_INJECT_FAILED \
260 nr_abort++;
261
262#define RSEQ_INJECT_C(n) \
263{ \
264 int loc_i, loc_nr_loops = loop_cnt[n]; \
265 \
266 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
267 rseq_barrier(); \
268 } \
269 if (loc_nr_loops == -1 && opt_modulo) { \
270 if (yield_mod_cnt == opt_modulo - 1) { \
271 if (opt_sleep > 0) \
272 poll(NULL, 0, opt_sleep); \
273 if (opt_yield) \
274 sched_yield(); \
275 if (opt_signal) \
276 raise(SIGUSR1); \
277 yield_mod_cnt = 0; \
278 } else { \
279 yield_mod_cnt++; \
280 } \
281 } \
282}
283
284#else
285
286#define printf_verbose(fmt, ...)
287
288#endif /* BENCHMARK */
289
290#include <rseq/rseq.h>
291
369688a5
MD
292static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
293
294static int sys_membarrier(int cmd, int flags, int cpu_id)
295{
296 return syscall(__NR_membarrier, cmd, flags, cpu_id);
297}
298
b08be829 299#ifdef rseq_arch_has_load_cbne_load_add_store
369688a5
MD
300#define TEST_MEMBARRIER
301#endif
302
303#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
304# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
305static
306int get_current_cpu_id(void)
307{
308 return rseq_current_mm_cid();
309}
310static
311bool rseq_validate_cpu_id(void)
312{
313 return rseq_mm_cid_available();
314}
40797ae3
MD
315static
316bool rseq_use_cpu_index(void)
317{
318 return false; /* Use mm_cid */
319}
369688a5
MD
320# ifdef TEST_MEMBARRIER
321/*
322 * Membarrier does not currently support targeting a mm_cid, so
323 * issue the barrier on all cpus.
324 */
325static
326int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
327{
328 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
329 0, 0);
330}
331# endif /* TEST_MEMBARRIER */
332#else
333# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
334static
335int get_current_cpu_id(void)
336{
337 return rseq_cpu_start();
338}
339static
340bool rseq_validate_cpu_id(void)
341{
342 return rseq_current_cpu_raw() >= 0;
343}
40797ae3
MD
344static
345bool rseq_use_cpu_index(void)
346{
347 return true; /* Use cpu_id as index. */
348}
369688a5
MD
349# ifdef TEST_MEMBARRIER
350static
351int rseq_membarrier_expedited(int cpu)
352{
353 return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
354 MEMBARRIER_CMD_FLAG_CPU, cpu);
355}
356# endif /* TEST_MEMBARRIER */
357#endif
358
31b44ba2 359struct percpu_lock {
324633af 360 intptr_t v;
31b44ba2
MD
361};
362
31b44ba2
MD
363struct spinlock_test_data {
364 struct percpu_lock lock;
324633af 365 intptr_t count;
31b44ba2
MD
366};
367
368struct spinlock_thread_test_data {
fe7f954a 369 struct spinlock_test_data __rseq_percpu *data;
31b44ba2
MD
370 long long reps;
371 int reg;
372};
373
374struct inc_test_data {
c8278da8 375 intptr_t count;
31b44ba2
MD
376};
377
378struct inc_thread_test_data {
c8278da8 379 struct inc_test_data __rseq_percpu *data;
31b44ba2
MD
380 long long reps;
381 int reg;
382};
383
384struct percpu_list_node {
385 intptr_t data;
386 struct percpu_list_node *next;
387};
388
31b44ba2 389struct percpu_list {
b08be829 390 struct percpu_list_node *head;
31b44ba2
MD
391};
392
393#define BUFFER_ITEM_PER_CPU 100
394
395struct percpu_buffer_node {
396 intptr_t data;
397};
398
bac8cd24 399struct percpu_buffer {
31b44ba2
MD
400 intptr_t offset;
401 intptr_t buflen;
402 struct percpu_buffer_node **array;
31b44ba2
MD
403};
404
405#define MEMCPY_BUFFER_ITEM_PER_CPU 100
406
407struct percpu_memcpy_buffer_node {
408 intptr_t data1;
409 uint64_t data2;
410};
411
78adbd91 412struct percpu_memcpy_buffer {
31b44ba2
MD
413 intptr_t offset;
414 intptr_t buflen;
415 struct percpu_memcpy_buffer_node *array;
31b44ba2
MD
416};
417
418/* A simple percpu spinlock. Grabs lock on current cpu. */
fe7f954a 419static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
31b44ba2
MD
420{
421 int cpu;
422
423 for (;;) {
424 int ret;
425
369688a5 426 cpu = get_current_cpu_id();
3726b9f1
MD
427 if (cpu < 0) {
428 fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
429 getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
430 abort();
431 }
41149e28 432 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
324633af 433 &rseq_percpu_ptr(lock, cpu)->v,
31b44ba2
MD
434 0, 1, cpu);
435 if (rseq_likely(!ret))
436 break;
437 /* Retry if comparison fails or rseq aborts. */
438 }
439 /*
440 * Acquire semantic when taking lock after control dependency.
441 * Matches rseq_smp_store_release().
442 */
443 rseq_smp_acquire__after_ctrl_dep();
444 return cpu;
445}
446
fe7f954a 447static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
31b44ba2 448{
324633af 449 assert(rseq_percpu_ptr(lock, cpu)->v == 1);
31b44ba2
MD
450 /*
451 * Release lock, with release semantic. Matches
452 * rseq_smp_acquire__after_ctrl_dep().
453 */
324633af 454 rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
31b44ba2
MD
455}
456
6e284b80 457static void *test_percpu_spinlock_thread(void *arg)
31b44ba2 458{
d268885a 459 struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
fe7f954a 460 struct spinlock_test_data __rseq_percpu *data = thread_data->data;
31b44ba2
MD
461 long long i, reps;
462
463 if (!opt_disable_rseq && thread_data->reg &&
464 rseq_register_current_thread())
465 abort();
466 reps = thread_data->reps;
467 for (i = 0; i < reps; i++) {
af895f04 468 int cpu = rseq_this_cpu_lock(&data->lock);
324633af 469 rseq_percpu_ptr(data, cpu)->count++;
31b44ba2
MD
470 rseq_percpu_unlock(&data->lock, cpu);
471#ifndef BENCHMARK
472 if (i != 0 && !(i % (reps / 10)))
473 printf_verbose("tid %d: count %lld\n",
474 (int) rseq_gettid(), i);
475#endif
476 }
477 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
478 (int) rseq_gettid(), nr_abort, signals_delivered);
479 if (!opt_disable_rseq && thread_data->reg &&
480 rseq_unregister_current_thread())
481 abort();
482 return NULL;
483}
484
485/*
486 * A simple test which implements a sharded counter using a per-cpu
487 * lock. Obviously real applications might prefer to simply use a
488 * per-cpu increment; however, this is reasonable for a test and the
489 * lock can be extended to synchronize more complicated operations.
490 */
6e284b80 491static void test_percpu_spinlock(void)
31b44ba2
MD
492{
493 const int num_threads = opt_threads;
494 int i, ret;
495 uint64_t sum;
496 pthread_t test_threads[num_threads];
fe7f954a 497 struct spinlock_test_data __rseq_percpu *data;
31b44ba2 498 struct spinlock_thread_test_data thread_data[num_threads];
324633af
MD
499 struct rseq_percpu_pool *mempool;
500
501 mempool = rseq_percpu_pool_create(sizeof(struct spinlock_test_data),
502 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
503 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
504 if (!mempool) {
505 perror("rseq_percpu_pool_create");
506 abort();
507 }
fe7f954a 508 data = (struct spinlock_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
324633af
MD
509 if (!data) {
510 perror("rseq_percpu_zmalloc");
511 abort();
512 }
31b44ba2 513
31b44ba2
MD
514 for (i = 0; i < num_threads; i++) {
515 thread_data[i].reps = opt_reps;
516 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
517 thread_data[i].reg = 1;
518 else
519 thread_data[i].reg = 0;
324633af 520 thread_data[i].data = data;
31b44ba2
MD
521 ret = pthread_create(&test_threads[i], NULL,
522 test_percpu_spinlock_thread,
523 &thread_data[i]);
524 if (ret) {
525 errno = ret;
526 perror("pthread_create");
527 abort();
528 }
529 }
530
531 for (i = 0; i < num_threads; i++) {
532 ret = pthread_join(test_threads[i], NULL);
533 if (ret) {
534 errno = ret;
535 perror("pthread_join");
536 abort();
537 }
538 }
539
540 sum = 0;
541 for (i = 0; i < CPU_SETSIZE; i++)
324633af 542 sum += rseq_percpu_ptr(data, i)->count;
31b44ba2
MD
543
544 assert(sum == (uint64_t)opt_reps * num_threads);
324633af
MD
545 rseq_percpu_free(data);
546 ret = rseq_percpu_pool_destroy(mempool);
547 if (ret) {
548 perror("rseq_percpu_pool_destroy");
549 abort();
550 }
31b44ba2
MD
551}
552
6e284b80 553static void *test_percpu_inc_thread(void *arg)
31b44ba2 554{
d268885a 555 struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
c8278da8 556 struct inc_test_data __rseq_percpu *data = thread_data->data;
31b44ba2
MD
557 long long i, reps;
558
559 if (!opt_disable_rseq && thread_data->reg &&
560 rseq_register_current_thread())
561 abort();
562 reps = thread_data->reps;
563 for (i = 0; i < reps; i++) {
564 int ret;
565
566 do {
567 int cpu;
568
369688a5 569 cpu = get_current_cpu_id();
41149e28 570 ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
c8278da8 571 &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
31b44ba2
MD
572 } while (rseq_unlikely(ret));
573#ifndef BENCHMARK
574 if (i != 0 && !(i % (reps / 10)))
575 printf_verbose("tid %d: count %lld\n",
576 (int) rseq_gettid(), i);
577#endif
578 }
579 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
580 (int) rseq_gettid(), nr_abort, signals_delivered);
581 if (!opt_disable_rseq && thread_data->reg &&
582 rseq_unregister_current_thread())
583 abort();
584 return NULL;
585}
586
6e284b80 587static void test_percpu_inc(void)
31b44ba2
MD
588{
589 const int num_threads = opt_threads;
590 int i, ret;
591 uint64_t sum;
592 pthread_t test_threads[num_threads];
c8278da8 593 struct inc_test_data __rseq_percpu *data;
31b44ba2 594 struct inc_thread_test_data thread_data[num_threads];
c8278da8
MD
595 struct rseq_percpu_pool *mempool;
596
597 mempool = rseq_percpu_pool_create(sizeof(struct inc_test_data),
598 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
599 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
600 if (!mempool) {
601 perror("rseq_percpu_pool_create");
602 abort();
603 }
604 data = (struct inc_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
605 if (!data) {
606 perror("rseq_percpu_zmalloc");
607 abort();
608 }
31b44ba2 609
31b44ba2
MD
610 for (i = 0; i < num_threads; i++) {
611 thread_data[i].reps = opt_reps;
612 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
613 thread_data[i].reg = 1;
614 else
615 thread_data[i].reg = 0;
c8278da8 616 thread_data[i].data = data;
31b44ba2
MD
617 ret = pthread_create(&test_threads[i], NULL,
618 test_percpu_inc_thread,
619 &thread_data[i]);
620 if (ret) {
621 errno = ret;
622 perror("pthread_create");
623 abort();
624 }
625 }
626
627 for (i = 0; i < num_threads; i++) {
628 ret = pthread_join(test_threads[i], NULL);
629 if (ret) {
630 errno = ret;
631 perror("pthread_join");
632 abort();
633 }
634 }
635
636 sum = 0;
637 for (i = 0; i < CPU_SETSIZE; i++)
c8278da8 638 sum += rseq_percpu_ptr(data, i)->count;
31b44ba2
MD
639
640 assert(sum == (uint64_t)opt_reps * num_threads);
c8278da8
MD
641 rseq_percpu_free(data);
642 ret = rseq_percpu_pool_destroy(mempool);
643 if (ret) {
644 perror("rseq_percpu_pool_destroy");
645 abort();
646 }
31b44ba2
MD
647}
648
b08be829 649static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
31b44ba2
MD
650 struct percpu_list_node *node,
651 int *_cpu)
652{
653 int cpu;
654
655 for (;;) {
656 intptr_t *targetptr, newval, expect;
b08be829 657 struct percpu_list *cpulist;
31b44ba2
MD
658 int ret;
659
369688a5 660 cpu = get_current_cpu_id();
b08be829 661 cpulist = rseq_percpu_ptr(list, cpu);
31b44ba2 662 /* Load list->c[cpu].head with single-copy atomicity. */
b08be829 663 expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
31b44ba2 664 newval = (intptr_t)node;
b08be829 665 targetptr = (intptr_t *)&cpulist->head;
31b44ba2 666 node->next = (struct percpu_list_node *)expect;
41149e28 667 ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 668 targetptr, expect, newval, cpu);
31b44ba2
MD
669 if (rseq_likely(!ret))
670 break;
671 /* Retry if comparison fails or rseq aborts. */
672 }
673 if (_cpu)
674 *_cpu = cpu;
675}
676
677/*
678 * Unlike a traditional lock-less linked list; the availability of a
679 * rseq primitive allows us to implement pop without concerns over
680 * ABA-type races.
681 */
b08be829 682static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
31b44ba2
MD
683 int *_cpu)
684{
685 struct percpu_list_node *node = NULL;
686 int cpu;
687
688 for (;;) {
689 struct percpu_list_node *head;
690 intptr_t *targetptr, expectnot, *load;
b08be829 691 struct percpu_list *cpulist;
d35eae6b
MD
692 long offset;
693 int ret;
31b44ba2 694
369688a5 695 cpu = get_current_cpu_id();
b08be829
MD
696 cpulist = rseq_percpu_ptr(list, cpu);
697 targetptr = (intptr_t *)&cpulist->head;
31b44ba2
MD
698 expectnot = (intptr_t)NULL;
699 offset = offsetof(struct percpu_list_node, next);
700 load = (intptr_t *)&head;
41149e28 701 ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5
MD
702 targetptr, expectnot,
703 offset, load, cpu);
31b44ba2
MD
704 if (rseq_likely(!ret)) {
705 node = head;
706 break;
707 }
708 if (ret > 0)
709 break;
710 /* Retry if rseq aborts. */
711 }
712 if (_cpu)
713 *_cpu = cpu;
714 return node;
715}
716
717/*
718 * __percpu_list_pop is not safe against concurrent accesses. Should
719 * only be used on lists that are not concurrently modified.
720 */
b08be829 721static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
31b44ba2 722{
b08be829 723 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
31b44ba2
MD
724 struct percpu_list_node *node;
725
b08be829 726 node = cpulist->head;
31b44ba2
MD
727 if (!node)
728 return NULL;
b08be829 729 cpulist->head = node->next;
31b44ba2
MD
730 return node;
731}
732
6e284b80 733static void *test_percpu_list_thread(void *arg)
31b44ba2
MD
734{
735 long long i, reps;
b08be829 736 struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
31b44ba2
MD
737
738 if (!opt_disable_rseq && rseq_register_current_thread())
739 abort();
740
741 reps = opt_reps;
742 for (i = 0; i < reps; i++) {
743 struct percpu_list_node *node;
744
745 node = this_cpu_list_pop(list, NULL);
746 if (opt_yield)
747 sched_yield(); /* encourage shuffling */
748 if (node)
749 this_cpu_list_push(list, node, NULL);
750 }
751
752 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
753 (int) rseq_gettid(), nr_abort, signals_delivered);
754 if (!opt_disable_rseq && rseq_unregister_current_thread())
755 abort();
756
757 return NULL;
758}
759
760/* Simultaneous modification to a per-cpu linked list from many threads. */
6e284b80 761static void test_percpu_list(void)
31b44ba2
MD
762{
763 const int num_threads = opt_threads;
764 int i, j, ret;
765 uint64_t sum = 0, expected_sum = 0;
b08be829 766 struct percpu_list __rseq_percpu *list;
31b44ba2
MD
767 pthread_t test_threads[num_threads];
768 cpu_set_t allowed_cpus;
b08be829 769 struct rseq_percpu_pool *mempool;
31b44ba2 770
b08be829
MD
771 mempool = rseq_percpu_pool_create(sizeof(struct percpu_list),
772 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
773 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
774 if (!mempool) {
775 perror("rseq_percpu_pool_create");
776 abort();
777 }
778 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
779 if (!list) {
780 perror("rseq_percpu_zmalloc");
781 abort();
782 }
31b44ba2
MD
783
784 /* Generate list entries for every usable cpu. */
785 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
786 for (i = 0; i < CPU_SETSIZE; i++) {
40797ae3 787 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
788 continue;
789 for (j = 1; j <= 100; j++) {
b08be829 790 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
31b44ba2
MD
791 struct percpu_list_node *node;
792
793 expected_sum += j;
794
d268885a 795 node = (struct percpu_list_node *) malloc(sizeof(*node));
31b44ba2
MD
796 assert(node);
797 node->data = j;
b08be829
MD
798 node->next = cpulist->head;
799 cpulist->head = node;
31b44ba2
MD
800 }
801 }
802
803 for (i = 0; i < num_threads; i++) {
804 ret = pthread_create(&test_threads[i], NULL,
b08be829 805 test_percpu_list_thread, list);
31b44ba2
MD
806 if (ret) {
807 errno = ret;
808 perror("pthread_create");
809 abort();
810 }
811 }
812
813 for (i = 0; i < num_threads; i++) {
814 ret = pthread_join(test_threads[i], NULL);
815 if (ret) {
816 errno = ret;
817 perror("pthread_join");
818 abort();
819 }
820 }
821
822 for (i = 0; i < CPU_SETSIZE; i++) {
823 struct percpu_list_node *node;
824
40797ae3 825 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
826 continue;
827
b08be829 828 while ((node = __percpu_list_pop(list, i))) {
31b44ba2
MD
829 sum += node->data;
830 free(node);
831 }
832 }
833
834 /*
835 * All entries should now be accounted for (unless some external
836 * actor is interfering with our allowed affinity while this
837 * test is running).
838 */
839 assert(sum == expected_sum);
b08be829
MD
840 rseq_percpu_free(list);
841 ret = rseq_percpu_pool_destroy(mempool);
842 if (ret) {
843 perror("rseq_percpu_pool_destroy");
844 abort();
845 }
31b44ba2
MD
846}
847
bac8cd24 848static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer,
31b44ba2
MD
849 struct percpu_buffer_node *node,
850 int *_cpu)
851{
852 bool result = false;
853 int cpu;
854
855 for (;;) {
bac8cd24 856 struct percpu_buffer *cpubuffer;
31b44ba2
MD
857 intptr_t *targetptr_spec, newval_spec;
858 intptr_t *targetptr_final, newval_final;
859 intptr_t offset;
860 int ret;
861
369688a5 862 cpu = get_current_cpu_id();
bac8cd24
MD
863 cpubuffer = rseq_percpu_ptr(buffer, cpu);
864 offset = RSEQ_READ_ONCE(cpubuffer->offset);
865 if (offset == cpubuffer->buflen)
31b44ba2
MD
866 break;
867 newval_spec = (intptr_t)node;
bac8cd24 868 targetptr_spec = (intptr_t *)&cpubuffer->array[offset];
31b44ba2 869 newval_final = offset + 1;
bac8cd24 870 targetptr_final = &cpubuffer->offset;
41149e28 871 ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
369688a5
MD
872 targetptr_final, offset, targetptr_spec,
873 newval_spec, newval_final, cpu);
31b44ba2
MD
874 if (rseq_likely(!ret)) {
875 result = true;
876 break;
877 }
878 /* Retry if comparison fails or rseq aborts. */
879 }
880 if (_cpu)
881 *_cpu = cpu;
882 return result;
883}
884
bac8cd24 885static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
31b44ba2
MD
886 int *_cpu)
887{
888 struct percpu_buffer_node *head;
889 int cpu;
890
891 for (;;) {
bac8cd24 892 struct percpu_buffer *cpubuffer;
31b44ba2
MD
893 intptr_t *targetptr, newval;
894 intptr_t offset;
895 int ret;
896
369688a5 897 cpu = get_current_cpu_id();
bac8cd24 898 cpubuffer = rseq_percpu_ptr(buffer, cpu);
31b44ba2 899 /* Load offset with single-copy atomicity. */
bac8cd24 900 offset = RSEQ_READ_ONCE(cpubuffer->offset);
31b44ba2
MD
901 if (offset == 0) {
902 head = NULL;
903 break;
904 }
bac8cd24 905 head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]);
31b44ba2 906 newval = offset - 1;
bac8cd24 907 targetptr = (intptr_t *)&cpubuffer->offset;
41149e28 908 ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 909 targetptr, offset,
bac8cd24 910 (intptr_t *)&cpubuffer->array[offset - 1],
31b44ba2
MD
911 (intptr_t)head, newval, cpu);
912 if (rseq_likely(!ret))
913 break;
914 /* Retry if comparison fails or rseq aborts. */
915 }
916 if (_cpu)
917 *_cpu = cpu;
918 return head;
919}
920
921/*
922 * __percpu_buffer_pop is not safe against concurrent accesses. Should
923 * only be used on buffers that are not concurrently modified.
924 */
bac8cd24 925static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
31b44ba2
MD
926 int cpu)
927{
bac8cd24 928 struct percpu_buffer *cpubuffer;
31b44ba2
MD
929 struct percpu_buffer_node *head;
930 intptr_t offset;
931
bac8cd24
MD
932 cpubuffer = rseq_percpu_ptr(buffer, cpu);
933 offset = cpubuffer->offset;
31b44ba2
MD
934 if (offset == 0)
935 return NULL;
bac8cd24
MD
936 head = cpubuffer->array[offset - 1];
937 cpubuffer->offset = offset - 1;
31b44ba2
MD
938 return head;
939}
940
6e284b80 941static void *test_percpu_buffer_thread(void *arg)
31b44ba2
MD
942{
943 long long i, reps;
bac8cd24 944 struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg;
31b44ba2
MD
945
946 if (!opt_disable_rseq && rseq_register_current_thread())
947 abort();
948
949 reps = opt_reps;
950 for (i = 0; i < reps; i++) {
951 struct percpu_buffer_node *node;
952
953 node = this_cpu_buffer_pop(buffer, NULL);
954 if (opt_yield)
955 sched_yield(); /* encourage shuffling */
956 if (node) {
957 if (!this_cpu_buffer_push(buffer, node, NULL)) {
958 /* Should increase buffer size. */
959 abort();
960 }
961 }
962 }
963
964 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
965 (int) rseq_gettid(), nr_abort, signals_delivered);
966 if (!opt_disable_rseq && rseq_unregister_current_thread())
967 abort();
968
969 return NULL;
970}
971
972/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 973static void test_percpu_buffer(void)
31b44ba2
MD
974{
975 const int num_threads = opt_threads;
976 int i, j, ret;
977 uint64_t sum = 0, expected_sum = 0;
bac8cd24 978 struct percpu_buffer __rseq_percpu *buffer;
31b44ba2
MD
979 pthread_t test_threads[num_threads];
980 cpu_set_t allowed_cpus;
bac8cd24 981 struct rseq_percpu_pool *mempool;
31b44ba2 982
bac8cd24
MD
983 mempool = rseq_percpu_pool_create(sizeof(struct percpu_buffer),
984 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
985 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
986 if (!mempool) {
987 perror("rseq_percpu_pool_create");
988 abort();
989 }
990 buffer = (struct percpu_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
991 if (!buffer) {
992 perror("rseq_percpu_zmalloc");
993 abort();
994 }
31b44ba2
MD
995
996 /* Generate list entries for every usable cpu. */
997 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
998 for (i = 0; i < CPU_SETSIZE; i++) {
bac8cd24
MD
999 struct percpu_buffer *cpubuffer;
1000
40797ae3 1001 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2 1002 continue;
bac8cd24 1003 cpubuffer = rseq_percpu_ptr(buffer, i);
31b44ba2 1004 /* Worse-case is every item in same CPU. */
bac8cd24 1005 cpubuffer->array =
d268885a 1006 (struct percpu_buffer_node **)
bac8cd24 1007 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
31b44ba2 1008 BUFFER_ITEM_PER_CPU);
bac8cd24
MD
1009 assert(cpubuffer->array);
1010 cpubuffer->buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
31b44ba2
MD
1011 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
1012 struct percpu_buffer_node *node;
1013
1014 expected_sum += j;
1015
1016 /*
1017 * We could theoretically put the word-sized
1018 * "data" directly in the buffer. However, we
1019 * want to model objects that would not fit
1020 * within a single word, so allocate an object
1021 * for each node.
1022 */
d268885a 1023 node = (struct percpu_buffer_node *) malloc(sizeof(*node));
31b44ba2
MD
1024 assert(node);
1025 node->data = j;
bac8cd24
MD
1026 cpubuffer->array[j - 1] = node;
1027 cpubuffer->offset++;
31b44ba2
MD
1028 }
1029 }
1030
1031 for (i = 0; i < num_threads; i++) {
1032 ret = pthread_create(&test_threads[i], NULL,
bac8cd24 1033 test_percpu_buffer_thread, buffer);
31b44ba2
MD
1034 if (ret) {
1035 errno = ret;
1036 perror("pthread_create");
1037 abort();
1038 }
1039 }
1040
1041 for (i = 0; i < num_threads; i++) {
1042 ret = pthread_join(test_threads[i], NULL);
1043 if (ret) {
1044 errno = ret;
1045 perror("pthread_join");
1046 abort();
1047 }
1048 }
1049
1050 for (i = 0; i < CPU_SETSIZE; i++) {
bac8cd24 1051 struct percpu_buffer *cpubuffer;
31b44ba2
MD
1052 struct percpu_buffer_node *node;
1053
40797ae3 1054 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1055 continue;
1056
bac8cd24
MD
1057 cpubuffer = rseq_percpu_ptr(buffer, i);
1058 while ((node = __percpu_buffer_pop(buffer, i))) {
31b44ba2
MD
1059 sum += node->data;
1060 free(node);
1061 }
bac8cd24 1062 free(cpubuffer->array);
31b44ba2
MD
1063 }
1064
1065 /*
1066 * All entries should now be accounted for (unless some external
1067 * actor is interfering with our allowed affinity while this
1068 * test is running).
1069 */
1070 assert(sum == expected_sum);
bac8cd24
MD
1071 rseq_percpu_free(buffer);
1072 ret = rseq_percpu_pool_destroy(mempool);
1073 if (ret) {
1074 perror("rseq_percpu_pool_destroy");
1075 abort();
1076 }
31b44ba2
MD
1077}
1078
78adbd91 1079static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer,
31b44ba2
MD
1080 struct percpu_memcpy_buffer_node item,
1081 int *_cpu)
1082{
1083 bool result = false;
1084 int cpu;
1085
1086 for (;;) {
78adbd91 1087 struct percpu_memcpy_buffer *cpubuffer;
31b44ba2
MD
1088 intptr_t *targetptr_final, newval_final, offset;
1089 char *destptr, *srcptr;
1090 size_t copylen;
1091 int ret;
1092
369688a5 1093 cpu = get_current_cpu_id();
78adbd91 1094 cpubuffer = rseq_percpu_ptr(buffer, cpu);
31b44ba2 1095 /* Load offset with single-copy atomicity. */
78adbd91
MD
1096 offset = RSEQ_READ_ONCE(cpubuffer->offset);
1097 if (offset == cpubuffer->buflen)
31b44ba2 1098 break;
78adbd91 1099 destptr = (char *)&cpubuffer->array[offset];
31b44ba2
MD
1100 srcptr = (char *)&item;
1101 /* copylen must be <= 4kB. */
1102 copylen = sizeof(item);
1103 newval_final = offset + 1;
78adbd91 1104 targetptr_final = &cpubuffer->offset;
41149e28 1105 ret = rseq_load_cbne_memcpy_store__ptr(
369688a5
MD
1106 opt_mo, RSEQ_PERCPU,
1107 targetptr_final, offset,
1108 destptr, srcptr, copylen,
1109 newval_final, cpu);
31b44ba2
MD
1110 if (rseq_likely(!ret)) {
1111 result = true;
1112 break;
1113 }
1114 /* Retry if comparison fails or rseq aborts. */
1115 }
1116 if (_cpu)
1117 *_cpu = cpu;
1118 return result;
1119}
1120
78adbd91 1121static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
31b44ba2
MD
1122 struct percpu_memcpy_buffer_node *item,
1123 int *_cpu)
1124{
1125 bool result = false;
1126 int cpu;
1127
1128 for (;;) {
78adbd91 1129 struct percpu_memcpy_buffer *cpubuffer;
31b44ba2
MD
1130 intptr_t *targetptr_final, newval_final, offset;
1131 char *destptr, *srcptr;
1132 size_t copylen;
1133 int ret;
1134
369688a5 1135 cpu = get_current_cpu_id();
78adbd91 1136 cpubuffer = rseq_percpu_ptr(buffer, cpu);
31b44ba2 1137 /* Load offset with single-copy atomicity. */
78adbd91 1138 offset = RSEQ_READ_ONCE(cpubuffer->offset);
31b44ba2
MD
1139 if (offset == 0)
1140 break;
1141 destptr = (char *)item;
78adbd91 1142 srcptr = (char *)&cpubuffer->array[offset - 1];
31b44ba2
MD
1143 /* copylen must be <= 4kB. */
1144 copylen = sizeof(*item);
1145 newval_final = offset - 1;
78adbd91 1146 targetptr_final = &cpubuffer->offset;
41149e28 1147 ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
369688a5 1148 targetptr_final, offset, destptr, srcptr, copylen,
31b44ba2
MD
1149 newval_final, cpu);
1150 if (rseq_likely(!ret)) {
1151 result = true;
1152 break;
1153 }
1154 /* Retry if comparison fails or rseq aborts. */
1155 }
1156 if (_cpu)
1157 *_cpu = cpu;
1158 return result;
1159}
1160
1161/*
1162 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
1163 * only be used on buffers that are not concurrently modified.
1164 */
78adbd91 1165static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
31b44ba2
MD
1166 struct percpu_memcpy_buffer_node *item,
1167 int cpu)
1168{
78adbd91 1169 struct percpu_memcpy_buffer *cpubuffer;
31b44ba2
MD
1170 intptr_t offset;
1171
78adbd91
MD
1172 cpubuffer = rseq_percpu_ptr(buffer, cpu);
1173 offset = cpubuffer->offset;
31b44ba2
MD
1174 if (offset == 0)
1175 return false;
78adbd91
MD
1176 memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item));
1177 cpubuffer->offset = offset - 1;
31b44ba2
MD
1178 return true;
1179}
1180
6e284b80 1181static void *test_percpu_memcpy_buffer_thread(void *arg)
31b44ba2
MD
1182{
1183 long long i, reps;
78adbd91 1184 struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg;
31b44ba2
MD
1185
1186 if (!opt_disable_rseq && rseq_register_current_thread())
1187 abort();
1188
1189 reps = opt_reps;
1190 for (i = 0; i < reps; i++) {
1191 struct percpu_memcpy_buffer_node item;
1192 bool result;
1193
1194 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1195 if (opt_yield)
1196 sched_yield(); /* encourage shuffling */
1197 if (result) {
1198 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1199 /* Should increase buffer size. */
1200 abort();
1201 }
1202 }
1203 }
1204
1205 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1206 (int) rseq_gettid(), nr_abort, signals_delivered);
1207 if (!opt_disable_rseq && rseq_unregister_current_thread())
1208 abort();
1209
1210 return NULL;
1211}
1212
1213/* Simultaneous modification to a per-cpu buffer from many threads. */
6e284b80 1214static void test_percpu_memcpy_buffer(void)
31b44ba2
MD
1215{
1216 const int num_threads = opt_threads;
1217 int i, j, ret;
1218 uint64_t sum = 0, expected_sum = 0;
78adbd91 1219 struct percpu_memcpy_buffer *buffer;
31b44ba2
MD
1220 pthread_t test_threads[num_threads];
1221 cpu_set_t allowed_cpus;
78adbd91 1222 struct rseq_percpu_pool *mempool;
31b44ba2 1223
78adbd91
MD
1224 mempool = rseq_percpu_pool_create(sizeof(struct percpu_memcpy_buffer),
1225 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
1226 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
1227 if (!mempool) {
1228 perror("rseq_percpu_pool_create");
1229 abort();
1230 }
1231 buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
1232 if (!buffer) {
1233 perror("rseq_percpu_zmalloc");
1234 abort();
1235 }
31b44ba2
MD
1236
1237 /* Generate list entries for every usable cpu. */
1238 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1239 for (i = 0; i < CPU_SETSIZE; i++) {
78adbd91
MD
1240 struct percpu_memcpy_buffer *cpubuffer;
1241
40797ae3 1242 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2 1243 continue;
78adbd91 1244 cpubuffer = rseq_percpu_ptr(buffer, i);
31b44ba2 1245 /* Worse-case is every item in same CPU. */
78adbd91 1246 cpubuffer->array =
d268885a 1247 (struct percpu_memcpy_buffer_node *)
78adbd91 1248 malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
31b44ba2 1249 MEMCPY_BUFFER_ITEM_PER_CPU);
78adbd91
MD
1250 assert(cpubuffer->array);
1251 cpubuffer->buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
31b44ba2
MD
1252 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1253 expected_sum += 2 * j + 1;
1254
1255 /*
1256 * We could theoretically put the word-sized
1257 * "data" directly in the buffer. However, we
1258 * want to model objects that would not fit
1259 * within a single word, so allocate an object
1260 * for each node.
1261 */
78adbd91
MD
1262 cpubuffer->array[j - 1].data1 = j;
1263 cpubuffer->array[j - 1].data2 = j + 1;
1264 cpubuffer->offset++;
31b44ba2
MD
1265 }
1266 }
1267
1268 for (i = 0; i < num_threads; i++) {
1269 ret = pthread_create(&test_threads[i], NULL,
1270 test_percpu_memcpy_buffer_thread,
78adbd91 1271 buffer);
31b44ba2
MD
1272 if (ret) {
1273 errno = ret;
1274 perror("pthread_create");
1275 abort();
1276 }
1277 }
1278
1279 for (i = 0; i < num_threads; i++) {
1280 ret = pthread_join(test_threads[i], NULL);
1281 if (ret) {
1282 errno = ret;
1283 perror("pthread_join");
1284 abort();
1285 }
1286 }
1287
1288 for (i = 0; i < CPU_SETSIZE; i++) {
1289 struct percpu_memcpy_buffer_node item;
78adbd91 1290 struct percpu_memcpy_buffer *cpubuffer;
31b44ba2 1291
40797ae3 1292 if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
31b44ba2
MD
1293 continue;
1294
78adbd91
MD
1295 cpubuffer = rseq_percpu_ptr(buffer, i);
1296 while (__percpu_memcpy_buffer_pop(buffer, &item, i)) {
31b44ba2
MD
1297 sum += item.data1;
1298 sum += item.data2;
1299 }
78adbd91 1300 free(cpubuffer->array);
31b44ba2
MD
1301 }
1302
1303 /*
1304 * All entries should now be accounted for (unless some external
1305 * actor is interfering with our allowed affinity while this
1306 * test is running).
1307 */
1308 assert(sum == expected_sum);
78adbd91
MD
1309 rseq_percpu_free(buffer);
1310 ret = rseq_percpu_pool_destroy(mempool);
1311 if (ret) {
1312 perror("rseq_percpu_pool_destroy");
1313 abort();
1314 }
31b44ba2
MD
1315}
1316
544cdc88 1317static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
31b44ba2
MD
1318{
1319 signals_delivered++;
1320}
1321
1322static int set_signal_handler(void)
1323{
1324 int ret = 0;
1325 struct sigaction sa;
1326 sigset_t sigset;
1327
1328 ret = sigemptyset(&sigset);
1329 if (ret < 0) {
1330 perror("sigemptyset");
1331 return ret;
1332 }
1333
1334 sa.sa_handler = test_signal_interrupt_handler;
1335 sa.sa_mask = sigset;
1336 sa.sa_flags = 0;
1337 ret = sigaction(SIGUSR1, &sa, NULL);
1338 if (ret < 0) {
1339 perror("sigaction");
1340 return ret;
1341 }
1342
1343 printf_verbose("Signal handler set for SIGUSR1\n");
1344
1345 return ret;
1346}
1347
3664098e
MD
1348static
1349bool membarrier_private_expedited_rseq_available(void)
1350{
1351 int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
1352
1353 if (status < 0) {
1354 perror("membarrier");
1355 return false;
1356 }
1357 if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
1358 return false;
1359 return true;
1360}
1361
5368dcb4 1362/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
369688a5 1363#ifdef TEST_MEMBARRIER
5368dcb4 1364struct test_membarrier_thread_args {
b08be829 1365 struct percpu_list __rseq_percpu *percpu_list_ptr;
5368dcb4 1366 int stop;
5368dcb4
MD
1367};
1368
1369/* Worker threads modify data in their "active" percpu lists. */
1370static
1371void *test_membarrier_worker_thread(void *arg)
1372{
1373 struct test_membarrier_thread_args *args =
1374 (struct test_membarrier_thread_args *)arg;
1375 const int iters = opt_reps;
1376 int i;
1377
1378 if (rseq_register_current_thread()) {
1379 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1380 errno, strerror(errno));
1381 abort();
1382 }
1383
1384 /* Wait for initialization. */
1385 while (!rseq_smp_load_acquire(&args->percpu_list_ptr)) { }
1386
1387 for (i = 0; i < iters; ++i) {
1388 int ret;
1389
1390 do {
369688a5 1391 int cpu = get_current_cpu_id();
b08be829
MD
1392 struct percpu_list __rseq_percpu *list = RSEQ_READ_ONCE(args->percpu_list_ptr);
1393 struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
5368dcb4 1394
b08be829
MD
1395 ret = rseq_load_cbne_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
1396 (intptr_t *) &args->percpu_list_ptr, (intptr_t) list,
1397 &cpulist->head->data, 1, cpu);
5368dcb4
MD
1398 } while (rseq_unlikely(ret));
1399 }
1400
1401 if (rseq_unregister_current_thread()) {
1402 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1403 errno, strerror(errno));
1404 abort();
1405 }
1406 return NULL;
1407}
1408
1409static
b08be829 1410struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_percpu_pool *mempool)
5368dcb4 1411{
b08be829 1412 struct percpu_list __rseq_percpu *list;
5368dcb4
MD
1413 int i;
1414
b08be829
MD
1415 list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
1416 if (!list) {
1417 perror("rseq_percpu_zmalloc");
1418 return NULL;
1419 }
5368dcb4 1420 for (i = 0; i < CPU_SETSIZE; i++) {
b08be829 1421 struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
5368dcb4
MD
1422 struct percpu_list_node *node;
1423
1424 node = (struct percpu_list_node *) malloc(sizeof(*node));
1425 assert(node);
1426 node->data = 0;
1427 node->next = NULL;
b08be829 1428 cpulist->head = node;
5368dcb4 1429 }
b08be829 1430 return list;
5368dcb4
MD
1431}
1432
1433static
b08be829 1434void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list)
5368dcb4
MD
1435{
1436 int i;
1437
1438 for (i = 0; i < CPU_SETSIZE; i++)
b08be829
MD
1439 free(rseq_percpu_ptr(list, i)->head);
1440 rseq_percpu_free(list);
5368dcb4
MD
1441}
1442
5368dcb4
MD
1443/*
1444 * The manager thread swaps per-cpu lists that worker threads see,
1445 * and validates that there are no unexpected modifications.
1446 */
1447static
1448void *test_membarrier_manager_thread(void *arg)
1449{
1450 struct test_membarrier_thread_args *args =
1451 (struct test_membarrier_thread_args *)arg;
b08be829 1452 struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
5368dcb4
MD
1453 intptr_t expect_a = 0, expect_b = 0;
1454 int cpu_a = 0, cpu_b = 0;
b08be829
MD
1455 struct rseq_percpu_pool *mempool;
1456 int ret;
1457
1458 mempool = rseq_percpu_pool_create(sizeof(struct percpu_list),
1459 PERCPU_POOL_LEN, CPU_SETSIZE, PROT_READ | PROT_WRITE,
1460 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, 0);
1461 if (!mempool) {
1462 perror("rseq_percpu_pool_create");
1463 abort();
1464 }
5368dcb4
MD
1465
1466 if (rseq_register_current_thread()) {
1467 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1468 errno, strerror(errno));
1469 abort();
1470 }
1471
1472 /* Init lists. */
b08be829
MD
1473 list_a = test_membarrier_alloc_percpu_list(mempool);
1474 assert(list_a);
1475 list_b = test_membarrier_alloc_percpu_list(mempool);
1476 assert(list_b);
5368dcb4
MD
1477
1478 /* Initialize lists before publishing them. */
1479 rseq_smp_wmb();
1480
b08be829 1481 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
5368dcb4
MD
1482
1483 while (!RSEQ_READ_ONCE(args->stop)) {
1484 /* list_a is "active". */
1485 cpu_a = rand() % CPU_SETSIZE;
1486 /*
1487 * As list_b is "inactive", we should never see changes
1488 * to list_b.
1489 */
b08be829 1490 if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
5368dcb4
MD
1491 fprintf(stderr, "Membarrier test failed\n");
1492 abort();
1493 }
1494
1495 /* Make list_b "active". */
b08be829 1496 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
369688a5 1497 if (rseq_membarrier_expedited(cpu_a) &&
5368dcb4
MD
1498 errno != ENXIO /* missing CPU */) {
1499 perror("sys_membarrier");
1500 abort();
1501 }
1502 /*
1503 * Cpu A should now only modify list_b, so the values
1504 * in list_a should be stable.
1505 */
b08be829 1506 expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
5368dcb4
MD
1507
1508 cpu_b = rand() % CPU_SETSIZE;
1509 /*
1510 * As list_a is "inactive", we should never see changes
1511 * to list_a.
1512 */
b08be829 1513 if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
5368dcb4
MD
1514 fprintf(stderr, "Membarrier test failed\n");
1515 abort();
1516 }
1517
1518 /* Make list_a "active". */
b08be829 1519 RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
369688a5 1520 if (rseq_membarrier_expedited(cpu_b) &&
5368dcb4
MD
1521 errno != ENXIO /* missing CPU */) {
1522 perror("sys_membarrier");
1523 abort();
1524 }
1525 /* Remember a value from list_b. */
b08be829 1526 expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
5368dcb4
MD
1527 }
1528
b08be829
MD
1529 test_membarrier_free_percpu_list(list_a);
1530 test_membarrier_free_percpu_list(list_b);
5368dcb4
MD
1531
1532 if (rseq_unregister_current_thread()) {
1533 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1534 errno, strerror(errno));
1535 abort();
1536 }
b08be829
MD
1537 ret = rseq_percpu_pool_destroy(mempool);
1538 if (ret) {
1539 perror("rseq_percpu_pool_destroy");
1540 abort();
1541 }
1542
5368dcb4
MD
1543 return NULL;
1544}
1545
1546static
1547void test_membarrier(void)
1548{
1549 const int num_threads = opt_threads;
1550 struct test_membarrier_thread_args thread_args;
1551 pthread_t worker_threads[num_threads];
1552 pthread_t manager_thread;
1553 int i, ret;
1554
d4bff8ed
MD
1555 if (!membarrier_private_expedited_rseq_available()) {
1556 fprintf(stderr, "Membarrier private expedited rseq not available. "
1557 "Skipping membarrier test.\n");
1558 return;
1559 }
5368dcb4
MD
1560 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1561 perror("sys_membarrier");
1562 abort();
1563 }
1564
b08be829 1565 thread_args.percpu_list_ptr = NULL;
5368dcb4 1566 thread_args.stop = 0;
5368dcb4
MD
1567 ret = pthread_create(&manager_thread, NULL,
1568 test_membarrier_manager_thread, &thread_args);
1569 if (ret) {
1570 errno = ret;
1571 perror("pthread_create");
1572 abort();
1573 }
1574
1575 for (i = 0; i < num_threads; i++) {
1576 ret = pthread_create(&worker_threads[i], NULL,
1577 test_membarrier_worker_thread, &thread_args);
1578 if (ret) {
1579 errno = ret;
1580 perror("pthread_create");
1581 abort();
1582 }
1583 }
1584
1585
1586 for (i = 0; i < num_threads; i++) {
1587 ret = pthread_join(worker_threads[i], NULL);
1588 if (ret) {
1589 errno = ret;
1590 perror("pthread_join");
1591 abort();
1592 }
1593 }
1594
1595 RSEQ_WRITE_ONCE(thread_args.stop, 1);
1596 ret = pthread_join(manager_thread, NULL);
1597 if (ret) {
1598 errno = ret;
1599 perror("pthread_join");
1600 abort();
1601 }
1602}
369688a5 1603#else /* TEST_MEMBARRIER */
5368dcb4
MD
1604static
1605void test_membarrier(void)
1606{
d4bff8ed
MD
1607 if (!membarrier_private_expedited_rseq_available()) {
1608 fprintf(stderr, "Membarrier private expedited rseq not available. "
1609 "Skipping membarrier test.\n");
1610 return;
1611 }
3cde2ee2 1612 fprintf(stderr, "rseq_load_add_load_load_add_store__ptr is not implemented on this architecture. "
5368dcb4
MD
1613 "Skipping membarrier test.\n");
1614}
1615#endif
1616
544cdc88 1617static void show_usage(char **argv)
31b44ba2
MD
1618{
1619 printf("Usage : %s <OPTIONS>\n",
1620 argv[0]);
1621 printf("OPTIONS:\n");
1622 printf(" [-1 loops] Number of loops for delay injection 1\n");
1623 printf(" [-2 loops] Number of loops for delay injection 2\n");
1624 printf(" [-3 loops] Number of loops for delay injection 3\n");
1625 printf(" [-4 loops] Number of loops for delay injection 4\n");
1626 printf(" [-5 loops] Number of loops for delay injection 5\n");
1627 printf(" [-6 loops] Number of loops for delay injection 6\n");
1628 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1629 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1630 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1631 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1632 printf(" [-y] Yield\n");
1633 printf(" [-k] Kill thread with signal\n");
1634 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1635 printf(" [-t N] Number of threads (default 200)\n");
1636 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1637 printf(" [-d] Disable rseq system call (no initialization)\n");
1638 printf(" [-D M] Disable rseq for each M threads\n");
5368dcb4 1639 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
31b44ba2 1640 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
d1cdec98 1641 printf(" [-c] Check if the rseq syscall is available.\n");
31b44ba2
MD
1642 printf(" [-v] Verbose output.\n");
1643 printf(" [-h] Show this help.\n");
1644 printf("\n");
1645}
1646
1647int main(int argc, char **argv)
1648{
1649 int i;
1650
1651 for (i = 1; i < argc; i++) {
1652 if (argv[i][0] != '-')
1653 continue;
1654 switch (argv[i][1]) {
1655 case '1':
1656 case '2':
1657 case '3':
1658 case '4':
1659 case '5':
1660 case '6':
1661 case '7':
1662 case '8':
1663 case '9':
1664 if (argc < i + 2) {
544cdc88 1665 show_usage(argv);
31b44ba2
MD
1666 goto error;
1667 }
1668 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1669 i++;
1670 break;
1671 case 'm':
1672 if (argc < i + 2) {
544cdc88 1673 show_usage(argv);
31b44ba2
MD
1674 goto error;
1675 }
1676 opt_modulo = atol(argv[i + 1]);
1677 if (opt_modulo < 0) {
544cdc88 1678 show_usage(argv);
31b44ba2
MD
1679 goto error;
1680 }
1681 i++;
1682 break;
1683 case 's':
1684 if (argc < i + 2) {
544cdc88 1685 show_usage(argv);
31b44ba2
MD
1686 goto error;
1687 }
1688 opt_sleep = atol(argv[i + 1]);
1689 if (opt_sleep < 0) {
544cdc88 1690 show_usage(argv);
31b44ba2
MD
1691 goto error;
1692 }
1693 i++;
1694 break;
1695 case 'y':
1696 opt_yield = 1;
1697 break;
1698 case 'k':
1699 opt_signal = 1;
1700 break;
1701 case 'd':
1702 opt_disable_rseq = 1;
1703 break;
1704 case 'D':
1705 if (argc < i + 2) {
544cdc88 1706 show_usage(argv);
31b44ba2
MD
1707 goto error;
1708 }
1709 opt_disable_mod = atol(argv[i + 1]);
1710 if (opt_disable_mod < 0) {
544cdc88 1711 show_usage(argv);
31b44ba2
MD
1712 goto error;
1713 }
1714 i++;
1715 break;
1716 case 't':
1717 if (argc < i + 2) {
544cdc88 1718 show_usage(argv);
31b44ba2
MD
1719 goto error;
1720 }
1721 opt_threads = atol(argv[i + 1]);
1722 if (opt_threads < 0) {
544cdc88 1723 show_usage(argv);
31b44ba2
MD
1724 goto error;
1725 }
1726 i++;
1727 break;
1728 case 'r':
1729 if (argc < i + 2) {
544cdc88 1730 show_usage(argv);
31b44ba2
MD
1731 goto error;
1732 }
1733 opt_reps = atoll(argv[i + 1]);
1734 if (opt_reps < 0) {
544cdc88 1735 show_usage(argv);
31b44ba2
MD
1736 goto error;
1737 }
1738 i++;
1739 break;
1740 case 'h':
544cdc88 1741 show_usage(argv);
31b44ba2
MD
1742 goto end;
1743 case 'T':
1744 if (argc < i + 2) {
544cdc88 1745 show_usage(argv);
31b44ba2
MD
1746 goto error;
1747 }
1748 opt_test = *argv[i + 1];
1749 switch (opt_test) {
1750 case 's':
1751 case 'l':
1752 case 'i':
1753 case 'b':
1754 case 'm':
5368dcb4 1755 case 'r':
31b44ba2
MD
1756 break;
1757 default:
544cdc88 1758 show_usage(argv);
31b44ba2
MD
1759 goto error;
1760 }
1761 i++;
1762 break;
1763 case 'v':
1764 verbose = 1;
1765 break;
1766 case 'M':
369688a5 1767 opt_mo = RSEQ_MO_RELEASE;
31b44ba2 1768 break;
d1cdec98 1769 case 'c':
8b34114a 1770 if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
d1cdec98
MJ
1771 printf_verbose("The rseq syscall is available.\n");
1772 goto end;
1773 } else {
1774 printf_verbose("The rseq syscall is unavailable.\n");
1775 goto no_rseq;
1776 }
31b44ba2 1777 default:
544cdc88 1778 show_usage(argv);
31b44ba2
MD
1779 goto error;
1780 }
1781 }
1782
1783 loop_cnt_1 = loop_cnt[1];
1784 loop_cnt_2 = loop_cnt[2];
1785 loop_cnt_3 = loop_cnt[3];
1786 loop_cnt_4 = loop_cnt[4];
1787 loop_cnt_5 = loop_cnt[5];
1788 loop_cnt_6 = loop_cnt[6];
1789
1790 if (set_signal_handler())
1791 goto error;
1792
1793 if (!opt_disable_rseq && rseq_register_current_thread())
1794 goto error;
369688a5 1795 if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
04bb9984
MD
1796 printf_verbose("The rseq cpu id getter is unavailable\n");
1797 goto no_rseq;
369688a5 1798 }
31b44ba2
MD
1799 switch (opt_test) {
1800 case 's':
1801 printf_verbose("spinlock\n");
1802 test_percpu_spinlock();
1803 break;
1804 case 'l':
1805 printf_verbose("linked list\n");
1806 test_percpu_list();
1807 break;
1808 case 'b':
1809 printf_verbose("buffer\n");
1810 test_percpu_buffer();
1811 break;
1812 case 'm':
1813 printf_verbose("memcpy buffer\n");
1814 test_percpu_memcpy_buffer();
1815 break;
1816 case 'i':
1817 printf_verbose("counter increment\n");
1818 test_percpu_inc();
1819 break;
5368dcb4
MD
1820 case 'r':
1821 printf_verbose("membarrier\n");
1822 test_membarrier();
1823 break;
31b44ba2
MD
1824 }
1825 if (!opt_disable_rseq && rseq_unregister_current_thread())
1826 abort();
1827end:
1828 return 0;
1829
1830error:
1831 return -1;
d1cdec98
MJ
1832
1833no_rseq:
1834 return 2;
31b44ba2 1835}
This page took 0.100812 seconds and 4 git commands to generate.