Add debug output to basic_percpu_ops_test
[librseq.git] / tests / basic_percpu_ops_test.c
CommitLineData
544cdc88 1// SPDX-License-Identifier: LGPL-2.1-only
b848736e
MD
2#ifndef _GNU_SOURCE
3#define _GNU_SOURCE
4#endif
5#include <assert.h>
6#include <pthread.h>
7#include <sched.h>
8#include <stdint.h>
a91728e0 9#include <inttypes.h>
b848736e
MD
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <stddef.h>
14
15#include <rseq/rseq.h>
16
544cdc88
MJ
17#include "tap.h"
18
d1cdec98 19#define NR_TESTS 4
544cdc88 20
b848736e
MD
21#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
22
23struct percpu_lock_entry {
24 intptr_t v;
25} __attribute__((aligned(128)));
26
27struct percpu_lock {
28 struct percpu_lock_entry c[CPU_SETSIZE];
29};
30
31struct test_data_entry {
32 intptr_t count;
33} __attribute__((aligned(128)));
34
35struct spinlock_test_data {
36 struct percpu_lock lock;
37 struct test_data_entry c[CPU_SETSIZE];
38 int reps;
39};
40
41struct percpu_list_node {
42 intptr_t data;
43 struct percpu_list_node *next;
44};
45
46struct percpu_list_entry {
47 struct percpu_list_node *head;
48} __attribute__((aligned(128)));
49
50struct percpu_list {
51 struct percpu_list_entry c[CPU_SETSIZE];
52};
53
54/* A simple percpu spinlock. Returns the cpu lock was acquired on. */
6e284b80 55static int rseq_this_cpu_lock(struct percpu_lock *lock)
b848736e
MD
56{
57 int cpu;
58
59 for (;;) {
60 int ret;
61
62 cpu = rseq_cpu_start();
63 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
64 0, 1, cpu);
65 if (rseq_likely(!ret))
66 break;
67 /* Retry if comparison fails or rseq aborts. */
68 }
69 /*
70 * Acquire semantic when taking lock after control dependency.
71 * Matches rseq_smp_store_release().
72 */
73 rseq_smp_acquire__after_ctrl_dep();
74 return cpu;
75}
76
6e284b80 77static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
b848736e
MD
78{
79 assert(lock->c[cpu].v == 1);
80 /*
81 * Release lock, with release semantic. Matches
82 * rseq_smp_acquire__after_ctrl_dep().
83 */
84 rseq_smp_store_release(&lock->c[cpu].v, 0);
85}
86
6e284b80 87static void *test_percpu_spinlock_thread(void *arg)
b848736e 88{
d268885a 89 struct spinlock_test_data *data = (struct spinlock_test_data *) arg;
b848736e
MD
90 int i, cpu;
91
92 if (rseq_register_current_thread()) {
93 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
94 errno, strerror(errno));
95 abort();
96 }
97 for (i = 0; i < data->reps; i++) {
98 cpu = rseq_this_cpu_lock(&data->lock);
99 data->c[cpu].count++;
100 rseq_percpu_unlock(&data->lock, cpu);
101 }
102 if (rseq_unregister_current_thread()) {
103 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
104 errno, strerror(errno));
105 abort();
106 }
107
108 return NULL;
109}
110
111/*
112 * A simple test which implements a sharded counter using a per-cpu
113 * lock. Obviously real applications might prefer to simply use a
114 * per-cpu increment; however, this is reasonable for a test and the
115 * lock can be extended to synchronize more complicated operations.
116 */
6e284b80 117static void test_percpu_spinlock(void)
b848736e
MD
118{
119 const int num_threads = 200;
120 int i;
a91728e0 121 uint64_t sum, expected_sum;
b848736e
MD
122 pthread_t test_threads[num_threads];
123 struct spinlock_test_data data;
124
544cdc88
MJ
125 diag("spinlock");
126
b848736e
MD
127 memset(&data, 0, sizeof(data));
128 data.reps = 5000;
129
130 for (i = 0; i < num_threads; i++)
131 pthread_create(&test_threads[i], NULL,
132 test_percpu_spinlock_thread, &data);
133
134 for (i = 0; i < num_threads; i++)
135 pthread_join(test_threads[i], NULL);
136
137 sum = 0;
138 for (i = 0; i < CPU_SETSIZE; i++)
139 sum += data.c[i].count;
140
a91728e0
MJ
141 expected_sum = (uint64_t)data.reps * num_threads;
142
143 ok(sum == expected_sum, "spinlock - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum);
b848736e
MD
144}
145
6e284b80 146static void this_cpu_list_push(struct percpu_list *list,
b848736e
MD
147 struct percpu_list_node *node,
148 int *_cpu)
149{
150 int cpu;
151
152 for (;;) {
153 intptr_t *targetptr, newval, expect;
154 int ret;
155
156 cpu = rseq_cpu_start();
157 /* Load list->c[cpu].head with single-copy atomicity. */
158 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
159 newval = (intptr_t)node;
160 targetptr = (intptr_t *)&list->c[cpu].head;
161 node->next = (struct percpu_list_node *)expect;
162 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
163 if (rseq_likely(!ret))
164 break;
165 /* Retry if comparison fails or rseq aborts. */
166 }
167 if (_cpu)
168 *_cpu = cpu;
169}
170
171/*
172 * Unlike a traditional lock-less linked list; the availability of a
173 * rseq primitive allows us to implement pop without concerns over
174 * ABA-type races.
175 */
6e284b80 176static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
b848736e
MD
177 int *_cpu)
178{
179 for (;;) {
180 struct percpu_list_node *head;
181 intptr_t *targetptr, expectnot, *load;
182 off_t offset;
183 int ret, cpu;
184
185 cpu = rseq_cpu_start();
186 targetptr = (intptr_t *)&list->c[cpu].head;
187 expectnot = (intptr_t)NULL;
188 offset = offsetof(struct percpu_list_node, next);
189 load = (intptr_t *)&head;
190 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
191 offset, load, cpu);
192 if (rseq_likely(!ret)) {
193 if (_cpu)
194 *_cpu = cpu;
195 return head;
196 }
197 if (ret > 0)
198 return NULL;
199 /* Retry if rseq aborts. */
200 }
201}
202
203/*
204 * __percpu_list_pop is not safe against concurrent accesses. Should
205 * only be used on lists that are not concurrently modified.
206 */
6e284b80 207static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
b848736e
MD
208{
209 struct percpu_list_node *node;
210
211 node = list->c[cpu].head;
212 if (!node)
213 return NULL;
214 list->c[cpu].head = node->next;
215 return node;
216}
217
6e284b80 218static void *test_percpu_list_thread(void *arg)
b848736e
MD
219{
220 int i;
221 struct percpu_list *list = (struct percpu_list *)arg;
222
223 if (rseq_register_current_thread()) {
224 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
225 errno, strerror(errno));
226 abort();
227 }
228
229 for (i = 0; i < 100000; i++) {
230 struct percpu_list_node *node;
231
232 node = this_cpu_list_pop(list, NULL);
233 sched_yield(); /* encourage shuffling */
234 if (node)
235 this_cpu_list_push(list, node, NULL);
236 }
237
238 if (rseq_unregister_current_thread()) {
239 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
240 errno, strerror(errno));
241 abort();
242 }
243
244 return NULL;
245}
246
247/* Simultaneous modification to a per-cpu linked list from many threads. */
6e284b80 248static void test_percpu_list(void)
b848736e
MD
249{
250 int i, j;
251 uint64_t sum = 0, expected_sum = 0;
252 struct percpu_list list;
253 pthread_t test_threads[200];
254 cpu_set_t allowed_cpus;
255
544cdc88
MJ
256 diag("percpu_list");
257
b848736e
MD
258 memset(&list, 0, sizeof(list));
259
260 /* Generate list entries for every usable cpu. */
261 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
262 for (i = 0; i < CPU_SETSIZE; i++) {
263 if (!CPU_ISSET(i, &allowed_cpus))
264 continue;
265 for (j = 1; j <= 100; j++) {
266 struct percpu_list_node *node;
267
268 expected_sum += j;
269
d268885a 270 node = (struct percpu_list_node *) malloc(sizeof(*node));
b848736e
MD
271 assert(node);
272 node->data = j;
273 node->next = list.c[i].head;
274 list.c[i].head = node;
275 }
276 }
277
278 for (i = 0; i < 200; i++)
279 pthread_create(&test_threads[i], NULL,
280 test_percpu_list_thread, &list);
281
282 for (i = 0; i < 200; i++)
283 pthread_join(test_threads[i], NULL);
284
285 for (i = 0; i < CPU_SETSIZE; i++) {
286 struct percpu_list_node *node;
287
288 if (!CPU_ISSET(i, &allowed_cpus))
289 continue;
290
291 while ((node = __percpu_list_pop(&list, i))) {
292 sum += node->data;
293 free(node);
294 }
295 }
296
297 /*
298 * All entries should now be accounted for (unless some external
299 * actor is interfering with our allowed affinity while this
300 * test is running).
301 */
a91728e0 302 ok(sum == expected_sum, "percpu_list - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum);
b848736e
MD
303}
304
305int main(void)
306{
544cdc88
MJ
307 plan_tests(NR_TESTS);
308
d1cdec98
MJ
309 if (!rseq_available()) {
310 skip(NR_TESTS, "The rseq syscall is unavailable");
311 goto end;
312 }
313
b848736e 314 if (rseq_register_current_thread()) {
d1cdec98 315 fail("rseq_register_current_thread(...) failed(%d): %s\n",
b848736e 316 errno, strerror(errno));
d1cdec98
MJ
317 goto end;
318 } else {
319 pass("Registered current thread with rseq");
b848736e 320 }
544cdc88 321
b848736e 322 test_percpu_spinlock();
b848736e 323 test_percpu_list();
544cdc88 324
b848736e 325 if (rseq_unregister_current_thread()) {
d1cdec98 326 fail("rseq_unregister_current_thread(...) failed(%d): %s\n",
b848736e 327 errno, strerror(errno));
d1cdec98
MJ
328 goto end;
329 } else {
330 pass("Unregistered current thread with rseq");
b848736e 331 }
544cdc88 332
d1cdec98
MJ
333end:
334 exit(exit_status());
b848736e 335}
This page took 0.036406 seconds and 4 git commands to generate.