Fix: use long rather than int for offsets
[librseq.git] / tests / basic_percpu_ops_test.c
1 // SPDX-License-Identifier: LGPL-2.1-only
2 #ifndef _GNU_SOURCE
3 #define _GNU_SOURCE
4 #endif
5 #include <assert.h>
6 #include <pthread.h>
7 #include <sched.h>
8 #include <stdint.h>
9 #include <inttypes.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stddef.h>
14
15 #include <rseq/rseq.h>
16
17 #include "tap.h"
18
19 #define NR_TESTS 4
20
21 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
22
23 struct percpu_lock_entry {
24 intptr_t v;
25 } __attribute__((aligned(128)));
26
27 struct percpu_lock {
28 struct percpu_lock_entry c[CPU_SETSIZE];
29 };
30
31 struct test_data_entry {
32 intptr_t count;
33 } __attribute__((aligned(128)));
34
35 struct spinlock_test_data {
36 struct percpu_lock lock;
37 struct test_data_entry c[CPU_SETSIZE];
38 int reps;
39 };
40
41 struct percpu_list_node {
42 intptr_t data;
43 struct percpu_list_node *next;
44 };
45
46 struct percpu_list_entry {
47 struct percpu_list_node *head;
48 } __attribute__((aligned(128)));
49
50 struct percpu_list {
51 struct percpu_list_entry c[CPU_SETSIZE];
52 };
53
54 /* A simple percpu spinlock. Returns the cpu lock was acquired on. */
55 static int rseq_this_cpu_lock(struct percpu_lock *lock)
56 {
57 int cpu;
58
59 for (;;) {
60 int ret;
61
62 cpu = rseq_cpu_start();
63 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
64 0, 1, cpu);
65 if (rseq_likely(!ret))
66 break;
67 /* Retry if comparison fails or rseq aborts. */
68 }
69 /*
70 * Acquire semantic when taking lock after control dependency.
71 * Matches rseq_smp_store_release().
72 */
73 rseq_smp_acquire__after_ctrl_dep();
74 return cpu;
75 }
76
77 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
78 {
79 assert(lock->c[cpu].v == 1);
80 /*
81 * Release lock, with release semantic. Matches
82 * rseq_smp_acquire__after_ctrl_dep().
83 */
84 rseq_smp_store_release(&lock->c[cpu].v, 0);
85 }
86
87 static void *test_percpu_spinlock_thread(void *arg)
88 {
89 struct spinlock_test_data *data = (struct spinlock_test_data *) arg;
90 int i, cpu;
91
92 if (rseq_register_current_thread()) {
93 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
94 errno, strerror(errno));
95 abort();
96 }
97 for (i = 0; i < data->reps; i++) {
98 cpu = rseq_this_cpu_lock(&data->lock);
99 data->c[cpu].count++;
100 rseq_percpu_unlock(&data->lock, cpu);
101 }
102 if (rseq_unregister_current_thread()) {
103 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
104 errno, strerror(errno));
105 abort();
106 }
107
108 return NULL;
109 }
110
111 /*
112 * A simple test which implements a sharded counter using a per-cpu
113 * lock. Obviously real applications might prefer to simply use a
114 * per-cpu increment; however, this is reasonable for a test and the
115 * lock can be extended to synchronize more complicated operations.
116 */
117 static void test_percpu_spinlock(void)
118 {
119 const int num_threads = 200;
120 int i;
121 uint64_t sum, expected_sum;
122 pthread_t test_threads[num_threads];
123 struct spinlock_test_data data;
124
125 diag("spinlock");
126
127 memset(&data, 0, sizeof(data));
128 data.reps = 5000;
129
130 for (i = 0; i < num_threads; i++)
131 pthread_create(&test_threads[i], NULL,
132 test_percpu_spinlock_thread, &data);
133
134 for (i = 0; i < num_threads; i++)
135 pthread_join(test_threads[i], NULL);
136
137 sum = 0;
138 for (i = 0; i < CPU_SETSIZE; i++)
139 sum += data.c[i].count;
140
141 expected_sum = (uint64_t)data.reps * num_threads;
142
143 ok(sum == expected_sum, "spinlock - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum);
144 }
145
146 static void this_cpu_list_push(struct percpu_list *list,
147 struct percpu_list_node *node,
148 int *_cpu)
149 {
150 int cpu;
151
152 for (;;) {
153 intptr_t *targetptr, newval, expect;
154 int ret;
155
156 cpu = rseq_cpu_start();
157 /* Load list->c[cpu].head with single-copy atomicity. */
158 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
159 newval = (intptr_t)node;
160 targetptr = (intptr_t *)&list->c[cpu].head;
161 node->next = (struct percpu_list_node *)expect;
162 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
163 if (rseq_likely(!ret))
164 break;
165 /* Retry if comparison fails or rseq aborts. */
166 }
167 if (_cpu)
168 *_cpu = cpu;
169 }
170
171 /*
172 * Unlike a traditional lock-less linked list; the availability of a
173 * rseq primitive allows us to implement pop without concerns over
174 * ABA-type races.
175 */
176 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
177 int *_cpu)
178 {
179 for (;;) {
180 struct percpu_list_node *head;
181 intptr_t *targetptr, expectnot, *load;
182 long offset;
183 int ret, cpu;
184
185 cpu = rseq_cpu_start();
186 targetptr = (intptr_t *)&list->c[cpu].head;
187 expectnot = (intptr_t)NULL;
188 offset = offsetof(struct percpu_list_node, next);
189 load = (intptr_t *)&head;
190 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
191 offset, load, cpu);
192 if (rseq_likely(!ret)) {
193 if (_cpu)
194 *_cpu = cpu;
195 return head;
196 }
197 if (ret > 0)
198 return NULL;
199 /* Retry if rseq aborts. */
200 }
201 }
202
203 /*
204 * __percpu_list_pop is not safe against concurrent accesses. Should
205 * only be used on lists that are not concurrently modified.
206 */
207 static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
208 {
209 struct percpu_list_node *node;
210
211 node = list->c[cpu].head;
212 if (!node)
213 return NULL;
214 list->c[cpu].head = node->next;
215 return node;
216 }
217
218 static void *test_percpu_list_thread(void *arg)
219 {
220 int i;
221 struct percpu_list *list = (struct percpu_list *)arg;
222
223 if (rseq_register_current_thread()) {
224 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
225 errno, strerror(errno));
226 abort();
227 }
228
229 for (i = 0; i < 100000; i++) {
230 struct percpu_list_node *node;
231
232 node = this_cpu_list_pop(list, NULL);
233 sched_yield(); /* encourage shuffling */
234 if (node)
235 this_cpu_list_push(list, node, NULL);
236 }
237
238 if (rseq_unregister_current_thread()) {
239 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
240 errno, strerror(errno));
241 abort();
242 }
243
244 return NULL;
245 }
246
247 /* Simultaneous modification to a per-cpu linked list from many threads. */
248 static void test_percpu_list(void)
249 {
250 int i, j;
251 uint64_t sum = 0, expected_sum = 0;
252 struct percpu_list list;
253 pthread_t test_threads[200];
254 cpu_set_t allowed_cpus;
255
256 diag("percpu_list");
257
258 memset(&list, 0, sizeof(list));
259
260 /* Generate list entries for every usable cpu. */
261 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
262 for (i = 0; i < CPU_SETSIZE; i++) {
263 if (!CPU_ISSET(i, &allowed_cpus))
264 continue;
265 for (j = 1; j <= 100; j++) {
266 struct percpu_list_node *node;
267
268 expected_sum += j;
269
270 node = (struct percpu_list_node *) malloc(sizeof(*node));
271 assert(node);
272 node->data = j;
273 node->next = list.c[i].head;
274 list.c[i].head = node;
275 }
276 }
277
278 for (i = 0; i < 200; i++)
279 pthread_create(&test_threads[i], NULL,
280 test_percpu_list_thread, &list);
281
282 for (i = 0; i < 200; i++)
283 pthread_join(test_threads[i], NULL);
284
285 for (i = 0; i < CPU_SETSIZE; i++) {
286 struct percpu_list_node *node;
287
288 if (!CPU_ISSET(i, &allowed_cpus))
289 continue;
290
291 while ((node = __percpu_list_pop(&list, i))) {
292 sum += node->data;
293 free(node);
294 }
295 }
296
297 /*
298 * All entries should now be accounted for (unless some external
299 * actor is interfering with our allowed affinity while this
300 * test is running).
301 */
302 ok(sum == expected_sum, "percpu_list - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum);
303 }
304
305 int main(void)
306 {
307 plan_tests(NR_TESTS);
308
309 if (!rseq_available()) {
310 skip(NR_TESTS, "The rseq syscall is unavailable");
311 goto end;
312 }
313
314 if (rseq_register_current_thread()) {
315 fail("rseq_register_current_thread(...) failed(%d): %s\n",
316 errno, strerror(errno));
317 goto end;
318 } else {
319 pass("Registered current thread with rseq");
320 }
321
322 test_percpu_spinlock();
323 test_percpu_list();
324
325 if (rseq_unregister_current_thread()) {
326 fail("rseq_unregister_current_thread(...) failed(%d): %s\n",
327 errno, strerror(errno));
328 goto end;
329 } else {
330 pass("Unregistered current thread with rseq");
331 }
332
333 end:
334 exit(exit_status());
335 }
This page took 0.060496 seconds and 5 git commands to generate.