Fix: use int rather than off_t for offsets
[librseq.git] / tests / basic_percpu_ops_test.c
1 // SPDX-License-Identifier: LGPL-2.1-only
2 #ifndef _GNU_SOURCE
3 #define _GNU_SOURCE
4 #endif
5 #include <assert.h>
6 #include <pthread.h>
7 #include <sched.h>
8 #include <stdint.h>
9 #include <inttypes.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stddef.h>
14
15 #include <rseq/rseq.h>
16
17 #include "tap.h"
18
19 #define NR_TESTS 4
20
21 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
22
23 struct percpu_lock_entry {
24 intptr_t v;
25 } __attribute__((aligned(128)));
26
27 struct percpu_lock {
28 struct percpu_lock_entry c[CPU_SETSIZE];
29 };
30
31 struct test_data_entry {
32 intptr_t count;
33 } __attribute__((aligned(128)));
34
35 struct spinlock_test_data {
36 struct percpu_lock lock;
37 struct test_data_entry c[CPU_SETSIZE];
38 int reps;
39 };
40
41 struct percpu_list_node {
42 intptr_t data;
43 struct percpu_list_node *next;
44 };
45
46 struct percpu_list_entry {
47 struct percpu_list_node *head;
48 } __attribute__((aligned(128)));
49
50 struct percpu_list {
51 struct percpu_list_entry c[CPU_SETSIZE];
52 };
53
54 /* A simple percpu spinlock. Returns the cpu lock was acquired on. */
55 static int rseq_this_cpu_lock(struct percpu_lock *lock)
56 {
57 int cpu;
58
59 for (;;) {
60 int ret;
61
62 cpu = rseq_cpu_start();
63 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
64 0, 1, cpu);
65 if (rseq_likely(!ret))
66 break;
67 /* Retry if comparison fails or rseq aborts. */
68 }
69 /*
70 * Acquire semantic when taking lock after control dependency.
71 * Matches rseq_smp_store_release().
72 */
73 rseq_smp_acquire__after_ctrl_dep();
74 return cpu;
75 }
76
77 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
78 {
79 assert(lock->c[cpu].v == 1);
80 /*
81 * Release lock, with release semantic. Matches
82 * rseq_smp_acquire__after_ctrl_dep().
83 */
84 rseq_smp_store_release(&lock->c[cpu].v, 0);
85 }
86
87 static void *test_percpu_spinlock_thread(void *arg)
88 {
89 struct spinlock_test_data *data = (struct spinlock_test_data *) arg;
90 int i, cpu;
91
92 if (rseq_register_current_thread()) {
93 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
94 errno, strerror(errno));
95 abort();
96 }
97 for (i = 0; i < data->reps; i++) {
98 cpu = rseq_this_cpu_lock(&data->lock);
99 data->c[cpu].count++;
100 rseq_percpu_unlock(&data->lock, cpu);
101 }
102 if (rseq_unregister_current_thread()) {
103 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
104 errno, strerror(errno));
105 abort();
106 }
107
108 return NULL;
109 }
110
111 /*
112 * A simple test which implements a sharded counter using a per-cpu
113 * lock. Obviously real applications might prefer to simply use a
114 * per-cpu increment; however, this is reasonable for a test and the
115 * lock can be extended to synchronize more complicated operations.
116 */
117 static void test_percpu_spinlock(void)
118 {
119 const int num_threads = 200;
120 int i;
121 uint64_t sum, expected_sum;
122 pthread_t test_threads[num_threads];
123 struct spinlock_test_data data;
124
125 diag("spinlock");
126
127 memset(&data, 0, sizeof(data));
128 data.reps = 5000;
129
130 for (i = 0; i < num_threads; i++)
131 pthread_create(&test_threads[i], NULL,
132 test_percpu_spinlock_thread, &data);
133
134 for (i = 0; i < num_threads; i++)
135 pthread_join(test_threads[i], NULL);
136
137 sum = 0;
138 for (i = 0; i < CPU_SETSIZE; i++)
139 sum += data.c[i].count;
140
141 expected_sum = (uint64_t)data.reps * num_threads;
142
143 ok(sum == expected_sum, "spinlock - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum);
144 }
145
146 static void this_cpu_list_push(struct percpu_list *list,
147 struct percpu_list_node *node,
148 int *_cpu)
149 {
150 int cpu;
151
152 for (;;) {
153 intptr_t *targetptr, newval, expect;
154 int ret;
155
156 cpu = rseq_cpu_start();
157 /* Load list->c[cpu].head with single-copy atomicity. */
158 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
159 newval = (intptr_t)node;
160 targetptr = (intptr_t *)&list->c[cpu].head;
161 node->next = (struct percpu_list_node *)expect;
162 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
163 if (rseq_likely(!ret))
164 break;
165 /* Retry if comparison fails or rseq aborts. */
166 }
167 if (_cpu)
168 *_cpu = cpu;
169 }
170
171 /*
172 * Unlike a traditional lock-less linked list; the availability of a
173 * rseq primitive allows us to implement pop without concerns over
174 * ABA-type races.
175 */
176 static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
177 int *_cpu)
178 {
179 for (;;) {
180 struct percpu_list_node *head;
181 intptr_t *targetptr, expectnot, *load;
182 int offset, ret, cpu;
183
184 cpu = rseq_cpu_start();
185 targetptr = (intptr_t *)&list->c[cpu].head;
186 expectnot = (intptr_t)NULL;
187 offset = offsetof(struct percpu_list_node, next);
188 load = (intptr_t *)&head;
189 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
190 offset, load, cpu);
191 if (rseq_likely(!ret)) {
192 if (_cpu)
193 *_cpu = cpu;
194 return head;
195 }
196 if (ret > 0)
197 return NULL;
198 /* Retry if rseq aborts. */
199 }
200 }
201
202 /*
203 * __percpu_list_pop is not safe against concurrent accesses. Should
204 * only be used on lists that are not concurrently modified.
205 */
206 static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
207 {
208 struct percpu_list_node *node;
209
210 node = list->c[cpu].head;
211 if (!node)
212 return NULL;
213 list->c[cpu].head = node->next;
214 return node;
215 }
216
217 static void *test_percpu_list_thread(void *arg)
218 {
219 int i;
220 struct percpu_list *list = (struct percpu_list *)arg;
221
222 if (rseq_register_current_thread()) {
223 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
224 errno, strerror(errno));
225 abort();
226 }
227
228 for (i = 0; i < 100000; i++) {
229 struct percpu_list_node *node;
230
231 node = this_cpu_list_pop(list, NULL);
232 sched_yield(); /* encourage shuffling */
233 if (node)
234 this_cpu_list_push(list, node, NULL);
235 }
236
237 if (rseq_unregister_current_thread()) {
238 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
239 errno, strerror(errno));
240 abort();
241 }
242
243 return NULL;
244 }
245
246 /* Simultaneous modification to a per-cpu linked list from many threads. */
247 static void test_percpu_list(void)
248 {
249 int i, j;
250 uint64_t sum = 0, expected_sum = 0;
251 struct percpu_list list;
252 pthread_t test_threads[200];
253 cpu_set_t allowed_cpus;
254
255 diag("percpu_list");
256
257 memset(&list, 0, sizeof(list));
258
259 /* Generate list entries for every usable cpu. */
260 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
261 for (i = 0; i < CPU_SETSIZE; i++) {
262 if (!CPU_ISSET(i, &allowed_cpus))
263 continue;
264 for (j = 1; j <= 100; j++) {
265 struct percpu_list_node *node;
266
267 expected_sum += j;
268
269 node = (struct percpu_list_node *) malloc(sizeof(*node));
270 assert(node);
271 node->data = j;
272 node->next = list.c[i].head;
273 list.c[i].head = node;
274 }
275 }
276
277 for (i = 0; i < 200; i++)
278 pthread_create(&test_threads[i], NULL,
279 test_percpu_list_thread, &list);
280
281 for (i = 0; i < 200; i++)
282 pthread_join(test_threads[i], NULL);
283
284 for (i = 0; i < CPU_SETSIZE; i++) {
285 struct percpu_list_node *node;
286
287 if (!CPU_ISSET(i, &allowed_cpus))
288 continue;
289
290 while ((node = __percpu_list_pop(&list, i))) {
291 sum += node->data;
292 free(node);
293 }
294 }
295
296 /*
297 * All entries should now be accounted for (unless some external
298 * actor is interfering with our allowed affinity while this
299 * test is running).
300 */
301 ok(sum == expected_sum, "percpu_list - sum (%" PRIu64 " == %" PRIu64 ")", sum, expected_sum);
302 }
303
304 int main(void)
305 {
306 plan_tests(NR_TESTS);
307
308 if (!rseq_available()) {
309 skip(NR_TESTS, "The rseq syscall is unavailable");
310 goto end;
311 }
312
313 if (rseq_register_current_thread()) {
314 fail("rseq_register_current_thread(...) failed(%d): %s\n",
315 errno, strerror(errno));
316 goto end;
317 } else {
318 pass("Registered current thread with rseq");
319 }
320
321 test_percpu_spinlock();
322 test_percpu_list();
323
324 if (rseq_unregister_current_thread()) {
325 fail("rseq_unregister_current_thread(...) failed(%d): %s\n",
326 errno, strerror(errno));
327 goto end;
328 } else {
329 pass("Unregistered current thread with rseq");
330 }
331
332 end:
333 exit(exit_status());
334 }
This page took 0.058492 seconds and 5 git commands to generate.