Add basic percpu ops test
[librseq.git] / tests / basic_percpu_ops_test.c
CommitLineData
b848736e
MD
1// SPDX-License-Identifier: LGPL-2.1
2#ifndef _GNU_SOURCE
3#define _GNU_SOURCE
4#endif
5#include <assert.h>
6#include <pthread.h>
7#include <sched.h>
8#include <stdint.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <stddef.h>
13
14#include <rseq/rseq.h>
15
16#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
17
18struct percpu_lock_entry {
19 intptr_t v;
20} __attribute__((aligned(128)));
21
22struct percpu_lock {
23 struct percpu_lock_entry c[CPU_SETSIZE];
24};
25
26struct test_data_entry {
27 intptr_t count;
28} __attribute__((aligned(128)));
29
30struct spinlock_test_data {
31 struct percpu_lock lock;
32 struct test_data_entry c[CPU_SETSIZE];
33 int reps;
34};
35
36struct percpu_list_node {
37 intptr_t data;
38 struct percpu_list_node *next;
39};
40
41struct percpu_list_entry {
42 struct percpu_list_node *head;
43} __attribute__((aligned(128)));
44
45struct percpu_list {
46 struct percpu_list_entry c[CPU_SETSIZE];
47};
48
49/* A simple percpu spinlock. Returns the cpu lock was acquired on. */
50int rseq_this_cpu_lock(struct percpu_lock *lock)
51{
52 int cpu;
53
54 for (;;) {
55 int ret;
56
57 cpu = rseq_cpu_start();
58 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
59 0, 1, cpu);
60 if (rseq_likely(!ret))
61 break;
62 /* Retry if comparison fails or rseq aborts. */
63 }
64 /*
65 * Acquire semantic when taking lock after control dependency.
66 * Matches rseq_smp_store_release().
67 */
68 rseq_smp_acquire__after_ctrl_dep();
69 return cpu;
70}
71
72void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
73{
74 assert(lock->c[cpu].v == 1);
75 /*
76 * Release lock, with release semantic. Matches
77 * rseq_smp_acquire__after_ctrl_dep().
78 */
79 rseq_smp_store_release(&lock->c[cpu].v, 0);
80}
81
82void *test_percpu_spinlock_thread(void *arg)
83{
84 struct spinlock_test_data *data = arg;
85 int i, cpu;
86
87 if (rseq_register_current_thread()) {
88 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
89 errno, strerror(errno));
90 abort();
91 }
92 for (i = 0; i < data->reps; i++) {
93 cpu = rseq_this_cpu_lock(&data->lock);
94 data->c[cpu].count++;
95 rseq_percpu_unlock(&data->lock, cpu);
96 }
97 if (rseq_unregister_current_thread()) {
98 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
99 errno, strerror(errno));
100 abort();
101 }
102
103 return NULL;
104}
105
106/*
107 * A simple test which implements a sharded counter using a per-cpu
108 * lock. Obviously real applications might prefer to simply use a
109 * per-cpu increment; however, this is reasonable for a test and the
110 * lock can be extended to synchronize more complicated operations.
111 */
112void test_percpu_spinlock(void)
113{
114 const int num_threads = 200;
115 int i;
116 uint64_t sum;
117 pthread_t test_threads[num_threads];
118 struct spinlock_test_data data;
119
120 memset(&data, 0, sizeof(data));
121 data.reps = 5000;
122
123 for (i = 0; i < num_threads; i++)
124 pthread_create(&test_threads[i], NULL,
125 test_percpu_spinlock_thread, &data);
126
127 for (i = 0; i < num_threads; i++)
128 pthread_join(test_threads[i], NULL);
129
130 sum = 0;
131 for (i = 0; i < CPU_SETSIZE; i++)
132 sum += data.c[i].count;
133
134 assert(sum == (uint64_t)data.reps * num_threads);
135}
136
137void this_cpu_list_push(struct percpu_list *list,
138 struct percpu_list_node *node,
139 int *_cpu)
140{
141 int cpu;
142
143 for (;;) {
144 intptr_t *targetptr, newval, expect;
145 int ret;
146
147 cpu = rseq_cpu_start();
148 /* Load list->c[cpu].head with single-copy atomicity. */
149 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
150 newval = (intptr_t)node;
151 targetptr = (intptr_t *)&list->c[cpu].head;
152 node->next = (struct percpu_list_node *)expect;
153 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
154 if (rseq_likely(!ret))
155 break;
156 /* Retry if comparison fails or rseq aborts. */
157 }
158 if (_cpu)
159 *_cpu = cpu;
160}
161
162/*
163 * Unlike a traditional lock-less linked list; the availability of a
164 * rseq primitive allows us to implement pop without concerns over
165 * ABA-type races.
166 */
167struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
168 int *_cpu)
169{
170 for (;;) {
171 struct percpu_list_node *head;
172 intptr_t *targetptr, expectnot, *load;
173 off_t offset;
174 int ret, cpu;
175
176 cpu = rseq_cpu_start();
177 targetptr = (intptr_t *)&list->c[cpu].head;
178 expectnot = (intptr_t)NULL;
179 offset = offsetof(struct percpu_list_node, next);
180 load = (intptr_t *)&head;
181 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
182 offset, load, cpu);
183 if (rseq_likely(!ret)) {
184 if (_cpu)
185 *_cpu = cpu;
186 return head;
187 }
188 if (ret > 0)
189 return NULL;
190 /* Retry if rseq aborts. */
191 }
192}
193
194/*
195 * __percpu_list_pop is not safe against concurrent accesses. Should
196 * only be used on lists that are not concurrently modified.
197 */
198struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
199{
200 struct percpu_list_node *node;
201
202 node = list->c[cpu].head;
203 if (!node)
204 return NULL;
205 list->c[cpu].head = node->next;
206 return node;
207}
208
209void *test_percpu_list_thread(void *arg)
210{
211 int i;
212 struct percpu_list *list = (struct percpu_list *)arg;
213
214 if (rseq_register_current_thread()) {
215 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
216 errno, strerror(errno));
217 abort();
218 }
219
220 for (i = 0; i < 100000; i++) {
221 struct percpu_list_node *node;
222
223 node = this_cpu_list_pop(list, NULL);
224 sched_yield(); /* encourage shuffling */
225 if (node)
226 this_cpu_list_push(list, node, NULL);
227 }
228
229 if (rseq_unregister_current_thread()) {
230 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
231 errno, strerror(errno));
232 abort();
233 }
234
235 return NULL;
236}
237
238/* Simultaneous modification to a per-cpu linked list from many threads. */
239void test_percpu_list(void)
240{
241 int i, j;
242 uint64_t sum = 0, expected_sum = 0;
243 struct percpu_list list;
244 pthread_t test_threads[200];
245 cpu_set_t allowed_cpus;
246
247 memset(&list, 0, sizeof(list));
248
249 /* Generate list entries for every usable cpu. */
250 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
251 for (i = 0; i < CPU_SETSIZE; i++) {
252 if (!CPU_ISSET(i, &allowed_cpus))
253 continue;
254 for (j = 1; j <= 100; j++) {
255 struct percpu_list_node *node;
256
257 expected_sum += j;
258
259 node = malloc(sizeof(*node));
260 assert(node);
261 node->data = j;
262 node->next = list.c[i].head;
263 list.c[i].head = node;
264 }
265 }
266
267 for (i = 0; i < 200; i++)
268 pthread_create(&test_threads[i], NULL,
269 test_percpu_list_thread, &list);
270
271 for (i = 0; i < 200; i++)
272 pthread_join(test_threads[i], NULL);
273
274 for (i = 0; i < CPU_SETSIZE; i++) {
275 struct percpu_list_node *node;
276
277 if (!CPU_ISSET(i, &allowed_cpus))
278 continue;
279
280 while ((node = __percpu_list_pop(&list, i))) {
281 sum += node->data;
282 free(node);
283 }
284 }
285
286 /*
287 * All entries should now be accounted for (unless some external
288 * actor is interfering with our allowed affinity while this
289 * test is running).
290 */
291 assert(sum == expected_sum);
292}
293
294int main(void)
295{
296 if (rseq_register_current_thread()) {
297 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
298 errno, strerror(errno));
299 goto error;
300 }
301 printf("spinlock\n");
302 test_percpu_spinlock();
303 printf("percpu_list\n");
304 test_percpu_list();
305 if (rseq_unregister_current_thread()) {
306 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
307 errno, strerror(errno));
308 goto error;
309 }
310 return 0;
311
312error:
313 return -1;
314}
This page took 0.034058 seconds and 4 git commands to generate.