Commit | Line | Data |
---|---|---|
b848736e MD |
1 | // SPDX-License-Identifier: LGPL-2.1 |
2 | #ifndef _GNU_SOURCE | |
3 | #define _GNU_SOURCE | |
4 | #endif | |
5 | #include <assert.h> | |
6 | #include <pthread.h> | |
7 | #include <sched.h> | |
8 | #include <stdint.h> | |
9 | #include <stdio.h> | |
10 | #include <stdlib.h> | |
11 | #include <string.h> | |
12 | #include <stddef.h> | |
13 | ||
14 | #include <rseq/rseq.h> | |
15 | ||
16 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) | |
17 | ||
18 | struct percpu_lock_entry { | |
19 | intptr_t v; | |
20 | } __attribute__((aligned(128))); | |
21 | ||
22 | struct percpu_lock { | |
23 | struct percpu_lock_entry c[CPU_SETSIZE]; | |
24 | }; | |
25 | ||
26 | struct test_data_entry { | |
27 | intptr_t count; | |
28 | } __attribute__((aligned(128))); | |
29 | ||
30 | struct spinlock_test_data { | |
31 | struct percpu_lock lock; | |
32 | struct test_data_entry c[CPU_SETSIZE]; | |
33 | int reps; | |
34 | }; | |
35 | ||
36 | struct percpu_list_node { | |
37 | intptr_t data; | |
38 | struct percpu_list_node *next; | |
39 | }; | |
40 | ||
41 | struct percpu_list_entry { | |
42 | struct percpu_list_node *head; | |
43 | } __attribute__((aligned(128))); | |
44 | ||
45 | struct percpu_list { | |
46 | struct percpu_list_entry c[CPU_SETSIZE]; | |
47 | }; | |
48 | ||
49 | /* A simple percpu spinlock. Returns the cpu lock was acquired on. */ | |
50 | int rseq_this_cpu_lock(struct percpu_lock *lock) | |
51 | { | |
52 | int cpu; | |
53 | ||
54 | for (;;) { | |
55 | int ret; | |
56 | ||
57 | cpu = rseq_cpu_start(); | |
58 | ret = rseq_cmpeqv_storev(&lock->c[cpu].v, | |
59 | 0, 1, cpu); | |
60 | if (rseq_likely(!ret)) | |
61 | break; | |
62 | /* Retry if comparison fails or rseq aborts. */ | |
63 | } | |
64 | /* | |
65 | * Acquire semantic when taking lock after control dependency. | |
66 | * Matches rseq_smp_store_release(). | |
67 | */ | |
68 | rseq_smp_acquire__after_ctrl_dep(); | |
69 | return cpu; | |
70 | } | |
71 | ||
72 | void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) | |
73 | { | |
74 | assert(lock->c[cpu].v == 1); | |
75 | /* | |
76 | * Release lock, with release semantic. Matches | |
77 | * rseq_smp_acquire__after_ctrl_dep(). | |
78 | */ | |
79 | rseq_smp_store_release(&lock->c[cpu].v, 0); | |
80 | } | |
81 | ||
82 | void *test_percpu_spinlock_thread(void *arg) | |
83 | { | |
84 | struct spinlock_test_data *data = arg; | |
85 | int i, cpu; | |
86 | ||
87 | if (rseq_register_current_thread()) { | |
88 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | |
89 | errno, strerror(errno)); | |
90 | abort(); | |
91 | } | |
92 | for (i = 0; i < data->reps; i++) { | |
93 | cpu = rseq_this_cpu_lock(&data->lock); | |
94 | data->c[cpu].count++; | |
95 | rseq_percpu_unlock(&data->lock, cpu); | |
96 | } | |
97 | if (rseq_unregister_current_thread()) { | |
98 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | |
99 | errno, strerror(errno)); | |
100 | abort(); | |
101 | } | |
102 | ||
103 | return NULL; | |
104 | } | |
105 | ||
106 | /* | |
107 | * A simple test which implements a sharded counter using a per-cpu | |
108 | * lock. Obviously real applications might prefer to simply use a | |
109 | * per-cpu increment; however, this is reasonable for a test and the | |
110 | * lock can be extended to synchronize more complicated operations. | |
111 | */ | |
112 | void test_percpu_spinlock(void) | |
113 | { | |
114 | const int num_threads = 200; | |
115 | int i; | |
116 | uint64_t sum; | |
117 | pthread_t test_threads[num_threads]; | |
118 | struct spinlock_test_data data; | |
119 | ||
120 | memset(&data, 0, sizeof(data)); | |
121 | data.reps = 5000; | |
122 | ||
123 | for (i = 0; i < num_threads; i++) | |
124 | pthread_create(&test_threads[i], NULL, | |
125 | test_percpu_spinlock_thread, &data); | |
126 | ||
127 | for (i = 0; i < num_threads; i++) | |
128 | pthread_join(test_threads[i], NULL); | |
129 | ||
130 | sum = 0; | |
131 | for (i = 0; i < CPU_SETSIZE; i++) | |
132 | sum += data.c[i].count; | |
133 | ||
134 | assert(sum == (uint64_t)data.reps * num_threads); | |
135 | } | |
136 | ||
137 | void this_cpu_list_push(struct percpu_list *list, | |
138 | struct percpu_list_node *node, | |
139 | int *_cpu) | |
140 | { | |
141 | int cpu; | |
142 | ||
143 | for (;;) { | |
144 | intptr_t *targetptr, newval, expect; | |
145 | int ret; | |
146 | ||
147 | cpu = rseq_cpu_start(); | |
148 | /* Load list->c[cpu].head with single-copy atomicity. */ | |
149 | expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); | |
150 | newval = (intptr_t)node; | |
151 | targetptr = (intptr_t *)&list->c[cpu].head; | |
152 | node->next = (struct percpu_list_node *)expect; | |
153 | ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); | |
154 | if (rseq_likely(!ret)) | |
155 | break; | |
156 | /* Retry if comparison fails or rseq aborts. */ | |
157 | } | |
158 | if (_cpu) | |
159 | *_cpu = cpu; | |
160 | } | |
161 | ||
162 | /* | |
163 | * Unlike a traditional lock-less linked list; the availability of a | |
164 | * rseq primitive allows us to implement pop without concerns over | |
165 | * ABA-type races. | |
166 | */ | |
167 | struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, | |
168 | int *_cpu) | |
169 | { | |
170 | for (;;) { | |
171 | struct percpu_list_node *head; | |
172 | intptr_t *targetptr, expectnot, *load; | |
173 | off_t offset; | |
174 | int ret, cpu; | |
175 | ||
176 | cpu = rseq_cpu_start(); | |
177 | targetptr = (intptr_t *)&list->c[cpu].head; | |
178 | expectnot = (intptr_t)NULL; | |
179 | offset = offsetof(struct percpu_list_node, next); | |
180 | load = (intptr_t *)&head; | |
181 | ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, | |
182 | offset, load, cpu); | |
183 | if (rseq_likely(!ret)) { | |
184 | if (_cpu) | |
185 | *_cpu = cpu; | |
186 | return head; | |
187 | } | |
188 | if (ret > 0) | |
189 | return NULL; | |
190 | /* Retry if rseq aborts. */ | |
191 | } | |
192 | } | |
193 | ||
194 | /* | |
195 | * __percpu_list_pop is not safe against concurrent accesses. Should | |
196 | * only be used on lists that are not concurrently modified. | |
197 | */ | |
198 | struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) | |
199 | { | |
200 | struct percpu_list_node *node; | |
201 | ||
202 | node = list->c[cpu].head; | |
203 | if (!node) | |
204 | return NULL; | |
205 | list->c[cpu].head = node->next; | |
206 | return node; | |
207 | } | |
208 | ||
209 | void *test_percpu_list_thread(void *arg) | |
210 | { | |
211 | int i; | |
212 | struct percpu_list *list = (struct percpu_list *)arg; | |
213 | ||
214 | if (rseq_register_current_thread()) { | |
215 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | |
216 | errno, strerror(errno)); | |
217 | abort(); | |
218 | } | |
219 | ||
220 | for (i = 0; i < 100000; i++) { | |
221 | struct percpu_list_node *node; | |
222 | ||
223 | node = this_cpu_list_pop(list, NULL); | |
224 | sched_yield(); /* encourage shuffling */ | |
225 | if (node) | |
226 | this_cpu_list_push(list, node, NULL); | |
227 | } | |
228 | ||
229 | if (rseq_unregister_current_thread()) { | |
230 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | |
231 | errno, strerror(errno)); | |
232 | abort(); | |
233 | } | |
234 | ||
235 | return NULL; | |
236 | } | |
237 | ||
238 | /* Simultaneous modification to a per-cpu linked list from many threads. */ | |
239 | void test_percpu_list(void) | |
240 | { | |
241 | int i, j; | |
242 | uint64_t sum = 0, expected_sum = 0; | |
243 | struct percpu_list list; | |
244 | pthread_t test_threads[200]; | |
245 | cpu_set_t allowed_cpus; | |
246 | ||
247 | memset(&list, 0, sizeof(list)); | |
248 | ||
249 | /* Generate list entries for every usable cpu. */ | |
250 | sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); | |
251 | for (i = 0; i < CPU_SETSIZE; i++) { | |
252 | if (!CPU_ISSET(i, &allowed_cpus)) | |
253 | continue; | |
254 | for (j = 1; j <= 100; j++) { | |
255 | struct percpu_list_node *node; | |
256 | ||
257 | expected_sum += j; | |
258 | ||
259 | node = malloc(sizeof(*node)); | |
260 | assert(node); | |
261 | node->data = j; | |
262 | node->next = list.c[i].head; | |
263 | list.c[i].head = node; | |
264 | } | |
265 | } | |
266 | ||
267 | for (i = 0; i < 200; i++) | |
268 | pthread_create(&test_threads[i], NULL, | |
269 | test_percpu_list_thread, &list); | |
270 | ||
271 | for (i = 0; i < 200; i++) | |
272 | pthread_join(test_threads[i], NULL); | |
273 | ||
274 | for (i = 0; i < CPU_SETSIZE; i++) { | |
275 | struct percpu_list_node *node; | |
276 | ||
277 | if (!CPU_ISSET(i, &allowed_cpus)) | |
278 | continue; | |
279 | ||
280 | while ((node = __percpu_list_pop(&list, i))) { | |
281 | sum += node->data; | |
282 | free(node); | |
283 | } | |
284 | } | |
285 | ||
286 | /* | |
287 | * All entries should now be accounted for (unless some external | |
288 | * actor is interfering with our allowed affinity while this | |
289 | * test is running). | |
290 | */ | |
291 | assert(sum == expected_sum); | |
292 | } | |
293 | ||
294 | int main(void) | |
295 | { | |
296 | if (rseq_register_current_thread()) { | |
297 | fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", | |
298 | errno, strerror(errno)); | |
299 | goto error; | |
300 | } | |
301 | printf("spinlock\n"); | |
302 | test_percpu_spinlock(); | |
303 | printf("percpu_list\n"); | |
304 | test_percpu_list(); | |
305 | if (rseq_unregister_current_thread()) { | |
306 | fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", | |
307 | errno, strerror(errno)); | |
308 | goto error; | |
309 | } | |
310 | return 0; | |
311 | ||
312 | error: | |
313 | return -1; | |
314 | } |