1 // SPDX-License-Identifier: MIT
3 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
15 #include <sys/syscall.h>
16 #include <linux/membarrier.h>
22 * If both rseq (with glibc support) and membarrier system calls are
23 * available, use them to replace barriers and atomics on the fast-path.
25 unsigned int side_rcu_rseq_membarrier_available
;
28 membarrier(int cmd
, unsigned int flags
, int cpu_id
)
30 return syscall(__NR_membarrier
, cmd
, flags
, cpu_id
);
34 * Wait/wakeup scheme with single waiter/many wakers.
37 void wait_gp_prepare(struct side_rcu_gp_state
*gp_state
)
39 __atomic_store_n(&gp_state
->futex
, -1, __ATOMIC_RELAXED
);
41 * This memory barrier (H) pairs with memory barrier (F). It
42 * orders store to futex before load of RCU reader's counter
43 * state, thus ensuring that load of RCU reader's counters does
44 * not leak outside of futex state=-1.
46 if (side_rcu_rseq_membarrier_available
) {
47 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
52 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
57 void wait_gp_end(struct side_rcu_gp_state
*gp_state
)
60 * This memory barrier (G) pairs with memory barrier (F). It
61 * orders load of RCU reader's counter state before storing the
62 * futex value, thus ensuring that load of RCU reader's counters
63 * does not leak outside of futex state=-1.
65 if (side_rcu_rseq_membarrier_available
) {
66 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
71 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
73 __atomic_store_n(&gp_state
->futex
, 0, __ATOMIC_RELAXED
);
77 void wait_gp(struct side_rcu_gp_state
*gp_state
)
80 * This memory barrier (G) pairs with memory barrier (F). It
81 * orders load of RCU reader's counter state before loading the
84 if (side_rcu_rseq_membarrier_available
) {
85 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
90 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
92 if (__atomic_load_n(&gp_state
->futex
, __ATOMIC_RELAXED
) != -1)
94 while (futex(&gp_state
->futex
, FUTEX_WAIT
, -1, NULL
, NULL
, 0)) {
97 /* Value already changed. */
100 /* Retry if interrupted by signal. */
101 break; /* Get out of switch. */
103 /* Unexpected error. */
110 /* active_readers is an input/output parameter. */
112 void check_active_readers(struct side_rcu_gp_state
*gp_state
, bool *active_readers
)
114 uintptr_t sum
[2] = { 0, 0 }; /* begin - end */
117 for (i
= 0; i
< gp_state
->nr_cpus
; i
++) {
118 struct side_rcu_cpu_gp_state
*cpu_state
= &gp_state
->percpu_state
[i
];
120 if (active_readers
[0]) {
121 sum
[0] -= __atomic_load_n(&cpu_state
->count
[0].end
, __ATOMIC_RELAXED
);
122 sum
[0] -= __atomic_load_n(&cpu_state
->count
[0].rseq_end
, __ATOMIC_RELAXED
);
124 if (active_readers
[1]) {
125 sum
[1] -= __atomic_load_n(&cpu_state
->count
[1].end
, __ATOMIC_RELAXED
);
126 sum
[1] -= __atomic_load_n(&cpu_state
->count
[1].rseq_end
, __ATOMIC_RELAXED
);
131 * This memory barrier (C) pairs with either of memory barriers
132 * (A) or (B) (one is sufficient).
134 * Read end counts before begin counts. Reading "end" before
135 * "begin" counts ensures we never see an "end" without having
136 * seen its associated "begin", because "begin" is always
137 * incremented before "end", as guaranteed by memory barriers
140 if (side_rcu_rseq_membarrier_available
) {
141 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
142 perror("membarrier");
146 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
149 for (i
= 0; i
< gp_state
->nr_cpus
; i
++) {
150 struct side_rcu_cpu_gp_state
*cpu_state
= &gp_state
->percpu_state
[i
];
152 if (active_readers
[0]) {
153 sum
[0] += __atomic_load_n(&cpu_state
->count
[0].begin
, __ATOMIC_RELAXED
);
154 sum
[0] += __atomic_load_n(&cpu_state
->count
[0].rseq_begin
, __ATOMIC_RELAXED
);
156 if (active_readers
[1]) {
157 sum
[1] += __atomic_load_n(&cpu_state
->count
[1].begin
, __ATOMIC_RELAXED
);
158 sum
[1] += __atomic_load_n(&cpu_state
->count
[1].rseq_begin
, __ATOMIC_RELAXED
);
161 if (active_readers
[0])
162 active_readers
[0] = sum
[0];
163 if (active_readers
[1])
164 active_readers
[1] = sum
[1];
168 * Wait for previous period to have no active readers.
170 * active_readers is an input/output parameter.
173 void wait_for_prev_period_readers(struct side_rcu_gp_state
*gp_state
, bool *active_readers
)
175 unsigned int prev_period
= gp_state
->period
^ 1;
178 * If a prior active readers scan already observed that no
179 * readers are present for the previous period, there is no need
182 if (!active_readers
[prev_period
])
185 * Wait for the sum of CPU begin/end counts to match for the
189 wait_gp_prepare(gp_state
);
190 check_active_readers(gp_state
, active_readers
);
191 if (!active_readers
[prev_period
]) {
192 wait_gp_end(gp_state
);
200 * The grace period completes when it observes that there are no active
201 * readers within each of the periods.
203 * The active_readers state is initially true for each period, until the
204 * grace period observes that no readers are present for each given
205 * period, at which point the active_readers state becomes false.
207 void side_rcu_wait_grace_period(struct side_rcu_gp_state
*gp_state
)
209 bool active_readers
[2] = { true, true };
212 * This memory barrier (D) pairs with memory barriers (A) and
213 * (B) on the read-side.
215 * It orders prior loads and stores before the "end"/"begin"
216 * reader state loads. In other words, it orders prior loads and
217 * stores before observation of active readers quiescence,
218 * effectively ensuring that read-side critical sections which
219 * exist after the grace period completes are ordered after
220 * loads and stores performed before the grace period.
222 if (side_rcu_rseq_membarrier_available
) {
223 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
224 perror("membarrier");
228 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
232 * First scan through all cpus, for both period. If no readers
233 * are accounted for, we have observed quiescence and can
234 * complete the grace period immediately.
236 check_active_readers(gp_state
, active_readers
);
237 if (!active_readers
[0] && !active_readers
[1])
240 pthread_mutex_lock(&gp_state
->gp_lock
);
242 wait_for_prev_period_readers(gp_state
, active_readers
);
244 * If the reader scan detected that there are no readers in the
245 * current period as well, we can complete the grace period
248 if (!active_readers
[gp_state
->period
])
251 /* Flip period: 0 -> 1, 1 -> 0. */
252 (void) __atomic_xor_fetch(&gp_state
->period
, 1, __ATOMIC_RELAXED
);
254 wait_for_prev_period_readers(gp_state
, active_readers
);
256 pthread_mutex_unlock(&gp_state
->gp_lock
);
259 * This memory barrier (E) pairs with memory barriers (A) and
260 * (B) on the read-side.
262 * It orders the "end"/"begin" reader state loads before
263 * following loads and stores. In other words, it orders
264 * observation of active readers quiescence before following
265 * loads and stores, effectively ensuring that read-side
266 * critical sections which existed prior to the grace period
267 * are ordered before loads and stores performed after the grace
270 if (side_rcu_rseq_membarrier_available
) {
271 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
272 perror("membarrier");
276 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
280 void side_rcu_gp_init(struct side_rcu_gp_state
*rcu_gp
)
282 bool has_membarrier
= false, has_rseq
= false;
284 memset(rcu_gp
, 0, sizeof(*rcu_gp
));
285 rcu_gp
->nr_cpus
= get_possible_cpus_array_len();
286 if (!rcu_gp
->nr_cpus
)
288 pthread_mutex_init(&rcu_gp
->gp_lock
, NULL
);
289 rcu_gp
->percpu_state
= (struct side_rcu_cpu_gp_state
*)
290 calloc(rcu_gp
->nr_cpus
, sizeof(struct side_rcu_cpu_gp_state
));
291 if (!rcu_gp
->percpu_state
)
293 if (!membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
, 0, 0))
294 has_membarrier
= true;
295 if (rseq_available(RSEQ_AVAILABLE_QUERY_LIBC
))
297 if (has_membarrier
&& has_rseq
)
298 side_rcu_rseq_membarrier_available
= 1;
301 void side_rcu_gp_exit(struct side_rcu_gp_state
*rcu_gp
)
303 rseq_prepare_unload();
304 pthread_mutex_destroy(&rcu_gp
->gp_lock
);
305 free(rcu_gp
->percpu_state
);