RCU: update barrier comments
[libside.git] / src / rcu.h
1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
4 */
5
6 #include <sched.h>
7 #include <stdint.h>
8 #include <pthread.h>
9 #include <stdbool.h>
10 #include <poll.h>
11
12 #define SIDE_CACHE_LINE_SIZE 256
13
14 struct side_rcu_percpu_count {
15 uintptr_t begin;
16 uintptr_t end;
17 } __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
18
19 struct side_rcu_cpu_gp_state {
20 struct side_rcu_percpu_count count[2];
21 };
22
23 struct side_rcu_gp_state {
24 struct side_rcu_cpu_gp_state *percpu_state;
25 int nr_cpus;
26 unsigned int period;
27 pthread_mutex_t gp_lock;
28 };
29
30 //TODO: replace atomics by rseq (when available)
31 //TODO: replace acquire/release by membarrier+compiler barrier (when available)
32 //TODO: implement wait/wakeup for grace period using sys_futex
33 static inline
34 unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
35 {
36 int cpu = sched_getcpu();
37 unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
38
39 if (cpu < 0)
40 cpu = 0;
41 /*
42 * This memory barrier (A) ensures that the contents of the
43 * read-side critical section does not leak before the "begin"
44 * counter increment. It pairs with memory barriers (D) and (E).
45 *
46 * This memory barrier (A) also ensures that the "begin"
47 * increment is before the "end" increment. It pairs with memory
48 * barrier (C). It is redundant with memory barrier (B) for that
49 * purpose.
50 */
51 (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_SEQ_CST);
52 return period;
53 }
54
55 static inline
56 void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
57 {
58 int cpu = sched_getcpu();
59
60 if (cpu < 0)
61 cpu = 0;
62 /*
63 * This memory barrier (B) ensures that the contents of the
64 * read-side critical section does not leak after the "end"
65 * counter increment. It pairs with memory barriers (D) and (E).
66 *
67 * This memory barrier (B) also ensures that the "begin"
68 * increment is before the "end" increment. It pairs with memory
69 * barrier (C). It is redundant with memory barrier (A) for that
70 * purpose.
71 */
72 (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_SEQ_CST);
73 }
74
75 #define side_rcu_dereference(p) \
76 __extension__ \
77 ({ \
78 (__typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \
79 (_____side_v); \
80 })
81
82 #define side_rcu_assign_pointer(p, v) __atomic_store_n(&(p), v, __ATOMIC_RELEASE); \
83
84 /* active_readers is an input/output parameter. */
85 static inline
86 void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_readers)
87 {
88 uintptr_t sum[2] = { 0, 0 }; /* begin - end */
89 int i;
90
91 for (i = 0; i < gp_state->nr_cpus; i++) {
92 struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
93
94 sum[0] -= __atomic_load_n(&cpu_state->count[0].end, __ATOMIC_RELAXED);
95 sum[1] -= __atomic_load_n(&cpu_state->count[1].end, __ATOMIC_RELAXED);
96 }
97
98 /*
99 * This memory barrier (C) pairs with either of memory barriers
100 * (A) or (B) (one is sufficient).
101 *
102 * Read end counts before begin counts. Reading "end" before
103 * "begin" counts ensures we never see an "end" without having
104 * seen its associated "begin", because "begin" is always
105 * incremented before "end", as guaranteed by memory barriers
106 * (A) or (B).
107 */
108 __atomic_thread_fence(__ATOMIC_SEQ_CST);
109
110 for (i = 0; i < gp_state->nr_cpus; i++) {
111 struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
112
113 sum[0] += __atomic_load_n(&cpu_state->count[0].begin, __ATOMIC_RELAXED);
114 sum[1] += __atomic_load_n(&cpu_state->count[1].begin, __ATOMIC_RELAXED);
115 }
116 if (active_readers[0])
117 active_readers[0] = sum[0];
118 if (active_readers[1])
119 active_readers[1] = sum[1];
120 }
121
122 /*
123 * Wait for previous period to have no active readers.
124 *
125 * active_readers is an input/output parameter.
126 */
127 static inline
128 void wait_for_prev_period_readers(struct side_rcu_gp_state *gp_state, bool *active_readers)
129 {
130 unsigned int prev_period = gp_state->period ^ 1;
131
132 /*
133 * If a prior active readers scan already observed that no
134 * readers are present for the previous period, there is no need
135 * to scan again.
136 */
137 if (!active_readers[prev_period])
138 return;
139 /*
140 * Wait for the sum of CPU begin/end counts to match for the
141 * previous period.
142 */
143 for (;;) {
144 check_active_readers(gp_state, active_readers);
145 if (!active_readers[prev_period])
146 break;
147 /* Retry after 10ms. */
148 poll(NULL, 0, 10);
149 }
150 }
151
152 /*
153 * The grace period completes when it observes that there are no active
154 * readers within each of the periods.
155 *
156 * The active_readers state is initially true for each period, until the
157 * grace period observes that no readers are present for each given
158 * period, at which point the active_readers state becomes false.
159 */
160 static inline
161 void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
162 {
163 bool active_readers[2] = { true, true };
164
165 /*
166 * This memory barrier (D) pairs with memory barriers (A) and
167 * (B) on the read-side.
168 *
169 * It orders prior loads and stores before the "end"/"begin"
170 * reader state loads. In other words, it orders prior loads and
171 * stores before observation of active readers quiescence,
172 * effectively ensuring that read-side critical sections which
173 * exist after the grace period completes are ordered after
174 * loads and stores performed before the grace period.
175 */
176 __atomic_thread_fence(__ATOMIC_SEQ_CST);
177
178 /*
179 * First scan through all cpus, for both period. If no readers
180 * are accounted for, we have observed quiescence and can
181 * complete the grace period immediately.
182 */
183 check_active_readers(gp_state, active_readers);
184 if (!active_readers[0] && !active_readers[1])
185 goto end;
186
187 pthread_mutex_lock(&gp_state->gp_lock);
188
189 wait_for_prev_period_readers(gp_state, active_readers);
190 /*
191 * If the reader scan detected that there are no readers in the
192 * current period as well, we can complete the grace period
193 * immediately.
194 */
195 if (!active_readers[gp_state->period])
196 goto unlock;
197
198 /* Flip period: 0 -> 1, 1 -> 0. */
199 (void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_RELAXED);
200
201 wait_for_prev_period_readers(gp_state, active_readers);
202 unlock:
203 pthread_mutex_unlock(&gp_state->gp_lock);
204 end:
205 /*
206 * This memory barrier (E) pairs with memory barriers (A) and
207 * (B) on the read-side.
208 *
209 * It orders the "end"/"begin" reader state loads before
210 * following loads and stores. In other words, it orders
211 * observation of active readers quiescence before following
212 * loads and stores, effectively ensuring that read-side
213 * critical sections which existed prior to the grace period
214 * are ordered before loads and stores performed after the grace
215 * period.
216 */
217 __atomic_thread_fence(__ATOMIC_SEQ_CST);
218 }
This page took 0.050167 seconds and 5 git commands to generate.