1 // SPDX-License-Identifier: MIT
3 * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
15 #include <sys/syscall.h>
16 #include <linux/membarrier.h>
22 * If both rseq (with glibc support) and membarrier system calls are
23 * available, use them to replace barriers and atomics on the fast-path.
25 unsigned int side_rcu_rseq_membarrier_available
;
28 membarrier(int cmd
, unsigned int flags
, int cpu_id
)
30 return syscall(__NR_membarrier
, cmd
, flags
, cpu_id
);
33 /* active_readers is an input/output parameter. */
35 void check_active_readers(struct side_rcu_gp_state
*gp_state
, bool *active_readers
)
37 uintptr_t sum
[2] = { 0, 0 }; /* begin - end */
40 for (i
= 0; i
< gp_state
->nr_cpus
; i
++) {
41 struct side_rcu_cpu_gp_state
*cpu_state
= &gp_state
->percpu_state
[i
];
43 if (active_readers
[0]) {
44 sum
[0] -= __atomic_load_n(&cpu_state
->count
[0].end
, __ATOMIC_RELAXED
);
45 sum
[0] -= __atomic_load_n(&cpu_state
->count
[0].rseq_end
, __ATOMIC_RELAXED
);
47 if (active_readers
[1]) {
48 sum
[1] -= __atomic_load_n(&cpu_state
->count
[1].end
, __ATOMIC_RELAXED
);
49 sum
[1] -= __atomic_load_n(&cpu_state
->count
[1].rseq_end
, __ATOMIC_RELAXED
);
54 * This memory barrier (C) pairs with either of memory barriers
55 * (A) or (B) (one is sufficient).
57 * Read end counts before begin counts. Reading "end" before
58 * "begin" counts ensures we never see an "end" without having
59 * seen its associated "begin", because "begin" is always
60 * incremented before "end", as guaranteed by memory barriers
63 if (side_rcu_rseq_membarrier_available
) {
64 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
69 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
72 for (i
= 0; i
< gp_state
->nr_cpus
; i
++) {
73 struct side_rcu_cpu_gp_state
*cpu_state
= &gp_state
->percpu_state
[i
];
75 if (active_readers
[0]) {
76 sum
[0] += __atomic_load_n(&cpu_state
->count
[0].begin
, __ATOMIC_RELAXED
);
77 sum
[0] += __atomic_load_n(&cpu_state
->count
[0].rseq_begin
, __ATOMIC_RELAXED
);
79 if (active_readers
[1]) {
80 sum
[1] += __atomic_load_n(&cpu_state
->count
[1].begin
, __ATOMIC_RELAXED
);
81 sum
[1] += __atomic_load_n(&cpu_state
->count
[1].rseq_begin
, __ATOMIC_RELAXED
);
84 if (active_readers
[0])
85 active_readers
[0] = sum
[0];
86 if (active_readers
[1])
87 active_readers
[1] = sum
[1];
91 * Wait for previous period to have no active readers.
93 * active_readers is an input/output parameter.
96 void wait_for_prev_period_readers(struct side_rcu_gp_state
*gp_state
, bool *active_readers
)
98 unsigned int prev_period
= gp_state
->period
^ 1;
101 * If a prior active readers scan already observed that no
102 * readers are present for the previous period, there is no need
105 if (!active_readers
[prev_period
])
108 * Wait for the sum of CPU begin/end counts to match for the
112 check_active_readers(gp_state
, active_readers
);
113 if (!active_readers
[prev_period
])
115 /* Retry after 10ms. */
121 * The grace period completes when it observes that there are no active
122 * readers within each of the periods.
124 * The active_readers state is initially true for each period, until the
125 * grace period observes that no readers are present for each given
126 * period, at which point the active_readers state becomes false.
128 void side_rcu_wait_grace_period(struct side_rcu_gp_state
*gp_state
)
130 bool active_readers
[2] = { true, true };
133 * This memory barrier (D) pairs with memory barriers (A) and
134 * (B) on the read-side.
136 * It orders prior loads and stores before the "end"/"begin"
137 * reader state loads. In other words, it orders prior loads and
138 * stores before observation of active readers quiescence,
139 * effectively ensuring that read-side critical sections which
140 * exist after the grace period completes are ordered after
141 * loads and stores performed before the grace period.
143 if (side_rcu_rseq_membarrier_available
) {
144 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
145 perror("membarrier");
149 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
153 * First scan through all cpus, for both period. If no readers
154 * are accounted for, we have observed quiescence and can
155 * complete the grace period immediately.
157 check_active_readers(gp_state
, active_readers
);
158 if (!active_readers
[0] && !active_readers
[1])
161 pthread_mutex_lock(&gp_state
->gp_lock
);
163 wait_for_prev_period_readers(gp_state
, active_readers
);
165 * If the reader scan detected that there are no readers in the
166 * current period as well, we can complete the grace period
169 if (!active_readers
[gp_state
->period
])
172 /* Flip period: 0 -> 1, 1 -> 0. */
173 (void) __atomic_xor_fetch(&gp_state
->period
, 1, __ATOMIC_RELAXED
);
175 wait_for_prev_period_readers(gp_state
, active_readers
);
177 pthread_mutex_unlock(&gp_state
->gp_lock
);
180 * This memory barrier (E) pairs with memory barriers (A) and
181 * (B) on the read-side.
183 * It orders the "end"/"begin" reader state loads before
184 * following loads and stores. In other words, it orders
185 * observation of active readers quiescence before following
186 * loads and stores, effectively ensuring that read-side
187 * critical sections which existed prior to the grace period
188 * are ordered before loads and stores performed after the grace
191 if (side_rcu_rseq_membarrier_available
) {
192 if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED
, 0, 0)) {
193 perror("membarrier");
197 __atomic_thread_fence(__ATOMIC_SEQ_CST
);
201 void side_rcu_gp_init(struct side_rcu_gp_state
*rcu_gp
)
203 bool has_membarrier
= false, has_rseq
= false;
205 memset(rcu_gp
, 0, sizeof(*rcu_gp
));
206 rcu_gp
->nr_cpus
= get_possible_cpus_array_len();
207 if (!rcu_gp
->nr_cpus
)
209 pthread_mutex_init(&rcu_gp
->gp_lock
, NULL
);
210 rcu_gp
->percpu_state
= calloc(rcu_gp
->nr_cpus
, sizeof(struct side_rcu_cpu_gp_state
));
211 if (!rcu_gp
->percpu_state
)
213 if (!membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED
, 0, 0))
214 has_membarrier
= true;
215 if (rseq_available(RSEQ_AVAILABLE_QUERY_LIBC
))
217 if (has_membarrier
&& has_rseq
)
218 side_rcu_rseq_membarrier_available
= 1;
221 void side_rcu_gp_exit(struct side_rcu_gp_state
*rcu_gp
)
223 rseq_prepare_unload();
224 pthread_mutex_destroy(&rcu_gp
->gp_lock
);
225 free(rcu_gp
->percpu_state
);