src/rcu.h

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
   4  */
   5
   6 #include <sched.h>
   7 #include <stdint.h>
   8 #include <pthread.h>
   9 #include <stdbool.h>
  10 #include <poll.h>
  11
  12 #define SIDE_CACHE_LINE_SIZE            256
  13
  14 struct side_rcu_percpu_count {
  15         uintptr_t begin;
  16         uintptr_t end;
  17 }  __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
  18
  19 struct side_rcu_cpu_gp_state {
  20         struct side_rcu_percpu_count count[2];
  21 };
  22
  23 struct side_rcu_gp_state {
  24         struct side_rcu_cpu_gp_state *percpu_state;
  25         int nr_cpus;
  26         unsigned int period;
  27         pthread_mutex_t gp_lock;
  28 };
  29
  30 //TODO: replace atomics by rseq (when available)
  31 //TODO: replace acquire/release by membarrier+compiler barrier (when available)
  32 //TODO: implement wait/wakeup for grace period using sys_futex
  33 static inline
  34 unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
  35 {
  36         int cpu = sched_getcpu();
  37         unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
  38
  39         if (cpu < 0)
  40                 cpu = 0;
  41         /*
  42          * This memory barrier (A) ensures that the contents of the
  43          * read-side critical section does not leak before the "begin"
  44          * counter increment. It pairs with memory barriers (D) and (E).
  45          *
  46          * This memory barrier (A) also ensures that the "begin"
  47          * increment is before the "end" increment. It pairs with memory
  48          * barrier (C). It is redundant with memory barrier (B) for that
  49          * purpose.
  50          */
  51         (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_SEQ_CST);
  52         return period;
  53 }
  54
  55 static inline
  56 void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
  57 {
  58         int cpu = sched_getcpu();
  59
  60         if (cpu < 0)
  61                 cpu = 0;
  62         /*
  63          * This memory barrier (B) ensures that the contents of the
  64          * read-side critical section does not leak after the "end"
  65          * counter increment. It pairs with memory barriers (D) and (E).
  66          *
  67          * This memory barrier (B) also ensures that the "begin"
  68          * increment is before the "end" increment. It pairs with memory
  69          * barrier (C). It is redundant with memory barrier (A) for that
  70          * purpose.
  71          */
  72         (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_SEQ_CST);
  73 }
  74
  75 #define side_rcu_dereference(p) \
  76         __extension__ \
  77         ({ \
  78                 (__typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \
  79                 (_____side_v); \
  80         })
  81
  82 #define side_rcu_assign_pointer(p, v)   __atomic_store_n(&(p), v, __ATOMIC_RELEASE); \
  83
  84 /* active_readers is an input/output parameter. */
  85 static inline
  86 void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_readers)
  87 {
  88         uintptr_t sum[2] = { 0, 0 };    /* begin - end */
  89         int i;
  90
  91         for (i = 0; i < gp_state->nr_cpus; i++) {
  92                 struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
  93
  94                 sum[0] -= __atomic_load_n(&cpu_state->count[0].end, __ATOMIC_RELAXED);
  95                 sum[1] -= __atomic_load_n(&cpu_state->count[1].end, __ATOMIC_RELAXED);
  96         }
  97
  98         /*
  99          * This memory barrier (C) pairs with either of memory barriers
 100          * (A) or (B) (one is sufficient).
 101          *
 102          * Read end counts before begin counts. Reading "end" before
 103          * "begin" counts ensures we never see an "end" without having
 104          * seen its associated "begin", because "begin" is always
 105          * incremented before "end", as guaranteed by memory barriers
 106          * (A) or (B).
 107          */
 108         __atomic_thread_fence(__ATOMIC_SEQ_CST);
 109
 110         for (i = 0; i < gp_state->nr_cpus; i++) {
 111                 struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
 112
 113                 sum[0] += __atomic_load_n(&cpu_state->count[0].begin, __ATOMIC_RELAXED);
 114                 sum[1] += __atomic_load_n(&cpu_state->count[1].begin, __ATOMIC_RELAXED);
 115         }
 116         if (active_readers[0])
 117                 active_readers[0] = sum[0];
 118         if (active_readers[1])
 119                 active_readers[1] = sum[1];
 120 }
 121
 122 /*
 123  * Wait for previous period to have no active readers.
 124  *
 125  * active_readers is an input/output parameter.
 126  */
 127 static inline
 128 void wait_for_prev_period_readers(struct side_rcu_gp_state *gp_state, bool *active_readers)
 129 {
 130         unsigned int prev_period = gp_state->period ^ 1;
 131
 132         /*
 133          * If a prior active readers scan already observed that no
 134          * readers are present for the previous period, there is no need
 135          * to scan again.
 136          */
 137         if (!active_readers[prev_period])
 138                 return;
 139         /*
 140          * Wait for the sum of CPU begin/end counts to match for the
 141          * previous period.
 142          */
 143         for (;;) {
 144                 check_active_readers(gp_state, active_readers);
 145                 if (!active_readers[prev_period])
 146                         break;
 147                 /* Retry after 10ms. */
 148                 poll(NULL, 0, 10);
 149         }
 150 }
 151
 152 /*
 153  * The grace period completes when it observes that there are no active
 154  * readers within each of the periods.
 155  *
 156  * The active_readers state is initially true for each period, until the
 157  * grace period observes that no readers are present for each given
 158  * period, at which point the active_readers state becomes false.
 159  */
 160 static inline
 161 void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
 162 {
 163         bool active_readers[2] = { true, true };
 164
 165         /*
 166          * This memory barrier (D) pairs with memory barriers (A) and
 167          * (B) on the read-side.
 168          *
 169          * It orders prior loads and stores before the "end"/"begin"
 170          * reader state loads. In other words, it orders prior loads and
 171          * stores before observation of active readers quiescence,
 172          * effectively ensuring that read-side critical sections which
 173          * exist after the grace period completes are ordered after
 174          * loads and stores performed before the grace period.
 175          */
 176         __atomic_thread_fence(__ATOMIC_SEQ_CST);
 177
 178         /*
 179          * First scan through all cpus, for both period. If no readers
 180          * are accounted for, we have observed quiescence and can
 181          * complete the grace period immediately.
 182          */
 183         check_active_readers(gp_state, active_readers);
 184         if (!active_readers[0] && !active_readers[1])
 185                 goto end;
 186
 187         pthread_mutex_lock(&gp_state->gp_lock);
 188
 189         wait_for_prev_period_readers(gp_state, active_readers);
 190         /*
 191          * If the reader scan detected that there are no readers in the
 192          * current period as well, we can complete the grace period
 193          * immediately.
 194          */
 195         if (!active_readers[gp_state->period])
 196                 goto unlock;
 197
 198         /* Flip period: 0 -> 1, 1 -> 0. */
 199         (void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_RELAXED);
 200
 201         wait_for_prev_period_readers(gp_state, active_readers);
 202 unlock:
 203         pthread_mutex_unlock(&gp_state->gp_lock);
 204 end:
 205         /*
 206          * This memory barrier (E) pairs with memory barriers (A) and
 207          * (B) on the read-side.
 208          *
 209          * It orders the "end"/"begin" reader state loads before
 210          * following loads and stores. In other words, it orders
 211          * observation of active readers quiescence before following
 212          * loads and stores, effectively ensuring that read-side
 213          * critical sections which existed prior to the grace period
 214          * are ordered before loads and stores performed after the grace
 215          * period.
 216          */
 217         __atomic_thread_fence(__ATOMIC_SEQ_CST);
 218 }