X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=src%2Frcu.h;h=4db3500566abc002042d9f63a6b617d4c24d76de;hb=873bbf16c6bcfe2c11fca7e76dd7284c5afbee99;hp=2831294b546bb4e8edaae0e9ea850146c54a57ff;hpb=1107e7d6d969e82d163674b41763eb0e7e360096;p=libside.git diff --git a/src/rcu.h b/src/rcu.h index 2831294..4db3500 100644 --- a/src/rcu.h +++ b/src/rcu.h @@ -3,137 +3,165 @@ * Copyright 2022 Mathieu Desnoyers */ +#ifndef _SIDE_RCU_H +#define _SIDE_RCU_H + #include #include #include +#include #include +#include +#include +#include +#include +#include +#include #define SIDE_CACHE_LINE_SIZE 256 -#define SIDE_RCU_PERCPU_ARRAY_SIZE 2 struct side_rcu_percpu_count { uintptr_t begin; + uintptr_t rseq_begin; uintptr_t end; -} __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE))); + uintptr_t rseq_end; +}; struct side_rcu_cpu_gp_state { - struct side_rcu_percpu_count count[SIDE_RCU_PERCPU_ARRAY_SIZE]; -}; + struct side_rcu_percpu_count count[2]; +} __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE))); struct side_rcu_gp_state { struct side_rcu_cpu_gp_state *percpu_state; int nr_cpus; + int32_t futex; unsigned int period; pthread_mutex_t gp_lock; }; -//TODO: replace atomics by rseq (when available) -//TODO: replace acquire/release by membarrier+compiler barrier (when available) -//TODO: implement wait/wakeup for grace period using sys_futex +struct side_rcu_read_state { + struct side_rcu_percpu_count *percpu_count; + int cpu; +}; + +extern unsigned int side_rcu_rseq_membarrier_available __attribute__((visibility("hidden"))); + static inline -unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state) +int futex(int32_t *uaddr, int op, int32_t val, + const struct timespec *timeout, int32_t *uaddr2, int32_t val3) { - int cpu = sched_getcpu(); - unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED); - - if (cpu < 0) - cpu = 0; - /* - * This acquire MO pairs with the release fence at the end of - * side_rcu_wait_grace_period(). - */ - (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_ACQUIRE); - return period; + return syscall(__NR_futex, uaddr, op, val, timeout, uaddr2, val3); } +/* + * Wake-up side_rcu_wait_grace_period. Called concurrently from many + * threads. + */ static inline -void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period) +void side_rcu_wake_up_gp(struct side_rcu_gp_state *gp_state) { - int cpu = sched_getcpu(); + if (side_unlikely(__atomic_load_n(&gp_state->futex, __ATOMIC_RELAXED) == -1)) { + __atomic_store_n(&gp_state->futex, 0, __ATOMIC_RELAXED); + /* TODO: handle futex return values. */ + (void) futex(&gp_state->futex, FUTEX_WAKE, 1, NULL, NULL, 0); + } +} - if (cpu < 0) +static inline +void side_rcu_read_begin(struct side_rcu_gp_state *gp_state, struct side_rcu_read_state *read_state) +{ + struct side_rcu_percpu_count *begin_cpu_count; + struct side_rcu_cpu_gp_state *cpu_gp_state; + unsigned int period; + int cpu; + + cpu = rseq_cpu_start(); + period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED); + cpu_gp_state = &gp_state->percpu_state[cpu]; + read_state->percpu_count = begin_cpu_count = &cpu_gp_state->count[period]; + read_state->cpu = cpu; + if (side_likely(side_rcu_rseq_membarrier_available && + !rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU_CPU_ID, + (intptr_t *)&begin_cpu_count->rseq_begin, 1, cpu))) { + /* + * This compiler barrier (A) is paired with membarrier() at (C), + * (D), (E). It effectively upgrades this compiler barrier to a + * SEQ_CST fence with respect to the paired barriers. + * + * This barrier (A) ensures that the contents of the read-side + * critical section does not leak before the "begin" counter + * increment. It pairs with memory barriers (D) and (E). + * + * This barrier (A) also ensures that the "begin" increment is + * before the "end" increment. It pairs with memory barrier (C). + * It is redundant with barrier (B) for that purpose. + */ + rseq_barrier(); + return; + } + /* Fallback to atomic increment and SEQ_CST. */ + cpu = sched_getcpu(); + if (side_unlikely(cpu < 0)) cpu = 0; - /* - * This release MO pairs with the acquire fence at the beginning - * of side_rcu_wait_grace_period(). - */ - (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_RELEASE); + read_state->cpu = cpu; + cpu_gp_state = &gp_state->percpu_state[cpu]; + read_state->percpu_count = begin_cpu_count = &cpu_gp_state->count[period]; + (void) __atomic_add_fetch(&begin_cpu_count->begin, 1, __ATOMIC_SEQ_CST); } -#define side_rcu_dereference(p) \ - __extension__ \ - ({ \ - (__typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \ - (_____side_v); \ - }) - -#define side_rcu_assign_pointer(p, v) __atomic_store_n(&(p), v, __ATOMIC_RELEASE); \ - static inline -void wait_for_cpus(struct side_rcu_gp_state *gp_state) +void side_rcu_read_end(struct side_rcu_gp_state *gp_state, struct side_rcu_read_state *read_state) { - unsigned int prev_period = gp_state->period ^ 1; + struct side_rcu_percpu_count *begin_cpu_count = read_state->percpu_count; + int cpu = read_state->cpu; /* - * Wait for the sum of CPU begin/end counts to match for the - * previous period. + * This compiler barrier (B) is paired with membarrier() at (C), + * (D), (E). It effectively upgrades this compiler barrier to a + * SEQ_CST fence with respect to the paired barriers. + * + * This barrier (B) ensures that the contents of the read-side + * critical section does not leak after the "end" counter + * increment. It pairs with memory barriers (D) and (E). + * + * This barrier (B) also ensures that the "begin" increment is + * before the "end" increment. It pairs with memory barrier (C). + * It is redundant with barrier (A) for that purpose. */ - for (;;) { - uintptr_t sum = 0; /* begin - end */ - int i; - - for (i = 0; i < gp_state->nr_cpus; i++) { - struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i]; - - sum -= __atomic_load_n(&cpu_state->count[prev_period].end, __ATOMIC_RELAXED); - } - + rseq_barrier(); + if (side_likely(side_rcu_rseq_membarrier_available && + !rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU_CPU_ID, + (intptr_t *)&begin_cpu_count->rseq_end, 1, cpu))) { /* - * Read end counts before begin counts. Reading end - * before begin count ensures we never see an end - * without having seen its associated begin, in case of - * a thread migration during the traversal over each - * cpu. + * This barrier (F) is paired with membarrier() + * at (G). It orders increment of the begin/end + * counters before load/store to the futex. */ - __atomic_thread_fence(__ATOMIC_SEQ_CST); - - for (i = 0; i < gp_state->nr_cpus; i++) { - struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i]; - - sum += __atomic_load_n(&cpu_state->count[prev_period].begin, __ATOMIC_RELAXED); - } - if (!sum) { - break; - } else { - /* Retry after 10ms. */ - poll(NULL, 0, 10); - } + rseq_barrier(); + goto end; } -} - -static inline -void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state) -{ + /* Fallback to atomic increment and SEQ_CST. */ + (void) __atomic_add_fetch(&begin_cpu_count->end, 1, __ATOMIC_SEQ_CST); /* - * This fence pairs with the acquire MO __atomic_add_fetch in - * side_rcu_read_begin(). + * This barrier (F) implied by SEQ_CST is paired with SEQ_CST + * barrier or membarrier() at (G). It orders increment of the + * begin/end counters before load/store to the futex. */ - __atomic_thread_fence(__ATOMIC_SEQ_CST); - - pthread_mutex_lock(&gp_state->gp_lock); - - wait_for_cpus(gp_state); +end: + side_rcu_wake_up_gp(gp_state); +} - /* Flip period: 0 -> 1, 1 -> 0. */ - (void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_SEQ_CST); +#define side_rcu_dereference(p) \ + __extension__ \ + ({ \ + __typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \ + (_____side_v); \ + }) - wait_for_cpus(gp_state); +#define side_rcu_assign_pointer(p, v) __atomic_store_n(&(p), v, __ATOMIC_RELEASE); - pthread_mutex_unlock(&gp_state->gp_lock); +void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state) __attribute__((visibility("hidden"))); +void side_rcu_gp_init(struct side_rcu_gp_state *rcu_gp) __attribute__((visibility("hidden"))); +void side_rcu_gp_exit(struct side_rcu_gp_state *rcu_gp) __attribute__((visibility("hidden"))); - /* - * This fence pairs with the release MO __atomic_add_fetch in - * side_rcu_read_end(). - */ - __atomic_thread_fence(__ATOMIC_SEQ_CST); -} +#endif /* _SIDE_RCU_H */