for (i = 0; i < gp_state->nr_cpus; i++) {
struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
- if (active_readers[0])
+ if (active_readers[0]) {
sum[0] -= __atomic_load_n(&cpu_state->count[0].end, __ATOMIC_RELAXED);
- if (active_readers[1])
+ sum[0] -= __atomic_load_n(&cpu_state->count[0].rseq_end, __ATOMIC_RELAXED);
+ }
+ if (active_readers[1]) {
sum[1] -= __atomic_load_n(&cpu_state->count[1].end, __ATOMIC_RELAXED);
+ sum[1] -= __atomic_load_n(&cpu_state->count[1].rseq_end, __ATOMIC_RELAXED);
+ }
}
/*
for (i = 0; i < gp_state->nr_cpus; i++) {
struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
- if (active_readers[0])
+ if (active_readers[0]) {
sum[0] += __atomic_load_n(&cpu_state->count[0].begin, __ATOMIC_RELAXED);
- if (active_readers[1])
+ sum[0] += __atomic_load_n(&cpu_state->count[0].rseq_begin, __ATOMIC_RELAXED);
+ }
+ if (active_readers[1]) {
sum[1] += __atomic_load_n(&cpu_state->count[1].begin, __ATOMIC_RELAXED);
+ sum[1] += __atomic_load_n(&cpu_state->count[1].rseq_begin, __ATOMIC_RELAXED);
+ }
}
if (active_readers[0])
active_readers[0] = sum[0];
void side_rcu_gp_exit(struct side_rcu_gp_state *rcu_gp)
{
+ rseq_prepare_unload();
pthread_mutex_destroy(&rcu_gp->gp_lock);
free(rcu_gp->percpu_state);
}
#include <pthread.h>
#include <stdbool.h>
#include <poll.h>
+#include <side/trace.h>
+#include <rseq/rseq.h>
#define SIDE_CACHE_LINE_SIZE 256
struct side_rcu_percpu_count {
uintptr_t begin;
+ uintptr_t rseq_begin;
uintptr_t end;
+ uintptr_t rseq_end;
} __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
struct side_rcu_cpu_gp_state {
pthread_mutex_t gp_lock;
};
-//TODO: replace atomics by rseq (when available)
//TODO: replace acquire/release by membarrier+compiler barrier (when available)
//TODO: implement wait/wakeup for grace period using sys_futex
static inline
unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
{
- int cpu = sched_getcpu();
unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
+ struct side_rcu_cpu_gp_state *cpu_gp_state;
+ int cpu;
- if (cpu < 0)
+ if (side_likely(rseq_offset > 0)) {
+ cpu = rseq_cpu_start();
+ cpu_gp_state = &gp_state->percpu_state[cpu];
+ if (!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_begin, 1, cpu))
+ goto fence;
+ }
+ cpu = sched_getcpu();
+ if (side_unlikely(cpu < 0))
cpu = 0;
+ cpu_gp_state = &gp_state->percpu_state[cpu];
+ (void) __atomic_add_fetch(&cpu_gp_state->count[period].begin, 1, __ATOMIC_RELAXED);
+fence:
/*
* This memory barrier (A) ensures that the contents of the
* read-side critical section does not leak before the "begin"
* barrier (C). It is redundant with memory barrier (B) for that
* purpose.
*/
- (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_SEQ_CST);
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
return period;
}
static inline
void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
{
- int cpu = sched_getcpu();
+ struct side_rcu_cpu_gp_state *cpu_gp_state;
+ int cpu;
- if (cpu < 0)
- cpu = 0;
/*
* This memory barrier (B) ensures that the contents of the
* read-side critical section does not leak after the "end"
* barrier (C). It is redundant with memory barrier (A) for that
* purpose.
*/
- (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_SEQ_CST);
+ __atomic_thread_fence(__ATOMIC_SEQ_CST);
+
+ if (side_likely(rseq_offset > 0)) {
+ cpu = rseq_cpu_start();
+ cpu_gp_state = &gp_state->percpu_state[cpu];
+ if (!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_end, 1, cpu))
+ return;
+ }
+ cpu = sched_getcpu();
+ if (side_unlikely(cpu < 0))
+ cpu = 0;
+ cpu_gp_state = &gp_state->percpu_state[cpu];
+ (void) __atomic_add_fetch(&cpu_gp_state->count[period].end, 1, __ATOMIC_RELAXED);
}
#define side_rcu_dereference(p) \