X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=src%2Frcu.h;h=4db3500566abc002042d9f63a6b617d4c24d76de;hb=873bbf16c6bcfe2c11fca7e76dd7284c5afbee99;hp=2831294b546bb4e8edaae0e9ea850146c54a57ff;hpb=1107e7d6d969e82d163674b41763eb0e7e360096;p=libside.git

diff --git a/src/rcu.h b/src/rcu.h
index 2831294..4db3500 100644
--- a/src/rcu.h
+++ b/src/rcu.h
@@ -3,137 +3,165 @@
  * Copyright 2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
  */
 
+#ifndef _SIDE_RCU_H
+#define _SIDE_RCU_H
+
 #include <sched.h>
 #include <stdint.h>
 #include <pthread.h>
+#include <stdbool.h>
 #include <poll.h>
+#include <rseq/rseq.h>
+#include <linux/futex.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <side/macros.h>
 
 #define SIDE_CACHE_LINE_SIZE		256
-#define SIDE_RCU_PERCPU_ARRAY_SIZE	2
 
 struct side_rcu_percpu_count {
 	uintptr_t begin;
+	uintptr_t rseq_begin;
 	uintptr_t end;
-}  __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
+	uintptr_t rseq_end;
+};
 
 struct side_rcu_cpu_gp_state {
-	struct side_rcu_percpu_count count[SIDE_RCU_PERCPU_ARRAY_SIZE];
-};
+	struct side_rcu_percpu_count count[2];
+} __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
 
 struct side_rcu_gp_state {
 	struct side_rcu_cpu_gp_state *percpu_state;
 	int nr_cpus;
+	int32_t futex;
 	unsigned int period;
 	pthread_mutex_t gp_lock;
 };
 
-//TODO: replace atomics by rseq (when available)
-//TODO: replace acquire/release by membarrier+compiler barrier (when available)
-//TODO: implement wait/wakeup for grace period using sys_futex
+struct side_rcu_read_state {
+	struct side_rcu_percpu_count *percpu_count;
+	int cpu;
+};
+
+extern unsigned int side_rcu_rseq_membarrier_available __attribute__((visibility("hidden")));
+
 static inline
-unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
+int futex(int32_t *uaddr, int op, int32_t val,
+	const struct timespec *timeout, int32_t *uaddr2, int32_t val3)
 {
-	int cpu = sched_getcpu();
-	unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
-
-	if (cpu < 0)
-		cpu = 0;
-	/*
-	 * This acquire MO pairs with the release fence at the end of
-	 * side_rcu_wait_grace_period().
-	 */
-	(void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_ACQUIRE);
-	return period;
+	return syscall(__NR_futex, uaddr, op, val, timeout, uaddr2, val3);
 }
 
+/*
+ * Wake-up side_rcu_wait_grace_period. Called concurrently from many
+ * threads.
+ */
 static inline
-void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
+void side_rcu_wake_up_gp(struct side_rcu_gp_state *gp_state)
 {
-	int cpu = sched_getcpu();
+	if (side_unlikely(__atomic_load_n(&gp_state->futex, __ATOMIC_RELAXED) == -1)) {
+		__atomic_store_n(&gp_state->futex, 0, __ATOMIC_RELAXED);
+		/* TODO: handle futex return values. */
+		(void) futex(&gp_state->futex, FUTEX_WAKE, 1, NULL, NULL, 0);
+	}
+}
 
-	if (cpu < 0)
+static inline
+void side_rcu_read_begin(struct side_rcu_gp_state *gp_state, struct side_rcu_read_state *read_state)
+{
+	struct side_rcu_percpu_count *begin_cpu_count;
+	struct side_rcu_cpu_gp_state *cpu_gp_state;
+	unsigned int period;
+	int cpu;
+
+	cpu = rseq_cpu_start();
+	period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
+	cpu_gp_state = &gp_state->percpu_state[cpu];
+	read_state->percpu_count = begin_cpu_count = &cpu_gp_state->count[period];
+	read_state->cpu = cpu;
+	if (side_likely(side_rcu_rseq_membarrier_available &&
+			!rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU_CPU_ID,
+				   (intptr_t *)&begin_cpu_count->rseq_begin, 1, cpu))) {
+		/*
+		 * This compiler barrier (A) is paired with membarrier() at (C),
+		 * (D), (E). It effectively upgrades this compiler barrier to a
+		 * SEQ_CST fence with respect to the paired barriers.
+		 *
+		 * This barrier (A) ensures that the contents of the read-side
+		 * critical section does not leak before the "begin" counter
+		 * increment. It pairs with memory barriers (D) and (E).
+		 *
+		 * This barrier (A) also ensures that the "begin" increment is
+		 * before the "end" increment. It pairs with memory barrier (C).
+		 * It is redundant with barrier (B) for that purpose.
+		 */
+		rseq_barrier();
+		return;
+	}
+	/* Fallback to atomic increment and SEQ_CST. */
+	cpu = sched_getcpu();
+	if (side_unlikely(cpu < 0))
 		cpu = 0;
-	/*
-	 * This release MO pairs with the acquire fence at the beginning
-	 * of side_rcu_wait_grace_period().
-	 */
-	(void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_RELEASE);
+	read_state->cpu = cpu;
+	cpu_gp_state = &gp_state->percpu_state[cpu];
+	read_state->percpu_count = begin_cpu_count = &cpu_gp_state->count[period];
+	(void) __atomic_add_fetch(&begin_cpu_count->begin, 1, __ATOMIC_SEQ_CST);
 }
 
-#define side_rcu_dereference(p) \
-	__extension__ \
-	({ \
-		(__typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \
-		(_____side_v); \
-	})
-
-#define side_rcu_assign_pointer(p, v)	__atomic_store_n(&(p), v, __ATOMIC_RELEASE); \
-
 static inline
-void wait_for_cpus(struct side_rcu_gp_state *gp_state)
+void side_rcu_read_end(struct side_rcu_gp_state *gp_state, struct side_rcu_read_state *read_state)
 {
-	unsigned int prev_period = gp_state->period ^ 1;
+	struct side_rcu_percpu_count *begin_cpu_count = read_state->percpu_count;
+	int cpu = read_state->cpu;
 
 	/*
-	 * Wait for the sum of CPU begin/end counts to match for the
-	 * previous period.
+	 * This compiler barrier (B) is paired with membarrier() at (C),
+	 * (D), (E). It effectively upgrades this compiler barrier to a
+	 * SEQ_CST fence with respect to the paired barriers.
+	 *
+	 * This barrier (B) ensures that the contents of the read-side
+	 * critical section does not leak after the "end" counter
+	 * increment. It pairs with memory barriers (D) and (E).
+	 *
+	 * This barrier (B) also ensures that the "begin" increment is
+	 * before the "end" increment. It pairs with memory barrier (C).
+	 * It is redundant with barrier (A) for that purpose.
 	 */
-	for (;;) {
-		uintptr_t sum = 0;	/* begin - end */
-		int i;
-
-		for (i = 0; i < gp_state->nr_cpus; i++) {
-			struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
-
-			sum -= __atomic_load_n(&cpu_state->count[prev_period].end, __ATOMIC_RELAXED);
-		}
-
+	rseq_barrier();
+	if (side_likely(side_rcu_rseq_membarrier_available &&
+			!rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU_CPU_ID,
+				   (intptr_t *)&begin_cpu_count->rseq_end, 1, cpu))) {
 		/*
-		 * Read end counts before begin counts. Reading end
-		 * before begin count ensures we never see an end
-		 * without having seen its associated begin, in case of
-		 * a thread migration during the traversal over each
-		 * cpu.
+		 * This barrier (F) is paired with membarrier()
+		 * at (G). It orders increment of the begin/end
+		 * counters before load/store to the futex.
 		 */
-		__atomic_thread_fence(__ATOMIC_SEQ_CST);
-
-		for (i = 0; i < gp_state->nr_cpus; i++) {
-			struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
-
-			sum += __atomic_load_n(&cpu_state->count[prev_period].begin, __ATOMIC_RELAXED);
-		}
-		if (!sum) {
-			break;
-		} else {
-			/* Retry after 10ms. */
-			poll(NULL, 0, 10);
-		}
+		rseq_barrier();
+		goto end;
 	}
-}
-
-static inline
-void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
-{
+	/* Fallback to atomic increment and SEQ_CST. */
+	(void) __atomic_add_fetch(&begin_cpu_count->end, 1, __ATOMIC_SEQ_CST);
 	/*
-	 * This fence pairs with the acquire MO __atomic_add_fetch in
-	 * side_rcu_read_begin().
+	 * This barrier (F) implied by SEQ_CST is paired with SEQ_CST
+	 * barrier or membarrier() at (G). It orders increment of the
+	 * begin/end counters before load/store to the futex.
 	 */
-	__atomic_thread_fence(__ATOMIC_SEQ_CST);
-
-	pthread_mutex_lock(&gp_state->gp_lock);
-
-	wait_for_cpus(gp_state);
+end:
+	side_rcu_wake_up_gp(gp_state);
+}
 
-	/* Flip period: 0 -> 1, 1 -> 0. */
-	(void) __atomic_xor_fetch(&gp_state->period, 1, __ATOMIC_SEQ_CST);
+#define side_rcu_dereference(p) \
+	__extension__ \
+	({ \
+		__typeof__(p) _____side_v = __atomic_load_n(&(p), __ATOMIC_CONSUME); \
+		(_____side_v); \
+	})
 
-	wait_for_cpus(gp_state);
+#define side_rcu_assign_pointer(p, v)	__atomic_store_n(&(p), v, __ATOMIC_RELEASE);
 
-	pthread_mutex_unlock(&gp_state->gp_lock);
+void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state) __attribute__((visibility("hidden")));
+void side_rcu_gp_init(struct side_rcu_gp_state *rcu_gp) __attribute__((visibility("hidden")));
+void side_rcu_gp_exit(struct side_rcu_gp_state *rcu_gp) __attribute__((visibility("hidden")));
 
-	/*
-	 * This fence pairs with the release MO __atomic_add_fetch in
-	 * side_rcu_read_end().
-	 */
-	__atomic_thread_fence(__ATOMIC_SEQ_CST);
-}
+#endif /* _SIDE_RCU_H */