Implement rseq-based RCU

author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Sun, 30 Oct 2022 19:42:57 +0000 (15:42 -0400)

committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Sun, 30 Oct 2022 19:42:57 +0000 (15:42 -0400)
author Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Sun, 30 Oct 2022 19:42:57 +0000 (15:42 -0400)
committer Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Sun, 30 Oct 2022 19:42:57 +0000 (15:42 -0400)
diff --git a/README.md b/README.md

index f1a3515bea5f00ba8a14dedf179db4014461bd5e..82d9e1717932b30b1120b255f37318d264c376bc 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,2 +1,5 @@
  # side
  Static Instrumentation Dynamically Enabled
+
+# dependencies
+librseq: https://github.com/compudj/librseq
diff --git a/src/Makefile b/src/Makefile

index a4b38aad002d8298e2430886c987036d7da23371..b1990757be993b14fc652497da88f0acb1e479c2 100644 (file)
--- a/src/Makefile
+++ b/src/Makefile
@@ -21,7 +21,7 @@ test.o: test.c $(HEADERS)
         gcc $(CFLAGS) $(CPPFLAGS) -c -o $@ $<
  
  test: tracer.o test.o side.o rcu.o smp.o
-       gcc $(CFLAGS) -o $@ $^
+       gcc $(CFLAGS) -o $@ $^ -lrseq
  
  .PHONY: clean
  
diff --git a/src/rcu.c b/src/rcu.c

index bb3bfed60b37efb1d8458732c836a23ba985dad9..45136ad5707f0ece0a411ae4f359066190a24d30 100644 (file)
--- a/src/rcu.c
+++ b/src/rcu.c
@@ -24,10 +24,14 @@ void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_reade
         for (i = 0; i < gp_state->nr_cpus; i++) {
                 struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
  
-               if (active_readers[0])
+               if (active_readers[0]) {
                         sum[0] -= __atomic_load_n(&cpu_state->count[0].end, __ATOMIC_RELAXED);
-               if (active_readers[1])
+                       sum[0] -= __atomic_load_n(&cpu_state->count[0].rseq_end, __ATOMIC_RELAXED);
+               }
+               if (active_readers[1]) {
                         sum[1] -= __atomic_load_n(&cpu_state->count[1].end, __ATOMIC_RELAXED);
+                       sum[1] -= __atomic_load_n(&cpu_state->count[1].rseq_end, __ATOMIC_RELAXED);
+               }
         }
  
         /*
@@ -45,10 +49,14 @@ void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_reade
         for (i = 0; i < gp_state->nr_cpus; i++) {
                 struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
  
-               if (active_readers[0])
+               if (active_readers[0]) {
                         sum[0] += __atomic_load_n(&cpu_state->count[0].begin, __ATOMIC_RELAXED);
-               if (active_readers[1])
+                       sum[0] += __atomic_load_n(&cpu_state->count[0].rseq_begin, __ATOMIC_RELAXED);
+               }
+               if (active_readers[1]) {
                         sum[1] += __atomic_load_n(&cpu_state->count[1].begin, __ATOMIC_RELAXED);
+                       sum[1] += __atomic_load_n(&cpu_state->count[1].rseq_begin, __ATOMIC_RELAXED);
+               }
         }
         if (active_readers[0])
                 active_readers[0] = sum[0];
@@ -167,6 +175,7 @@ void side_rcu_gp_init(struct side_rcu_gp_state *rcu_gp)
  
  void side_rcu_gp_exit(struct side_rcu_gp_state *rcu_gp)
  {
+       rseq_prepare_unload();
         pthread_mutex_destroy(&rcu_gp->gp_lock);
         free(rcu_gp->percpu_state);
  }
diff --git a/src/rcu.h b/src/rcu.h

index f25aa876a1edcee052117a2f4905e10bc8e28a54..25655ca850bd92a2c67c7e5106c5276451c453da 100644 (file)
--- a/src/rcu.h
+++ b/src/rcu.h
@@ -11,12 +11,16 @@
  #include <pthread.h>
  #include <stdbool.h>
  #include <poll.h>
+#include <side/trace.h>
+#include <rseq/rseq.h>
  
  #define SIDE_CACHE_LINE_SIZE           256
  
  struct side_rcu_percpu_count {
         uintptr_t begin;
+       uintptr_t rseq_begin;
         uintptr_t end;
+       uintptr_t rseq_end;
  }  __attribute__((__aligned__(SIDE_CACHE_LINE_SIZE)));
  
  struct side_rcu_cpu_gp_state {
@@ -30,17 +34,27 @@ struct side_rcu_gp_state {
         pthread_mutex_t gp_lock;
  };
  
-//TODO: replace atomics by rseq (when available)
  //TODO: replace acquire/release by membarrier+compiler barrier (when available)
  //TODO: implement wait/wakeup for grace period using sys_futex
  static inline
  unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
  {
-       int cpu = sched_getcpu();
         unsigned int period = __atomic_load_n(&gp_state->period, __ATOMIC_RELAXED);
+       struct side_rcu_cpu_gp_state *cpu_gp_state;
+       int cpu;
  
-       if (cpu < 0)
+       if (side_likely(rseq_offset > 0)) {
+               cpu = rseq_cpu_start();
+               cpu_gp_state = &gp_state->percpu_state[cpu];
+               if (!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_begin, 1, cpu))
+                       goto fence;
+       }
+       cpu = sched_getcpu();
+       if (side_unlikely(cpu < 0))
                 cpu = 0;
+       cpu_gp_state = &gp_state->percpu_state[cpu];
+       (void) __atomic_add_fetch(&cpu_gp_state->count[period].begin, 1, __ATOMIC_RELAXED);
+fence:
         /*
          * This memory barrier (A) ensures that the contents of the
          * read-side critical section does not leak before the "begin"
@@ -51,17 +65,16 @@ unsigned int side_rcu_read_begin(struct side_rcu_gp_state *gp_state)
          * barrier (C). It is redundant with memory barrier (B) for that
          * purpose.
          */
-       (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].begin, 1, __ATOMIC_SEQ_CST);
+       __atomic_thread_fence(__ATOMIC_SEQ_CST);
         return period;
  }
  
  static inline
  void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
  {
-       int cpu = sched_getcpu();
+       struct side_rcu_cpu_gp_state *cpu_gp_state;
+       int cpu;
  
-       if (cpu < 0)
-               cpu = 0;
         /*
          * This memory barrier (B) ensures that the contents of the
          * read-side critical section does not leak after the "end"
@@ -72,7 +85,19 @@ void side_rcu_read_end(struct side_rcu_gp_state *gp_state, unsigned int period)
          * barrier (C). It is redundant with memory barrier (A) for that
          * purpose.
          */
-       (void) __atomic_add_fetch(&gp_state->percpu_state[cpu].count[period].end, 1, __ATOMIC_SEQ_CST);
+       __atomic_thread_fence(__ATOMIC_SEQ_CST);
+
+       if (side_likely(rseq_offset > 0)) {
+               cpu = rseq_cpu_start();
+               cpu_gp_state = &gp_state->percpu_state[cpu];
+               if (!rseq_addv((intptr_t *)&cpu_gp_state->count[period].rseq_end, 1, cpu))
+                       return;
+       }
+       cpu = sched_getcpu();
+       if (side_unlikely(cpu < 0))
+               cpu = 0;
+       cpu_gp_state = &gp_state->percpu_state[cpu];
+       (void) __atomic_add_fetch(&cpu_gp_state->count[period].end, 1, __ATOMIC_RELAXED);
  }
  
  #define side_rcu_dereference(p) \
author	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Sun, 30 Oct 2022 19:42:57 +0000 (15:42 -0400)
committer	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
	Sun, 30 Oct 2022 19:42:57 +0000 (15:42 -0400)
README.md		patch \| blob \| blame \| history
src/Makefile		patch \| blob \| blame \| history
src/rcu.c		patch \| blob \| blame \| history
src/rcu.h		patch \| blob \| blame \| history