Use RCU for statedump
[libside.git] / src / rcu.c
index bb3bfed60b37efb1d8458732c836a23ba985dad9..9652725fdc1f037e9bf2eada107fdf954c489b90 100644 (file)
--- a/src/rcu.c
+++ b/src/rcu.c
 #include <stdbool.h>
 #include <poll.h>
 #include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/syscall.h>
+#include <linux/membarrier.h>
 
 #include "rcu.h"
 #include "smp.h"
 
+/*
+ * If both rseq (with glibc support) and membarrier system calls are
+ * available, use them to replace barriers and atomics on the fast-path.
+ */
+unsigned int side_rcu_rseq_membarrier_available;
+
+static int
+membarrier(int cmd, unsigned int flags, int cpu_id)
+{
+       return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+/*
+ * Wait/wakeup scheme with single waiter/many wakers.
+ */
+static
+void wait_gp_prepare(struct side_rcu_gp_state *gp_state)
+{
+       __atomic_store_n(&gp_state->futex, -1, __ATOMIC_RELAXED);
+       /*
+        * This memory barrier (H) pairs with memory barrier (F). It
+        * orders store to futex before load of RCU reader's counter
+        * state, thus ensuring that load of RCU reader's counters does
+        * not leak outside of futex state=-1.
+        */
+       if (side_rcu_rseq_membarrier_available) {
+               if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0)) {
+                       perror("membarrier");
+                       abort();
+               }
+       } else {
+               __atomic_thread_fence(__ATOMIC_SEQ_CST);
+       }
+}
+
+static
+void wait_gp_end(struct side_rcu_gp_state *gp_state)
+{
+       /*
+        * This memory barrier (G) pairs with memory barrier (F). It
+        * orders load of RCU reader's counter state before storing the
+        * futex value, thus ensuring that load of RCU reader's counters
+        * does not leak outside of futex state=-1.
+        */
+       if (side_rcu_rseq_membarrier_available) {
+               if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0)) {
+                       perror("membarrier");
+                       abort();
+               }
+       } else {
+               __atomic_thread_fence(__ATOMIC_SEQ_CST);
+       }
+       __atomic_store_n(&gp_state->futex, 0, __ATOMIC_RELAXED);
+}
+
+static
+void wait_gp(struct side_rcu_gp_state *gp_state)
+{
+       /*
+        * This memory barrier (G) pairs with memory barrier (F). It
+        * orders load of RCU reader's counter state before loading the
+        * futex value.
+        */
+       if (side_rcu_rseq_membarrier_available) {
+               if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0)) {
+                       perror("membarrier");
+                       abort();
+               }
+       } else {
+               __atomic_thread_fence(__ATOMIC_SEQ_CST);
+       }
+       while (__atomic_load_n(&gp_state->futex, __ATOMIC_RELAXED) == -1) {
+               if (!futex(&gp_state->futex, FUTEX_WAIT, -1, NULL, NULL, 0)) {
+                       /*
+                        * May be awakened by either spurious wake up or
+                        * because the state is now as expected.
+                        */
+                       continue;
+               }
+               switch (errno) {
+               case EWOULDBLOCK:
+                       /* Value already changed. */
+                       return;
+               case EINTR:
+                       /* Retry if interrupted by signal. */
+                       break;  /* Get out of switch. */
+               default:
+                       /* Unexpected error. */
+                       abort();
+               }
+       }
+       return;
+}
+
 /* active_readers is an input/output parameter. */
 static
 void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_readers)
@@ -24,10 +122,14 @@ void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_reade
        for (i = 0; i < gp_state->nr_cpus; i++) {
                struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
 
-               if (active_readers[0])
+               if (active_readers[0]) {
                        sum[0] -= __atomic_load_n(&cpu_state->count[0].end, __ATOMIC_RELAXED);
-               if (active_readers[1])
+                       sum[0] -= __atomic_load_n(&cpu_state->count[0].rseq_end, __ATOMIC_RELAXED);
+               }
+               if (active_readers[1]) {
                        sum[1] -= __atomic_load_n(&cpu_state->count[1].end, __ATOMIC_RELAXED);
+                       sum[1] -= __atomic_load_n(&cpu_state->count[1].rseq_end, __ATOMIC_RELAXED);
+               }
        }
 
        /*
@@ -40,15 +142,26 @@ void check_active_readers(struct side_rcu_gp_state *gp_state, bool *active_reade
         * incremented before "end", as guaranteed by memory barriers
         * (A) or (B).
         */
-       __atomic_thread_fence(__ATOMIC_SEQ_CST);
+       if (side_rcu_rseq_membarrier_available) {
+               if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0)) {
+                       perror("membarrier");
+                       abort();
+               }
+       } else {
+               __atomic_thread_fence(__ATOMIC_SEQ_CST);
+       }
 
        for (i = 0; i < gp_state->nr_cpus; i++) {
                struct side_rcu_cpu_gp_state *cpu_state = &gp_state->percpu_state[i];
 
-               if (active_readers[0])
+               if (active_readers[0]) {
                        sum[0] += __atomic_load_n(&cpu_state->count[0].begin, __ATOMIC_RELAXED);
-               if (active_readers[1])
+                       sum[0] += __atomic_load_n(&cpu_state->count[0].rseq_begin, __ATOMIC_RELAXED);
+               }
+               if (active_readers[1]) {
                        sum[1] += __atomic_load_n(&cpu_state->count[1].begin, __ATOMIC_RELAXED);
+                       sum[1] += __atomic_load_n(&cpu_state->count[1].rseq_begin, __ATOMIC_RELAXED);
+               }
        }
        if (active_readers[0])
                active_readers[0] = sum[0];
@@ -78,11 +191,13 @@ void wait_for_prev_period_readers(struct side_rcu_gp_state *gp_state, bool *acti
         * previous period.
         */
        for (;;) {
+               wait_gp_prepare(gp_state);
                check_active_readers(gp_state, active_readers);
-               if (!active_readers[prev_period])
+               if (!active_readers[prev_period]) {
+                       wait_gp_end(gp_state);
                        break;
-               /* Retry after 10ms. */
-               poll(NULL, 0, 10);
+               }
+               wait_gp(gp_state);
        }
 }
 
@@ -109,7 +224,14 @@ void side_rcu_wait_grace_period(struct side_rcu_gp_state *gp_state)
         * exist after the grace period completes are ordered after
         * loads and stores performed before the grace period.
         */
-       __atomic_thread_fence(__ATOMIC_SEQ_CST);
+       if (side_rcu_rseq_membarrier_available) {
+               if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0)) {
+                       perror("membarrier");
+                       abort();
+               }
+       } else {
+               __atomic_thread_fence(__ATOMIC_SEQ_CST);
+       }
 
        /*
         * First scan through all cpus, for both period. If no readers
@@ -150,23 +272,40 @@ end:
         * are ordered before loads and stores performed after the grace
         * period.
         */
-       __atomic_thread_fence(__ATOMIC_SEQ_CST);
+       if (side_rcu_rseq_membarrier_available) {
+               if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0, 0)) {
+                       perror("membarrier");
+                       abort();
+               }
+       } else {
+               __atomic_thread_fence(__ATOMIC_SEQ_CST);
+       }
 }
 
 void side_rcu_gp_init(struct side_rcu_gp_state *rcu_gp)
 {
+       bool has_membarrier = false, has_rseq = false;
+
        memset(rcu_gp, 0, sizeof(*rcu_gp));
        rcu_gp->nr_cpus = get_possible_cpus_array_len();
        if (!rcu_gp->nr_cpus)
                abort();
        pthread_mutex_init(&rcu_gp->gp_lock, NULL);
-       rcu_gp->percpu_state = calloc(rcu_gp->nr_cpus, sizeof(struct side_rcu_cpu_gp_state));
+       rcu_gp->percpu_state = (struct side_rcu_cpu_gp_state *)
+               calloc(rcu_gp->nr_cpus, sizeof(struct side_rcu_cpu_gp_state));
        if (!rcu_gp->percpu_state)
                abort();
+       if (!membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0, 0))
+               has_membarrier = true;
+       if (rseq_available(RSEQ_AVAILABLE_QUERY_LIBC))
+               has_rseq = true;
+       if (has_membarrier && has_rseq)
+               side_rcu_rseq_membarrier_available = 1;
 }
 
 void side_rcu_gp_exit(struct side_rcu_gp_state *rcu_gp)
 {
+       rseq_prepare_unload();
        pthread_mutex_destroy(&rcu_gp->gp_lock);
        free(rcu_gp->percpu_state);
 }
This page took 0.056998 seconds and 4 git commands to generate.