mempool: Introduce optional stride parameter
[librseq.git] / tests / param_test.c
index 4a45b5ca9758ec9b4f736e0069b79ab5215a4da4..bb8b15a3957bb984902ad34051a1d79b90d40854 100644 (file)
@@ -1,4 +1,5 @@
-// SPDX-License-Identifier: LGPL-2.1-only
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
 #ifndef _GNU_SOURCE
 #define _GNU_SOURCE
 #endif
@@ -18,6 +19,8 @@
 #include <signal.h>
 #include <errno.h>
 #include <stddef.h>
+#include <stdbool.h>
+#include <rseq/mempool.h>
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
 enum {
@@ -44,24 +47,20 @@ static int opt_modulo, verbose;
 
 static int opt_yield, opt_signal, opt_sleep,
                opt_disable_rseq, opt_threads = 200,
-               opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
+               opt_disable_mod = 0, opt_test = 's';
 
-#ifndef RSEQ_SKIP_FASTPATH
 static long long opt_reps = 5000;
-#else
-static long long opt_reps = 100;
-#endif
 
 static __thread __attribute__((tls_model("initial-exec")))
 unsigned int signals_delivered;
 
-#ifndef BENCHMARK
-
 static inline pid_t rseq_gettid(void)
 {
        return syscall(__NR_gettid);
 }
 
+#ifndef BENCHMARK
+
 static __thread __attribute__((tls_model("initial-exec"), unused))
 int yield_mod_cnt, nr_abort;
 
@@ -78,8 +77,13 @@ int yield_mod_cnt, nr_abort;
 #define RSEQ_INJECT_CLOBBER \
        , INJECT_ASM_REG
 
-#define RSEQ_INJECT_ASM(n) \
-       "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
+/*
+ * Use ip-relative addressing to get the loop counter.
+ */
+#define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
+       "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
+       "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
+       "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
        "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
        "jz 333f\n\t" \
        "222:\n\t" \
@@ -87,6 +91,9 @@ int yield_mod_cnt, nr_abort;
        "jnz 222b\n\t" \
        "333:\n\t"
 
+#define RSEQ_INJECT_ASM(n) \
+       __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
+
 #elif defined(__x86_64__)
 
 #define INJECT_ASM_REG_P       "rax"
@@ -221,6 +228,29 @@ int yield_mod_cnt, nr_abort;
        "bnez " INJECT_ASM_REG ", 222b\n\t" \
        "333:\n\t"
 
+#elif defined(__riscv)
+
+#define RSEQ_INJECT_INPUT \
+       , [loop_cnt_1]"m"(loop_cnt[1]) \
+       , [loop_cnt_2]"m"(loop_cnt[2]) \
+       , [loop_cnt_3]"m"(loop_cnt[3]) \
+       , [loop_cnt_4]"m"(loop_cnt[4]) \
+       , [loop_cnt_5]"m"(loop_cnt[5]) \
+       , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG  "t1"
+
+#define RSEQ_INJECT_CLOBBER \
+       , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n)                                      \
+       "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"         \
+       "beqz " INJECT_ASM_REG ", 333f\n\t"                     \
+       "222:\n\t"                                              \
+       "addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"   \
+       "bnez " INJECT_ASM_REG ", 222b\n\t"                     \
+       "333:\n\t"
+
 #else
 #error unsupported target
 #endif
@@ -258,35 +288,94 @@ int yield_mod_cnt, nr_abort;
 
 #include <rseq/rseq.h>
 
-struct percpu_lock_entry {
-       intptr_t v;
-} __attribute__((aligned(128)));
+static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+       return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+#ifdef rseq_arch_has_load_cbne_load_add_load_add_store
+#define TEST_MEMBARRIER
+#endif
+
+#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
+# define RSEQ_PERCPU   RSEQ_PERCPU_MM_CID
+static
+int get_current_cpu_id(void)
+{
+       return rseq_current_mm_cid();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+       return rseq_mm_cid_available();
+}
+static
+bool rseq_use_cpu_index(void)
+{
+       return false;   /* Use mm_cid */
+}
+# ifdef TEST_MEMBARRIER
+/*
+ * Membarrier does not currently support targeting a mm_cid, so
+ * issue the barrier on all cpus.
+ */
+static
+int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
+{
+       return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+                             0, 0);
+}
+# endif /* TEST_MEMBARRIER */
+#else
+# define RSEQ_PERCPU   RSEQ_PERCPU_CPU_ID
+static
+int get_current_cpu_id(void)
+{
+       return rseq_cpu_start();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+       return rseq_current_cpu_raw() >= 0;
+}
+static
+bool rseq_use_cpu_index(void)
+{
+       return true;    /* Use cpu_id as index. */
+}
+# ifdef TEST_MEMBARRIER
+static
+int rseq_membarrier_expedited(int cpu)
+{
+       return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+                             MEMBARRIER_CMD_FLAG_CPU, cpu);
+}
+# endif /* TEST_MEMBARRIER */
+#endif
 
 struct percpu_lock {
-       struct percpu_lock_entry c[CPU_SETSIZE];
+       intptr_t v;
 };
 
-struct test_data_entry {
-       intptr_t count;
-} __attribute__((aligned(128)));
-
 struct spinlock_test_data {
        struct percpu_lock lock;
-       struct test_data_entry c[CPU_SETSIZE];
+       intptr_t count;
 };
 
 struct spinlock_thread_test_data {
-       struct spinlock_test_data *data;
+       struct spinlock_test_data __rseq_percpu *data;
        long long reps;
        int reg;
 };
 
 struct inc_test_data {
-       struct test_data_entry c[CPU_SETSIZE];
+       intptr_t count;
 };
 
 struct inc_thread_test_data {
-       struct inc_test_data *data;
+       struct inc_test_data __rseq_percpu *data;
        long long reps;
        int reg;
 };
@@ -296,12 +385,8 @@ struct percpu_list_node {
        struct percpu_list_node *next;
 };
 
-struct percpu_list_entry {
-       struct percpu_list_node *head;
-} __attribute__((aligned(128)));
-
 struct percpu_list {
-       struct percpu_list_entry c[CPU_SETSIZE];
+       struct percpu_list_node *head;
 };
 
 #define BUFFER_ITEM_PER_CPU    100
@@ -310,14 +395,10 @@ struct percpu_buffer_node {
        intptr_t data;
 };
 
-struct percpu_buffer_entry {
+struct percpu_buffer {
        intptr_t offset;
        intptr_t buflen;
        struct percpu_buffer_node **array;
-} __attribute__((aligned(128)));
-
-struct percpu_buffer {
-       struct percpu_buffer_entry c[CPU_SETSIZE];
 };
 
 #define MEMCPY_BUFFER_ITEM_PER_CPU     100
@@ -327,26 +408,28 @@ struct percpu_memcpy_buffer_node {
        uint64_t data2;
 };
 
-struct percpu_memcpy_buffer_entry {
+struct percpu_memcpy_buffer {
        intptr_t offset;
        intptr_t buflen;
        struct percpu_memcpy_buffer_node *array;
-} __attribute__((aligned(128)));
-
-struct percpu_memcpy_buffer {
-       struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
 };
 
 /* A simple percpu spinlock. Grabs lock on current cpu. */
-static int rseq_this_cpu_lock(struct percpu_lock *lock)
+static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
 {
        int cpu;
 
        for (;;) {
                int ret;
 
-               cpu = rseq_cpu_start();
-               ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+               cpu = get_current_cpu_id();
+               if (cpu < 0) {
+                       fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
+                               getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
+                       abort();
+               }
+               ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                                        &rseq_percpu_ptr(lock, cpu)->v,
                                         0, 1, cpu);
                if (rseq_likely(!ret))
                        break;
@@ -360,20 +443,20 @@ static int rseq_this_cpu_lock(struct percpu_lock *lock)
        return cpu;
 }
 
-static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
 {
-       assert(lock->c[cpu].v == 1);
+       assert(rseq_percpu_ptr(lock, cpu)->v == 1);
        /*
         * Release lock, with release semantic. Matches
         * rseq_smp_acquire__after_ctrl_dep().
         */
-       rseq_smp_store_release(&lock->c[cpu].v, 0);
+       rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
 }
 
 static void *test_percpu_spinlock_thread(void *arg)
 {
        struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
-       struct spinlock_test_data *data = thread_data->data;
+       struct spinlock_test_data __rseq_percpu *data = thread_data->data;
        long long i, reps;
 
        if (!opt_disable_rseq && thread_data->reg &&
@@ -382,7 +465,7 @@ static void *test_percpu_spinlock_thread(void *arg)
        reps = thread_data->reps;
        for (i = 0; i < reps; i++) {
                int cpu = rseq_this_cpu_lock(&data->lock);
-               data->c[cpu].count++;
+               rseq_percpu_ptr(data, cpu)->count++;
                rseq_percpu_unlock(&data->lock, cpu);
 #ifndef BENCHMARK
                if (i != 0 && !(i % (reps / 10)))
@@ -410,17 +493,30 @@ static void test_percpu_spinlock(void)
        int i, ret;
        uint64_t sum;
        pthread_t test_threads[num_threads];
-       struct spinlock_test_data data;
+       struct spinlock_test_data __rseq_percpu *data;
        struct spinlock_thread_test_data thread_data[num_threads];
+       struct rseq_mempool *mempool;
+
+       mempool = rseq_mempool_create("spinlock_test_data",
+                       sizeof(struct spinlock_test_data),
+                       0, CPU_SETSIZE, NULL);
+       if (!mempool) {
+               perror("rseq_mempool_create");
+               abort();
+       }
+       data = (struct spinlock_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+       if (!data) {
+               perror("rseq_percpu_zmalloc");
+               abort();
+       }
 
-       memset(&data, 0, sizeof(data));
        for (i = 0; i < num_threads; i++) {
                thread_data[i].reps = opt_reps;
                if (opt_disable_mod <= 0 || (i % opt_disable_mod))
                        thread_data[i].reg = 1;
                else
                        thread_data[i].reg = 0;
-               thread_data[i].data = &data;
+               thread_data[i].data = data;
                ret = pthread_create(&test_threads[i], NULL,
                                     test_percpu_spinlock_thread,
                                     &thread_data[i]);
@@ -442,15 +538,21 @@ static void test_percpu_spinlock(void)
 
        sum = 0;
        for (i = 0; i < CPU_SETSIZE; i++)
-               sum += data.c[i].count;
+               sum += rseq_percpu_ptr(data, i)->count;
 
        assert(sum == (uint64_t)opt_reps * num_threads);
+       rseq_percpu_free(data);
+       ret = rseq_mempool_destroy(mempool);
+       if (ret) {
+               perror("rseq_mempool_destroy");
+               abort();
+       }
 }
 
 static void *test_percpu_inc_thread(void *arg)
 {
        struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
-       struct inc_test_data *data = thread_data->data;
+       struct inc_test_data __rseq_percpu *data = thread_data->data;
        long long i, reps;
 
        if (!opt_disable_rseq && thread_data->reg &&
@@ -463,8 +565,9 @@ static void *test_percpu_inc_thread(void *arg)
                do {
                        int cpu;
 
-                       cpu = rseq_cpu_start();
-                       ret = rseq_addv(&data->c[cpu].count, 1, cpu);
+                       cpu = get_current_cpu_id();
+                       ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                                       &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
                } while (rseq_unlikely(ret));
 #ifndef BENCHMARK
                if (i != 0 && !(i % (reps / 10)))
@@ -486,17 +589,30 @@ static void test_percpu_inc(void)
        int i, ret;
        uint64_t sum;
        pthread_t test_threads[num_threads];
-       struct inc_test_data data;
+       struct inc_test_data __rseq_percpu *data;
        struct inc_thread_test_data thread_data[num_threads];
+       struct rseq_mempool *mempool;
+
+       mempool = rseq_mempool_create("inc_test_data",
+                       sizeof(struct inc_test_data),
+                       0, CPU_SETSIZE, NULL);
+       if (!mempool) {
+               perror("rseq_mempool_create");
+               abort();
+       }
+       data = (struct inc_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+       if (!data) {
+               perror("rseq_percpu_zmalloc");
+               abort();
+       }
 
-       memset(&data, 0, sizeof(data));
        for (i = 0; i < num_threads; i++) {
                thread_data[i].reps = opt_reps;
                if (opt_disable_mod <= 0 || (i % opt_disable_mod))
                        thread_data[i].reg = 1;
                else
                        thread_data[i].reg = 0;
-               thread_data[i].data = &data;
+               thread_data[i].data = data;
                ret = pthread_create(&test_threads[i], NULL,
                                     test_percpu_inc_thread,
                                     &thread_data[i]);
@@ -518,12 +634,18 @@ static void test_percpu_inc(void)
 
        sum = 0;
        for (i = 0; i < CPU_SETSIZE; i++)
-               sum += data.c[i].count;
+               sum += rseq_percpu_ptr(data, i)->count;
 
        assert(sum == (uint64_t)opt_reps * num_threads);
+       rseq_percpu_free(data);
+       ret = rseq_mempool_destroy(mempool);
+       if (ret) {
+               perror("rseq_mempool_destroy");
+               abort();
+       }
 }
 
-static void this_cpu_list_push(struct percpu_list *list,
+static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
                        struct percpu_list_node *node,
                        int *_cpu)
 {
@@ -531,15 +653,18 @@ static void this_cpu_list_push(struct percpu_list *list,
 
        for (;;) {
                intptr_t *targetptr, newval, expect;
+               struct percpu_list *cpulist;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
+               cpulist = rseq_percpu_ptr(list, cpu);
                /* Load list->c[cpu].head with single-copy atomicity. */
-               expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+               expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
                newval = (intptr_t)node;
-               targetptr = (intptr_t *)&list->c[cpu].head;
+               targetptr = (intptr_t *)&cpulist->head;
                node->next = (struct percpu_list_node *)expect;
-               ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+               ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                                        targetptr, expect, newval, cpu);
                if (rseq_likely(!ret))
                        break;
                /* Retry if comparison fails or rseq aborts. */
@@ -553,7 +678,7 @@ static void this_cpu_list_push(struct percpu_list *list,
  * rseq primitive allows us to implement pop without concerns over
  * ABA-type races.
  */
-static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
                                           int *_cpu)
 {
        struct percpu_list_node *node = NULL;
@@ -562,16 +687,19 @@ static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
        for (;;) {
                struct percpu_list_node *head;
                intptr_t *targetptr, expectnot, *load;
+               struct percpu_list *cpulist;
                long offset;
                int ret;
 
-               cpu = rseq_cpu_start();
-               targetptr = (intptr_t *)&list->c[cpu].head;
+               cpu = get_current_cpu_id();
+               cpulist = rseq_percpu_ptr(list, cpu);
+               targetptr = (intptr_t *)&cpulist->head;
                expectnot = (intptr_t)NULL;
                offset = offsetof(struct percpu_list_node, next);
                load = (intptr_t *)&head;
-               ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
-                                                  offset, load, cpu);
+               ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                                                targetptr, expectnot,
+                                                offset, load, cpu);
                if (rseq_likely(!ret)) {
                        node = head;
                        break;
@@ -589,21 +717,22 @@ static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
  * __percpu_list_pop is not safe against concurrent accesses. Should
  * only be used on lists that are not concurrently modified.
  */
-static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
 {
+       struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
        struct percpu_list_node *node;
 
-       node = list->c[cpu].head;
+       node = cpulist->head;
        if (!node)
                return NULL;
-       list->c[cpu].head = node->next;
+       cpulist->head = node->next;
        return node;
 }
 
 static void *test_percpu_list_thread(void *arg)
 {
        long long i, reps;
-       struct percpu_list *list = (struct percpu_list *)arg;
+       struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
 
        if (!opt_disable_rseq && rseq_register_current_thread())
                abort();
@@ -633,18 +762,30 @@ static void test_percpu_list(void)
        const int num_threads = opt_threads;
        int i, j, ret;
        uint64_t sum = 0, expected_sum = 0;
-       struct percpu_list list;
+       struct percpu_list __rseq_percpu *list;
        pthread_t test_threads[num_threads];
        cpu_set_t allowed_cpus;
+       struct rseq_mempool *mempool;
 
-       memset(&list, 0, sizeof(list));
+       mempool = rseq_mempool_create("percpu_list", sizeof(struct percpu_list),
+                       0, CPU_SETSIZE, NULL);
+       if (!mempool) {
+               perror("rseq_mempool_create");
+               abort();
+       }
+       list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+       if (!list) {
+               perror("rseq_percpu_zmalloc");
+               abort();
+       }
 
        /* Generate list entries for every usable cpu. */
        sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
        for (i = 0; i < CPU_SETSIZE; i++) {
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                        continue;
                for (j = 1; j <= 100; j++) {
+                       struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
                        struct percpu_list_node *node;
 
                        expected_sum += j;
@@ -652,14 +793,14 @@ static void test_percpu_list(void)
                        node = (struct percpu_list_node *) malloc(sizeof(*node));
                        assert(node);
                        node->data = j;
-                       node->next = list.c[i].head;
-                       list.c[i].head = node;
+                       node->next = cpulist->head;
+                       cpulist->head = node;
                }
        }
 
        for (i = 0; i < num_threads; i++) {
                ret = pthread_create(&test_threads[i], NULL,
-                                    test_percpu_list_thread, &list);
+                                    test_percpu_list_thread, list);
                if (ret) {
                        errno = ret;
                        perror("pthread_create");
@@ -679,10 +820,10 @@ static void test_percpu_list(void)
        for (i = 0; i < CPU_SETSIZE; i++) {
                struct percpu_list_node *node;
 
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                        continue;
 
-               while ((node = __percpu_list_pop(&list, i))) {
+               while ((node = __percpu_list_pop(list, i))) {
                        sum += node->data;
                        free(node);
                }
@@ -694,9 +835,15 @@ static void test_percpu_list(void)
         * test is running).
         */
        assert(sum == expected_sum);
+       rseq_percpu_free(list);
+       ret = rseq_mempool_destroy(mempool);
+       if (ret) {
+               perror("rseq_mempool_destroy");
+               abort();
+       }
 }
 
-static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
+static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer,
                          struct percpu_buffer_node *node,
                          int *_cpu)
 {
@@ -704,27 +851,24 @@ static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
        int cpu;
 
        for (;;) {
+               struct percpu_buffer *cpubuffer;
                intptr_t *targetptr_spec, newval_spec;
                intptr_t *targetptr_final, newval_final;
                intptr_t offset;
                int ret;
 
-               cpu = rseq_cpu_start();
-               offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
-               if (offset == buffer->c[cpu].buflen)
+               cpu = get_current_cpu_id();
+               cpubuffer = rseq_percpu_ptr(buffer, cpu);
+               offset = RSEQ_READ_ONCE(cpubuffer->offset);
+               if (offset == cpubuffer->buflen)
                        break;
                newval_spec = (intptr_t)node;
-               targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
+               targetptr_spec = (intptr_t *)&cpubuffer->array[offset];
                newval_final = offset + 1;
-               targetptr_final = &buffer->c[cpu].offset;
-               if (opt_mb)
-                       ret = rseq_cmpeqv_trystorev_storev_release(
-                               targetptr_final, offset, targetptr_spec,
-                               newval_spec, newval_final, cpu);
-               else
-                       ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
-                               offset, targetptr_spec, newval_spec,
-                               newval_final, cpu);
+               targetptr_final = &cpubuffer->offset;
+               ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
+                       targetptr_final, offset, targetptr_spec,
+                       newval_spec, newval_final, cpu);
                if (rseq_likely(!ret)) {
                        result = true;
                        break;
@@ -736,29 +880,32 @@ static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
        return result;
 }
 
-static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
+static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
                                               int *_cpu)
 {
        struct percpu_buffer_node *head;
        int cpu;
 
        for (;;) {
+               struct percpu_buffer *cpubuffer;
                intptr_t *targetptr, newval;
                intptr_t offset;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
+               cpubuffer = rseq_percpu_ptr(buffer, cpu);
                /* Load offset with single-copy atomicity. */
-               offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+               offset = RSEQ_READ_ONCE(cpubuffer->offset);
                if (offset == 0) {
                        head = NULL;
                        break;
                }
-               head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
+               head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]);
                newval = offset - 1;
-               targetptr = (intptr_t *)&buffer->c[cpu].offset;
-               ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
-                       (intptr_t *)&buffer->c[cpu].array[offset - 1],
+               targetptr = (intptr_t *)&cpubuffer->offset;
+               ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                       targetptr, offset,
+                       (intptr_t *)&cpubuffer->array[offset - 1],
                        (intptr_t)head, newval, cpu);
                if (rseq_likely(!ret))
                        break;
@@ -773,24 +920,26 @@ static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buff
  * __percpu_buffer_pop is not safe against concurrent accesses. Should
  * only be used on buffers that are not concurrently modified.
  */
-static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
+static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
                                               int cpu)
 {
+       struct percpu_buffer *cpubuffer;
        struct percpu_buffer_node *head;
        intptr_t offset;
 
-       offset = buffer->c[cpu].offset;
+       cpubuffer = rseq_percpu_ptr(buffer, cpu);
+       offset = cpubuffer->offset;
        if (offset == 0)
                return NULL;
-       head = buffer->c[cpu].array[offset - 1];
-       buffer->c[cpu].offset = offset - 1;
+       head = cpubuffer->array[offset - 1];
+       cpubuffer->offset = offset - 1;
        return head;
 }
 
 static void *test_percpu_buffer_thread(void *arg)
 {
        long long i, reps;
-       struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
+       struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg;
 
        if (!opt_disable_rseq && rseq_register_current_thread())
                abort();
@@ -824,24 +973,38 @@ static void test_percpu_buffer(void)
        const int num_threads = opt_threads;
        int i, j, ret;
        uint64_t sum = 0, expected_sum = 0;
-       struct percpu_buffer buffer;
+       struct percpu_buffer __rseq_percpu *buffer;
        pthread_t test_threads[num_threads];
        cpu_set_t allowed_cpus;
+       struct rseq_mempool *mempool;
 
-       memset(&buffer, 0, sizeof(buffer));
+       mempool = rseq_mempool_create("percpu_buffer", sizeof(struct percpu_buffer),
+                       0, CPU_SETSIZE, NULL);
+       if (!mempool) {
+               perror("rseq_mempool_create");
+               abort();
+       }
+       buffer = (struct percpu_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+       if (!buffer) {
+               perror("rseq_percpu_zmalloc");
+               abort();
+       }
 
        /* Generate list entries for every usable cpu. */
        sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
        for (i = 0; i < CPU_SETSIZE; i++) {
-               if (!CPU_ISSET(i, &allowed_cpus))
+               struct percpu_buffer *cpubuffer;
+
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                        continue;
+               cpubuffer = rseq_percpu_ptr(buffer, i);
                /* Worse-case is every item in same CPU. */
-               buffer.c[i].array =
+               cpubuffer->array =
                        (struct percpu_buffer_node **)
-                       malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+                       malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
                               BUFFER_ITEM_PER_CPU);
-               assert(buffer.c[i].array);
-               buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
+               assert(cpubuffer->array);
+               cpubuffer->buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
                for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
                        struct percpu_buffer_node *node;
 
@@ -857,14 +1020,14 @@ static void test_percpu_buffer(void)
                        node = (struct percpu_buffer_node *) malloc(sizeof(*node));
                        assert(node);
                        node->data = j;
-                       buffer.c[i].array[j - 1] = node;
-                       buffer.c[i].offset++;
+                       cpubuffer->array[j - 1] = node;
+                       cpubuffer->offset++;
                }
        }
 
        for (i = 0; i < num_threads; i++) {
                ret = pthread_create(&test_threads[i], NULL,
-                                    test_percpu_buffer_thread, &buffer);
+                                    test_percpu_buffer_thread, buffer);
                if (ret) {
                        errno = ret;
                        perror("pthread_create");
@@ -882,16 +1045,18 @@ static void test_percpu_buffer(void)
        }
 
        for (i = 0; i < CPU_SETSIZE; i++) {
+               struct percpu_buffer *cpubuffer;
                struct percpu_buffer_node *node;
 
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                        continue;
 
-               while ((node = __percpu_buffer_pop(&buffer, i))) {
+               cpubuffer = rseq_percpu_ptr(buffer, i);
+               while ((node = __percpu_buffer_pop(buffer, i))) {
                        sum += node->data;
                        free(node);
                }
-               free(buffer.c[i].array);
+               free(cpubuffer->array);
        }
 
        /*
@@ -900,9 +1065,15 @@ static void test_percpu_buffer(void)
         * test is running).
         */
        assert(sum == expected_sum);
+       rseq_percpu_free(buffer);
+       ret = rseq_mempool_destroy(mempool);
+       if (ret) {
+               perror("rseq_mempool_destroy");
+               abort();
+       }
 }
 
-static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
+static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer,
                                 struct percpu_memcpy_buffer_node item,
                                 int *_cpu)
 {
@@ -910,31 +1081,29 @@ static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
        int cpu;
 
        for (;;) {
+               struct percpu_memcpy_buffer *cpubuffer;
                intptr_t *targetptr_final, newval_final, offset;
                char *destptr, *srcptr;
                size_t copylen;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
+               cpubuffer = rseq_percpu_ptr(buffer, cpu);
                /* Load offset with single-copy atomicity. */
-               offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
-               if (offset == buffer->c[cpu].buflen)
+               offset = RSEQ_READ_ONCE(cpubuffer->offset);
+               if (offset == cpubuffer->buflen)
                        break;
-               destptr = (char *)&buffer->c[cpu].array[offset];
+               destptr = (char *)&cpubuffer->array[offset];
                srcptr = (char *)&item;
                /* copylen must be <= 4kB. */
                copylen = sizeof(item);
                newval_final = offset + 1;
-               targetptr_final = &buffer->c[cpu].offset;
-               if (opt_mb)
-                       ret = rseq_cmpeqv_trymemcpy_storev_release(
-                               targetptr_final, offset,
-                               destptr, srcptr, copylen,
-                               newval_final, cpu);
-               else
-                       ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
-                               offset, destptr, srcptr, copylen,
-                               newval_final, cpu);
+               targetptr_final = &cpubuffer->offset;
+               ret = rseq_load_cbne_memcpy_store__ptr(
+                       opt_mo, RSEQ_PERCPU,
+                       targetptr_final, offset,
+                       destptr, srcptr, copylen,
+                       newval_final, cpu);
                if (rseq_likely(!ret)) {
                        result = true;
                        break;
@@ -946,7 +1115,7 @@ static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
        return result;
 }
 
-static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
                                struct percpu_memcpy_buffer_node *item,
                                int *_cpu)
 {
@@ -954,24 +1123,26 @@ static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
        int cpu;
 
        for (;;) {
+               struct percpu_memcpy_buffer *cpubuffer;
                intptr_t *targetptr_final, newval_final, offset;
                char *destptr, *srcptr;
                size_t copylen;
                int ret;
 
-               cpu = rseq_cpu_start();
+               cpu = get_current_cpu_id();
+               cpubuffer = rseq_percpu_ptr(buffer, cpu);
                /* Load offset with single-copy atomicity. */
-               offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+               offset = RSEQ_READ_ONCE(cpubuffer->offset);
                if (offset == 0)
                        break;
                destptr = (char *)item;
-               srcptr = (char *)&buffer->c[cpu].array[offset - 1];
+               srcptr = (char *)&cpubuffer->array[offset - 1];
                /* copylen must be <= 4kB. */
                copylen = sizeof(*item);
                newval_final = offset - 1;
-               targetptr_final = &buffer->c[cpu].offset;
-               ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
-                       offset, destptr, srcptr, copylen,
+               targetptr_final = &cpubuffer->offset;
+               ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                       targetptr_final, offset, destptr, srcptr, copylen,
                        newval_final, cpu);
                if (rseq_likely(!ret)) {
                        result = true;
@@ -988,24 +1159,26 @@ static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
  * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
  * only be used on buffers that are not concurrently modified.
  */
-static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
                                struct percpu_memcpy_buffer_node *item,
                                int cpu)
 {
+       struct percpu_memcpy_buffer *cpubuffer;
        intptr_t offset;
 
-       offset = buffer->c[cpu].offset;
+       cpubuffer = rseq_percpu_ptr(buffer, cpu);
+       offset = cpubuffer->offset;
        if (offset == 0)
                return false;
-       memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
-       buffer->c[cpu].offset = offset - 1;
+       memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item));
+       cpubuffer->offset = offset - 1;
        return true;
 }
 
 static void *test_percpu_memcpy_buffer_thread(void *arg)
 {
        long long i, reps;
-       struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
+       struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg;
 
        if (!opt_disable_rseq && rseq_register_current_thread())
                abort();
@@ -1040,24 +1213,39 @@ static void test_percpu_memcpy_buffer(void)
        const int num_threads = opt_threads;
        int i, j, ret;
        uint64_t sum = 0, expected_sum = 0;
-       struct percpu_memcpy_buffer buffer;
+       struct percpu_memcpy_buffer *buffer;
        pthread_t test_threads[num_threads];
        cpu_set_t allowed_cpus;
+       struct rseq_mempool *mempool;
 
-       memset(&buffer, 0, sizeof(buffer));
+       mempool = rseq_mempool_create("percpu_memcpy_buffer",
+                       sizeof(struct percpu_memcpy_buffer),
+                       0, CPU_SETSIZE, NULL);
+       if (!mempool) {
+               perror("rseq_mempool_create");
+               abort();
+       }
+       buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+       if (!buffer) {
+               perror("rseq_percpu_zmalloc");
+               abort();
+       }
 
        /* Generate list entries for every usable cpu. */
        sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
        for (i = 0; i < CPU_SETSIZE; i++) {
-               if (!CPU_ISSET(i, &allowed_cpus))
+               struct percpu_memcpy_buffer *cpubuffer;
+
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                        continue;
+               cpubuffer = rseq_percpu_ptr(buffer, i);
                /* Worse-case is every item in same CPU. */
-               buffer.c[i].array =
+               cpubuffer->array =
                        (struct percpu_memcpy_buffer_node *)
-                       malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+                       malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
                               MEMCPY_BUFFER_ITEM_PER_CPU);
-               assert(buffer.c[i].array);
-               buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
+               assert(cpubuffer->array);
+               cpubuffer->buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
                for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
                        expected_sum += 2 * j + 1;
 
@@ -1068,16 +1256,16 @@ static void test_percpu_memcpy_buffer(void)
                         * within a single word, so allocate an object
                         * for each node.
                         */
-                       buffer.c[i].array[j - 1].data1 = j;
-                       buffer.c[i].array[j - 1].data2 = j + 1;
-                       buffer.c[i].offset++;
+                       cpubuffer->array[j - 1].data1 = j;
+                       cpubuffer->array[j - 1].data2 = j + 1;
+                       cpubuffer->offset++;
                }
        }
 
        for (i = 0; i < num_threads; i++) {
                ret = pthread_create(&test_threads[i], NULL,
                                     test_percpu_memcpy_buffer_thread,
-                                    &buffer);
+                                    buffer);
                if (ret) {
                        errno = ret;
                        perror("pthread_create");
@@ -1096,15 +1284,17 @@ static void test_percpu_memcpy_buffer(void)
 
        for (i = 0; i < CPU_SETSIZE; i++) {
                struct percpu_memcpy_buffer_node item;
+               struct percpu_memcpy_buffer *cpubuffer;
 
-               if (!CPU_ISSET(i, &allowed_cpus))
+               if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
                        continue;
 
-               while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
+               cpubuffer = rseq_percpu_ptr(buffer, i);
+               while (__percpu_memcpy_buffer_pop(buffer, &item, i)) {
                        sum += item.data1;
                        sum += item.data2;
                }
-               free(buffer.c[i].array);
+               free(cpubuffer->array);
        }
 
        /*
@@ -1113,9 +1303,14 @@ static void test_percpu_memcpy_buffer(void)
         * test is running).
         */
        assert(sum == expected_sum);
+       rseq_percpu_free(buffer);
+       ret = rseq_mempool_destroy(mempool);
+       if (ret) {
+               perror("rseq_mempool_destroy");
+               abort();
+       }
 }
 
-
 static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
 {
        signals_delivered++;
@@ -1147,12 +1342,6 @@ static int set_signal_handler(void)
        return ret;
 }
 
-static
-int sys_membarrier(int cmd, int flags, int cpu_id)
-{
-       return syscall(__NR_membarrier, cmd, flags, cpu_id);
-}
-
 static
 bool membarrier_private_expedited_rseq_available(void)
 {
@@ -1168,10 +1357,11 @@ bool membarrier_private_expedited_rseq_available(void)
 }
 
 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
-#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+#ifdef TEST_MEMBARRIER
 struct test_membarrier_thread_args {
+       struct rseq_mempool *mempool;
+       struct percpu_list __rseq_percpu *percpu_list_ptr;
        int stop;
-       intptr_t percpu_list_ptr;
 };
 
 /* Worker threads modify data in their "active" percpu lists. */
@@ -1180,8 +1370,8 @@ void *test_membarrier_worker_thread(void *arg)
 {
        struct test_membarrier_thread_args *args =
                (struct test_membarrier_thread_args *)arg;
-       const int iters = opt_reps;
-       int i;
+       const long long iters = opt_reps;
+       long long i;
 
        if (rseq_register_current_thread()) {
                fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
@@ -1196,10 +1386,13 @@ void *test_membarrier_worker_thread(void *arg)
                int ret;
 
                do {
-                       int cpu = rseq_cpu_start();
+                       int cpu = get_current_cpu_id();
+                       struct percpu_list __rseq_percpu *list = RSEQ_READ_ONCE(args->percpu_list_ptr);
+                       struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
 
-                       ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
-                               sizeof(struct percpu_list_entry) * cpu, 1, cpu);
+                       ret = rseq_load_cbne_load_add_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+                               (intptr_t *) &args->percpu_list_ptr,
+                               (intptr_t) list, (intptr_t *) &cpulist->head, 0, 1, cpu);
                } while (rseq_unlikely(ret));
        }
 
@@ -1212,29 +1405,48 @@ void *test_membarrier_worker_thread(void *arg)
 }
 
 static
-void test_membarrier_init_percpu_list(struct percpu_list *list)
+struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_mempool *mempool)
 {
+       struct percpu_list __rseq_percpu *list;
        int i;
 
-       memset(list, 0, sizeof(*list));
+       list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+       if (!list) {
+               perror("rseq_percpu_zmalloc");
+               return NULL;
+       }
        for (i = 0; i < CPU_SETSIZE; i++) {
+               struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
                struct percpu_list_node *node;
 
                node = (struct percpu_list_node *) malloc(sizeof(*node));
                assert(node);
                node->data = 0;
                node->next = NULL;
-               list->c[i].head = node;
+               cpulist->head = node;
        }
+       return list;
 }
 
 static
-void test_membarrier_free_percpu_list(struct percpu_list *list)
+void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list)
 {
        int i;
 
        for (i = 0; i < CPU_SETSIZE; i++)
-               free(list->c[i].head);
+               free(rseq_percpu_ptr(list, i)->head);
+       rseq_percpu_free(list);
+}
+
+static
+long long test_membarrier_count_percpu_list(struct percpu_list __rseq_percpu *list)
+{
+       long long total_count = 0;
+       int i;
+
+       for (i = 0; i < CPU_SETSIZE; i++)
+               total_count += rseq_percpu_ptr(list, i)->head->data;
+       return total_count;
 }
 
 /*
@@ -1246,9 +1458,20 @@ void *test_membarrier_manager_thread(void *arg)
 {
        struct test_membarrier_thread_args *args =
                (struct test_membarrier_thread_args *)arg;
-       struct percpu_list list_a, list_b;
+       struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
        intptr_t expect_a = 0, expect_b = 0;
        int cpu_a = 0, cpu_b = 0;
+       struct rseq_mempool *mempool;
+       int ret;
+       long long total_count = 0;
+
+       mempool = rseq_mempool_create("percpu_list", sizeof(struct percpu_list),
+                       0, CPU_SETSIZE, NULL);
+       if (!mempool) {
+               perror("rseq_mempool_create");
+               abort();
+       }
+       args->mempool = mempool;
 
        if (rseq_register_current_thread()) {
                fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
@@ -1257,13 +1480,15 @@ void *test_membarrier_manager_thread(void *arg)
        }
 
        /* Init lists. */
-       test_membarrier_init_percpu_list(&list_a);
-       test_membarrier_init_percpu_list(&list_b);
+       list_a = test_membarrier_alloc_percpu_list(mempool);
+       assert(list_a);
+       list_b = test_membarrier_alloc_percpu_list(mempool);
+       assert(list_b);
 
        /* Initialize lists before publishing them. */
        rseq_smp_wmb();
 
-       RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
+       RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
 
        while (!RSEQ_READ_ONCE(args->stop)) {
                /* list_a is "active". */
@@ -1272,15 +1497,14 @@ void *test_membarrier_manager_thread(void *arg)
                 * As list_b is "inactive", we should never see changes
                 * to list_b.
                 */
-               if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) {
+               if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
                        fprintf(stderr, "Membarrier test failed\n");
                        abort();
                }
 
                /* Make list_b "active". */
-               RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b);
-               if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
-                                       MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
+               RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
+               if (rseq_membarrier_expedited(cpu_a) &&
                                errno != ENXIO /* missing CPU */) {
                        perror("sys_membarrier");
                        abort();
@@ -1289,38 +1513,52 @@ void *test_membarrier_manager_thread(void *arg)
                 * Cpu A should now only modify list_b, so the values
                 * in list_a should be stable.
                 */
-               expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data);
+               expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
 
                cpu_b = rand() % CPU_SETSIZE;
                /*
                 * As list_a is "inactive", we should never see changes
                 * to list_a.
                 */
-               if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) {
+               if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
                        fprintf(stderr, "Membarrier test failed\n");
                        abort();
                }
 
                /* Make list_a "active". */
-               RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
-               if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
-                                       MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
+               RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
+               if (rseq_membarrier_expedited(cpu_b) &&
                                errno != ENXIO /* missing CPU */) {
                        perror("sys_membarrier");
                        abort();
                }
                /* Remember a value from list_b. */
-               expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data);
+               expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
        }
 
-       test_membarrier_free_percpu_list(&list_a);
-       test_membarrier_free_percpu_list(&list_b);
+       total_count += test_membarrier_count_percpu_list(list_a);
+       total_count += test_membarrier_count_percpu_list(list_b);
+
+       /* Validate that we observe the right number of increments. */
+       if (total_count != opt_threads * opt_reps) {
+               fprintf(stderr, "Error: Observed %lld increments, expected %lld\n",
+                       total_count, opt_threads * opt_reps);
+               abort();
+       }
+       test_membarrier_free_percpu_list(list_a);
+       test_membarrier_free_percpu_list(list_b);
 
        if (rseq_unregister_current_thread()) {
                fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
                        errno, strerror(errno));
                abort();
        }
+       ret = rseq_mempool_destroy(mempool);
+       if (ret) {
+               perror("rseq_mempool_destroy");
+               abort();
+       }
+
        return NULL;
 }
 
@@ -1343,8 +1581,8 @@ void test_membarrier(void)
                abort();
        }
 
+       thread_args.percpu_list_ptr = NULL;
        thread_args.stop = 0;
-       thread_args.percpu_list_ptr = 0;
        ret = pthread_create(&manager_thread, NULL,
                        test_membarrier_manager_thread, &thread_args);
        if (ret) {
@@ -1381,7 +1619,7 @@ void test_membarrier(void)
                abort();
        }
 }
-#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
+#else /* TEST_MEMBARRIER */
 static
 void test_membarrier(void)
 {
@@ -1390,7 +1628,7 @@ void test_membarrier(void)
                                "Skipping membarrier test.\n");
                return;
        }
-       fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
+       fprintf(stderr, "rseq_load_cbne_load_add_load_add_store__ptr is not implemented on this architecture. "
                        "Skipping membarrier test.\n");
 }
 #endif
@@ -1545,10 +1783,10 @@ int main(int argc, char **argv)
                        verbose = 1;
                        break;
                case 'M':
-                       opt_mb = 1;
+                       opt_mo = RSEQ_MO_RELEASE;
                        break;
                case 'c':
-                       if (rseq_available()) {
+                       if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
                                printf_verbose("The rseq syscall is available.\n");
                                goto end;
                        } else {
@@ -1573,6 +1811,10 @@ int main(int argc, char **argv)
 
        if (!opt_disable_rseq && rseq_register_current_thread())
                goto error;
+       if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
+               printf_verbose("The rseq cpu id getter is unavailable\n");
+               goto no_rseq;
+       }
        switch (opt_test) {
        case 's':
                printf_verbose("spinlock\n");
This page took 0.062092 seconds and 4 git commands to generate.