-// SPDX-License-Identifier: LGPL-2.1-only
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <signal.h>
#include <errno.h>
#include <stddef.h>
+#include <stdbool.h>
+#include <rseq/mempool.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0)
enum {
static int opt_yield, opt_signal, opt_sleep,
opt_disable_rseq, opt_threads = 200,
- opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
+ opt_disable_mod = 0, opt_test = 's';
-#ifndef RSEQ_SKIP_FASTPATH
static long long opt_reps = 5000;
-#else
-static long long opt_reps = 100;
-#endif
static __thread __attribute__((tls_model("initial-exec")))
unsigned int signals_delivered;
-#ifndef BENCHMARK
-
static inline pid_t rseq_gettid(void)
{
return syscall(__NR_gettid);
}
+#ifndef BENCHMARK
+
static __thread __attribute__((tls_model("initial-exec"), unused))
int yield_mod_cnt, nr_abort;
#define RSEQ_INJECT_CLOBBER \
, INJECT_ASM_REG
-#define RSEQ_INJECT_ASM(n) \
- "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
+/*
+ * Use ip-relative addressing to get the loop counter.
+ */
+#define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \
+ "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \
+ "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
+ "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \
"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
"jz 333f\n\t" \
"222:\n\t" \
"jnz 222b\n\t" \
"333:\n\t"
+#define RSEQ_INJECT_ASM(n) \
+ __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL)
+
#elif defined(__x86_64__)
#define INJECT_ASM_REG_P "rax"
"bnez " INJECT_ASM_REG ", 222b\n\t" \
"333:\n\t"
+#elif defined(__riscv)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "t1"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "beqz " INJECT_ASM_REG ", 333f\n\t" \
+ "222:\n\t" \
+ "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
+ "bnez " INJECT_ASM_REG ", 222b\n\t" \
+ "333:\n\t"
+
#else
#error unsupported target
#endif
#include <rseq/rseq.h>
-struct percpu_lock_entry {
- intptr_t v;
-} __attribute__((aligned(128)));
+static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+ return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+#ifdef rseq_arch_has_load_cbne_load_add_load_add_store
+#define TEST_MEMBARRIER
+#endif
+
+#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
+# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
+static
+int get_current_cpu_id(void)
+{
+ return rseq_current_mm_cid();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+ return rseq_mm_cid_available();
+}
+static
+bool rseq_use_cpu_index(void)
+{
+ return false; /* Use mm_cid */
+}
+# ifdef TEST_MEMBARRIER
+/*
+ * Membarrier does not currently support targeting a mm_cid, so
+ * issue the barrier on all cpus.
+ */
+static
+int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu)
+{
+ return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ 0, 0);
+}
+# endif /* TEST_MEMBARRIER */
+#else
+# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
+static
+int get_current_cpu_id(void)
+{
+ return rseq_cpu_start();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+ return rseq_current_cpu_raw() >= 0;
+}
+static
+bool rseq_use_cpu_index(void)
+{
+ return true; /* Use cpu_id as index. */
+}
+# ifdef TEST_MEMBARRIER
+static
+int rseq_membarrier_expedited(int cpu)
+{
+ return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu);
+}
+# endif /* TEST_MEMBARRIER */
+#endif
struct percpu_lock {
- struct percpu_lock_entry c[CPU_SETSIZE];
+ intptr_t v;
};
-struct test_data_entry {
- intptr_t count;
-} __attribute__((aligned(128)));
-
struct spinlock_test_data {
struct percpu_lock lock;
- struct test_data_entry c[CPU_SETSIZE];
+ intptr_t count;
};
struct spinlock_thread_test_data {
- struct spinlock_test_data *data;
+ struct spinlock_test_data __rseq_percpu *data;
long long reps;
int reg;
};
struct inc_test_data {
- struct test_data_entry c[CPU_SETSIZE];
+ intptr_t count;
};
struct inc_thread_test_data {
- struct inc_test_data *data;
+ struct inc_test_data __rseq_percpu *data;
long long reps;
int reg;
};
struct percpu_list_node *next;
};
-struct percpu_list_entry {
- struct percpu_list_node *head;
-} __attribute__((aligned(128)));
-
struct percpu_list {
- struct percpu_list_entry c[CPU_SETSIZE];
+ struct percpu_list_node *head;
};
#define BUFFER_ITEM_PER_CPU 100
intptr_t data;
};
-struct percpu_buffer_entry {
+struct percpu_buffer {
intptr_t offset;
intptr_t buflen;
struct percpu_buffer_node **array;
-} __attribute__((aligned(128)));
-
-struct percpu_buffer {
- struct percpu_buffer_entry c[CPU_SETSIZE];
};
#define MEMCPY_BUFFER_ITEM_PER_CPU 100
uint64_t data2;
};
-struct percpu_memcpy_buffer_entry {
+struct percpu_memcpy_buffer {
intptr_t offset;
intptr_t buflen;
struct percpu_memcpy_buffer_node *array;
-} __attribute__((aligned(128)));
-
-struct percpu_memcpy_buffer {
- struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
};
/* A simple percpu spinlock. Grabs lock on current cpu. */
-static int rseq_this_cpu_lock(struct percpu_lock *lock)
+static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock)
{
int cpu;
for (;;) {
int ret;
- cpu = rseq_cpu_start();
- ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+ cpu = get_current_cpu_id();
+ if (cpu < 0) {
+ fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
+ getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
+ abort();
+ }
+ ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ &rseq_percpu_ptr(lock, cpu)->v,
0, 1, cpu);
if (rseq_likely(!ret))
break;
return cpu;
}
-static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu)
{
- assert(lock->c[cpu].v == 1);
+ assert(rseq_percpu_ptr(lock, cpu)->v == 1);
/*
* Release lock, with release semantic. Matches
* rseq_smp_acquire__after_ctrl_dep().
*/
- rseq_smp_store_release(&lock->c[cpu].v, 0);
+ rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0);
}
static void *test_percpu_spinlock_thread(void *arg)
{
struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg;
- struct spinlock_test_data *data = thread_data->data;
+ struct spinlock_test_data __rseq_percpu *data = thread_data->data;
long long i, reps;
if (!opt_disable_rseq && thread_data->reg &&
reps = thread_data->reps;
for (i = 0; i < reps; i++) {
int cpu = rseq_this_cpu_lock(&data->lock);
- data->c[cpu].count++;
+ rseq_percpu_ptr(data, cpu)->count++;
rseq_percpu_unlock(&data->lock, cpu);
#ifndef BENCHMARK
if (i != 0 && !(i % (reps / 10)))
int i, ret;
uint64_t sum;
pthread_t test_threads[num_threads];
- struct spinlock_test_data data;
+ struct spinlock_test_data __rseq_percpu *data;
struct spinlock_thread_test_data thread_data[num_threads];
+ struct rseq_mempool *mempool;
+
+ mempool = rseq_mempool_create("spinlock_test_data",
+ sizeof(struct spinlock_test_data),
+ 0, CPU_SETSIZE, NULL);
+ if (!mempool) {
+ perror("rseq_mempool_create");
+ abort();
+ }
+ data = (struct spinlock_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+ if (!data) {
+ perror("rseq_percpu_zmalloc");
+ abort();
+ }
- memset(&data, 0, sizeof(data));
for (i = 0; i < num_threads; i++) {
thread_data[i].reps = opt_reps;
if (opt_disable_mod <= 0 || (i % opt_disable_mod))
thread_data[i].reg = 1;
else
thread_data[i].reg = 0;
- thread_data[i].data = &data;
+ thread_data[i].data = data;
ret = pthread_create(&test_threads[i], NULL,
test_percpu_spinlock_thread,
&thread_data[i]);
sum = 0;
for (i = 0; i < CPU_SETSIZE; i++)
- sum += data.c[i].count;
+ sum += rseq_percpu_ptr(data, i)->count;
assert(sum == (uint64_t)opt_reps * num_threads);
+ rseq_percpu_free(data);
+ ret = rseq_mempool_destroy(mempool);
+ if (ret) {
+ perror("rseq_mempool_destroy");
+ abort();
+ }
}
static void *test_percpu_inc_thread(void *arg)
{
struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg;
- struct inc_test_data *data = thread_data->data;
+ struct inc_test_data __rseq_percpu *data = thread_data->data;
long long i, reps;
if (!opt_disable_rseq && thread_data->reg &&
do {
int cpu;
- cpu = rseq_cpu_start();
- ret = rseq_addv(&data->c[cpu].count, 1, cpu);
+ cpu = get_current_cpu_id();
+ ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ &rseq_percpu_ptr(data, cpu)->count, 1, cpu);
} while (rseq_unlikely(ret));
#ifndef BENCHMARK
if (i != 0 && !(i % (reps / 10)))
int i, ret;
uint64_t sum;
pthread_t test_threads[num_threads];
- struct inc_test_data data;
+ struct inc_test_data __rseq_percpu *data;
struct inc_thread_test_data thread_data[num_threads];
+ struct rseq_mempool *mempool;
+
+ mempool = rseq_mempool_create("inc_test_data",
+ sizeof(struct inc_test_data),
+ 0, CPU_SETSIZE, NULL);
+ if (!mempool) {
+ perror("rseq_mempool_create");
+ abort();
+ }
+ data = (struct inc_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+ if (!data) {
+ perror("rseq_percpu_zmalloc");
+ abort();
+ }
- memset(&data, 0, sizeof(data));
for (i = 0; i < num_threads; i++) {
thread_data[i].reps = opt_reps;
if (opt_disable_mod <= 0 || (i % opt_disable_mod))
thread_data[i].reg = 1;
else
thread_data[i].reg = 0;
- thread_data[i].data = &data;
+ thread_data[i].data = data;
ret = pthread_create(&test_threads[i], NULL,
test_percpu_inc_thread,
&thread_data[i]);
sum = 0;
for (i = 0; i < CPU_SETSIZE; i++)
- sum += data.c[i].count;
+ sum += rseq_percpu_ptr(data, i)->count;
assert(sum == (uint64_t)opt_reps * num_threads);
+ rseq_percpu_free(data);
+ ret = rseq_mempool_destroy(mempool);
+ if (ret) {
+ perror("rseq_mempool_destroy");
+ abort();
+ }
}
-static void this_cpu_list_push(struct percpu_list *list,
+static void this_cpu_list_push(struct percpu_list __rseq_percpu *list,
struct percpu_list_node *node,
int *_cpu)
{
for (;;) {
intptr_t *targetptr, newval, expect;
+ struct percpu_list *cpulist;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
+ cpulist = rseq_percpu_ptr(list, cpu);
/* Load list->c[cpu].head with single-copy atomicity. */
- expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+ expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head);
newval = (intptr_t)node;
- targetptr = (intptr_t *)&list->c[cpu].head;
+ targetptr = (intptr_t *)&cpulist->head;
node->next = (struct percpu_list_node *)expect;
- ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+ ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr, expect, newval, cpu);
if (rseq_likely(!ret))
break;
/* Retry if comparison fails or rseq aborts. */
* rseq primitive allows us to implement pop without concerns over
* ABA-type races.
*/
-static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list,
int *_cpu)
{
struct percpu_list_node *node = NULL;
for (;;) {
struct percpu_list_node *head;
intptr_t *targetptr, expectnot, *load;
- off_t offset;
+ struct percpu_list *cpulist;
+ long offset;
int ret;
- cpu = rseq_cpu_start();
- targetptr = (intptr_t *)&list->c[cpu].head;
+ cpu = get_current_cpu_id();
+ cpulist = rseq_percpu_ptr(list, cpu);
+ targetptr = (intptr_t *)&cpulist->head;
expectnot = (intptr_t)NULL;
offset = offsetof(struct percpu_list_node, next);
load = (intptr_t *)&head;
- ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
- offset, load, cpu);
+ ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr, expectnot,
+ offset, load, cpu);
if (rseq_likely(!ret)) {
node = head;
break;
* __percpu_list_pop is not safe against concurrent accesses. Should
* only be used on lists that are not concurrently modified.
*/
-static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu)
{
+ struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
struct percpu_list_node *node;
- node = list->c[cpu].head;
+ node = cpulist->head;
if (!node)
return NULL;
- list->c[cpu].head = node->next;
+ cpulist->head = node->next;
return node;
}
static void *test_percpu_list_thread(void *arg)
{
long long i, reps;
- struct percpu_list *list = (struct percpu_list *)arg;
+ struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg;
if (!opt_disable_rseq && rseq_register_current_thread())
abort();
const int num_threads = opt_threads;
int i, j, ret;
uint64_t sum = 0, expected_sum = 0;
- struct percpu_list list;
+ struct percpu_list __rseq_percpu *list;
pthread_t test_threads[num_threads];
cpu_set_t allowed_cpus;
+ struct rseq_mempool *mempool;
- memset(&list, 0, sizeof(list));
+ mempool = rseq_mempool_create("percpu_list", sizeof(struct percpu_list),
+ 0, CPU_SETSIZE, NULL);
+ if (!mempool) {
+ perror("rseq_mempool_create");
+ abort();
+ }
+ list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+ if (!list) {
+ perror("rseq_percpu_zmalloc");
+ abort();
+ }
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
for (j = 1; j <= 100; j++) {
+ struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
struct percpu_list_node *node;
expected_sum += j;
node = (struct percpu_list_node *) malloc(sizeof(*node));
assert(node);
node->data = j;
- node->next = list.c[i].head;
- list.c[i].head = node;
+ node->next = cpulist->head;
+ cpulist->head = node;
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_create(&test_threads[i], NULL,
- test_percpu_list_thread, &list);
+ test_percpu_list_thread, list);
if (ret) {
errno = ret;
perror("pthread_create");
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_list_node *node;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
- while ((node = __percpu_list_pop(&list, i))) {
+ while ((node = __percpu_list_pop(list, i))) {
sum += node->data;
free(node);
}
* test is running).
*/
assert(sum == expected_sum);
+ rseq_percpu_free(list);
+ ret = rseq_mempool_destroy(mempool);
+ if (ret) {
+ perror("rseq_mempool_destroy");
+ abort();
+ }
}
-static bool this_cpu_buffer_push(struct percpu_buffer *buffer,
+static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer,
struct percpu_buffer_node *node,
int *_cpu)
{
int cpu;
for (;;) {
+ struct percpu_buffer *cpubuffer;
intptr_t *targetptr_spec, newval_spec;
intptr_t *targetptr_final, newval_final;
intptr_t offset;
int ret;
- cpu = rseq_cpu_start();
- offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
- if (offset == buffer->c[cpu].buflen)
+ cpu = get_current_cpu_id();
+ cpubuffer = rseq_percpu_ptr(buffer, cpu);
+ offset = RSEQ_READ_ONCE(cpubuffer->offset);
+ if (offset == cpubuffer->buflen)
break;
newval_spec = (intptr_t)node;
- targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
+ targetptr_spec = (intptr_t *)&cpubuffer->array[offset];
newval_final = offset + 1;
- targetptr_final = &buffer->c[cpu].offset;
- if (opt_mb)
- ret = rseq_cmpeqv_trystorev_storev_release(
- targetptr_final, offset, targetptr_spec,
- newval_spec, newval_final, cpu);
- else
- ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
- offset, targetptr_spec, newval_spec,
- newval_final, cpu);
+ targetptr_final = &cpubuffer->offset;
+ ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU,
+ targetptr_final, offset, targetptr_spec,
+ newval_spec, newval_final, cpu);
if (rseq_likely(!ret)) {
result = true;
break;
return result;
}
-static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
+static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
int *_cpu)
{
struct percpu_buffer_node *head;
int cpu;
for (;;) {
+ struct percpu_buffer *cpubuffer;
intptr_t *targetptr, newval;
intptr_t offset;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
+ cpubuffer = rseq_percpu_ptr(buffer, cpu);
/* Load offset with single-copy atomicity. */
- offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ offset = RSEQ_READ_ONCE(cpubuffer->offset);
if (offset == 0) {
head = NULL;
break;
}
- head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
+ head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]);
newval = offset - 1;
- targetptr = (intptr_t *)&buffer->c[cpu].offset;
- ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
- (intptr_t *)&buffer->c[cpu].array[offset - 1],
+ targetptr = (intptr_t *)&cpubuffer->offset;
+ ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr, offset,
+ (intptr_t *)&cpubuffer->array[offset - 1],
(intptr_t)head, newval, cpu);
if (rseq_likely(!ret))
break;
* __percpu_buffer_pop is not safe against concurrent accesses. Should
* only be used on buffers that are not concurrently modified.
*/
-static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
+static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer,
int cpu)
{
+ struct percpu_buffer *cpubuffer;
struct percpu_buffer_node *head;
intptr_t offset;
- offset = buffer->c[cpu].offset;
+ cpubuffer = rseq_percpu_ptr(buffer, cpu);
+ offset = cpubuffer->offset;
if (offset == 0)
return NULL;
- head = buffer->c[cpu].array[offset - 1];
- buffer->c[cpu].offset = offset - 1;
+ head = cpubuffer->array[offset - 1];
+ cpubuffer->offset = offset - 1;
return head;
}
static void *test_percpu_buffer_thread(void *arg)
{
long long i, reps;
- struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
+ struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg;
if (!opt_disable_rseq && rseq_register_current_thread())
abort();
const int num_threads = opt_threads;
int i, j, ret;
uint64_t sum = 0, expected_sum = 0;
- struct percpu_buffer buffer;
+ struct percpu_buffer __rseq_percpu *buffer;
pthread_t test_threads[num_threads];
cpu_set_t allowed_cpus;
+ struct rseq_mempool *mempool;
- memset(&buffer, 0, sizeof(buffer));
+ mempool = rseq_mempool_create("percpu_buffer", sizeof(struct percpu_buffer),
+ 0, CPU_SETSIZE, NULL);
+ if (!mempool) {
+ perror("rseq_mempool_create");
+ abort();
+ }
+ buffer = (struct percpu_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+ if (!buffer) {
+ perror("rseq_percpu_zmalloc");
+ abort();
+ }
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ struct percpu_buffer *cpubuffer;
+
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
+ cpubuffer = rseq_percpu_ptr(buffer, i);
/* Worse-case is every item in same CPU. */
- buffer.c[i].array =
+ cpubuffer->array =
(struct percpu_buffer_node **)
- malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+ malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
BUFFER_ITEM_PER_CPU);
- assert(buffer.c[i].array);
- buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
+ assert(cpubuffer->array);
+ cpubuffer->buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
struct percpu_buffer_node *node;
node = (struct percpu_buffer_node *) malloc(sizeof(*node));
assert(node);
node->data = j;
- buffer.c[i].array[j - 1] = node;
- buffer.c[i].offset++;
+ cpubuffer->array[j - 1] = node;
+ cpubuffer->offset++;
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_create(&test_threads[i], NULL,
- test_percpu_buffer_thread, &buffer);
+ test_percpu_buffer_thread, buffer);
if (ret) {
errno = ret;
perror("pthread_create");
}
for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_buffer *cpubuffer;
struct percpu_buffer_node *node;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
- while ((node = __percpu_buffer_pop(&buffer, i))) {
+ cpubuffer = rseq_percpu_ptr(buffer, i);
+ while ((node = __percpu_buffer_pop(buffer, i))) {
sum += node->data;
free(node);
}
- free(buffer.c[i].array);
+ free(cpubuffer->array);
}
/*
* test is running).
*/
assert(sum == expected_sum);
+ rseq_percpu_free(buffer);
+ ret = rseq_mempool_destroy(mempool);
+ if (ret) {
+ perror("rseq_mempool_destroy");
+ abort();
+ }
}
-static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
+static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer,
struct percpu_memcpy_buffer_node item,
int *_cpu)
{
int cpu;
for (;;) {
+ struct percpu_memcpy_buffer *cpubuffer;
intptr_t *targetptr_final, newval_final, offset;
char *destptr, *srcptr;
size_t copylen;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
+ cpubuffer = rseq_percpu_ptr(buffer, cpu);
/* Load offset with single-copy atomicity. */
- offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
- if (offset == buffer->c[cpu].buflen)
+ offset = RSEQ_READ_ONCE(cpubuffer->offset);
+ if (offset == cpubuffer->buflen)
break;
- destptr = (char *)&buffer->c[cpu].array[offset];
+ destptr = (char *)&cpubuffer->array[offset];
srcptr = (char *)&item;
/* copylen must be <= 4kB. */
copylen = sizeof(item);
newval_final = offset + 1;
- targetptr_final = &buffer->c[cpu].offset;
- if (opt_mb)
- ret = rseq_cmpeqv_trymemcpy_storev_release(
- targetptr_final, offset,
- destptr, srcptr, copylen,
- newval_final, cpu);
- else
- ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
- offset, destptr, srcptr, copylen,
- newval_final, cpu);
+ targetptr_final = &cpubuffer->offset;
+ ret = rseq_load_cbne_memcpy_store__ptr(
+ opt_mo, RSEQ_PERCPU,
+ targetptr_final, offset,
+ destptr, srcptr, copylen,
+ newval_final, cpu);
if (rseq_likely(!ret)) {
result = true;
break;
return result;
}
-static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
struct percpu_memcpy_buffer_node *item,
int *_cpu)
{
int cpu;
for (;;) {
+ struct percpu_memcpy_buffer *cpubuffer;
intptr_t *targetptr_final, newval_final, offset;
char *destptr, *srcptr;
size_t copylen;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
+ cpubuffer = rseq_percpu_ptr(buffer, cpu);
/* Load offset with single-copy atomicity. */
- offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ offset = RSEQ_READ_ONCE(cpubuffer->offset);
if (offset == 0)
break;
destptr = (char *)item;
- srcptr = (char *)&buffer->c[cpu].array[offset - 1];
+ srcptr = (char *)&cpubuffer->array[offset - 1];
/* copylen must be <= 4kB. */
copylen = sizeof(*item);
newval_final = offset - 1;
- targetptr_final = &buffer->c[cpu].offset;
- ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
- offset, destptr, srcptr, copylen,
+ targetptr_final = &cpubuffer->offset;
+ ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr_final, offset, destptr, srcptr, copylen,
newval_final, cpu);
if (rseq_likely(!ret)) {
result = true;
* __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
* only be used on buffers that are not concurrently modified.
*/
-static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer,
struct percpu_memcpy_buffer_node *item,
int cpu)
{
+ struct percpu_memcpy_buffer *cpubuffer;
intptr_t offset;
- offset = buffer->c[cpu].offset;
+ cpubuffer = rseq_percpu_ptr(buffer, cpu);
+ offset = cpubuffer->offset;
if (offset == 0)
return false;
- memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
- buffer->c[cpu].offset = offset - 1;
+ memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item));
+ cpubuffer->offset = offset - 1;
return true;
}
static void *test_percpu_memcpy_buffer_thread(void *arg)
{
long long i, reps;
- struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
+ struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg;
if (!opt_disable_rseq && rseq_register_current_thread())
abort();
const int num_threads = opt_threads;
int i, j, ret;
uint64_t sum = 0, expected_sum = 0;
- struct percpu_memcpy_buffer buffer;
+ struct percpu_memcpy_buffer *buffer;
pthread_t test_threads[num_threads];
cpu_set_t allowed_cpus;
+ struct rseq_mempool *mempool;
- memset(&buffer, 0, sizeof(buffer));
+ mempool = rseq_mempool_create("percpu_memcpy_buffer",
+ sizeof(struct percpu_memcpy_buffer),
+ 0, CPU_SETSIZE, NULL);
+ if (!mempool) {
+ perror("rseq_mempool_create");
+ abort();
+ }
+ buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+ if (!buffer) {
+ perror("rseq_percpu_zmalloc");
+ abort();
+ }
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ struct percpu_memcpy_buffer *cpubuffer;
+
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
+ cpubuffer = rseq_percpu_ptr(buffer, i);
/* Worse-case is every item in same CPU. */
- buffer.c[i].array =
+ cpubuffer->array =
(struct percpu_memcpy_buffer_node *)
- malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+ malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE *
MEMCPY_BUFFER_ITEM_PER_CPU);
- assert(buffer.c[i].array);
- buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
+ assert(cpubuffer->array);
+ cpubuffer->buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
expected_sum += 2 * j + 1;
* within a single word, so allocate an object
* for each node.
*/
- buffer.c[i].array[j - 1].data1 = j;
- buffer.c[i].array[j - 1].data2 = j + 1;
- buffer.c[i].offset++;
+ cpubuffer->array[j - 1].data1 = j;
+ cpubuffer->array[j - 1].data2 = j + 1;
+ cpubuffer->offset++;
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_create(&test_threads[i], NULL,
test_percpu_memcpy_buffer_thread,
- &buffer);
+ buffer);
if (ret) {
errno = ret;
perror("pthread_create");
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_memcpy_buffer_node item;
+ struct percpu_memcpy_buffer *cpubuffer;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
- while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
+ cpubuffer = rseq_percpu_ptr(buffer, i);
+ while (__percpu_memcpy_buffer_pop(buffer, &item, i)) {
sum += item.data1;
sum += item.data2;
}
- free(buffer.c[i].array);
+ free(cpubuffer->array);
}
/*
* test is running).
*/
assert(sum == expected_sum);
+ rseq_percpu_free(buffer);
+ ret = rseq_mempool_destroy(mempool);
+ if (ret) {
+ perror("rseq_mempool_destroy");
+ abort();
+ }
}
-
static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo)
{
signals_delivered++;
return ret;
}
+static
+bool membarrier_private_expedited_rseq_available(void)
+{
+ int status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
+
+ if (status < 0) {
+ perror("membarrier");
+ return false;
+ }
+ if (!(status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ))
+ return false;
+ return true;
+}
+
/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
-#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+#ifdef TEST_MEMBARRIER
struct test_membarrier_thread_args {
+ struct rseq_mempool *mempool;
+ struct percpu_list __rseq_percpu *percpu_list_ptr;
int stop;
- intptr_t percpu_list_ptr;
};
/* Worker threads modify data in their "active" percpu lists. */
{
struct test_membarrier_thread_args *args =
(struct test_membarrier_thread_args *)arg;
- const int iters = opt_reps;
- int i;
+ const long long iters = opt_reps;
+ long long i;
if (rseq_register_current_thread()) {
fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
int ret;
do {
- int cpu = rseq_cpu_start();
+ int cpu = get_current_cpu_id();
+ struct percpu_list __rseq_percpu *list = RSEQ_READ_ONCE(args->percpu_list_ptr);
+ struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu);
- ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
- sizeof(struct percpu_list_entry) * cpu, 1, cpu);
+ ret = rseq_load_cbne_load_add_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ (intptr_t *) &args->percpu_list_ptr,
+ (intptr_t) list, (intptr_t *) &cpulist->head, 0, 1, cpu);
} while (rseq_unlikely(ret));
}
}
static
-void test_membarrier_init_percpu_list(struct percpu_list *list)
+struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_mempool *mempool)
{
+ struct percpu_list __rseq_percpu *list;
int i;
- memset(list, 0, sizeof(*list));
+ list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool);
+ if (!list) {
+ perror("rseq_percpu_zmalloc");
+ return NULL;
+ }
for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list *cpulist = rseq_percpu_ptr(list, i);
struct percpu_list_node *node;
node = (struct percpu_list_node *) malloc(sizeof(*node));
assert(node);
node->data = 0;
node->next = NULL;
- list->c[i].head = node;
+ cpulist->head = node;
}
+ return list;
}
static
-void test_membarrier_free_percpu_list(struct percpu_list *list)
+void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list)
{
int i;
for (i = 0; i < CPU_SETSIZE; i++)
- free(list->c[i].head);
+ free(rseq_percpu_ptr(list, i)->head);
+ rseq_percpu_free(list);
}
static
-int sys_membarrier(int cmd, int flags, int cpu_id)
+long long test_membarrier_count_percpu_list(struct percpu_list __rseq_percpu *list)
{
- return syscall(__NR_membarrier, cmd, flags, cpu_id);
+ long long total_count = 0;
+ int i;
+
+ for (i = 0; i < CPU_SETSIZE; i++)
+ total_count += rseq_percpu_ptr(list, i)->head->data;
+ return total_count;
}
/*
{
struct test_membarrier_thread_args *args =
(struct test_membarrier_thread_args *)arg;
- struct percpu_list list_a, list_b;
+ struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b;
intptr_t expect_a = 0, expect_b = 0;
int cpu_a = 0, cpu_b = 0;
+ struct rseq_mempool *mempool;
+ int ret;
+ long long total_count = 0;
+
+ mempool = rseq_mempool_create("percpu_list", sizeof(struct percpu_list),
+ 0, CPU_SETSIZE, NULL);
+ if (!mempool) {
+ perror("rseq_mempool_create");
+ abort();
+ }
+ args->mempool = mempool;
if (rseq_register_current_thread()) {
fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
}
/* Init lists. */
- test_membarrier_init_percpu_list(&list_a);
- test_membarrier_init_percpu_list(&list_b);
+ list_a = test_membarrier_alloc_percpu_list(mempool);
+ assert(list_a);
+ list_b = test_membarrier_alloc_percpu_list(mempool);
+ assert(list_b);
/* Initialize lists before publishing them. */
rseq_smp_wmb();
- RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
+ RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
while (!RSEQ_READ_ONCE(args->stop)) {
/* list_a is "active". */
* As list_b is "inactive", we should never see changes
* to list_b.
*/
- if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) {
+ if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) {
fprintf(stderr, "Membarrier test failed\n");
abort();
}
/* Make list_b "active". */
- RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b);
- if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
- MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
+ RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b);
+ if (rseq_membarrier_expedited(cpu_a) &&
errno != ENXIO /* missing CPU */) {
perror("sys_membarrier");
abort();
* Cpu A should now only modify list_b, so the values
* in list_a should be stable.
*/
- expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data);
+ expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data);
cpu_b = rand() % CPU_SETSIZE;
/*
* As list_a is "inactive", we should never see changes
* to list_a.
*/
- if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) {
+ if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) {
fprintf(stderr, "Membarrier test failed\n");
abort();
}
/* Make list_a "active". */
- RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a);
- if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
- MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
+ RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a);
+ if (rseq_membarrier_expedited(cpu_b) &&
errno != ENXIO /* missing CPU */) {
perror("sys_membarrier");
abort();
}
/* Remember a value from list_b. */
- expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data);
+ expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data);
}
- test_membarrier_free_percpu_list(&list_a);
- test_membarrier_free_percpu_list(&list_b);
+ total_count += test_membarrier_count_percpu_list(list_a);
+ total_count += test_membarrier_count_percpu_list(list_b);
+
+ /* Validate that we observe the right number of increments. */
+ if (total_count != opt_threads * opt_reps) {
+ fprintf(stderr, "Error: Observed %lld increments, expected %lld\n",
+ total_count, opt_threads * opt_reps);
+ abort();
+ }
+ test_membarrier_free_percpu_list(list_a);
+ test_membarrier_free_percpu_list(list_b);
if (rseq_unregister_current_thread()) {
fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
errno, strerror(errno));
abort();
}
+ ret = rseq_mempool_destroy(mempool);
+ if (ret) {
+ perror("rseq_mempool_destroy");
+ abort();
+ }
+
return NULL;
}
pthread_t manager_thread;
int i, ret;
+ if (!membarrier_private_expedited_rseq_available()) {
+ fprintf(stderr, "Membarrier private expedited rseq not available. "
+ "Skipping membarrier test.\n");
+ return;
+ }
if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
perror("sys_membarrier");
abort();
}
+ thread_args.percpu_list_ptr = NULL;
thread_args.stop = 0;
- thread_args.percpu_list_ptr = 0;
ret = pthread_create(&manager_thread, NULL,
test_membarrier_manager_thread, &thread_args);
if (ret) {
abort();
}
}
-#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
+#else /* TEST_MEMBARRIER */
static
void test_membarrier(void)
{
- fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
+ if (!membarrier_private_expedited_rseq_available()) {
+ fprintf(stderr, "Membarrier private expedited rseq not available. "
+ "Skipping membarrier test.\n");
+ return;
+ }
+ fprintf(stderr, "rseq_load_cbne_load_add_load_add_store__ptr is not implemented on this architecture. "
"Skipping membarrier test.\n");
}
#endif
verbose = 1;
break;
case 'M':
- opt_mb = 1;
+ opt_mo = RSEQ_MO_RELEASE;
break;
case 'c':
- if (rseq_available()) {
+ if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) {
printf_verbose("The rseq syscall is available.\n");
goto end;
} else {
if (!opt_disable_rseq && rseq_register_current_thread())
goto error;
+ if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
+ printf_verbose("The rseq cpu id getter is unavailable\n");
+ goto no_rseq;
+ }
switch (opt_test) {
case 's':
printf_verbose("spinlock\n");