X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=tests%2Fparam_test.c;h=bb8b15a3957bb984902ad34051a1d79b90d40854;hb=06e0b1c08e655c5b6fb855420e31e27c8c4bdbe2;hp=4a45b5ca9758ec9b4f736e0069b79ab5215a4da4;hpb=d35eae6b611618c5d05c4dc4ce0327e98b252c99;p=librseq.git diff --git a/tests/param_test.c b/tests/param_test.c index 4a45b5c..bb8b15a 100644 --- a/tests/param_test.c +++ b/tests/param_test.c @@ -1,4 +1,5 @@ -// SPDX-License-Identifier: LGPL-2.1-only +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2020-2022 Mathieu Desnoyers #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif @@ -18,6 +19,8 @@ #include #include #include +#include +#include #if LINUX_VERSION_CODE < KERNEL_VERSION(5,10,0) enum { @@ -44,24 +47,20 @@ static int opt_modulo, verbose; static int opt_yield, opt_signal, opt_sleep, opt_disable_rseq, opt_threads = 200, - opt_disable_mod = 0, opt_test = 's', opt_mb = 0; + opt_disable_mod = 0, opt_test = 's'; -#ifndef RSEQ_SKIP_FASTPATH static long long opt_reps = 5000; -#else -static long long opt_reps = 100; -#endif static __thread __attribute__((tls_model("initial-exec"))) unsigned int signals_delivered; -#ifndef BENCHMARK - static inline pid_t rseq_gettid(void) { return syscall(__NR_gettid); } +#ifndef BENCHMARK + static __thread __attribute__((tls_model("initial-exec"), unused)) int yield_mod_cnt, nr_abort; @@ -78,8 +77,13 @@ int yield_mod_cnt, nr_abort; #define RSEQ_INJECT_CLOBBER \ , INJECT_ASM_REG -#define RSEQ_INJECT_ASM(n) \ - "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \ +/* + * Use ip-relative addressing to get the loop counter. + */ +#define __RSEQ_INJECT_ASM(n, ref_ip, ref_label) \ + "movl " __rseq_str(ref_ip) ", %%" INJECT_ASM_REG "\n\t" \ + "leal ( asm_loop_cnt_" #n " - " __rseq_str(ref_label) "b)(%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \ + "movl (%%" INJECT_ASM_REG "), %%" INJECT_ASM_REG "\n\t" \ "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \ "jz 333f\n\t" \ "222:\n\t" \ @@ -87,6 +91,9 @@ int yield_mod_cnt, nr_abort; "jnz 222b\n\t" \ "333:\n\t" +#define RSEQ_INJECT_ASM(n) \ + __RSEQ_INJECT_ASM(n, %[ref_ip], RSEQ_ASM_REF_LABEL) + #elif defined(__x86_64__) #define INJECT_ASM_REG_P "rax" @@ -221,6 +228,29 @@ int yield_mod_cnt, nr_abort; "bnez " INJECT_ASM_REG ", 222b\n\t" \ "333:\n\t" +#elif defined(__riscv) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) + +#define INJECT_ASM_REG "t1" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "beqz " INJECT_ASM_REG ", 333f\n\t" \ + "222:\n\t" \ + "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \ + "bnez " INJECT_ASM_REG ", 222b\n\t" \ + "333:\n\t" + #else #error unsupported target #endif @@ -258,35 +288,94 @@ int yield_mod_cnt, nr_abort; #include -struct percpu_lock_entry { - intptr_t v; -} __attribute__((aligned(128))); +static enum rseq_mo opt_mo = RSEQ_MO_RELAXED; + +static int sys_membarrier(int cmd, int flags, int cpu_id) +{ + return syscall(__NR_membarrier, cmd, flags, cpu_id); +} + +#ifdef rseq_arch_has_load_cbne_load_add_load_add_store +#define TEST_MEMBARRIER +#endif + +#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID +# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID +static +int get_current_cpu_id(void) +{ + return rseq_current_mm_cid(); +} +static +bool rseq_validate_cpu_id(void) +{ + return rseq_mm_cid_available(); +} +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} +# ifdef TEST_MEMBARRIER +/* + * Membarrier does not currently support targeting a mm_cid, so + * issue the barrier on all cpus. + */ +static +int rseq_membarrier_expedited(__attribute__ ((unused)) int cpu) +{ + return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + 0, 0); +} +# endif /* TEST_MEMBARRIER */ +#else +# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID +static +int get_current_cpu_id(void) +{ + return rseq_cpu_start(); +} +static +bool rseq_validate_cpu_id(void) +{ + return rseq_current_cpu_raw() >= 0; +} +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} +# ifdef TEST_MEMBARRIER +static +int rseq_membarrier_expedited(int cpu) +{ + return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu); +} +# endif /* TEST_MEMBARRIER */ +#endif struct percpu_lock { - struct percpu_lock_entry c[CPU_SETSIZE]; + intptr_t v; }; -struct test_data_entry { - intptr_t count; -} __attribute__((aligned(128))); - struct spinlock_test_data { struct percpu_lock lock; - struct test_data_entry c[CPU_SETSIZE]; + intptr_t count; }; struct spinlock_thread_test_data { - struct spinlock_test_data *data; + struct spinlock_test_data __rseq_percpu *data; long long reps; int reg; }; struct inc_test_data { - struct test_data_entry c[CPU_SETSIZE]; + intptr_t count; }; struct inc_thread_test_data { - struct inc_test_data *data; + struct inc_test_data __rseq_percpu *data; long long reps; int reg; }; @@ -296,12 +385,8 @@ struct percpu_list_node { struct percpu_list_node *next; }; -struct percpu_list_entry { - struct percpu_list_node *head; -} __attribute__((aligned(128))); - struct percpu_list { - struct percpu_list_entry c[CPU_SETSIZE]; + struct percpu_list_node *head; }; #define BUFFER_ITEM_PER_CPU 100 @@ -310,14 +395,10 @@ struct percpu_buffer_node { intptr_t data; }; -struct percpu_buffer_entry { +struct percpu_buffer { intptr_t offset; intptr_t buflen; struct percpu_buffer_node **array; -} __attribute__((aligned(128))); - -struct percpu_buffer { - struct percpu_buffer_entry c[CPU_SETSIZE]; }; #define MEMCPY_BUFFER_ITEM_PER_CPU 100 @@ -327,26 +408,28 @@ struct percpu_memcpy_buffer_node { uint64_t data2; }; -struct percpu_memcpy_buffer_entry { +struct percpu_memcpy_buffer { intptr_t offset; intptr_t buflen; struct percpu_memcpy_buffer_node *array; -} __attribute__((aligned(128))); - -struct percpu_memcpy_buffer { - struct percpu_memcpy_buffer_entry c[CPU_SETSIZE]; }; /* A simple percpu spinlock. Grabs lock on current cpu. */ -static int rseq_this_cpu_lock(struct percpu_lock *lock) +static int rseq_this_cpu_lock(struct percpu_lock __rseq_percpu *lock) { int cpu; for (;;) { int ret; - cpu = rseq_cpu_start(); - ret = rseq_cmpeqv_storev(&lock->c[cpu].v, + cpu = get_current_cpu_id(); + if (cpu < 0) { + fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n", + getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu); + abort(); + } + ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU, + &rseq_percpu_ptr(lock, cpu)->v, 0, 1, cpu); if (rseq_likely(!ret)) break; @@ -360,20 +443,20 @@ static int rseq_this_cpu_lock(struct percpu_lock *lock) return cpu; } -static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) +static void rseq_percpu_unlock(struct percpu_lock __rseq_percpu *lock, int cpu) { - assert(lock->c[cpu].v == 1); + assert(rseq_percpu_ptr(lock, cpu)->v == 1); /* * Release lock, with release semantic. Matches * rseq_smp_acquire__after_ctrl_dep(). */ - rseq_smp_store_release(&lock->c[cpu].v, 0); + rseq_smp_store_release(&rseq_percpu_ptr(lock, cpu)->v, 0); } static void *test_percpu_spinlock_thread(void *arg) { struct spinlock_thread_test_data *thread_data = (struct spinlock_thread_test_data *) arg; - struct spinlock_test_data *data = thread_data->data; + struct spinlock_test_data __rseq_percpu *data = thread_data->data; long long i, reps; if (!opt_disable_rseq && thread_data->reg && @@ -382,7 +465,7 @@ static void *test_percpu_spinlock_thread(void *arg) reps = thread_data->reps; for (i = 0; i < reps; i++) { int cpu = rseq_this_cpu_lock(&data->lock); - data->c[cpu].count++; + rseq_percpu_ptr(data, cpu)->count++; rseq_percpu_unlock(&data->lock, cpu); #ifndef BENCHMARK if (i != 0 && !(i % (reps / 10))) @@ -410,17 +493,30 @@ static void test_percpu_spinlock(void) int i, ret; uint64_t sum; pthread_t test_threads[num_threads]; - struct spinlock_test_data data; + struct spinlock_test_data __rseq_percpu *data; struct spinlock_thread_test_data thread_data[num_threads]; + struct rseq_mempool *mempool; + + mempool = rseq_mempool_create("spinlock_test_data", + sizeof(struct spinlock_test_data), + 0, CPU_SETSIZE, NULL); + if (!mempool) { + perror("rseq_mempool_create"); + abort(); + } + data = (struct spinlock_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool); + if (!data) { + perror("rseq_percpu_zmalloc"); + abort(); + } - memset(&data, 0, sizeof(data)); for (i = 0; i < num_threads; i++) { thread_data[i].reps = opt_reps; if (opt_disable_mod <= 0 || (i % opt_disable_mod)) thread_data[i].reg = 1; else thread_data[i].reg = 0; - thread_data[i].data = &data; + thread_data[i].data = data; ret = pthread_create(&test_threads[i], NULL, test_percpu_spinlock_thread, &thread_data[i]); @@ -442,15 +538,21 @@ static void test_percpu_spinlock(void) sum = 0; for (i = 0; i < CPU_SETSIZE; i++) - sum += data.c[i].count; + sum += rseq_percpu_ptr(data, i)->count; assert(sum == (uint64_t)opt_reps * num_threads); + rseq_percpu_free(data); + ret = rseq_mempool_destroy(mempool); + if (ret) { + perror("rseq_mempool_destroy"); + abort(); + } } static void *test_percpu_inc_thread(void *arg) { struct inc_thread_test_data *thread_data = (struct inc_thread_test_data *) arg; - struct inc_test_data *data = thread_data->data; + struct inc_test_data __rseq_percpu *data = thread_data->data; long long i, reps; if (!opt_disable_rseq && thread_data->reg && @@ -463,8 +565,9 @@ static void *test_percpu_inc_thread(void *arg) do { int cpu; - cpu = rseq_cpu_start(); - ret = rseq_addv(&data->c[cpu].count, 1, cpu); + cpu = get_current_cpu_id(); + ret = rseq_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU, + &rseq_percpu_ptr(data, cpu)->count, 1, cpu); } while (rseq_unlikely(ret)); #ifndef BENCHMARK if (i != 0 && !(i % (reps / 10))) @@ -486,17 +589,30 @@ static void test_percpu_inc(void) int i, ret; uint64_t sum; pthread_t test_threads[num_threads]; - struct inc_test_data data; + struct inc_test_data __rseq_percpu *data; struct inc_thread_test_data thread_data[num_threads]; + struct rseq_mempool *mempool; + + mempool = rseq_mempool_create("inc_test_data", + sizeof(struct inc_test_data), + 0, CPU_SETSIZE, NULL); + if (!mempool) { + perror("rseq_mempool_create"); + abort(); + } + data = (struct inc_test_data __rseq_percpu *)rseq_percpu_zmalloc(mempool); + if (!data) { + perror("rseq_percpu_zmalloc"); + abort(); + } - memset(&data, 0, sizeof(data)); for (i = 0; i < num_threads; i++) { thread_data[i].reps = opt_reps; if (opt_disable_mod <= 0 || (i % opt_disable_mod)) thread_data[i].reg = 1; else thread_data[i].reg = 0; - thread_data[i].data = &data; + thread_data[i].data = data; ret = pthread_create(&test_threads[i], NULL, test_percpu_inc_thread, &thread_data[i]); @@ -518,12 +634,18 @@ static void test_percpu_inc(void) sum = 0; for (i = 0; i < CPU_SETSIZE; i++) - sum += data.c[i].count; + sum += rseq_percpu_ptr(data, i)->count; assert(sum == (uint64_t)opt_reps * num_threads); + rseq_percpu_free(data); + ret = rseq_mempool_destroy(mempool); + if (ret) { + perror("rseq_mempool_destroy"); + abort(); + } } -static void this_cpu_list_push(struct percpu_list *list, +static void this_cpu_list_push(struct percpu_list __rseq_percpu *list, struct percpu_list_node *node, int *_cpu) { @@ -531,15 +653,18 @@ static void this_cpu_list_push(struct percpu_list *list, for (;;) { intptr_t *targetptr, newval, expect; + struct percpu_list *cpulist; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); + cpulist = rseq_percpu_ptr(list, cpu); /* Load list->c[cpu].head with single-copy atomicity. */ - expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); + expect = (intptr_t)RSEQ_READ_ONCE(cpulist->head); newval = (intptr_t)node; - targetptr = (intptr_t *)&list->c[cpu].head; + targetptr = (intptr_t *)&cpulist->head; node->next = (struct percpu_list_node *)expect; - ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu); + ret = rseq_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr, expect, newval, cpu); if (rseq_likely(!ret)) break; /* Retry if comparison fails or rseq aborts. */ @@ -553,7 +678,7 @@ static void this_cpu_list_push(struct percpu_list *list, * rseq primitive allows us to implement pop without concerns over * ABA-type races. */ -static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, +static struct percpu_list_node *this_cpu_list_pop(struct percpu_list __rseq_percpu *list, int *_cpu) { struct percpu_list_node *node = NULL; @@ -562,16 +687,19 @@ static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, for (;;) { struct percpu_list_node *head; intptr_t *targetptr, expectnot, *load; + struct percpu_list *cpulist; long offset; int ret; - cpu = rseq_cpu_start(); - targetptr = (intptr_t *)&list->c[cpu].head; + cpu = get_current_cpu_id(); + cpulist = rseq_percpu_ptr(list, cpu); + targetptr = (intptr_t *)&cpulist->head; expectnot = (intptr_t)NULL; offset = offsetof(struct percpu_list_node, next); load = (intptr_t *)&head; - ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot, - offset, load, cpu); + ret = rseq_load_cbeq_store_add_load_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr, expectnot, + offset, load, cpu); if (rseq_likely(!ret)) { node = head; break; @@ -589,21 +717,22 @@ static struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, * __percpu_list_pop is not safe against concurrent accesses. Should * only be used on lists that are not concurrently modified. */ -static struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu) +static struct percpu_list_node *__percpu_list_pop(struct percpu_list __rseq_percpu *list, int cpu) { + struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu); struct percpu_list_node *node; - node = list->c[cpu].head; + node = cpulist->head; if (!node) return NULL; - list->c[cpu].head = node->next; + cpulist->head = node->next; return node; } static void *test_percpu_list_thread(void *arg) { long long i, reps; - struct percpu_list *list = (struct percpu_list *)arg; + struct percpu_list __rseq_percpu *list = (struct percpu_list __rseq_percpu *)arg; if (!opt_disable_rseq && rseq_register_current_thread()) abort(); @@ -633,18 +762,30 @@ static void test_percpu_list(void) const int num_threads = opt_threads; int i, j, ret; uint64_t sum = 0, expected_sum = 0; - struct percpu_list list; + struct percpu_list __rseq_percpu *list; pthread_t test_threads[num_threads]; cpu_set_t allowed_cpus; + struct rseq_mempool *mempool; - memset(&list, 0, sizeof(list)); + mempool = rseq_mempool_create("percpu_list", sizeof(struct percpu_list), + 0, CPU_SETSIZE, NULL); + if (!mempool) { + perror("rseq_mempool_create"); + abort(); + } + list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool); + if (!list) { + perror("rseq_percpu_zmalloc"); + abort(); + } /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { + struct percpu_list *cpulist = rseq_percpu_ptr(list, i); struct percpu_list_node *node; expected_sum += j; @@ -652,14 +793,14 @@ static void test_percpu_list(void) node = (struct percpu_list_node *) malloc(sizeof(*node)); assert(node); node->data = j; - node->next = list.c[i].head; - list.c[i].head = node; + node->next = cpulist->head; + cpulist->head = node; } } for (i = 0; i < num_threads; i++) { ret = pthread_create(&test_threads[i], NULL, - test_percpu_list_thread, &list); + test_percpu_list_thread, list); if (ret) { errno = ret; perror("pthread_create"); @@ -679,10 +820,10 @@ static void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; - while ((node = __percpu_list_pop(&list, i))) { + while ((node = __percpu_list_pop(list, i))) { sum += node->data; free(node); } @@ -694,9 +835,15 @@ static void test_percpu_list(void) * test is running). */ assert(sum == expected_sum); + rseq_percpu_free(list); + ret = rseq_mempool_destroy(mempool); + if (ret) { + perror("rseq_mempool_destroy"); + abort(); + } } -static bool this_cpu_buffer_push(struct percpu_buffer *buffer, +static bool this_cpu_buffer_push(struct percpu_buffer __rseq_percpu *buffer, struct percpu_buffer_node *node, int *_cpu) { @@ -704,27 +851,24 @@ static bool this_cpu_buffer_push(struct percpu_buffer *buffer, int cpu; for (;;) { + struct percpu_buffer *cpubuffer; intptr_t *targetptr_spec, newval_spec; intptr_t *targetptr_final, newval_final; intptr_t offset; int ret; - cpu = rseq_cpu_start(); - offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); - if (offset == buffer->c[cpu].buflen) + cpu = get_current_cpu_id(); + cpubuffer = rseq_percpu_ptr(buffer, cpu); + offset = RSEQ_READ_ONCE(cpubuffer->offset); + if (offset == cpubuffer->buflen) break; newval_spec = (intptr_t)node; - targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset]; + targetptr_spec = (intptr_t *)&cpubuffer->array[offset]; newval_final = offset + 1; - targetptr_final = &buffer->c[cpu].offset; - if (opt_mb) - ret = rseq_cmpeqv_trystorev_storev_release( - targetptr_final, offset, targetptr_spec, - newval_spec, newval_final, cpu); - else - ret = rseq_cmpeqv_trystorev_storev(targetptr_final, - offset, targetptr_spec, newval_spec, - newval_final, cpu); + targetptr_final = &cpubuffer->offset; + ret = rseq_load_cbne_store_store__ptr(opt_mo, RSEQ_PERCPU, + targetptr_final, offset, targetptr_spec, + newval_spec, newval_final, cpu); if (rseq_likely(!ret)) { result = true; break; @@ -736,29 +880,32 @@ static bool this_cpu_buffer_push(struct percpu_buffer *buffer, return result; } -static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, +static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer, int *_cpu) { struct percpu_buffer_node *head; int cpu; for (;;) { + struct percpu_buffer *cpubuffer; intptr_t *targetptr, newval; intptr_t offset; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); + cpubuffer = rseq_percpu_ptr(buffer, cpu); /* Load offset with single-copy atomicity. */ - offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); + offset = RSEQ_READ_ONCE(cpubuffer->offset); if (offset == 0) { head = NULL; break; } - head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]); + head = RSEQ_READ_ONCE(cpubuffer->array[offset - 1]); newval = offset - 1; - targetptr = (intptr_t *)&buffer->c[cpu].offset; - ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset, - (intptr_t *)&buffer->c[cpu].array[offset - 1], + targetptr = (intptr_t *)&cpubuffer->offset; + ret = rseq_load_cbne_load_cbne_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr, offset, + (intptr_t *)&cpubuffer->array[offset - 1], (intptr_t)head, newval, cpu); if (rseq_likely(!ret)) break; @@ -773,24 +920,26 @@ static struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buff * __percpu_buffer_pop is not safe against concurrent accesses. Should * only be used on buffers that are not concurrently modified. */ -static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, +static struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer __rseq_percpu *buffer, int cpu) { + struct percpu_buffer *cpubuffer; struct percpu_buffer_node *head; intptr_t offset; - offset = buffer->c[cpu].offset; + cpubuffer = rseq_percpu_ptr(buffer, cpu); + offset = cpubuffer->offset; if (offset == 0) return NULL; - head = buffer->c[cpu].array[offset - 1]; - buffer->c[cpu].offset = offset - 1; + head = cpubuffer->array[offset - 1]; + cpubuffer->offset = offset - 1; return head; } static void *test_percpu_buffer_thread(void *arg) { long long i, reps; - struct percpu_buffer *buffer = (struct percpu_buffer *)arg; + struct percpu_buffer __rseq_percpu *buffer = (struct percpu_buffer __rseq_percpu *)arg; if (!opt_disable_rseq && rseq_register_current_thread()) abort(); @@ -824,24 +973,38 @@ static void test_percpu_buffer(void) const int num_threads = opt_threads; int i, j, ret; uint64_t sum = 0, expected_sum = 0; - struct percpu_buffer buffer; + struct percpu_buffer __rseq_percpu *buffer; pthread_t test_threads[num_threads]; cpu_set_t allowed_cpus; + struct rseq_mempool *mempool; - memset(&buffer, 0, sizeof(buffer)); + mempool = rseq_mempool_create("percpu_buffer", sizeof(struct percpu_buffer), + 0, CPU_SETSIZE, NULL); + if (!mempool) { + perror("rseq_mempool_create"); + abort(); + } + buffer = (struct percpu_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool); + if (!buffer) { + perror("rseq_percpu_zmalloc"); + abort(); + } /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + struct percpu_buffer *cpubuffer; + + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; + cpubuffer = rseq_percpu_ptr(buffer, i); /* Worse-case is every item in same CPU. */ - buffer.c[i].array = + cpubuffer->array = (struct percpu_buffer_node **) - malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * + malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE * BUFFER_ITEM_PER_CPU); - assert(buffer.c[i].array); - buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; + assert(cpubuffer->array); + cpubuffer->buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { struct percpu_buffer_node *node; @@ -857,14 +1020,14 @@ static void test_percpu_buffer(void) node = (struct percpu_buffer_node *) malloc(sizeof(*node)); assert(node); node->data = j; - buffer.c[i].array[j - 1] = node; - buffer.c[i].offset++; + cpubuffer->array[j - 1] = node; + cpubuffer->offset++; } } for (i = 0; i < num_threads; i++) { ret = pthread_create(&test_threads[i], NULL, - test_percpu_buffer_thread, &buffer); + test_percpu_buffer_thread, buffer); if (ret) { errno = ret; perror("pthread_create"); @@ -882,16 +1045,18 @@ static void test_percpu_buffer(void) } for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_buffer *cpubuffer; struct percpu_buffer_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; - while ((node = __percpu_buffer_pop(&buffer, i))) { + cpubuffer = rseq_percpu_ptr(buffer, i); + while ((node = __percpu_buffer_pop(buffer, i))) { sum += node->data; free(node); } - free(buffer.c[i].array); + free(cpubuffer->array); } /* @@ -900,9 +1065,15 @@ static void test_percpu_buffer(void) * test is running). */ assert(sum == expected_sum); + rseq_percpu_free(buffer); + ret = rseq_mempool_destroy(mempool); + if (ret) { + perror("rseq_mempool_destroy"); + abort(); + } } -static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, +static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer __rseq_percpu *buffer, struct percpu_memcpy_buffer_node item, int *_cpu) { @@ -910,31 +1081,29 @@ static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, int cpu; for (;;) { + struct percpu_memcpy_buffer *cpubuffer; intptr_t *targetptr_final, newval_final, offset; char *destptr, *srcptr; size_t copylen; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); + cpubuffer = rseq_percpu_ptr(buffer, cpu); /* Load offset with single-copy atomicity. */ - offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); - if (offset == buffer->c[cpu].buflen) + offset = RSEQ_READ_ONCE(cpubuffer->offset); + if (offset == cpubuffer->buflen) break; - destptr = (char *)&buffer->c[cpu].array[offset]; + destptr = (char *)&cpubuffer->array[offset]; srcptr = (char *)&item; /* copylen must be <= 4kB. */ copylen = sizeof(item); newval_final = offset + 1; - targetptr_final = &buffer->c[cpu].offset; - if (opt_mb) - ret = rseq_cmpeqv_trymemcpy_storev_release( - targetptr_final, offset, - destptr, srcptr, copylen, - newval_final, cpu); - else - ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, - offset, destptr, srcptr, copylen, - newval_final, cpu); + targetptr_final = &cpubuffer->offset; + ret = rseq_load_cbne_memcpy_store__ptr( + opt_mo, RSEQ_PERCPU, + targetptr_final, offset, + destptr, srcptr, copylen, + newval_final, cpu); if (rseq_likely(!ret)) { result = true; break; @@ -946,7 +1115,7 @@ static bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, return result; } -static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, +static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer, struct percpu_memcpy_buffer_node *item, int *_cpu) { @@ -954,24 +1123,26 @@ static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, int cpu; for (;;) { + struct percpu_memcpy_buffer *cpubuffer; intptr_t *targetptr_final, newval_final, offset; char *destptr, *srcptr; size_t copylen; int ret; - cpu = rseq_cpu_start(); + cpu = get_current_cpu_id(); + cpubuffer = rseq_percpu_ptr(buffer, cpu); /* Load offset with single-copy atomicity. */ - offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); + offset = RSEQ_READ_ONCE(cpubuffer->offset); if (offset == 0) break; destptr = (char *)item; - srcptr = (char *)&buffer->c[cpu].array[offset - 1]; + srcptr = (char *)&cpubuffer->array[offset - 1]; /* copylen must be <= 4kB. */ copylen = sizeof(*item); newval_final = offset - 1; - targetptr_final = &buffer->c[cpu].offset; - ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final, - offset, destptr, srcptr, copylen, + targetptr_final = &cpubuffer->offset; + ret = rseq_load_cbne_memcpy_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU, + targetptr_final, offset, destptr, srcptr, copylen, newval_final, cpu); if (rseq_likely(!ret)) { result = true; @@ -988,24 +1159,26 @@ static bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should * only be used on buffers that are not concurrently modified. */ -static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, +static bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer __rseq_percpu *buffer, struct percpu_memcpy_buffer_node *item, int cpu) { + struct percpu_memcpy_buffer *cpubuffer; intptr_t offset; - offset = buffer->c[cpu].offset; + cpubuffer = rseq_percpu_ptr(buffer, cpu); + offset = cpubuffer->offset; if (offset == 0) return false; - memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item)); - buffer->c[cpu].offset = offset - 1; + memcpy(item, &cpubuffer->array[offset - 1], sizeof(*item)); + cpubuffer->offset = offset - 1; return true; } static void *test_percpu_memcpy_buffer_thread(void *arg) { long long i, reps; - struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg; + struct percpu_memcpy_buffer __rseq_percpu *buffer = (struct percpu_memcpy_buffer __rseq_percpu *)arg; if (!opt_disable_rseq && rseq_register_current_thread()) abort(); @@ -1040,24 +1213,39 @@ static void test_percpu_memcpy_buffer(void) const int num_threads = opt_threads; int i, j, ret; uint64_t sum = 0, expected_sum = 0; - struct percpu_memcpy_buffer buffer; + struct percpu_memcpy_buffer *buffer; pthread_t test_threads[num_threads]; cpu_set_t allowed_cpus; + struct rseq_mempool *mempool; - memset(&buffer, 0, sizeof(buffer)); + mempool = rseq_mempool_create("percpu_memcpy_buffer", + sizeof(struct percpu_memcpy_buffer), + 0, CPU_SETSIZE, NULL); + if (!mempool) { + perror("rseq_mempool_create"); + abort(); + } + buffer = (struct percpu_memcpy_buffer __rseq_percpu *)rseq_percpu_zmalloc(mempool); + if (!buffer) { + perror("rseq_percpu_zmalloc"); + abort(); + } /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + struct percpu_memcpy_buffer *cpubuffer; + + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; + cpubuffer = rseq_percpu_ptr(buffer, i); /* Worse-case is every item in same CPU. */ - buffer.c[i].array = + cpubuffer->array = (struct percpu_memcpy_buffer_node *) - malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE * + malloc(sizeof(*cpubuffer->array) * CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU); - assert(buffer.c[i].array); - buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; + assert(cpubuffer->array); + cpubuffer->buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) { expected_sum += 2 * j + 1; @@ -1068,16 +1256,16 @@ static void test_percpu_memcpy_buffer(void) * within a single word, so allocate an object * for each node. */ - buffer.c[i].array[j - 1].data1 = j; - buffer.c[i].array[j - 1].data2 = j + 1; - buffer.c[i].offset++; + cpubuffer->array[j - 1].data1 = j; + cpubuffer->array[j - 1].data2 = j + 1; + cpubuffer->offset++; } } for (i = 0; i < num_threads; i++) { ret = pthread_create(&test_threads[i], NULL, test_percpu_memcpy_buffer_thread, - &buffer); + buffer); if (ret) { errno = ret; perror("pthread_create"); @@ -1096,15 +1284,17 @@ static void test_percpu_memcpy_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_memcpy_buffer_node item; + struct percpu_memcpy_buffer *cpubuffer; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; - while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { + cpubuffer = rseq_percpu_ptr(buffer, i); + while (__percpu_memcpy_buffer_pop(buffer, &item, i)) { sum += item.data1; sum += item.data2; } - free(buffer.c[i].array); + free(cpubuffer->array); } /* @@ -1113,9 +1303,14 @@ static void test_percpu_memcpy_buffer(void) * test is running). */ assert(sum == expected_sum); + rseq_percpu_free(buffer); + ret = rseq_mempool_destroy(mempool); + if (ret) { + perror("rseq_mempool_destroy"); + abort(); + } } - static void test_signal_interrupt_handler(__attribute__ ((unused)) int signo) { signals_delivered++; @@ -1147,12 +1342,6 @@ static int set_signal_handler(void) return ret; } -static -int sys_membarrier(int cmd, int flags, int cpu_id) -{ - return syscall(__NR_membarrier, cmd, flags, cpu_id); -} - static bool membarrier_private_expedited_rseq_available(void) { @@ -1168,10 +1357,11 @@ bool membarrier_private_expedited_rseq_available(void) } /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ -#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +#ifdef TEST_MEMBARRIER struct test_membarrier_thread_args { + struct rseq_mempool *mempool; + struct percpu_list __rseq_percpu *percpu_list_ptr; int stop; - intptr_t percpu_list_ptr; }; /* Worker threads modify data in their "active" percpu lists. */ @@ -1180,8 +1370,8 @@ void *test_membarrier_worker_thread(void *arg) { struct test_membarrier_thread_args *args = (struct test_membarrier_thread_args *)arg; - const int iters = opt_reps; - int i; + const long long iters = opt_reps; + long long i; if (rseq_register_current_thread()) { fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", @@ -1196,10 +1386,13 @@ void *test_membarrier_worker_thread(void *arg) int ret; do { - int cpu = rseq_cpu_start(); + int cpu = get_current_cpu_id(); + struct percpu_list __rseq_percpu *list = RSEQ_READ_ONCE(args->percpu_list_ptr); + struct percpu_list *cpulist = rseq_percpu_ptr(list, cpu); - ret = rseq_offset_deref_addv(&args->percpu_list_ptr, - sizeof(struct percpu_list_entry) * cpu, 1, cpu); + ret = rseq_load_cbne_load_add_load_add_store__ptr(RSEQ_MO_RELAXED, RSEQ_PERCPU, + (intptr_t *) &args->percpu_list_ptr, + (intptr_t) list, (intptr_t *) &cpulist->head, 0, 1, cpu); } while (rseq_unlikely(ret)); } @@ -1212,29 +1405,48 @@ void *test_membarrier_worker_thread(void *arg) } static -void test_membarrier_init_percpu_list(struct percpu_list *list) +struct percpu_list __rseq_percpu *test_membarrier_alloc_percpu_list(struct rseq_mempool *mempool) { + struct percpu_list __rseq_percpu *list; int i; - memset(list, 0, sizeof(*list)); + list = (struct percpu_list __rseq_percpu *)rseq_percpu_zmalloc(mempool); + if (!list) { + perror("rseq_percpu_zmalloc"); + return NULL; + } for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_list *cpulist = rseq_percpu_ptr(list, i); struct percpu_list_node *node; node = (struct percpu_list_node *) malloc(sizeof(*node)); assert(node); node->data = 0; node->next = NULL; - list->c[i].head = node; + cpulist->head = node; } + return list; } static -void test_membarrier_free_percpu_list(struct percpu_list *list) +void test_membarrier_free_percpu_list(struct percpu_list __rseq_percpu *list) { int i; for (i = 0; i < CPU_SETSIZE; i++) - free(list->c[i].head); + free(rseq_percpu_ptr(list, i)->head); + rseq_percpu_free(list); +} + +static +long long test_membarrier_count_percpu_list(struct percpu_list __rseq_percpu *list) +{ + long long total_count = 0; + int i; + + for (i = 0; i < CPU_SETSIZE; i++) + total_count += rseq_percpu_ptr(list, i)->head->data; + return total_count; } /* @@ -1246,9 +1458,20 @@ void *test_membarrier_manager_thread(void *arg) { struct test_membarrier_thread_args *args = (struct test_membarrier_thread_args *)arg; - struct percpu_list list_a, list_b; + struct percpu_list __rseq_percpu *list_a, __rseq_percpu *list_b; intptr_t expect_a = 0, expect_b = 0; int cpu_a = 0, cpu_b = 0; + struct rseq_mempool *mempool; + int ret; + long long total_count = 0; + + mempool = rseq_mempool_create("percpu_list", sizeof(struct percpu_list), + 0, CPU_SETSIZE, NULL); + if (!mempool) { + perror("rseq_mempool_create"); + abort(); + } + args->mempool = mempool; if (rseq_register_current_thread()) { fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", @@ -1257,13 +1480,15 @@ void *test_membarrier_manager_thread(void *arg) } /* Init lists. */ - test_membarrier_init_percpu_list(&list_a); - test_membarrier_init_percpu_list(&list_b); + list_a = test_membarrier_alloc_percpu_list(mempool); + assert(list_a); + list_b = test_membarrier_alloc_percpu_list(mempool); + assert(list_b); /* Initialize lists before publishing them. */ rseq_smp_wmb(); - RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a); + RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a); while (!RSEQ_READ_ONCE(args->stop)) { /* list_a is "active". */ @@ -1272,15 +1497,14 @@ void *test_membarrier_manager_thread(void *arg) * As list_b is "inactive", we should never see changes * to list_b. */ - if (expect_b != RSEQ_READ_ONCE(list_b.c[cpu_b].head->data)) { + if (expect_b != RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data)) { fprintf(stderr, "Membarrier test failed\n"); abort(); } /* Make list_b "active". */ - RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_b); - if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, - MEMBARRIER_CMD_FLAG_CPU, cpu_a) && + RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_b); + if (rseq_membarrier_expedited(cpu_a) && errno != ENXIO /* missing CPU */) { perror("sys_membarrier"); abort(); @@ -1289,38 +1513,52 @@ void *test_membarrier_manager_thread(void *arg) * Cpu A should now only modify list_b, so the values * in list_a should be stable. */ - expect_a = RSEQ_READ_ONCE(list_a.c[cpu_a].head->data); + expect_a = RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data); cpu_b = rand() % CPU_SETSIZE; /* * As list_a is "inactive", we should never see changes * to list_a. */ - if (expect_a != RSEQ_READ_ONCE(list_a.c[cpu_a].head->data)) { + if (expect_a != RSEQ_READ_ONCE(rseq_percpu_ptr(list_a, cpu_a)->head->data)) { fprintf(stderr, "Membarrier test failed\n"); abort(); } /* Make list_a "active". */ - RSEQ_WRITE_ONCE(args->percpu_list_ptr, (intptr_t)&list_a); - if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, - MEMBARRIER_CMD_FLAG_CPU, cpu_b) && + RSEQ_WRITE_ONCE(args->percpu_list_ptr, list_a); + if (rseq_membarrier_expedited(cpu_b) && errno != ENXIO /* missing CPU */) { perror("sys_membarrier"); abort(); } /* Remember a value from list_b. */ - expect_b = RSEQ_READ_ONCE(list_b.c[cpu_b].head->data); + expect_b = RSEQ_READ_ONCE(rseq_percpu_ptr(list_b, cpu_b)->head->data); } - test_membarrier_free_percpu_list(&list_a); - test_membarrier_free_percpu_list(&list_b); + total_count += test_membarrier_count_percpu_list(list_a); + total_count += test_membarrier_count_percpu_list(list_b); + + /* Validate that we observe the right number of increments. */ + if (total_count != opt_threads * opt_reps) { + fprintf(stderr, "Error: Observed %lld increments, expected %lld\n", + total_count, opt_threads * opt_reps); + abort(); + } + test_membarrier_free_percpu_list(list_a); + test_membarrier_free_percpu_list(list_b); if (rseq_unregister_current_thread()) { fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", errno, strerror(errno)); abort(); } + ret = rseq_mempool_destroy(mempool); + if (ret) { + perror("rseq_mempool_destroy"); + abort(); + } + return NULL; } @@ -1343,8 +1581,8 @@ void test_membarrier(void) abort(); } + thread_args.percpu_list_ptr = NULL; thread_args.stop = 0; - thread_args.percpu_list_ptr = 0; ret = pthread_create(&manager_thread, NULL, test_membarrier_manager_thread, &thread_args); if (ret) { @@ -1381,7 +1619,7 @@ void test_membarrier(void) abort(); } } -#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ +#else /* TEST_MEMBARRIER */ static void test_membarrier(void) { @@ -1390,7 +1628,7 @@ void test_membarrier(void) "Skipping membarrier test.\n"); return; } - fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " + fprintf(stderr, "rseq_load_cbne_load_add_load_add_store__ptr is not implemented on this architecture. " "Skipping membarrier test.\n"); } #endif @@ -1545,10 +1783,10 @@ int main(int argc, char **argv) verbose = 1; break; case 'M': - opt_mb = 1; + opt_mo = RSEQ_MO_RELEASE; break; case 'c': - if (rseq_available()) { + if (rseq_available(RSEQ_AVAILABLE_QUERY_KERNEL)) { printf_verbose("The rseq syscall is available.\n"); goto end; } else { @@ -1573,6 +1811,10 @@ int main(int argc, char **argv) if (!opt_disable_rseq && rseq_register_current_thread()) goto error; + if (!opt_disable_rseq && !rseq_validate_cpu_id()) { + printf_verbose("The rseq cpu id getter is unavailable\n"); + goto no_rseq; + } switch (opt_test) { case 's': printf_verbose("spinlock\n");