From dd6b880e89a240df6b09ca207a4c5a464bb1d8f6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sun, 7 Oct 2018 13:19:15 -0400 Subject: [PATCH] Add percpu ops test Signed-off-by: Mathieu Desnoyers --- .gitignore | 2 + Makefile | 11 +- tests/Makefile | 27 ++++ tests/basic_percpu_ops_test.c | 297 ++++++++++++++++++++++++++++++++++ 4 files changed, 335 insertions(+), 2 deletions(-) create mode 100644 tests/Makefile create mode 100644 tests/basic_percpu_ops_test.c diff --git a/.gitignore b/.gitignore index c6127b3..4a06a6d 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,5 @@ modules.order Module.symvers Mkfile.old dkms.conf + +tests/basic_percpu_ops_test diff --git a/Makefile b/Makefile index 38898a9..3218bf8 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ # granted, provided the above notices are retained, and a notice that # the code was modified is included with the above copyright notice. +SUBDIRS = tests/ CPPFLAGS += -I./include CFLAGS += -O2 -g @@ -16,7 +17,12 @@ LDFLAGS += -pthread PREFIX = /usr/local -all: librseq.so +all: librseq.so subdirs + +subdirs: + for dir in $(SUBDIRS); do \ + $(MAKE) -C $$dir; \ + done INCLUDES=$(wildcard include/rseq/*.h) @@ -24,9 +30,10 @@ librseq.so: src/rseq.c src/cpu-op.c src/percpu-op.c ${INCLUDES} $(CC) $(CFLAGS) $(LDFLAGS) $(CPPFLAGS) -shared -fpic \ src/rseq.c src/cpu-op.c src/percpu-op.c -o $@ -.PHONY: clean install uninstall +.PHONY: clean install uninstall subdirs clean: + $(MAKE) -C $(SUBDIRS) clean rm -f librseq.so install: librseq.so diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..23496bb --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,27 @@ +# Copyright (C) 2016 Mathieu Desnoyers +# +# THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED +# OR IMPLIED. ANY USE IS AT YOUR OWN RISK. +# +# Permission is hereby granted to use or copy this program for any +# purpose, provided the above notices are retained on all copies. +# Permission to modify the code and to distribute modified code is +# granted, provided the above notices are retained, and a notice that +# the code was modified is included with the above copyright notice. + +CPPFLAGS += -I../include +CFLAGS += -O2 -g +LDFLAGS += -pthread + +all: basic_percpu_ops_test + +INCLUDES=$(wildcard ../include/rseq/*.h) + +basic_percpu_ops_test: basic_percpu_ops_test.c ${INCLUDES} + $(CC) $(CFLAGS) $(LDFLAGS) $(CPPFLAGS) -shared -fpic \ + $< -o $@ + +.PHONY: clean + +clean: + rm -f basic_percpu_ops_test diff --git a/tests/basic_percpu_ops_test.c b/tests/basic_percpu_ops_test.c new file mode 100644 index 0000000..ed29d11 --- /dev/null +++ b/tests/basic_percpu_ops_test.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: LGPL-2.1 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +struct percpu_lock_entry { + intptr_t v; +} __attribute__((aligned(128))); + +struct percpu_lock { + struct percpu_lock_entry c[CPU_SETSIZE]; +}; + +struct test_data_entry { + intptr_t count; +} __attribute__((aligned(128))); + +struct spinlock_test_data { + struct percpu_lock lock; + struct test_data_entry c[CPU_SETSIZE]; + int reps; +}; + +struct percpu_list_node { + intptr_t data; + struct percpu_list_node *next; +}; + +struct percpu_list_entry { + struct percpu_list_node *head; +} __attribute__((aligned(128))); + +struct percpu_list { + struct percpu_list_entry c[CPU_SETSIZE]; +}; + +/* A simple percpu spinlock. Returns the cpu lock was acquired on. */ +int rseq_percpu_lock(struct percpu_lock *lock) +{ + int cpu; + + for (;;) { + int ret; + + cpu = rseq_cpu_start(); + ret = percpu_cmpeqv_storev(&lock->c[cpu].v, + 0, 1, cpu); + if (rseq_likely(!ret)) + break; + if (rseq_unlikely(ret < 0)) { + perror("cpu_opv"); + abort(); + } + /* Retry if comparison fails. */ + } + /* + * Acquire semantic when taking lock after control dependency. + * Matches rseq_smp_store_release(). + */ + rseq_smp_acquire__after_ctrl_dep(); + return cpu; +} + +void rseq_percpu_unlock(struct percpu_lock *lock, int cpu) +{ + assert(lock->c[cpu].v == 1); + /* + * Release lock, with release semantic. Matches + * rseq_smp_acquire__after_ctrl_dep(). + */ + rseq_smp_store_release(&lock->c[cpu].v, 0); +} + +void *test_percpu_spinlock_thread(void *arg) +{ + struct spinlock_test_data *data = arg; + int i, cpu; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + for (i = 0; i < data->reps; i++) { + cpu = rseq_percpu_lock(&data->lock); + data->c[cpu].count++; + rseq_percpu_unlock(&data->lock, cpu); + } + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + return NULL; +} + +/* + * A simple test which implements a sharded counter using a per-cpu + * lock. Obviously real applications might prefer to simply use a + * per-cpu increment; however, this is reasonable for a test and the + * lock can be extended to synchronize more complicated operations. + */ +void test_percpu_spinlock(void) +{ + const int num_threads = 200; + int i; + uint64_t sum; + pthread_t test_threads[num_threads]; + struct spinlock_test_data data; + + memset(&data, 0, sizeof(data)); + data.reps = 5000; + + for (i = 0; i < num_threads; i++) + pthread_create(&test_threads[i], NULL, + test_percpu_spinlock_thread, &data); + + for (i = 0; i < num_threads; i++) + pthread_join(test_threads[i], NULL); + + sum = 0; + for (i = 0; i < CPU_SETSIZE; i++) + sum += data.c[i].count; + + assert(sum == (uint64_t)data.reps * num_threads); +} + +int percpu_list_push(struct percpu_list *list, struct percpu_list_node *node, + int cpu) +{ + for (;;) { + intptr_t *targetptr, newval, expect; + int ret; + + /* Load list->c[cpu].head with single-copy atomicity. */ + expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head); + newval = (intptr_t)node; + targetptr = (intptr_t *)&list->c[cpu].head; + node->next = (struct percpu_list_node *)expect; + ret = percpu_cmpeqv_storev(targetptr, expect, newval, cpu); + if (rseq_likely(!ret)) + break; + if (rseq_unlikely(ret < 0)) { + perror("cpu_opv"); + abort(); + } + /* Retry if comparison fails. */ + } + return cpu; +} + +/* + * Unlike a traditional lock-less linked list; the availability of a + * rseq primitive allows us to implement pop without concerns over + * ABA-type races. + */ +struct percpu_list_node *percpu_list_pop(struct percpu_list *list, + int cpu) +{ + struct percpu_list_node *head; + intptr_t *targetptr, expectnot, *load; + off_t offset; + int ret; + + targetptr = (intptr_t *)&list->c[cpu].head; + expectnot = (intptr_t)NULL; + offset = offsetof(struct percpu_list_node, next); + load = (intptr_t *)&head; + ret = percpu_cmpnev_storeoffp_load(targetptr, expectnot, + offset, load, cpu); + if (rseq_unlikely(ret < 0)) { + perror("cpu_opv"); + abort(); + } + if (ret > 0) + return NULL; + return head; +} + +void *test_percpu_list_thread(void *arg) +{ + int i; + struct percpu_list *list = (struct percpu_list *)arg; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + for (i = 0; i < 100000; i++) { + struct percpu_list_node *node; + + node = percpu_list_pop(list, rseq_cpu_start()); + sched_yield(); /* encourage shuffling */ + if (node) + percpu_list_push(list, node, rseq_cpu_start()); + } + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + return NULL; +} + +/* Simultaneous modification to a per-cpu linked list from many threads. */ +void test_percpu_list(void) +{ + int i, j; + uint64_t sum = 0, expected_sum = 0; + struct percpu_list list; + pthread_t test_threads[200]; + cpu_set_t allowed_cpus; + + memset(&list, 0, sizeof(list)); + + /* Generate list entries for every usable cpu. */ + sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); + for (i = 0; i < CPU_SETSIZE; i++) { + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + for (j = 1; j <= 100; j++) { + struct percpu_list_node *node; + + expected_sum += j; + + node = malloc(sizeof(*node)); + assert(node); + node->data = j; + node->next = list.c[i].head; + list.c[i].head = node; + } + } + + for (i = 0; i < 200; i++) + pthread_create(&test_threads[i], NULL, + test_percpu_list_thread, &list); + + for (i = 0; i < 200; i++) + pthread_join(test_threads[i], NULL); + + for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_list_node *node; + + if (!CPU_ISSET(i, &allowed_cpus)) + continue; + + while ((node = percpu_list_pop(&list, i))) { + sum += node->data; + free(node); + } + } + + /* + * All entries should now be accounted for (unless some external + * actor is interfering with our allowed affinity while this + * test is running). + */ + assert(sum == expected_sum); +} + +int main(int argc, char **argv) +{ + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + goto error; + } + printf("spinlock\n"); + test_percpu_spinlock(); + printf("percpu_list\n"); + test_percpu_list(); + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + goto error; + } + return 0; + +error: + return -1; +} + -- 2.34.1