[deliverable/linux.git] / tools / testing / selftests / rseq / basic_percpu_ops_test.c

#define _GNU_SOURCE
#include <assert.h>
#include <pthread.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <rseq.h>

static struct rseq_lock rseq_lock;

struct percpu_lock_entry {
	intptr_t v;
} __attribute__((aligned(128)));

struct percpu_lock {
	struct percpu_lock_entry c[CPU_SETSIZE];
};

struct test_data_entry {
	intptr_t count;
} __attribute__((aligned(128)));

struct spinlock_test_data {
	struct percpu_lock lock;
	struct test_data_entry c[CPU_SETSIZE];
	int reps;
};

struct percpu_list_node {
	intptr_t data;
	struct percpu_list_node *next;
};

struct percpu_list_entry {
	struct percpu_list_node *head;
} __attribute__((aligned(128)));

struct percpu_list {
	struct percpu_list_entry c[CPU_SETSIZE];
};

/* A simple percpu spinlock.  Returns the cpu lock was acquired on. */
int rseq_percpu_lock(struct percpu_lock *lock)
{
	struct rseq_state rseq_state;
	intptr_t *targetptr, newval;
	int cpu;
	bool result;

	for (;;) {
		do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
			{
				if (unlikely(lock->c[cpu].v)) {
					result = false;
				} else {
					newval = 1;
					targetptr = (intptr_t *)&lock->c[cpu].v;
				}
			});
		if (likely(result))
			break;
	}
	/*
	 * Acquire semantic when taking lock after control dependency.
	 * Matches smp_store_release().
	 */
	smp_acquire__after_ctrl_dep();
	return cpu;
}

void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
{
	assert(lock->c[cpu].v == 1);
	/*
	 * Release lock, with release semantic. Matches
	 * smp_acquire__after_ctrl_dep().
	 */
	smp_store_release(&lock->c[cpu].v, 0);
}

void *test_percpu_spinlock_thread(void *arg)
{
	struct spinlock_test_data *data = arg;
	int i, cpu;

	if (rseq_register_current_thread())
		abort();
	for (i = 0; i < data->reps; i++) {
		cpu = rseq_percpu_lock(&data->lock);
		data->c[cpu].count++;
		rseq_percpu_unlock(&data->lock, cpu);
	}
	if (rseq_unregister_current_thread())
		abort();

	return NULL;
}

/*
 * A simple test which implements a sharded counter using a per-cpu
 * lock.  Obviously real applications might prefer to simply use a
 * per-cpu increment; however, this is reasonable for a test and the
 * lock can be extended to synchronize more complicated operations.
 */
void test_percpu_spinlock(void)
{
	const int num_threads = 200;
	int i;
	uint64_t sum;
	pthread_t test_threads[num_threads];
	struct spinlock_test_data data;

	memset(&data, 0, sizeof(data));
	data.reps = 5000;

	for (i = 0; i < num_threads; i++)
		pthread_create(&test_threads[i], NULL,
			test_percpu_spinlock_thread, &data);

	for (i = 0; i < num_threads; i++)
		pthread_join(test_threads[i], NULL);

	sum = 0;
	for (i = 0; i < CPU_SETSIZE; i++)
		sum += data.c[i].count;

	assert(sum == (uint64_t)data.reps * num_threads);
}

int percpu_list_push(struct percpu_list *list, struct percpu_list_node *node)
{
	struct rseq_state rseq_state;
	intptr_t *targetptr, newval;
	int cpu;
	bool result;

	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
		{
			newval = (intptr_t)node;
			targetptr = (intptr_t *)&list->c[cpu].head;
			node->next = list->c[cpu].head;
		});

	return cpu;
}

/*
 * Unlike a traditional lock-less linked list; the availability of a
 * rseq primitive allows us to implement pop without concerns over
 * ABA-type races.
 */
struct percpu_list_node *percpu_list_pop(struct percpu_list *list)
{
	struct percpu_list_node *head, *next;
	struct rseq_state rseq_state;
	intptr_t *targetptr, newval;
	int cpu;
	bool result;

	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
		{
			head = list->c[cpu].head;
			if (!head) {
				result = false;
			} else {
				next = head->next;
				newval = (intptr_t) next;
				targetptr = (intptr_t *)&list->c[cpu].head;
			}
		});

	return head;
}

void *test_percpu_list_thread(void *arg)
{
	int i;
	struct percpu_list *list = (struct percpu_list *)arg;

	if (rseq_register_current_thread())
		abort();

	for (i = 0; i < 100000; i++) {
		struct percpu_list_node *node = percpu_list_pop(list);

		sched_yield();  /* encourage shuffling */
		if (node)
			percpu_list_push(list, node);
	}

	if (rseq_unregister_current_thread())
		abort();

	return NULL;
}

/* Simultaneous modification to a per-cpu linked list from many threads.  */
void test_percpu_list(void)
{
	int i, j;
	uint64_t sum = 0, expected_sum = 0;
	struct percpu_list list;
	pthread_t test_threads[200];
	cpu_set_t allowed_cpus;

	memset(&list, 0, sizeof(list));

	/* Generate list entries for every usable cpu. */
	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
	for (i = 0; i < CPU_SETSIZE; i++) {
		if (!CPU_ISSET(i, &allowed_cpus))
			continue;
		for (j = 1; j <= 100; j++) {
			struct percpu_list_node *node;

			expected_sum += j;

			node = malloc(sizeof(*node));
			assert(node);
			node->data = j;
			node->next = list.c[i].head;
			list.c[i].head = node;
		}
	}

	for (i = 0; i < 200; i++)
		assert(pthread_create(&test_threads[i], NULL,
			test_percpu_list_thread, &list) == 0);

	for (i = 0; i < 200; i++)
		pthread_join(test_threads[i], NULL);

	for (i = 0; i < CPU_SETSIZE; i++) {
		cpu_set_t pin_mask;
		struct percpu_list_node *node;

		if (!CPU_ISSET(i, &allowed_cpus))
			continue;

		CPU_ZERO(&pin_mask);
		CPU_SET(i, &pin_mask);
		sched_setaffinity(0, sizeof(pin_mask), &pin_mask);

		while ((node = percpu_list_pop(&list))) {
			sum += node->data;
			free(node);
		}
	}

	/*
	 * All entries should now be accounted for (unless some external
	 * actor is interfering with our allowed affinity while this
	 * test is running).
	 */
	assert(sum == expected_sum);
}

int main(int argc, char **argv)
{
	if (rseq_init_lock(&rseq_lock)) {
		perror("rseq_init_lock");
		return -1;
	}
	if (rseq_register_current_thread())
		goto error;
	printf("spinlock\n");
	test_percpu_spinlock();
	printf("percpu_list\n");
	test_percpu_list();
	if (rseq_unregister_current_thread())
		goto error;
	if (rseq_destroy_lock(&rseq_lock)) {
		perror("rseq_destroy_lock");
		return -1;
	}
	return 0;

error:
	if (rseq_destroy_lock(&rseq_lock))
		perror("rseq_destroy_lock");
	return -1;
}
Commit	Line	Data
	1	#define _GNU_SOURCE
	2	#include <assert.h>
	3	#include <pthread.h>
	4	#include <sched.h>
	5	#include <stdint.h>
	6	#include <stdio.h>
	7	#include <stdlib.h>
	8	#include <string.h>
	9
	10	#include <rseq.h>
	11
	12	static struct rseq_lock rseq_lock;
	13
	14	struct percpu_lock_entry {
	15	intptr_t v;
	16	} __attribute__((aligned(128)));
	17
	18	struct percpu_lock {
	19	struct percpu_lock_entry c[CPU_SETSIZE];
	20	};
	21
	22	struct test_data_entry {
	23	intptr_t count;
	24	} __attribute__((aligned(128)));
	25
	26	struct spinlock_test_data {
	27	struct percpu_lock lock;
	28	struct test_data_entry c[CPU_SETSIZE];
	29	int reps;
	30	};
	31
	32	struct percpu_list_node {
	33	intptr_t data;
	34	struct percpu_list_node *next;
	35	};
	36
	37	struct percpu_list_entry {
	38	struct percpu_list_node *head;
	39	} __attribute__((aligned(128)));
	40
	41	struct percpu_list {
	42	struct percpu_list_entry c[CPU_SETSIZE];
	43	};
	44
	45	/* A simple percpu spinlock. Returns the cpu lock was acquired on. */
	46	int rseq_percpu_lock(struct percpu_lock *lock)
	47	{
	48	struct rseq_state rseq_state;
	49	intptr_t *targetptr, newval;
	50	int cpu;
	51	bool result;
	52
	53	for (;;) {
	54	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
	55	{
	56	if (unlikely(lock->c[cpu].v)) {
	57	result = false;
	58	} else {
	59	newval = 1;
	60	targetptr = (intptr_t *)&lock->c[cpu].v;
	61	}
	62	});
	63	if (likely(result))
	64	break;
	65	}
	66	/*
	67	* Acquire semantic when taking lock after control dependency.
	68	* Matches smp_store_release().
	69	*/
	70	smp_acquire__after_ctrl_dep();
	71	return cpu;
	72	}
	73
	74	void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
	75	{
	76	assert(lock->c[cpu].v == 1);
	77	/*
	78	* Release lock, with release semantic. Matches
	79	* smp_acquire__after_ctrl_dep().
	80	*/
	81	smp_store_release(&lock->c[cpu].v, 0);
	82	}
	83
	84	void test_percpu_spinlock_thread(void arg)
	85	{
	86	struct spinlock_test_data *data = arg;
	87	int i, cpu;
	88
	89	if (rseq_register_current_thread())
	90	abort();
	91	for (i = 0; i < data->reps; i++) {
	92	cpu = rseq_percpu_lock(&data->lock);
	93	data->c[cpu].count++;
	94	rseq_percpu_unlock(&data->lock, cpu);
	95	}
	96	if (rseq_unregister_current_thread())
	97	abort();
	98
	99	return NULL;
	100	}
	101
	102	/*
	103	* A simple test which implements a sharded counter using a per-cpu
	104	* lock. Obviously real applications might prefer to simply use a
	105	* per-cpu increment; however, this is reasonable for a test and the
	106	* lock can be extended to synchronize more complicated operations.
	107	*/
	108	void test_percpu_spinlock(void)
	109	{
	110	const int num_threads = 200;
	111	int i;
	112	uint64_t sum;
	113	pthread_t test_threads[num_threads];
	114	struct spinlock_test_data data;
	115
	116	memset(&data, 0, sizeof(data));
	117	data.reps = 5000;
	118
	119	for (i = 0; i < num_threads; i++)
	120	pthread_create(&test_threads[i], NULL,
	121	test_percpu_spinlock_thread, &data);
	122
	123	for (i = 0; i < num_threads; i++)
	124	pthread_join(test_threads[i], NULL);
	125
	126	sum = 0;
	127	for (i = 0; i < CPU_SETSIZE; i++)
	128	sum += data.c[i].count;
	129
	130	assert(sum == (uint64_t)data.reps * num_threads);
	131	}
	132
	133	int percpu_list_push(struct percpu_list list, struct percpu_list_node node)
	134	{
	135	struct rseq_state rseq_state;
	136	intptr_t *targetptr, newval;
	137	int cpu;
	138	bool result;
	139
	140	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
	141	{
	142	newval = (intptr_t)node;
	143	targetptr = (intptr_t *)&list->c[cpu].head;
	144	node->next = list->c[cpu].head;
	145	});
	146
	147	return cpu;
	148	}
	149
	150	/*
	151	* Unlike a traditional lock-less linked list; the availability of a
	152	* rseq primitive allows us to implement pop without concerns over
	153	* ABA-type races.
	154	*/
	155	struct percpu_list_node percpu_list_pop(struct percpu_list list)
	156	{
	157	struct percpu_list_node head, next;
	158	struct rseq_state rseq_state;
	159	intptr_t *targetptr, newval;
	160	int cpu;
	161	bool result;
	162
	163	do_rseq(&rseq_lock, rseq_state, cpu, result, targetptr, newval,
	164	{
	165	head = list->c[cpu].head;
	166	if (!head) {
	167	result = false;
	168	} else {
	169	next = head->next;
	170	newval = (intptr_t) next;
	171	targetptr = (intptr_t *)&list->c[cpu].head;
	172	}
	173	});
	174
	175	return head;
	176	}
	177
	178	void test_percpu_list_thread(void arg)
	179	{
	180	int i;
	181	struct percpu_list list = (struct percpu_list )arg;
	182
	183	if (rseq_register_current_thread())
	184	abort();
	185
	186	for (i = 0; i < 100000; i++) {
	187	struct percpu_list_node *node = percpu_list_pop(list);
	188
	189	sched_yield(); /* encourage shuffling */
	190	if (node)
	191	percpu_list_push(list, node);
	192	}
	193
	194	if (rseq_unregister_current_thread())
	195	abort();
	196
	197	return NULL;
	198	}
	199
	200	/* Simultaneous modification to a per-cpu linked list from many threads. */
	201	void test_percpu_list(void)
	202	{
	203	int i, j;
	204	uint64_t sum = 0, expected_sum = 0;
	205	struct percpu_list list;
	206	pthread_t test_threads[200];
	207	cpu_set_t allowed_cpus;
	208
	209	memset(&list, 0, sizeof(list));
	210
	211	/* Generate list entries for every usable cpu. */
	212	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
	213	for (i = 0; i < CPU_SETSIZE; i++) {
	214	if (!CPU_ISSET(i, &allowed_cpus))
	215	continue;
	216	for (j = 1; j <= 100; j++) {
	217	struct percpu_list_node *node;
	218
	219	expected_sum += j;
	220
	221	node = malloc(sizeof(*node));
	222	assert(node);
	223	node->data = j;
	224	node->next = list.c[i].head;
	225	list.c[i].head = node;
	226	}
	227	}
	228
	229	for (i = 0; i < 200; i++)
	230	assert(pthread_create(&test_threads[i], NULL,
	231	test_percpu_list_thread, &list) == 0);
	232
	233	for (i = 0; i < 200; i++)
	234	pthread_join(test_threads[i], NULL);
	235
	236	for (i = 0; i < CPU_SETSIZE; i++) {
	237	cpu_set_t pin_mask;
	238	struct percpu_list_node *node;
	239
	240	if (!CPU_ISSET(i, &allowed_cpus))
	241	continue;
	242
	243	CPU_ZERO(&pin_mask);
	244	CPU_SET(i, &pin_mask);
	245	sched_setaffinity(0, sizeof(pin_mask), &pin_mask);
	246
	247	while ((node = percpu_list_pop(&list))) {
	248	sum += node->data;
	249	free(node);
	250	}
	251	}
	252
	253	/*
	254	* All entries should now be accounted for (unless some external
	255	* actor is interfering with our allowed affinity while this
	256	* test is running).
	257	*/
	258	assert(sum == expected_sum);
	259	}
	260
	261	int main(int argc, char **argv)
	262	{
	263	if (rseq_init_lock(&rseq_lock)) {
	264	perror("rseq_init_lock");
	265	return -1;
	266	}
	267	if (rseq_register_current_thread())
	268	goto error;
	269	printf("spinlock\n");
	270	test_percpu_spinlock();
	271	printf("percpu_list\n");
	272	test_percpu_list();
	273	if (rseq_unregister_current_thread())
	274	goto error;
	275	if (rseq_destroy_lock(&rseq_lock)) {
	276	perror("rseq_destroy_lock");
	277	return -1;
	278	}
	279	return 0;
	280
	281	error:
	282	if (rseq_destroy_lock(&rseq_lock))
	283	perror("rseq_destroy_lock");
	284	return -1;
	285	}
	286