rseq/selftests: Provide basic percpu ops test

"basic_percpu_ops_test" is a slightly more "realistic" variant, implementing a few simple per-cpu operations and testing their correctness. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Joel Fernandes <joelaf@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Dave Watson <davejwatson@fb.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Shuah Khan <shuahkh@osg.samsung.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: linux-kselftest@vger.kernel.org Cc: "H . Peter Anvin" <hpa@zytor.com> Cc: Chris Lameter <cl@linux.com> Cc: Russell King <linux@arm.linux.org.uk> Cc: Andrew Hunter <ahh@google.com> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com> Cc: Paul Turner <pjt@google.com> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Ben Maurer <bmaurer@fb.com> Cc: linux-api@vger.kernel.org Cc: Andy Lutomirski <luto@amacapital.net> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: https://lkml.kernel.org/r/20180602124408.8430-15-mathieu.desnoyers@efficios.com
author: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> 2018-06-02 08:44:06 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2018-06-06 05:58:35 -0400
commit: 01a5ec4217599fd78ba76fa7199a350c5fb4650f (patch)
tree: 1af689bd075fcee6ccf00453361d047817913d23 /tools
parent: 1badac4f80c3d325d6d1b9fc3b6344120aa799be (diff)
1 files changed, 312 insertions, 0 deletions
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
new file mode 100644
index 000000000000..eb3f6db36d36
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include "rseq.h"
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+struct percpu_lock_entry {
+        intptr_t v;
+} __attribute__((aligned(128)));
+struct percpu_lock {
+        struct percpu_lock_entry c[CPU_SETSIZE];
+};
+struct test_data_entry {
+        intptr_t count;
+} __attribute__((aligned(128)));
+struct spinlock_test_data {
+        struct percpu_lock lock;
+        struct test_data_entry c[CPU_SETSIZE];
+        int reps;
+};
+struct percpu_list_node {
+        intptr_t data;
+        struct percpu_list_node *next;
+};
+struct percpu_list_entry {
+        struct percpu_list_node *head;
+} __attribute__((aligned(128)));
+struct percpu_list {
+        struct percpu_list_entry c[CPU_SETSIZE];
+};
+/* A simple percpu spinlock.  Returns the cpu lock was acquired on. */
+int rseq_this_cpu_lock(struct percpu_lock *lock)
+{
+        int cpu;
+        for (;;) {
+                int ret;
+                cpu = rseq_cpu_start();
+                ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+                                         0, 1, cpu);
+                if (rseq_likely(!ret))
+                        break;
+                /* Retry if comparison fails or rseq aborts. */
+        }
+        /*
+         * Acquire semantic when taking lock after control dependency.
+         * Matches rseq_smp_store_release().
+         */
+        rseq_smp_acquire__after_ctrl_dep();
+        return cpu;
+}
+void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+        assert(lock->c[cpu].v == 1);
+        /*
+         * Release lock, with release semantic. Matches
+         * rseq_smp_acquire__after_ctrl_dep().
+         */
+        rseq_smp_store_release(&lock->c[cpu].v, 0);
+}
+void *test_percpu_spinlock_thread(void *arg)
+{
+        struct spinlock_test_data *data = arg;
+        int i, cpu;
+        if (rseq_register_current_thread()) {
+                fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+                        errno, strerror(errno));
+                abort();
+        }
+        for (i = 0; i < data->reps; i++) {
+                cpu = rseq_this_cpu_lock(&data->lock);
+                data->c[cpu].count++;
+                rseq_percpu_unlock(&data->lock, cpu);
+        }
+        if (rseq_unregister_current_thread()) {
+                fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+                        errno, strerror(errno));
+                abort();
+        }
+        return NULL;
+}
+/*
+ * A simple test which implements a sharded counter using a per-cpu
+ * lock.  Obviously real applications might prefer to simply use a
+ * per-cpu increment; however, this is reasonable for a test and the
+ * lock can be extended to synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+        const int num_threads = 200;
+        int i;
+        uint64_t sum;
+        pthread_t test_threads[num_threads];
+        struct spinlock_test_data data;
+        memset(&data, 0, sizeof(data));
+        data.reps = 5000;
+        for (i = 0; i < num_threads; i++)
+                pthread_create(&test_threads[i], NULL,
+                               test_percpu_spinlock_thread, &data);
+        for (i = 0; i < num_threads; i++)
+                pthread_join(test_threads[i], NULL);
+        sum = 0;
+        for (i = 0; i < CPU_SETSIZE; i++)
+                sum += data.c[i].count;
+        assert(sum == (uint64_t)data.reps * num_threads);
+}
+void this_cpu_list_push(struct percpu_list *list,
+                        struct percpu_list_node *node,
+                        int *_cpu)
+{
+        int cpu;
+        for (;;) {
+                intptr_t *targetptr, newval, expect;
+                int ret;
+                cpu = rseq_cpu_start();
+                /* Load list->c[cpu].head with single-copy atomicity. */
+                expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+                newval = (intptr_t)node;
+                targetptr = (intptr_t *)&list->c[cpu].head;
+                node->next = (struct percpu_list_node *)expect;
+                ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+                if (rseq_likely(!ret))
+                        break;
+                /* Retry if comparison fails or rseq aborts. */
+        }
+        if (_cpu)
+                *_cpu = cpu;
+}
+/*
+ * Unlike a traditional lock-less linked list; the availability of a
+ * rseq primitive allows us to implement pop without concerns over
+ * ABA-type races.
+ */
+struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+                                           int *_cpu)
+{
+        for (;;) {
+                struct percpu_list_node *head;
+                intptr_t *targetptr, expectnot, *load;
+                off_t offset;
+                int ret, cpu;
+                cpu = rseq_cpu_start();
+                targetptr = (intptr_t *)&list->c[cpu].head;
+                expectnot = (intptr_t)NULL;
+                offset = offsetof(struct percpu_list_node, next);
+                load = (intptr_t *)&head;
+                ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+                                                 offset, load, cpu);
+                if (rseq_likely(!ret)) {
+                        if (_cpu)
+                                *_cpu = cpu;
+                        return head;
+                }
+                if (ret > 0)
+                        return NULL;
+                /* Retry if rseq aborts. */
+        }
+}
+/*
+ * __percpu_list_pop is not safe against concurrent accesses. Should
+ * only be used on lists that are not concurrently modified.
+ */
+struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+{
+        struct percpu_list_node *node;
+        node = list->c[cpu].head;
+        if (!node)
+                return NULL;
+        list->c[cpu].head = node->next;
+        return node;
+}
+void *test_percpu_list_thread(void *arg)
+{
+        int i;
+        struct percpu_list *list = (struct percpu_list *)arg;
+        if (rseq_register_current_thread()) {
+                fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+                        errno, strerror(errno));
+                abort();
+        }
+        for (i = 0; i < 100000; i++) {
+                struct percpu_list_node *node;
+                node = this_cpu_list_pop(list, NULL);
+                sched_yield();  /* encourage shuffling */
+                if (node)
+                        this_cpu_list_push(list, node, NULL);
+        }
+        if (rseq_unregister_current_thread()) {
+                fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+                        errno, strerror(errno));
+                abort();
+        }
+        return NULL;
+}
+/* Simultaneous modification to a per-cpu linked list from many threads.  */
+void test_percpu_list(void)
+{
+        int i, j;
+        uint64_t sum = 0, expected_sum = 0;
+        struct percpu_list list;
+        pthread_t test_threads[200];
+        cpu_set_t allowed_cpus;
+        memset(&list, 0, sizeof(list));
+        /* Generate list entries for every usable cpu. */
+        sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+        for (i = 0; i < CPU_SETSIZE; i++) {
+                if (!CPU_ISSET(i, &allowed_cpus))
+                        continue;
+                for (j = 1; j <= 100; j++) {
+                        struct percpu_list_node *node;
+                        expected_sum += j;
+                        node = malloc(sizeof(*node));
+                        assert(node);
+                        node->data = j;
+                        node->next = list.c[i].head;
+                        list.c[i].head = node;
+                }
+        }
+        for (i = 0; i < 200; i++)
+                pthread_create(&test_threads[i], NULL,
+                       test_percpu_list_thread, &list);
+        for (i = 0; i < 200; i++)
+                pthread_join(test_threads[i], NULL);
+        for (i = 0; i < CPU_SETSIZE; i++) {
+                struct percpu_list_node *node;
+                if (!CPU_ISSET(i, &allowed_cpus))
+                        continue;
+                while ((node = __percpu_list_pop(&list, i))) {
+                        sum += node->data;
+                        free(node);
+                }
+        }
+        /*
+         * All entries should now be accounted for (unless some external
+         * actor is interfering with our allowed affinity while this
+         * test is running).
+         */
+        assert(sum == expected_sum);
+}
+int main(int argc, char **argv)
+{
+        if (rseq_register_current_thread()) {
+                fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+                        errno, strerror(errno));
+                goto error;
+        }
+        printf("spinlock\n");
+        test_percpu_spinlock();
+        printf("percpu_list\n");
+        test_percpu_list();
+        if (rseq_unregister_current_thread()) {
+                fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+                        errno, strerror(errno));
+                goto error;
+        }
+        return 0;
+error:
+        return -1;
+}
author	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>	2018-06-02 08:44:06 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2018-06-06 05:58:35 -0400
commit	01a5ec4217599fd78ba76fa7199a350c5fb4650f (patch)
tree	1af689bd075fcee6ccf00453361d047817913d23 /tools
parent	1badac4f80c3d325d6d1b9fc3b6344120aa799be (diff)

diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c new file mode 100644 index 000000000000..eb3f6db36d36 --- /dev/null +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -0,0 +1,312 @@
	1	// SPDX-License-Identifier: LGPL-2.1
	2	#define _GNU_SOURCE
	3	#include <assert.h>
	4	#include <pthread.h>
	5	#include <sched.h>
	6	#include <stdint.h>
	7	#include <stdio.h>
	8	#include <stdlib.h>
	9	#include <string.h>
	10	#include <stddef.h>
	11
	12	#include "rseq.h"
	13
	14	#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
	15
	16	struct percpu_lock_entry {
	17	intptr_t v;
	18	} __attribute__((aligned(128)));
	19
	20	struct percpu_lock {
	21	struct percpu_lock_entry c[CPU_SETSIZE];
	22	};
	23
	24	struct test_data_entry {
	25	intptr_t count;
	26	} __attribute__((aligned(128)));
	27
	28	struct spinlock_test_data {
	29	struct percpu_lock lock;
	30	struct test_data_entry c[CPU_SETSIZE];
	31	int reps;
	32	};
	33
	34	struct percpu_list_node {
	35	intptr_t data;
	36	struct percpu_list_node *next;
	37	};
	38
	39	struct percpu_list_entry {
	40	struct percpu_list_node *head;
	41	} __attribute__((aligned(128)));
	42
	43	struct percpu_list {
	44	struct percpu_list_entry c[CPU_SETSIZE];
	45	};
	46
	47	/* A simple percpu spinlock. Returns the cpu lock was acquired on. */
	48	int rseq_this_cpu_lock(struct percpu_lock *lock)
	49	{
	50	int cpu;
	51
	52	for (;;) {
	53	int ret;
	54
	55	cpu = rseq_cpu_start();
	56	ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
	57	0, 1, cpu);
	58	if (rseq_likely(!ret))
	59	break;
	60	/* Retry if comparison fails or rseq aborts. */
	61	}
	62	/*
	63	* Acquire semantic when taking lock after control dependency.
	64	* Matches rseq_smp_store_release().
	65	*/
	66	rseq_smp_acquire__after_ctrl_dep();
	67	return cpu;
	68	}
	69
	70	void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
	71	{
	72	assert(lock->c[cpu].v == 1);
	73	/*
	74	* Release lock, with release semantic. Matches
	75	* rseq_smp_acquire__after_ctrl_dep().
	76	*/
	77	rseq_smp_store_release(&lock->c[cpu].v, 0);
	78	}
	79
	80	void test_percpu_spinlock_thread(void arg)
	81	{
	82	struct spinlock_test_data *data = arg;
	83	int i, cpu;
	84
	85	if (rseq_register_current_thread()) {
	86	fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
	87	errno, strerror(errno));
	88	abort();
	89	}
	90	for (i = 0; i < data->reps; i++) {
	91	cpu = rseq_this_cpu_lock(&data->lock);
	92	data->c[cpu].count++;
	93	rseq_percpu_unlock(&data->lock, cpu);
	94	}
	95	if (rseq_unregister_current_thread()) {
	96	fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
	97	errno, strerror(errno));
	98	abort();
	99	}
	100
	101	return NULL;
	102	}
	103
	104	/*
	105	* A simple test which implements a sharded counter using a per-cpu
	106	* lock. Obviously real applications might prefer to simply use a
	107	* per-cpu increment; however, this is reasonable for a test and the
	108	* lock can be extended to synchronize more complicated operations.
	109	*/
	110	void test_percpu_spinlock(void)
	111	{
	112	const int num_threads = 200;
	113	int i;
	114	uint64_t sum;
	115	pthread_t test_threads[num_threads];
	116	struct spinlock_test_data data;
	117
	118	memset(&data, 0, sizeof(data));
	119	data.reps = 5000;
	120
	121	for (i = 0; i < num_threads; i++)
	122	pthread_create(&test_threads[i], NULL,
	123	test_percpu_spinlock_thread, &data);
	124
	125	for (i = 0; i < num_threads; i++)
	126	pthread_join(test_threads[i], NULL);
	127
	128	sum = 0;
	129	for (i = 0; i < CPU_SETSIZE; i++)
	130	sum += data.c[i].count;
	131
	132	assert(sum == (uint64_t)data.reps * num_threads);
	133	}
	134
	135	void this_cpu_list_push(struct percpu_list *list,
	136	struct percpu_list_node *node,
	137	int *_cpu)
	138	{
	139	int cpu;
	140
	141	for (;;) {
	142	intptr_t *targetptr, newval, expect;
	143	int ret;
	144
	145	cpu = rseq_cpu_start();
	146	/* Load list->c[cpu].head with single-copy atomicity. */
	147	expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
	148	newval = (intptr_t)node;
	149	targetptr = (intptr_t *)&list->c[cpu].head;
	150	node->next = (struct percpu_list_node *)expect;
	151	ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
	152	if (rseq_likely(!ret))
	153	break;
	154	/* Retry if comparison fails or rseq aborts. */
	155	}
	156	if (_cpu)
	157	*_cpu = cpu;
	158	}
	159
	160	/*
	161	* Unlike a traditional lock-less linked list; the availability of a
	162	* rseq primitive allows us to implement pop without concerns over
	163	* ABA-type races.
	164	*/
	165	struct percpu_list_node this_cpu_list_pop(struct percpu_list list,
	166	int *_cpu)
	167	{
	168	for (;;) {
	169	struct percpu_list_node *head;
	170	intptr_t targetptr, expectnot, load;
	171	off_t offset;
	172	int ret, cpu;
	173
	174	cpu = rseq_cpu_start();
	175	targetptr = (intptr_t *)&list->c[cpu].head;
	176	expectnot = (intptr_t)NULL;
	177	offset = offsetof(struct percpu_list_node, next);
	178	load = (intptr_t *)&head;
	179	ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
	180	offset, load, cpu);
	181	if (rseq_likely(!ret)) {
	182	if (_cpu)
	183	*_cpu = cpu;
	184	return head;
	185	}
	186	if (ret > 0)
	187	return NULL;
	188	/* Retry if rseq aborts. */
	189	}
	190	}
	191
	192	/*
	193	* __percpu_list_pop is not safe against concurrent accesses. Should
	194	* only be used on lists that are not concurrently modified.
	195	*/
	196	struct percpu_list_node __percpu_list_pop(struct percpu_list list, int cpu)
	197	{
	198	struct percpu_list_node *node;
	199
	200	node = list->c[cpu].head;
	201	if (!node)
	202	return NULL;
	203	list->c[cpu].head = node->next;
	204	return node;
	205	}
	206
	207	void test_percpu_list_thread(void arg)
	208	{
	209	int i;
	210	struct percpu_list list = (struct percpu_list )arg;
	211
	212	if (rseq_register_current_thread()) {
	213	fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
	214	errno, strerror(errno));
	215	abort();
	216	}
	217
	218	for (i = 0; i < 100000; i++) {
	219	struct percpu_list_node *node;
	220
	221	node = this_cpu_list_pop(list, NULL);
	222	sched_yield(); /* encourage shuffling */
	223	if (node)
	224	this_cpu_list_push(list, node, NULL);
	225	}
	226
	227	if (rseq_unregister_current_thread()) {
	228	fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
	229	errno, strerror(errno));
	230	abort();
	231	}
	232
	233	return NULL;
	234	}
	235
	236	/* Simultaneous modification to a per-cpu linked list from many threads. */
	237	void test_percpu_list(void)
	238	{
	239	int i, j;
	240	uint64_t sum = 0, expected_sum = 0;
	241	struct percpu_list list;
	242	pthread_t test_threads[200];
	243	cpu_set_t allowed_cpus;
	244
	245	memset(&list, 0, sizeof(list));
	246
	247	/* Generate list entries for every usable cpu. */
	248	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
	249	for (i = 0; i < CPU_SETSIZE; i++) {
	250	if (!CPU_ISSET(i, &allowed_cpus))
	251	continue;
	252	for (j = 1; j <= 100; j++) {
	253	struct percpu_list_node *node;
	254
	255	expected_sum += j;
	256
	257	node = malloc(sizeof(*node));
	258	assert(node);
	259	node->data = j;
	260	node->next = list.c[i].head;
	261	list.c[i].head = node;
	262	}
	263	}
	264
	265	for (i = 0; i < 200; i++)
	266	pthread_create(&test_threads[i], NULL,
	267	test_percpu_list_thread, &list);
	268
	269	for (i = 0; i < 200; i++)
	270	pthread_join(test_threads[i], NULL);
	271
	272	for (i = 0; i < CPU_SETSIZE; i++) {
	273	struct percpu_list_node *node;
	274
	275	if (!CPU_ISSET(i, &allowed_cpus))
	276	continue;
	277
	278	while ((node = __percpu_list_pop(&list, i))) {
	279	sum += node->data;
	280	free(node);
	281	}
	282	}
	283
	284	/*
	285	* All entries should now be accounted for (unless some external
	286	* actor is interfering with our allowed affinity while this
	287	* test is running).
	288	*/
	289	assert(sum == expected_sum);
	290	}
	291
	292	int main(int argc, char **argv)
	293	{
	294	if (rseq_register_current_thread()) {
	295	fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
	296	errno, strerror(errno));
	297	goto error;
	298	}
	299	printf("spinlock\n");
	300	test_percpu_spinlock();
	301	printf("percpu_list\n");
	302	test_percpu_list();
	303	if (rseq_unregister_current_thread()) {
	304	fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
	305	errno, strerror(errno));
	306	goto error;
	307	}
	308	return 0;
	309
	310	error:
	311	return -1;
	312	}