2 files changed, 155 insertions, 1 deletions
diff --git a/lib/Makefile b/lib/Makefile
index e2152fa7ff4d..e959c20efb24 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -9,7 +9,7 @@ endif
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
         rbtree.o radix-tree.o dump_stack.o timerqueue.o\
-         idr.o int_sqrt.o extable.o \
+         idr.o int_sqrt.o extable.o percpu-rwsem.o \
         sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
         proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
         is_single_threaded.o plist.o decompress.o kobject_uevent.o \
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
new file mode 100644
index 000000000000..2e03bcfe48f9
--- /dev/null
+++ b/lib/percpu-rwsem.c
@@ -0,0 +1,154 @@
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/percpu.h>
+#include <linux/wait.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
+{
+        brw->fast_read_ctr = alloc_percpu(int);
+        if (unlikely(!brw->fast_read_ctr))
+                return -ENOMEM;
+        mutex_init(&brw->writer_mutex);
+        init_rwsem(&brw->rw_sem);
+        atomic_set(&brw->slow_read_ctr, 0);
+        init_waitqueue_head(&brw->write_waitq);
+        return 0;
+}
+void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
+{
+        free_percpu(brw->fast_read_ctr);
+        brw->fast_read_ctr = NULL; /* catch use after free bugs */
+}
+/*
+ * This is the fast-path for down_read/up_read, it only needs to ensure
+ * there is no pending writer (!mutex_is_locked() check) and inc/dec the
+ * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
+ * serialize with the preempt-disabled section below.
+ *
+ * The nontrivial part is that we should guarantee acquire/release semantics
+ * in case when
+ *
+ *      R_W: down_write() comes after up_read(), the writer should see all
+ *           changes done by the reader
+ * or
+ *      W_R: down_read() comes after up_write(), the reader should see all
+ *           changes done by the writer
+ *
+ * If this helper fails the callers rely on the normal rw_semaphore and
+ * atomic_dec_and_test(), so in this case we have the necessary barriers.
+ *
+ * But if it succeeds we do not have any barriers, mutex_is_locked() or
+ * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
+ * reader inside the critical section. See the comments in down_write and
+ * up_write below.
+ */
+static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
+{
+        bool success = false;
+        preempt_disable();
+        if (likely(!mutex_is_locked(&brw->writer_mutex))) {
+                __this_cpu_add(*brw->fast_read_ctr, val);
+                success = true;
+        }
+        preempt_enable();
+        return success;
+}
+/*
+ * Like the normal down_read() this is not recursive, the writer can
+ * come after the first percpu_down_read() and create the deadlock.
+ */
+void percpu_down_read(struct percpu_rw_semaphore *brw)
+{
+        if (likely(update_fast_ctr(brw, +1)))
+                return;
+        down_read(&brw->rw_sem);
+        atomic_inc(&brw->slow_read_ctr);
+        up_read(&brw->rw_sem);
+}
+void percpu_up_read(struct percpu_rw_semaphore *brw)
+{
+        if (likely(update_fast_ctr(brw, -1)))
+                return;
+        /* false-positive is possible but harmless */
+        if (atomic_dec_and_test(&brw->slow_read_ctr))
+                wake_up_all(&brw->write_waitq);
+}
+static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
+{
+        unsigned int sum = 0;
+        int cpu;
+        for_each_possible_cpu(cpu) {
+                sum += per_cpu(*brw->fast_read_ctr, cpu);
+                per_cpu(*brw->fast_read_ctr, cpu) = 0;
+        }
+        return sum;
+}
+/*
+ * A writer takes ->writer_mutex to exclude other writers and to force the
+ * readers to switch to the slow mode, note the mutex_is_locked() check in
+ * update_fast_ctr().
+ *
+ * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
+ * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
+ * counter it represents the number of active readers.
+ *
+ * Finally the writer takes ->rw_sem for writing and blocks the new readers,
+ * then waits until the slow counter becomes zero.
+ */
+void percpu_down_write(struct percpu_rw_semaphore *brw)
+{
+        /* also blocks update_fast_ctr() which checks mutex_is_locked() */
+        mutex_lock(&brw->writer_mutex);
+        /*
+         * 1. Ensures mutex_is_locked() is visible to any down_read/up_read
+         *    so that update_fast_ctr() can't succeed.
+         *
+         * 2. Ensures we see the result of every previous this_cpu_add() in
+         *    update_fast_ctr().
+         *
+         * 3. Ensures that if any reader has exited its critical section via
+         *    fast-path, it executes a full memory barrier before we return.
+         *    See R_W case in the comment above update_fast_ctr().
+         */
+        synchronize_sched_expedited();
+        /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
+        atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
+        /* block the new readers completely */
+        down_write(&brw->rw_sem);
+        /* wait for all readers to complete their percpu_up_read() */
+        wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
+}
+void percpu_up_write(struct percpu_rw_semaphore *brw)
+{
+        /* allow the new readers, but only the slow-path */
+        up_write(&brw->rw_sem);
+        /*
+         * Insert the barrier before the next fast-path in down_read,
+         * see W_R case in the comment above update_fast_ctr().
+         */
+        synchronize_sched_expedited();
+        mutex_unlock(&brw->writer_mutex);
+}

diff --git a/lib/Makefile b/lib/Makefile index e2152fa7ff4d..e959c20efb24 100644 --- a/lib/Makefile +++ b/lib/Makefile
@@ -9,7 +9,7 @@ endif
9		9
10	lib-y := ctype.o string.o vsprintf.o cmdline.o \	10	lib-y := ctype.o string.o vsprintf.o cmdline.o \
11	rbtree.o radix-tree.o dump_stack.o timerqueue.o\	11	rbtree.o radix-tree.o dump_stack.o timerqueue.o\
12	idr.o int_sqrt.o extable.o \	12	idr.o int_sqrt.o extable.o percpu-rwsem.o \
13	sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \	13	sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
14	proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \	14	proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
15	is_single_threaded.o plist.o decompress.o kobject_uevent.o \	15	is_single_threaded.o plist.o decompress.o kobject_uevent.o \


diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c new file mode 100644 index 000000000000..2e03bcfe48f9 --- /dev/null +++ b/lib/percpu-rwsem.c
@@ -0,0 +1,154 @@
		1	#include <linux/mutex.h>
		2	#include <linux/rwsem.h>
		3	#include <linux/percpu.h>
		4	#include <linux/wait.h>
		5	#include <linux/percpu-rwsem.h>
		6	#include <linux/rcupdate.h>
		7	#include <linux/sched.h>
		8	#include <linux/errno.h>
		9
		10	int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
		11	{
		12	brw->fast_read_ctr = alloc_percpu(int);
		13	if (unlikely(!brw->fast_read_ctr))
		14	return -ENOMEM;
		15
		16	mutex_init(&brw->writer_mutex);
		17	init_rwsem(&brw->rw_sem);
		18	atomic_set(&brw->slow_read_ctr, 0);
		19	init_waitqueue_head(&brw->write_waitq);
		20	return 0;
		21	}
		22
		23	void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
		24	{
		25	free_percpu(brw->fast_read_ctr);
		26	brw->fast_read_ctr = NULL; /* catch use after free bugs */
		27	}
		28
		29	/*
		30	* This is the fast-path for down_read/up_read, it only needs to ensure
		31	* there is no pending writer (!mutex_is_locked() check) and inc/dec the
		32	* fast per-cpu counter. The writer uses synchronize_sched_expedited() to
		33	* serialize with the preempt-disabled section below.
		34	*
		35	* The nontrivial part is that we should guarantee acquire/release semantics
		36	* in case when
		37	*
		38	* R_W: down_write() comes after up_read(), the writer should see all
		39	* changes done by the reader
		40	* or
		41	* W_R: down_read() comes after up_write(), the reader should see all
		42	* changes done by the writer
		43	*
		44	* If this helper fails the callers rely on the normal rw_semaphore and
		45	* atomic_dec_and_test(), so in this case we have the necessary barriers.
		46	*
		47	* But if it succeeds we do not have any barriers, mutex_is_locked() or
		48	* __this_cpu_add() below can be reordered with any LOAD/STORE done by the
		49	* reader inside the critical section. See the comments in down_write and
		50	* up_write below.
		51	*/
		52	static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
		53	{
		54	bool success = false;
		55
		56	preempt_disable();
		57	if (likely(!mutex_is_locked(&brw->writer_mutex))) {
		58	__this_cpu_add(*brw->fast_read_ctr, val);
		59	success = true;
		60	}
		61	preempt_enable();
		62
		63	return success;
		64	}
		65
		66	/*
		67	* Like the normal down_read() this is not recursive, the writer can
		68	* come after the first percpu_down_read() and create the deadlock.
		69	*/
		70	void percpu_down_read(struct percpu_rw_semaphore *brw)
		71	{
		72	if (likely(update_fast_ctr(brw, +1)))
		73	return;
		74
		75	down_read(&brw->rw_sem);
		76	atomic_inc(&brw->slow_read_ctr);
		77	up_read(&brw->rw_sem);
		78	}
		79
		80	void percpu_up_read(struct percpu_rw_semaphore *brw)
		81	{
		82	if (likely(update_fast_ctr(brw, -1)))
		83	return;
		84
		85	/* false-positive is possible but harmless */
		86	if (atomic_dec_and_test(&brw->slow_read_ctr))
		87	wake_up_all(&brw->write_waitq);
		88	}
		89
		90	static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
		91	{
		92	unsigned int sum = 0;
		93	int cpu;
		94
		95	for_each_possible_cpu(cpu) {
		96	sum += per_cpu(*brw->fast_read_ctr, cpu);
		97	per_cpu(*brw->fast_read_ctr, cpu) = 0;
		98	}
		99
		100	return sum;
		101	}
		102
		103	/*
		104	* A writer takes ->writer_mutex to exclude other writers and to force the
		105	* readers to switch to the slow mode, note the mutex_is_locked() check in
		106	* update_fast_ctr().
		107	*
		108	* After that the readers can only inc/dec the slow ->slow_read_ctr counter,
		109	* ->fast_read_ctr is stable. Once the writer moves its sum into the slow
		110	* counter it represents the number of active readers.
		111	*
		112	* Finally the writer takes ->rw_sem for writing and blocks the new readers,
		113	* then waits until the slow counter becomes zero.
		114	*/
		115	void percpu_down_write(struct percpu_rw_semaphore *brw)
		116	{
		117	/* also blocks update_fast_ctr() which checks mutex_is_locked() */
		118	mutex_lock(&brw->writer_mutex);
		119
		120	/*
		121	* 1. Ensures mutex_is_locked() is visible to any down_read/up_read
		122	* so that update_fast_ctr() can't succeed.
		123	*
		124	* 2. Ensures we see the result of every previous this_cpu_add() in
		125	* update_fast_ctr().
		126	*
		127	* 3. Ensures that if any reader has exited its critical section via
		128	* fast-path, it executes a full memory barrier before we return.
		129	* See R_W case in the comment above update_fast_ctr().
		130	*/
		131	synchronize_sched_expedited();
		132
		133	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
		134	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
		135
		136	/* block the new readers completely */
		137	down_write(&brw->rw_sem);
		138
		139	/* wait for all readers to complete their percpu_up_read() */
		140	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
		141	}
		142
		143	void percpu_up_write(struct percpu_rw_semaphore *brw)
		144	{
		145	/* allow the new readers, but only the slow-path */
		146	up_write(&brw->rw_sem);
		147
		148	/*
		149	* Insert the barrier before the next fast-path in down_read,
		150	* see W_R case in the comment above update_fast_ctr().
		151	*/
		152	synchronize_sched_expedited();
		153	mutex_unlock(&brw->writer_mutex);
		154	}