percpu_ref: decouple switching to atomic mode and killing

percpu_ref has treated the dropping of the base reference and switching to atomic mode as an integral operation; however, there's nothing inherent tying the two together. The use cases for percpu_ref have been expanding continuously. While the current init/kill/reinit/exit model can cover a lot, the coupling of kill/reinit with atomic/percpu mode switching is turning out to be too restrictive for use cases where many percpu_refs are created and destroyed back-to-back with only some of them reaching extended operation. The coupling also makes implementing always-atomic debug mode difficult. This patch separates out atomic mode switching into percpu_ref_switch_to_atomic() and reimplements percpu_ref_kill_and_confirm() on top of it. * The handling of __PERCPU_REF_ATOMIC and __PERCPU_REF_DEAD is now differentiated. Among get/put operations, percpu_ref_tryget_live() is the only one which cares about DEAD. * percpu_ref_switch_to_atomic() can be called multiple times on the same ref. This means that multiple @confirm_switch may get queued up which we can't do reliably without extra memory area. This is handled by making the later invocation synchronously wait for the completion of the previous one. This isn't particularly desirable but such synchronous waits shouldn't happen in most cases. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Christoph Hellwig <hch@infradead.org> Cc: Johannes Weiner <hannes@cmpxchg.org>
author: Tejun Heo <tj@kernel.org> 2014-09-24 13:31:49 -0400
committer: Tejun Heo <tj@kernel.org> 2014-09-24 13:31:49 -0400
commit: 490c79a65708873228cf114cf00e32c204e4e907 (patch)
tree: b54e3d1617601a5da2273e4bfa4920beb7f9e779 /lib/percpu-refcount.c
parent: 27344a9017cdaff82a167827da3001a0918afdc3 (diff)
1 files changed, 110 insertions, 31 deletions
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e2ff19f970cf..6e0d14366c5d 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -1,6 +1,8 @@
 #define pr_fmt(fmt) "%s: " fmt "\n", __func__
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
 #include <linux/percpu-refcount.h>
 /*
@@ -31,6 +33,8 @@
 #define PERCPU_COUNT_BIAS       (1LU << (BITS_PER_LONG - 1))
+static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
        return (unsigned long __percpu *)
@@ -88,7 +92,19 @@ void percpu_ref_exit(struct percpu_ref *ref)
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
-static void percpu_ref_kill_rcu(struct rcu_head *rcu)
+static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
+{
+        struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+        ref->confirm_switch(ref);
+        ref->confirm_switch = NULL;
+        wake_up_all(&percpu_ref_switch_waitq);
+        /* drop ref from percpu_ref_switch_to_atomic() */
+        percpu_ref_put(ref);
+}
+static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 {
        struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
        unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
@@ -116,47 +132,79 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
        atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
        WARN_ONCE(atomic_long_read(&ref->count) <= 0,
-                  "percpu ref (%pf) <= 0 (%ld) after killed",
+                  "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
                  ref->release, atomic_long_read(&ref->count));
-        /* @ref is viewed as dead on all CPUs, send out kill confirmation */
+        /* @ref is viewed as dead on all CPUs, send out switch confirmation */
-        if (ref->confirm_switch)
+        percpu_ref_call_confirm_rcu(rcu);
-                ref->confirm_switch(ref);
+}
-        /*
+static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
-         * Now we're in single atomic_long_t mode with a consistent
+{
-         * refcount, so it's safe to drop our initial ref:
+}
-         */
-        percpu_ref_put(ref);
+static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+                                          percpu_ref_func_t *confirm_switch)
+{
+        if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
+                /* switching from percpu to atomic */
+                ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+                /*
+                 * Non-NULL ->confirm_switch is used to indicate that
+                 * switching is in progress.  Use noop one if unspecified.
+                 */
+                WARN_ON_ONCE(ref->confirm_switch);
+                ref->confirm_switch =
+                        confirm_switch ?: percpu_ref_noop_confirm_switch;
+                percpu_ref_get(ref);    /* put after confirmation */
+                call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
+        } else if (confirm_switch) {
+                /*
+                 * Somebody already set ATOMIC.  Switching may still be in
+                 * progress.  @confirm_switch must be invoked after the
+                 * switching is complete and a full sched RCU grace period
+                 * has passed.  Wait synchronously for the previous
+                 * switching and schedule @confirm_switch invocation.
+                 */
+                wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+                ref->confirm_switch = confirm_switch;
+                percpu_ref_get(ref);    /* put after confirmation */
+                call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
+        }
 }
 /**
- * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
+ * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
- * @ref: percpu_ref to kill
+ * @ref: percpu_ref to switch to atomic mode
- * @confirm_kill: optional confirmation callback
+ * @confirm_switch: optional confirmation callback
 *
- * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
+ * There's no reason to use this function for the usual reference counting.
- * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
+ * Use percpu_ref_kill[_and_confirm]().
- * called after @ref is seen as dead from all CPUs - all further
+ *
- * invocations of percpu_ref_tryget_live() will fail.  See
+ * Schedule switching of @ref to atomic mode.  All its percpu counts will
- * percpu_ref_tryget_live() for more details.
+ * be collected to the main atomic counter.  On completion, when all CPUs
+ * are guaraneed to be in atomic mode, @confirm_switch, which may not
+ * block, is invoked.  This function may be invoked concurrently with all
+ * the get/put operations and can safely be mixed with kill and reinit
+ * operations.
 *
- * Due to the way percpu_ref is implemented, @confirm_kill will be called
+ * This function normally doesn't block and can be called from any context
- * after at least one full RCU grace period has passed but this is an
+ * but it may block if @confirm_kill is specified and @ref is already in
- * implementation detail and callers must not depend on it.
+ * the process of switching to atomic mode.  In such cases, @confirm_switch
+ * will be invoked after the switching is complete.
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
 */
-void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
-                                 percpu_ref_func_t *confirm_kill)
+                                 percpu_ref_func_t *confirm_switch)
 {
-        WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD,
+        __percpu_ref_switch_to_atomic(ref, confirm_switch);
-                  "%s called more than once on %pf!", __func__, ref->release);
-        ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC_DEAD;
-        ref->confirm_switch = confirm_kill;
-        call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
-EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 /**
 * percpu_ref_reinit - re-initialize a percpu refcount
@@ -192,3 +240,34 @@ void percpu_ref_reinit(struct percpu_ref *ref)
                          ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+/**
+ * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
+ * @ref: percpu_ref to kill
+ * @confirm_kill: optional confirmation callback
+ *
+ * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
+ * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
+ * called after @ref is seen as dead from all CPUs at which point all
+ * further invocations of percpu_ref_tryget_live() will fail.  See
+ * percpu_ref_tryget_live() for details.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode by percpu_ref_switch_atomic().
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
+ */
+void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
+                                 percpu_ref_func_t *confirm_kill)
+{
+        WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+                  "%s called more than once on %pf!", __func__, ref->release);
+        ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+        __percpu_ref_switch_to_atomic(ref, confirm_kill);
+        percpu_ref_put(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
author	Tejun Heo <tj@kernel.org>	2014-09-24 13:31:49 -0400
committer	Tejun Heo <tj@kernel.org>	2014-09-24 13:31:49 -0400
commit	490c79a65708873228cf114cf00e32c204e4e907 (patch)
tree	b54e3d1617601a5da2273e4bfa4920beb7f9e779 /lib/percpu-refcount.c
parent	27344a9017cdaff82a167827da3001a0918afdc3 (diff)

diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index e2ff19f970cf..6e0d14366c5d 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c
@@ -1,6 +1,8 @@
1	#define pr_fmt(fmt) "%s: " fmt "\n", __func__	1	#define pr_fmt(fmt) "%s: " fmt "\n", __func__
2		2
3	#include <linux/kernel.h>	3	#include <linux/kernel.h>
		4	#include <linux/sched.h>
		5	#include <linux/wait.h>
4	#include <linux/percpu-refcount.h>	6	#include <linux/percpu-refcount.h>
5		7
6	/*	8	/*
@@ -31,6 +33,8 @@
31		33
32	#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))	34	#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
33		35
		36	static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
		37
34	static unsigned long __percpu percpu_count_ptr(struct percpu_ref ref)	38	static unsigned long __percpu percpu_count_ptr(struct percpu_ref ref)
35	{	39	{
36	return (unsigned long __percpu *)	40	return (unsigned long __percpu *)
@@ -88,7 +92,19 @@ void percpu_ref_exit(struct percpu_ref *ref)
88	}	92	}
89	EXPORT_SYMBOL_GPL(percpu_ref_exit);	93	EXPORT_SYMBOL_GPL(percpu_ref_exit);
90		94
91	static void percpu_ref_kill_rcu(struct rcu_head *rcu)	95	static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
		96	{
		97	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
		98
		99	ref->confirm_switch(ref);
		100	ref->confirm_switch = NULL;
		101	wake_up_all(&percpu_ref_switch_waitq);
		102
		103	/* drop ref from percpu_ref_switch_to_atomic() */
		104	percpu_ref_put(ref);
		105	}
		106
		107	static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
92	{	108	{
93	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);	109	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
94	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);	110	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
@@ -116,47 +132,79 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
116	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);	132	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
117		133
118	WARN_ONCE(atomic_long_read(&ref->count) <= 0,	134	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
119	"percpu ref (%pf) <= 0 (%ld) after killed",	135	"percpu ref (%pf) <= 0 (%ld) after switching to atomic",
120	ref->release, atomic_long_read(&ref->count));	136	ref->release, atomic_long_read(&ref->count));
121		137
122	/* @ref is viewed as dead on all CPUs, send out kill confirmation */	138	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
123	if (ref->confirm_switch)	139	percpu_ref_call_confirm_rcu(rcu);
124	ref->confirm_switch(ref);	140	}
125		141
126	/*	142	static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
127	* Now we're in single atomic_long_t mode with a consistent	143	{
128	* refcount, so it's safe to drop our initial ref:	144	}
129	*/	145
130	percpu_ref_put(ref);	146	static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
		147	percpu_ref_func_t *confirm_switch)
		148	{
		149	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
		150	/* switching from percpu to atomic */
		151	ref->percpu_count_ptr \|= __PERCPU_REF_ATOMIC;
		152
		153	/*
		154	* Non-NULL ->confirm_switch is used to indicate that
		155	* switching is in progress. Use noop one if unspecified.
		156	*/
		157	WARN_ON_ONCE(ref->confirm_switch);
		158	ref->confirm_switch =
		159	confirm_switch ?: percpu_ref_noop_confirm_switch;
		160
		161	percpu_ref_get(ref); /* put after confirmation */
		162	call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
		163	} else if (confirm_switch) {
		164	/*
		165	* Somebody already set ATOMIC. Switching may still be in
		166	* progress. @confirm_switch must be invoked after the
		167	* switching is complete and a full sched RCU grace period
		168	* has passed. Wait synchronously for the previous
		169	* switching and schedule @confirm_switch invocation.
		170	*/
		171	wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
		172	ref->confirm_switch = confirm_switch;
		173
		174	percpu_ref_get(ref); /* put after confirmation */
		175	call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
		176	}
131	}	177	}
132		178
133	/**	179	/**
134	* percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation	180	* percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
135	* @ref: percpu_ref to kill	181	* @ref: percpu_ref to switch to atomic mode
136	* @confirm_kill: optional confirmation callback	182	* @confirm_switch: optional confirmation callback
137	*	183	*
138	* Equivalent to percpu_ref_kill() but also schedules kill confirmation if	184	* There's no reason to use this function for the usual reference counting.
139	* @confirm_kill is not NULL. @confirm_kill, which may not block, will be	185	* Use percpu_ref_kill[_and_confirm]().
140	* called after @ref is seen as dead from all CPUs - all further	186	*
141	* invocations of percpu_ref_tryget_live() will fail. See	187	* Schedule switching of @ref to atomic mode. All its percpu counts will
142	* percpu_ref_tryget_live() for more details.	188	* be collected to the main atomic counter. On completion, when all CPUs
		189	* are guaraneed to be in atomic mode, @confirm_switch, which may not
		190	* block, is invoked. This function may be invoked concurrently with all
		191	* the get/put operations and can safely be mixed with kill and reinit
		192	* operations.
143	*	193	*
144	* Due to the way percpu_ref is implemented, @confirm_kill will be called	194	* This function normally doesn't block and can be called from any context
145	* after at least one full RCU grace period has passed but this is an	195	* but it may block if @confirm_kill is specified and @ref is already in
146	* implementation detail and callers must not depend on it.	196	* the process of switching to atomic mode. In such cases, @confirm_switch
		197	* will be invoked after the switching is complete.
		198	*
		199	* Due to the way percpu_ref is implemented, @confirm_switch will be called
		200	* after at least one full sched RCU grace period has passed but this is an
		201	* implementation detail and must not be depended upon.
147	*/	202	*/
148	void percpu_ref_kill_and_confirm(struct percpu_ref *ref,	203	void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
149	percpu_ref_func_t *confirm_kill)	204	percpu_ref_func_t *confirm_switch)
150	{	205	{
151	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD,	206	__percpu_ref_switch_to_atomic(ref, confirm_switch);
152	"%s called more than once on %pf!", __func__, ref->release);
153
154	ref->percpu_count_ptr \|= __PERCPU_REF_ATOMIC_DEAD;
155	ref->confirm_switch = confirm_kill;
156
157	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
158	}	207	}
159	EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
160		208
161	/**	209	/**
162	* percpu_ref_reinit - re-initialize a percpu refcount	210	* percpu_ref_reinit - re-initialize a percpu refcount
@@ -192,3 +240,34 @@ void percpu_ref_reinit(struct percpu_ref *ref)
192	ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);	240	ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
193	}	241	}
194	EXPORT_SYMBOL_GPL(percpu_ref_reinit);	242	EXPORT_SYMBOL_GPL(percpu_ref_reinit);
		243
		244	/**
		245	* percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
		246	* @ref: percpu_ref to kill
		247	* @confirm_kill: optional confirmation callback
		248	*
		249	* Equivalent to percpu_ref_kill() but also schedules kill confirmation if
		250	* @confirm_kill is not NULL. @confirm_kill, which may not block, will be
		251	* called after @ref is seen as dead from all CPUs at which point all
		252	* further invocations of percpu_ref_tryget_live() will fail. See
		253	* percpu_ref_tryget_live() for details.
		254	*
		255	* This function normally doesn't block and can be called from any context
		256	* but it may block if @confirm_kill is specified and @ref is already in
		257	* the process of switching to atomic mode by percpu_ref_switch_atomic().
		258	*
		259	* Due to the way percpu_ref is implemented, @confirm_switch will be called
		260	* after at least one full sched RCU grace period has passed but this is an
		261	* implementation detail and must not be depended upon.
		262	*/
		263	void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
		264	percpu_ref_func_t *confirm_kill)
		265	{
		266	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
		267	"%s called more than once on %pf!", __func__, ref->release);
		268
		269	ref->percpu_count_ptr \|= __PERCPU_REF_DEAD;
		270	__percpu_ref_switch_to_atomic(ref, confirm_kill);
		271	percpu_ref_put(ref);
		272	}
		273	EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);