aboutsummaryrefslogtreecommitdiffstats
path: root/lib/percpu-refcount.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-09-24 13:31:49 -0400
committerTejun Heo <tj@kernel.org>2014-09-24 13:31:49 -0400
commit490c79a65708873228cf114cf00e32c204e4e907 (patch)
treeb54e3d1617601a5da2273e4bfa4920beb7f9e779 /lib/percpu-refcount.c
parent27344a9017cdaff82a167827da3001a0918afdc3 (diff)
percpu_ref: decouple switching to atomic mode and killing
percpu_ref has treated the dropping of the base reference and switching to atomic mode as an integral operation; however, there's nothing inherent tying the two together. The use cases for percpu_ref have been expanding continuously. While the current init/kill/reinit/exit model can cover a lot, the coupling of kill/reinit with atomic/percpu mode switching is turning out to be too restrictive for use cases where many percpu_refs are created and destroyed back-to-back with only some of them reaching extended operation. The coupling also makes implementing always-atomic debug mode difficult. This patch separates out atomic mode switching into percpu_ref_switch_to_atomic() and reimplements percpu_ref_kill_and_confirm() on top of it. * The handling of __PERCPU_REF_ATOMIC and __PERCPU_REF_DEAD is now differentiated. Among get/put operations, percpu_ref_tryget_live() is the only one which cares about DEAD. * percpu_ref_switch_to_atomic() can be called multiple times on the same ref. This means that multiple @confirm_switch may get queued up which we can't do reliably without extra memory area. This is handled by making the later invocation synchronously wait for the completion of the previous one. This isn't particularly desirable but such synchronous waits shouldn't happen in most cases. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Christoph Hellwig <hch@infradead.org> Cc: Johannes Weiner <hannes@cmpxchg.org>
Diffstat (limited to 'lib/percpu-refcount.c')
-rw-r--r--lib/percpu-refcount.c141
1 files changed, 110 insertions, 31 deletions
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e2ff19f970cf..6e0d14366c5d 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -1,6 +1,8 @@
1#define pr_fmt(fmt) "%s: " fmt "\n", __func__ 1#define pr_fmt(fmt) "%s: " fmt "\n", __func__
2 2
3#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/sched.h>
5#include <linux/wait.h>
4#include <linux/percpu-refcount.h> 6#include <linux/percpu-refcount.h>
5 7
6/* 8/*
@@ -31,6 +33,8 @@
31 33
32#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) 34#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
33 35
36static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
37
34static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) 38static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
35{ 39{
36 return (unsigned long __percpu *) 40 return (unsigned long __percpu *)
@@ -88,7 +92,19 @@ void percpu_ref_exit(struct percpu_ref *ref)
88} 92}
89EXPORT_SYMBOL_GPL(percpu_ref_exit); 93EXPORT_SYMBOL_GPL(percpu_ref_exit);
90 94
91static void percpu_ref_kill_rcu(struct rcu_head *rcu) 95static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
96{
97 struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
98
99 ref->confirm_switch(ref);
100 ref->confirm_switch = NULL;
101 wake_up_all(&percpu_ref_switch_waitq);
102
103 /* drop ref from percpu_ref_switch_to_atomic() */
104 percpu_ref_put(ref);
105}
106
107static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
92{ 108{
93 struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); 109 struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
94 unsigned long __percpu *percpu_count = percpu_count_ptr(ref); 110 unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
@@ -116,47 +132,79 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
116 atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count); 132 atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
117 133
118 WARN_ONCE(atomic_long_read(&ref->count) <= 0, 134 WARN_ONCE(atomic_long_read(&ref->count) <= 0,
119 "percpu ref (%pf) <= 0 (%ld) after killed", 135 "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
120 ref->release, atomic_long_read(&ref->count)); 136 ref->release, atomic_long_read(&ref->count));
121 137
122 /* @ref is viewed as dead on all CPUs, send out kill confirmation */ 138 /* @ref is viewed as dead on all CPUs, send out switch confirmation */
123 if (ref->confirm_switch) 139 percpu_ref_call_confirm_rcu(rcu);
124 ref->confirm_switch(ref); 140}
125 141
126 /* 142static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
127 * Now we're in single atomic_long_t mode with a consistent 143{
128 * refcount, so it's safe to drop our initial ref: 144}
129 */ 145
130 percpu_ref_put(ref); 146static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
147 percpu_ref_func_t *confirm_switch)
148{
149 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
150 /* switching from percpu to atomic */
151 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
152
153 /*
154 * Non-NULL ->confirm_switch is used to indicate that
155 * switching is in progress. Use noop one if unspecified.
156 */
157 WARN_ON_ONCE(ref->confirm_switch);
158 ref->confirm_switch =
159 confirm_switch ?: percpu_ref_noop_confirm_switch;
160
161 percpu_ref_get(ref); /* put after confirmation */
162 call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
163 } else if (confirm_switch) {
164 /*
165 * Somebody already set ATOMIC. Switching may still be in
166 * progress. @confirm_switch must be invoked after the
167 * switching is complete and a full sched RCU grace period
168 * has passed. Wait synchronously for the previous
169 * switching and schedule @confirm_switch invocation.
170 */
171 wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
172 ref->confirm_switch = confirm_switch;
173
174 percpu_ref_get(ref); /* put after confirmation */
175 call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
176 }
131} 177}
132 178
133/** 179/**
134 * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation 180 * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
135 * @ref: percpu_ref to kill 181 * @ref: percpu_ref to switch to atomic mode
136 * @confirm_kill: optional confirmation callback 182 * @confirm_switch: optional confirmation callback
137 * 183 *
138 * Equivalent to percpu_ref_kill() but also schedules kill confirmation if 184 * There's no reason to use this function for the usual reference counting.
139 * @confirm_kill is not NULL. @confirm_kill, which may not block, will be 185 * Use percpu_ref_kill[_and_confirm]().
140 * called after @ref is seen as dead from all CPUs - all further 186 *
141 * invocations of percpu_ref_tryget_live() will fail. See 187 * Schedule switching of @ref to atomic mode. All its percpu counts will
142 * percpu_ref_tryget_live() for more details. 188 * be collected to the main atomic counter. On completion, when all CPUs
189 * are guaraneed to be in atomic mode, @confirm_switch, which may not
190 * block, is invoked. This function may be invoked concurrently with all
191 * the get/put operations and can safely be mixed with kill and reinit
192 * operations.
143 * 193 *
144 * Due to the way percpu_ref is implemented, @confirm_kill will be called 194 * This function normally doesn't block and can be called from any context
145 * after at least one full RCU grace period has passed but this is an 195 * but it may block if @confirm_kill is specified and @ref is already in
146 * implementation detail and callers must not depend on it. 196 * the process of switching to atomic mode. In such cases, @confirm_switch
197 * will be invoked after the switching is complete.
198 *
199 * Due to the way percpu_ref is implemented, @confirm_switch will be called
200 * after at least one full sched RCU grace period has passed but this is an
201 * implementation detail and must not be depended upon.
147 */ 202 */
148void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 203void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
149 percpu_ref_func_t *confirm_kill) 204 percpu_ref_func_t *confirm_switch)
150{ 205{
151 WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD, 206 __percpu_ref_switch_to_atomic(ref, confirm_switch);
152 "%s called more than once on %pf!", __func__, ref->release);
153
154 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC_DEAD;
155 ref->confirm_switch = confirm_kill;
156
157 call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
158} 207}
159EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
160 208
161/** 209/**
162 * percpu_ref_reinit - re-initialize a percpu refcount 210 * percpu_ref_reinit - re-initialize a percpu refcount
@@ -192,3 +240,34 @@ void percpu_ref_reinit(struct percpu_ref *ref)
192 ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD); 240 ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
193} 241}
194EXPORT_SYMBOL_GPL(percpu_ref_reinit); 242EXPORT_SYMBOL_GPL(percpu_ref_reinit);
243
244/**
245 * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
246 * @ref: percpu_ref to kill
247 * @confirm_kill: optional confirmation callback
248 *
249 * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
250 * @confirm_kill is not NULL. @confirm_kill, which may not block, will be
251 * called after @ref is seen as dead from all CPUs at which point all
252 * further invocations of percpu_ref_tryget_live() will fail. See
253 * percpu_ref_tryget_live() for details.
254 *
255 * This function normally doesn't block and can be called from any context
256 * but it may block if @confirm_kill is specified and @ref is already in
257 * the process of switching to atomic mode by percpu_ref_switch_atomic().
258 *
259 * Due to the way percpu_ref is implemented, @confirm_switch will be called
260 * after at least one full sched RCU grace period has passed but this is an
261 * implementation detail and must not be depended upon.
262 */
263void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
264 percpu_ref_func_t *confirm_kill)
265{
266 WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
267 "%s called more than once on %pf!", __func__, ref->release);
268
269 ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
270 __percpu_ref_switch_to_atomic(ref, confirm_kill);
271 percpu_ref_put(ref);
272}
273EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);