aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-10 07:26:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-10 07:26:02 -0400
commitc798360cd1438090d51eeaa8e67985da11362eba (patch)
tree0107d3b9ee7476264c3357287787d393545bd2d9 /lib
parentb211e9d7c861bdb37b86d6384da9edfb80949ceb (diff)
parent6ae833c7fe0c6ef1f0ab13cc775da230d6f4c256 (diff)
Merge branch 'for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Pull percpu updates from Tejun Heo: "A lot of activities on percpu front. Notable changes are... - percpu allocator now can take @gfp. If @gfp doesn't contain GFP_KERNEL, it tries to allocate from what's already available to the allocator and a work item tries to keep the reserve around certain level so that these atomic allocations usually succeed. This will replace the ad-hoc percpu memory pool used by blk-throttle and also be used by the planned blkcg support for writeback IOs. Please note that I noticed a bug in how @gfp is interpreted while preparing this pull request and applied the fix 6ae833c7fe0c ("percpu: fix how @gfp is interpreted by the percpu allocator") just now. - percpu_ref now uses longs for percpu and global counters instead of ints. It leads to more sparse packing of the percpu counters on 64bit machines but the overhead should be negligible and this allows using percpu_ref for refcnting pages and in-memory objects directly. - The switching between percpu and single counter modes of a percpu_ref is made independent of putting the base ref and a percpu_ref can now optionally be initialized in single or killed mode. This allows avoiding percpu shutdown latency for cases where the refcounted objects may be synchronously created and destroyed in rapid succession with only a fraction of them reaching fully operational status (SCSI probing does this when combined with blk-mq support). It's also planned to be used to implement forced single mode to detect underflow more timely for debugging. There's a separate branch percpu/for-3.18-consistent-ops which cleans up the duplicate percpu accessors. That branch causes a number of conflicts with s390 and other trees. I'll send a separate pull request w/ resolutions once other branches are merged" * 'for-3.18' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu: (33 commits) percpu: fix how @gfp is interpreted by the percpu allocator blk-mq, percpu_ref: start q->mq_usage_counter in atomic mode percpu_ref: make INIT_ATOMIC and switch_to_atomic() sticky percpu_ref: add PERCPU_REF_INIT_* flags percpu_ref: decouple switching to percpu mode and reinit percpu_ref: decouple switching to atomic mode and killing percpu_ref: add PCPU_REF_DEAD percpu_ref: rename things to prepare for decoupling percpu/atomic mode switch percpu_ref: replace pcpu_ prefix with percpu_ percpu_ref: minor code and comment updates percpu_ref: relocate percpu_ref_reinit() Revert "blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during probe" Revert "percpu: free percpu allocation info for uniprocessor system" percpu-refcount: make percpu_ref based on longs instead of ints percpu-refcount: improve WARN messages percpu: fix locking regression in the failure path of pcpu_alloc() percpu-refcount: add @gfp to percpu_ref_init() proportions: add @gfp to init functions percpu_counter: add @gfp to percpu_counter_init() percpu_counter: make percpu_counters_lock irq-safe ...
Diffstat (limited to 'lib')
-rw-r--r--lib/flex_proportions.c8
-rw-r--r--lib/percpu-refcount.c305
-rw-r--r--lib/percpu_counter.c20
-rw-r--r--lib/proportions.c10
4 files changed, 238 insertions, 105 deletions
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index ebf3bac460b0..8f25652f40d4 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -34,13 +34,13 @@
34 */ 34 */
35#include <linux/flex_proportions.h> 35#include <linux/flex_proportions.h>
36 36
37int fprop_global_init(struct fprop_global *p) 37int fprop_global_init(struct fprop_global *p, gfp_t gfp)
38{ 38{
39 int err; 39 int err;
40 40
41 p->period = 0; 41 p->period = 0;
42 /* Use 1 to avoid dealing with periods with 0 events... */ 42 /* Use 1 to avoid dealing with periods with 0 events... */
43 err = percpu_counter_init(&p->events, 1); 43 err = percpu_counter_init(&p->events, 1, gfp);
44 if (err) 44 if (err)
45 return err; 45 return err;
46 seqcount_init(&p->sequence); 46 seqcount_init(&p->sequence);
@@ -168,11 +168,11 @@ void fprop_fraction_single(struct fprop_global *p,
168 */ 168 */
169#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) 169#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
170 170
171int fprop_local_init_percpu(struct fprop_local_percpu *pl) 171int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp)
172{ 172{
173 int err; 173 int err;
174 174
175 err = percpu_counter_init(&pl->events, 0); 175 err = percpu_counter_init(&pl->events, 0, gfp);
176 if (err) 176 if (err)
177 return err; 177 return err;
178 pl->period = 0; 178 pl->period = 0;
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index a89cf09a8268..6111bcb28376 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -1,6 +1,8 @@
1#define pr_fmt(fmt) "%s: " fmt "\n", __func__ 1#define pr_fmt(fmt) "%s: " fmt "\n", __func__
2 2
3#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/sched.h>
5#include <linux/wait.h>
4#include <linux/percpu-refcount.h> 6#include <linux/percpu-refcount.h>
5 7
6/* 8/*
@@ -11,8 +13,8 @@
11 * percpu counters will all sum to the correct value 13 * percpu counters will all sum to the correct value
12 * 14 *
13 * (More precisely: because moduler arithmatic is commutative the sum of all the 15 * (More precisely: because moduler arithmatic is commutative the sum of all the
14 * pcpu_count vars will be equal to what it would have been if all the gets and 16 * percpu_count vars will be equal to what it would have been if all the gets
15 * puts were done to a single integer, even if some of the percpu integers 17 * and puts were done to a single integer, even if some of the percpu integers
16 * overflow or underflow). 18 * overflow or underflow).
17 * 19 *
18 * The real trick to implementing percpu refcounts is shutdown. We can't detect 20 * The real trick to implementing percpu refcounts is shutdown. We can't detect
@@ -25,75 +27,64 @@
25 * works. 27 * works.
26 * 28 *
27 * Converting to non percpu mode is done with some RCUish stuff in 29 * Converting to non percpu mode is done with some RCUish stuff in
28 * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t 30 * percpu_ref_kill. Additionally, we need a bias value so that the
29 * can't hit 0 before we've added up all the percpu refs. 31 * atomic_long_t can't hit 0 before we've added up all the percpu refs.
30 */ 32 */
31 33
32#define PCPU_COUNT_BIAS (1U << 31) 34#define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1))
33 35
34static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref) 36static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
37
38static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
35{ 39{
36 return (unsigned __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD); 40 return (unsigned long __percpu *)
41 (ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
37} 42}
38 43
39/** 44/**
40 * percpu_ref_init - initialize a percpu refcount 45 * percpu_ref_init - initialize a percpu refcount
41 * @ref: percpu_ref to initialize 46 * @ref: percpu_ref to initialize
42 * @release: function which will be called when refcount hits 0 47 * @release: function which will be called when refcount hits 0
48 * @flags: PERCPU_REF_INIT_* flags
49 * @gfp: allocation mask to use
43 * 50 *
44 * Initializes the refcount in single atomic counter mode with a refcount of 1; 51 * Initializes @ref. If @flags is zero, @ref starts in percpu mode with a
45 * analagous to atomic_set(ref, 1). 52 * refcount of 1; analagous to atomic_long_set(ref, 1). See the
53 * definitions of PERCPU_REF_INIT_* flags for flag behaviors.
46 * 54 *
47 * Note that @release must not sleep - it may potentially be called from RCU 55 * Note that @release must not sleep - it may potentially be called from RCU
48 * callback context by percpu_ref_kill(). 56 * callback context by percpu_ref_kill().
49 */ 57 */
50int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) 58int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
59 unsigned int flags, gfp_t gfp)
51{ 60{
52 atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); 61 size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
62 __alignof__(unsigned long));
63 unsigned long start_count = 0;
53 64
54 ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned); 65 ref->percpu_count_ptr = (unsigned long)
55 if (!ref->pcpu_count_ptr) 66 __alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
67 if (!ref->percpu_count_ptr)
56 return -ENOMEM; 68 return -ENOMEM;
57 69
58 ref->release = release; 70 ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
59 return 0;
60}
61EXPORT_SYMBOL_GPL(percpu_ref_init);
62
63/**
64 * percpu_ref_reinit - re-initialize a percpu refcount
65 * @ref: perpcu_ref to re-initialize
66 *
67 * Re-initialize @ref so that it's in the same state as when it finished
68 * percpu_ref_init(). @ref must have been initialized successfully, killed
69 * and reached 0 but not exited.
70 *
71 * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
72 * this function is in progress.
73 */
74void percpu_ref_reinit(struct percpu_ref *ref)
75{
76 unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
77 int cpu;
78 71
79 BUG_ON(!pcpu_count); 72 if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD))
80 WARN_ON(!percpu_ref_is_zero(ref)); 73 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
74 else
75 start_count += PERCPU_COUNT_BIAS;
81 76
82 atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); 77 if (flags & PERCPU_REF_INIT_DEAD)
78 ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
79 else
80 start_count++;
83 81
84 /* 82 atomic_long_set(&ref->count, start_count);
85 * Restore per-cpu operation. smp_store_release() is paired with
86 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
87 * that the zeroing is visible to all percpu accesses which can see
88 * the following PCPU_REF_DEAD clearing.
89 */
90 for_each_possible_cpu(cpu)
91 *per_cpu_ptr(pcpu_count, cpu) = 0;
92 83
93 smp_store_release(&ref->pcpu_count_ptr, 84 ref->release = release;
94 ref->pcpu_count_ptr & ~PCPU_REF_DEAD); 85 return 0;
95} 86}
96EXPORT_SYMBOL_GPL(percpu_ref_reinit); 87EXPORT_SYMBOL_GPL(percpu_ref_init);
97 88
98/** 89/**
99 * percpu_ref_exit - undo percpu_ref_init() 90 * percpu_ref_exit - undo percpu_ref_init()
@@ -107,26 +98,39 @@ EXPORT_SYMBOL_GPL(percpu_ref_reinit);
107 */ 98 */
108void percpu_ref_exit(struct percpu_ref *ref) 99void percpu_ref_exit(struct percpu_ref *ref)
109{ 100{
110 unsigned __percpu *pcpu_count = pcpu_count_ptr(ref); 101 unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
111 102
112 if (pcpu_count) { 103 if (percpu_count) {
113 free_percpu(pcpu_count); 104 free_percpu(percpu_count);
114 ref->pcpu_count_ptr = PCPU_REF_DEAD; 105 ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
115 } 106 }
116} 107}
117EXPORT_SYMBOL_GPL(percpu_ref_exit); 108EXPORT_SYMBOL_GPL(percpu_ref_exit);
118 109
119static void percpu_ref_kill_rcu(struct rcu_head *rcu) 110static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
111{
112 struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
113
114 ref->confirm_switch(ref);
115 ref->confirm_switch = NULL;
116 wake_up_all(&percpu_ref_switch_waitq);
117
118 /* drop ref from percpu_ref_switch_to_atomic() */
119 percpu_ref_put(ref);
120}
121
122static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
120{ 123{
121 struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); 124 struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
122 unsigned __percpu *pcpu_count = pcpu_count_ptr(ref); 125 unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
123 unsigned count = 0; 126 unsigned long count = 0;
124 int cpu; 127 int cpu;
125 128
126 for_each_possible_cpu(cpu) 129 for_each_possible_cpu(cpu)
127 count += *per_cpu_ptr(pcpu_count, cpu); 130 count += *per_cpu_ptr(percpu_count, cpu);
128 131
129 pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count); 132 pr_debug("global %ld percpu %ld",
133 atomic_long_read(&ref->count), (long)count);
130 134
131 /* 135 /*
132 * It's crucial that we sum the percpu counters _before_ adding the sum 136 * It's crucial that we sum the percpu counters _before_ adding the sum
@@ -140,21 +144,137 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
140 * reaching 0 before we add the percpu counts. But doing it at the same 144 * reaching 0 before we add the percpu counts. But doing it at the same
141 * time is equivalent and saves us atomic operations: 145 * time is equivalent and saves us atomic operations:
142 */ 146 */
147 atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
148
149 WARN_ONCE(atomic_long_read(&ref->count) <= 0,
150 "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
151 ref->release, atomic_long_read(&ref->count));
152
153 /* @ref is viewed as dead on all CPUs, send out switch confirmation */
154 percpu_ref_call_confirm_rcu(rcu);
155}
156
157static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
158{
159}
160
161static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
162 percpu_ref_func_t *confirm_switch)
163{
164 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
165 /* switching from percpu to atomic */
166 ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
167
168 /*
169 * Non-NULL ->confirm_switch is used to indicate that
170 * switching is in progress. Use noop one if unspecified.
171 */
172 WARN_ON_ONCE(ref->confirm_switch);
173 ref->confirm_switch =
174 confirm_switch ?: percpu_ref_noop_confirm_switch;
175
176 percpu_ref_get(ref); /* put after confirmation */
177 call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
178 } else if (confirm_switch) {
179 /*
180 * Somebody already set ATOMIC. Switching may still be in
181 * progress. @confirm_switch must be invoked after the
182 * switching is complete and a full sched RCU grace period
183 * has passed. Wait synchronously for the previous
184 * switching and schedule @confirm_switch invocation.
185 */
186 wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
187 ref->confirm_switch = confirm_switch;
143 188
144 atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); 189 percpu_ref_get(ref); /* put after confirmation */
190 call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
191 }
192}
193
194/**
195 * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
196 * @ref: percpu_ref to switch to atomic mode
197 * @confirm_switch: optional confirmation callback
198 *
199 * There's no reason to use this function for the usual reference counting.
200 * Use percpu_ref_kill[_and_confirm]().
201 *
202 * Schedule switching of @ref to atomic mode. All its percpu counts will
203 * be collected to the main atomic counter. On completion, when all CPUs
204 * are guaraneed to be in atomic mode, @confirm_switch, which may not
205 * block, is invoked. This function may be invoked concurrently with all
206 * the get/put operations and can safely be mixed with kill and reinit
207 * operations. Note that @ref will stay in atomic mode across kill/reinit
208 * cycles until percpu_ref_switch_to_percpu() is called.
209 *
210 * This function normally doesn't block and can be called from any context
211 * but it may block if @confirm_kill is specified and @ref is already in
212 * the process of switching to atomic mode. In such cases, @confirm_switch
213 * will be invoked after the switching is complete.
214 *
215 * Due to the way percpu_ref is implemented, @confirm_switch will be called
216 * after at least one full sched RCU grace period has passed but this is an
217 * implementation detail and must not be depended upon.
218 */
219void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
220 percpu_ref_func_t *confirm_switch)
221{
222 ref->force_atomic = true;
223 __percpu_ref_switch_to_atomic(ref, confirm_switch);
224}
145 225
146 WARN_ONCE(atomic_read(&ref->count) <= 0, "percpu ref <= 0 (%i)", 226static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
147 atomic_read(&ref->count)); 227{
228 unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
229 int cpu;
230
231 BUG_ON(!percpu_count);
148 232
149 /* @ref is viewed as dead on all CPUs, send out kill confirmation */ 233 if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
150 if (ref->confirm_kill) 234 return;
151 ref->confirm_kill(ref); 235
236 wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
237
238 atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
152 239
153 /* 240 /*
154 * Now we're in single atomic_t mode with a consistent refcount, so it's 241 * Restore per-cpu operation. smp_store_release() is paired with
155 * safe to drop our initial ref: 242 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
243 * that the zeroing is visible to all percpu accesses which can see
244 * the following __PERCPU_REF_ATOMIC clearing.
156 */ 245 */
157 percpu_ref_put(ref); 246 for_each_possible_cpu(cpu)
247 *per_cpu_ptr(percpu_count, cpu) = 0;
248
249 smp_store_release(&ref->percpu_count_ptr,
250 ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
251}
252
253/**
254 * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
255 * @ref: percpu_ref to switch to percpu mode
256 *
257 * There's no reason to use this function for the usual reference counting.
258 * To re-use an expired ref, use percpu_ref_reinit().
259 *
260 * Switch @ref to percpu mode. This function may be invoked concurrently
261 * with all the get/put operations and can safely be mixed with kill and
262 * reinit operations. This function reverses the sticky atomic state set
263 * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic(). If @ref is
264 * dying or dead, the actual switching takes place on the following
265 * percpu_ref_reinit().
266 *
267 * This function normally doesn't block and can be called from any context
268 * but it may block if @ref is in the process of switching to atomic mode
269 * by percpu_ref_switch_atomic().
270 */
271void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
272{
273 ref->force_atomic = false;
274
275 /* a dying or dead ref can't be switched to percpu mode w/o reinit */
276 if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
277 __percpu_ref_switch_to_percpu(ref);
158} 278}
159 279
160/** 280/**
@@ -164,39 +284,48 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
164 * 284 *
165 * Equivalent to percpu_ref_kill() but also schedules kill confirmation if 285 * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
166 * @confirm_kill is not NULL. @confirm_kill, which may not block, will be 286 * @confirm_kill is not NULL. @confirm_kill, which may not block, will be
167 * called after @ref is seen as dead from all CPUs - all further 287 * called after @ref is seen as dead from all CPUs at which point all
168 * invocations of percpu_ref_tryget() will fail. See percpu_ref_tryget() 288 * further invocations of percpu_ref_tryget_live() will fail. See
169 * for more details. 289 * percpu_ref_tryget_live() for details.
290 *
291 * This function normally doesn't block and can be called from any context
292 * but it may block if @confirm_kill is specified and @ref is in the
293 * process of switching to atomic mode by percpu_ref_switch_atomic().
170 * 294 *
171 * Due to the way percpu_ref is implemented, @confirm_kill will be called 295 * Due to the way percpu_ref is implemented, @confirm_switch will be called
172 * after at least one full RCU grace period has passed but this is an 296 * after at least one full sched RCU grace period has passed but this is an
173 * implementation detail and callers must not depend on it. 297 * implementation detail and must not be depended upon.
174 */ 298 */
175void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 299void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
176 percpu_ref_func_t *confirm_kill) 300 percpu_ref_func_t *confirm_kill)
177{ 301{
178 WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD, 302 WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
179 "percpu_ref_kill() called more than once!\n"); 303 "%s called more than once on %pf!", __func__, ref->release);
180 304
181 ref->pcpu_count_ptr |= PCPU_REF_DEAD; 305 ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
182 ref->confirm_kill = confirm_kill; 306 __percpu_ref_switch_to_atomic(ref, confirm_kill);
183 307 percpu_ref_put(ref);
184 call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
185} 308}
186EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); 309EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
187 310
188/* 311/**
189 * XXX: Temporary kludge to work around SCSI blk-mq stall. Used only by 312 * percpu_ref_reinit - re-initialize a percpu refcount
190 * block/blk-mq.c::blk_mq_freeze_queue(). Will be removed during v3.18 313 * @ref: perpcu_ref to re-initialize
191 * devel cycle. Do not use anywhere else. 314 *
315 * Re-initialize @ref so that it's in the same state as when it finished
316 * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD. @ref must have been
317 * initialized successfully and reached 0 but not exited.
318 *
319 * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
320 * this function is in progress.
192 */ 321 */
193void __percpu_ref_kill_expedited(struct percpu_ref *ref) 322void percpu_ref_reinit(struct percpu_ref *ref)
194{ 323{
195 WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD, 324 WARN_ON_ONCE(!percpu_ref_is_zero(ref));
196 "percpu_ref_kill() called more than once on %pf!",
197 ref->release);
198 325
199 ref->pcpu_count_ptr |= PCPU_REF_DEAD; 326 ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
200 synchronize_sched_expedited(); 327 percpu_ref_get(ref);
201 percpu_ref_kill_rcu(&ref->rcu); 328 if (!ref->force_atomic)
329 __percpu_ref_switch_to_percpu(ref);
202} 330}
331EXPORT_SYMBOL_GPL(percpu_ref_reinit);
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 7dd33577b905..48144cdae819 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -112,13 +112,15 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
112} 112}
113EXPORT_SYMBOL(__percpu_counter_sum); 113EXPORT_SYMBOL(__percpu_counter_sum);
114 114
115int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, 115int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
116 struct lock_class_key *key) 116 struct lock_class_key *key)
117{ 117{
118 unsigned long flags __maybe_unused;
119
118 raw_spin_lock_init(&fbc->lock); 120 raw_spin_lock_init(&fbc->lock);
119 lockdep_set_class(&fbc->lock, key); 121 lockdep_set_class(&fbc->lock, key);
120 fbc->count = amount; 122 fbc->count = amount;
121 fbc->counters = alloc_percpu(s32); 123 fbc->counters = alloc_percpu_gfp(s32, gfp);
122 if (!fbc->counters) 124 if (!fbc->counters)
123 return -ENOMEM; 125 return -ENOMEM;
124 126
@@ -126,9 +128,9 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
126 128
127#ifdef CONFIG_HOTPLUG_CPU 129#ifdef CONFIG_HOTPLUG_CPU
128 INIT_LIST_HEAD(&fbc->list); 130 INIT_LIST_HEAD(&fbc->list);
129 spin_lock(&percpu_counters_lock); 131 spin_lock_irqsave(&percpu_counters_lock, flags);
130 list_add(&fbc->list, &percpu_counters); 132 list_add(&fbc->list, &percpu_counters);
131 spin_unlock(&percpu_counters_lock); 133 spin_unlock_irqrestore(&percpu_counters_lock, flags);
132#endif 134#endif
133 return 0; 135 return 0;
134} 136}
@@ -136,15 +138,17 @@ EXPORT_SYMBOL(__percpu_counter_init);
136 138
137void percpu_counter_destroy(struct percpu_counter *fbc) 139void percpu_counter_destroy(struct percpu_counter *fbc)
138{ 140{
141 unsigned long flags __maybe_unused;
142
139 if (!fbc->counters) 143 if (!fbc->counters)
140 return; 144 return;
141 145
142 debug_percpu_counter_deactivate(fbc); 146 debug_percpu_counter_deactivate(fbc);
143 147
144#ifdef CONFIG_HOTPLUG_CPU 148#ifdef CONFIG_HOTPLUG_CPU
145 spin_lock(&percpu_counters_lock); 149 spin_lock_irqsave(&percpu_counters_lock, flags);
146 list_del(&fbc->list); 150 list_del(&fbc->list);
147 spin_unlock(&percpu_counters_lock); 151 spin_unlock_irqrestore(&percpu_counters_lock, flags);
148#endif 152#endif
149 free_percpu(fbc->counters); 153 free_percpu(fbc->counters);
150 fbc->counters = NULL; 154 fbc->counters = NULL;
@@ -173,7 +177,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
173 return NOTIFY_OK; 177 return NOTIFY_OK;
174 178
175 cpu = (unsigned long)hcpu; 179 cpu = (unsigned long)hcpu;
176 spin_lock(&percpu_counters_lock); 180 spin_lock_irq(&percpu_counters_lock);
177 list_for_each_entry(fbc, &percpu_counters, list) { 181 list_for_each_entry(fbc, &percpu_counters, list) {
178 s32 *pcount; 182 s32 *pcount;
179 unsigned long flags; 183 unsigned long flags;
@@ -184,7 +188,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
184 *pcount = 0; 188 *pcount = 0;
185 raw_spin_unlock_irqrestore(&fbc->lock, flags); 189 raw_spin_unlock_irqrestore(&fbc->lock, flags);
186 } 190 }
187 spin_unlock(&percpu_counters_lock); 191 spin_unlock_irq(&percpu_counters_lock);
188#endif 192#endif
189 return NOTIFY_OK; 193 return NOTIFY_OK;
190} 194}
diff --git a/lib/proportions.c b/lib/proportions.c
index 05df84801b56..6f724298f67a 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -73,7 +73,7 @@
73#include <linux/proportions.h> 73#include <linux/proportions.h>
74#include <linux/rcupdate.h> 74#include <linux/rcupdate.h>
75 75
76int prop_descriptor_init(struct prop_descriptor *pd, int shift) 76int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
77{ 77{
78 int err; 78 int err;
79 79
@@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
83 pd->index = 0; 83 pd->index = 0;
84 pd->pg[0].shift = shift; 84 pd->pg[0].shift = shift;
85 mutex_init(&pd->mutex); 85 mutex_init(&pd->mutex);
86 err = percpu_counter_init(&pd->pg[0].events, 0); 86 err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
87 if (err) 87 if (err)
88 goto out; 88 goto out;
89 89
90 err = percpu_counter_init(&pd->pg[1].events, 0); 90 err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
91 if (err) 91 if (err)
92 percpu_counter_destroy(&pd->pg[0].events); 92 percpu_counter_destroy(&pd->pg[0].events);
93 93
@@ -188,12 +188,12 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
188 188
189#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) 189#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
190 190
191int prop_local_init_percpu(struct prop_local_percpu *pl) 191int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
192{ 192{
193 raw_spin_lock_init(&pl->lock); 193 raw_spin_lock_init(&pl->lock);
194 pl->shift = 0; 194 pl->shift = 0;
195 pl->period = 0; 195 pl->period = 0;
196 return percpu_counter_init(&pl->events, 0); 196 return percpu_counter_init(&pl->events, 0, gfp);
197} 197}
198 198
199void prop_local_destroy_percpu(struct prop_local_percpu *pl) 199void prop_local_destroy_percpu(struct prop_local_percpu *pl)