diff options
-rw-r--r-- | arch/x86/include/asm/percpu.h | 5 | ||||
-rw-r--r-- | include/asm-generic/percpu.h | 53 | ||||
-rw-r--r-- | lib/percpu-refcount.c | 169 | ||||
-rw-r--r-- | mm/percpu.c | 38 |
4 files changed, 148 insertions, 117 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e02e3f80d363..84f58de08c2b 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -521,7 +521,8 @@ do { \ | |||
521 | static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, | 521 | static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, |
522 | const unsigned long __percpu *addr) | 522 | const unsigned long __percpu *addr) |
523 | { | 523 | { |
524 | unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; | 524 | unsigned long __percpu *a = |
525 | (unsigned long __percpu *)addr + nr / BITS_PER_LONG; | ||
525 | 526 | ||
526 | #ifdef CONFIG_X86_64 | 527 | #ifdef CONFIG_X86_64 |
527 | return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; | 528 | return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; |
@@ -538,7 +539,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, | |||
538 | asm volatile("bt "__percpu_arg(2)",%1\n\t" | 539 | asm volatile("bt "__percpu_arg(2)",%1\n\t" |
539 | CC_SET(c) | 540 | CC_SET(c) |
540 | : CC_OUT(c) (oldbit) | 541 | : CC_OUT(c) (oldbit) |
541 | : "m" (*(unsigned long *)addr), "Ir" (nr)); | 542 | : "m" (*(unsigned long __percpu *)addr), "Ir" (nr)); |
542 | 543 | ||
543 | return oldbit; | 544 | return oldbit; |
544 | } | 545 | } |
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 4d9f233c4ba8..40e887068da2 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h | |||
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void); | |||
65 | #define PER_CPU_DEF_ATTRIBUTES | 65 | #define PER_CPU_DEF_ATTRIBUTES |
66 | #endif | 66 | #endif |
67 | 67 | ||
68 | #define raw_cpu_generic_read(pcp) \ | ||
69 | ({ \ | ||
70 | *raw_cpu_ptr(&(pcp)); \ | ||
71 | }) | ||
72 | |||
68 | #define raw_cpu_generic_to_op(pcp, val, op) \ | 73 | #define raw_cpu_generic_to_op(pcp, val, op) \ |
69 | do { \ | 74 | do { \ |
70 | *raw_cpu_ptr(&(pcp)) op val; \ | 75 | *raw_cpu_ptr(&(pcp)) op val; \ |
@@ -72,34 +77,39 @@ do { \ | |||
72 | 77 | ||
73 | #define raw_cpu_generic_add_return(pcp, val) \ | 78 | #define raw_cpu_generic_add_return(pcp, val) \ |
74 | ({ \ | 79 | ({ \ |
75 | raw_cpu_add(pcp, val); \ | 80 | typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \ |
76 | raw_cpu_read(pcp); \ | 81 | \ |
82 | *__p += val; \ | ||
83 | *__p; \ | ||
77 | }) | 84 | }) |
78 | 85 | ||
79 | #define raw_cpu_generic_xchg(pcp, nval) \ | 86 | #define raw_cpu_generic_xchg(pcp, nval) \ |
80 | ({ \ | 87 | ({ \ |
88 | typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \ | ||
81 | typeof(pcp) __ret; \ | 89 | typeof(pcp) __ret; \ |
82 | __ret = raw_cpu_read(pcp); \ | 90 | __ret = *__p; \ |
83 | raw_cpu_write(pcp, nval); \ | 91 | *__p = nval; \ |
84 | __ret; \ | 92 | __ret; \ |
85 | }) | 93 | }) |
86 | 94 | ||
87 | #define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ | 95 | #define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ |
88 | ({ \ | 96 | ({ \ |
97 | typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \ | ||
89 | typeof(pcp) __ret; \ | 98 | typeof(pcp) __ret; \ |
90 | __ret = raw_cpu_read(pcp); \ | 99 | __ret = *__p; \ |
91 | if (__ret == (oval)) \ | 100 | if (__ret == (oval)) \ |
92 | raw_cpu_write(pcp, nval); \ | 101 | *__p = nval; \ |
93 | __ret; \ | 102 | __ret; \ |
94 | }) | 103 | }) |
95 | 104 | ||
96 | #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ | 105 | #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ |
97 | ({ \ | 106 | ({ \ |
107 | typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1)); \ | ||
108 | typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2)); \ | ||
98 | int __ret = 0; \ | 109 | int __ret = 0; \ |
99 | if (raw_cpu_read(pcp1) == (oval1) && \ | 110 | if (*__p1 == (oval1) && *__p2 == (oval2)) { \ |
100 | raw_cpu_read(pcp2) == (oval2)) { \ | 111 | *__p1 = nval1; \ |
101 | raw_cpu_write(pcp1, nval1); \ | 112 | *__p2 = nval2; \ |
102 | raw_cpu_write(pcp2, nval2); \ | ||
103 | __ret = 1; \ | 113 | __ret = 1; \ |
104 | } \ | 114 | } \ |
105 | (__ret); \ | 115 | (__ret); \ |
@@ -109,7 +119,7 @@ do { \ | |||
109 | ({ \ | 119 | ({ \ |
110 | typeof(pcp) __ret; \ | 120 | typeof(pcp) __ret; \ |
111 | preempt_disable(); \ | 121 | preempt_disable(); \ |
112 | __ret = *this_cpu_ptr(&(pcp)); \ | 122 | __ret = raw_cpu_generic_read(pcp); \ |
113 | preempt_enable(); \ | 123 | preempt_enable(); \ |
114 | __ret; \ | 124 | __ret; \ |
115 | }) | 125 | }) |
@@ -118,17 +128,17 @@ do { \ | |||
118 | do { \ | 128 | do { \ |
119 | unsigned long __flags; \ | 129 | unsigned long __flags; \ |
120 | raw_local_irq_save(__flags); \ | 130 | raw_local_irq_save(__flags); \ |
121 | *raw_cpu_ptr(&(pcp)) op val; \ | 131 | raw_cpu_generic_to_op(pcp, val, op); \ |
122 | raw_local_irq_restore(__flags); \ | 132 | raw_local_irq_restore(__flags); \ |
123 | } while (0) | 133 | } while (0) |
124 | 134 | ||
135 | |||
125 | #define this_cpu_generic_add_return(pcp, val) \ | 136 | #define this_cpu_generic_add_return(pcp, val) \ |
126 | ({ \ | 137 | ({ \ |
127 | typeof(pcp) __ret; \ | 138 | typeof(pcp) __ret; \ |
128 | unsigned long __flags; \ | 139 | unsigned long __flags; \ |
129 | raw_local_irq_save(__flags); \ | 140 | raw_local_irq_save(__flags); \ |
130 | raw_cpu_add(pcp, val); \ | 141 | __ret = raw_cpu_generic_add_return(pcp, val); \ |
131 | __ret = raw_cpu_read(pcp); \ | ||
132 | raw_local_irq_restore(__flags); \ | 142 | raw_local_irq_restore(__flags); \ |
133 | __ret; \ | 143 | __ret; \ |
134 | }) | 144 | }) |
@@ -138,8 +148,7 @@ do { \ | |||
138 | typeof(pcp) __ret; \ | 148 | typeof(pcp) __ret; \ |
139 | unsigned long __flags; \ | 149 | unsigned long __flags; \ |
140 | raw_local_irq_save(__flags); \ | 150 | raw_local_irq_save(__flags); \ |
141 | __ret = raw_cpu_read(pcp); \ | 151 | __ret = raw_cpu_generic_xchg(pcp, nval); \ |
142 | raw_cpu_write(pcp, nval); \ | ||
143 | raw_local_irq_restore(__flags); \ | 152 | raw_local_irq_restore(__flags); \ |
144 | __ret; \ | 153 | __ret; \ |
145 | }) | 154 | }) |
@@ -149,9 +158,7 @@ do { \ | |||
149 | typeof(pcp) __ret; \ | 158 | typeof(pcp) __ret; \ |
150 | unsigned long __flags; \ | 159 | unsigned long __flags; \ |
151 | raw_local_irq_save(__flags); \ | 160 | raw_local_irq_save(__flags); \ |
152 | __ret = raw_cpu_read(pcp); \ | 161 | __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \ |
153 | if (__ret == (oval)) \ | ||
154 | raw_cpu_write(pcp, nval); \ | ||
155 | raw_local_irq_restore(__flags); \ | 162 | raw_local_irq_restore(__flags); \ |
156 | __ret; \ | 163 | __ret; \ |
157 | }) | 164 | }) |
@@ -168,16 +175,16 @@ do { \ | |||
168 | }) | 175 | }) |
169 | 176 | ||
170 | #ifndef raw_cpu_read_1 | 177 | #ifndef raw_cpu_read_1 |
171 | #define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) | 178 | #define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp) |
172 | #endif | 179 | #endif |
173 | #ifndef raw_cpu_read_2 | 180 | #ifndef raw_cpu_read_2 |
174 | #define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) | 181 | #define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp) |
175 | #endif | 182 | #endif |
176 | #ifndef raw_cpu_read_4 | 183 | #ifndef raw_cpu_read_4 |
177 | #define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) | 184 | #define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp) |
178 | #endif | 185 | #endif |
179 | #ifndef raw_cpu_read_8 | 186 | #ifndef raw_cpu_read_8 |
180 | #define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) | 187 | #define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp) |
181 | #endif | 188 | #endif |
182 | 189 | ||
183 | #ifndef raw_cpu_write_1 | 190 | #ifndef raw_cpu_write_1 |
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 27fe74948882..9ac959ef4cae 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c | |||
@@ -33,6 +33,7 @@ | |||
33 | 33 | ||
34 | #define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) | 34 | #define PERCPU_COUNT_BIAS (1LU << (BITS_PER_LONG - 1)) |
35 | 35 | ||
36 | static DEFINE_SPINLOCK(percpu_ref_switch_lock); | ||
36 | static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); | 37 | static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq); |
37 | 38 | ||
38 | static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) | 39 | static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref) |
@@ -82,6 +83,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, | |||
82 | atomic_long_set(&ref->count, start_count); | 83 | atomic_long_set(&ref->count, start_count); |
83 | 84 | ||
84 | ref->release = release; | 85 | ref->release = release; |
86 | ref->confirm_switch = NULL; | ||
85 | return 0; | 87 | return 0; |
86 | } | 88 | } |
87 | EXPORT_SYMBOL_GPL(percpu_ref_init); | 89 | EXPORT_SYMBOL_GPL(percpu_ref_init); |
@@ -101,6 +103,8 @@ void percpu_ref_exit(struct percpu_ref *ref) | |||
101 | unsigned long __percpu *percpu_count = percpu_count_ptr(ref); | 103 | unsigned long __percpu *percpu_count = percpu_count_ptr(ref); |
102 | 104 | ||
103 | if (percpu_count) { | 105 | if (percpu_count) { |
106 | /* non-NULL confirm_switch indicates switching in progress */ | ||
107 | WARN_ON_ONCE(ref->confirm_switch); | ||
104 | free_percpu(percpu_count); | 108 | free_percpu(percpu_count); |
105 | ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; | 109 | ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD; |
106 | } | 110 | } |
@@ -161,66 +165,23 @@ static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref) | |||
161 | static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, | 165 | static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, |
162 | percpu_ref_func_t *confirm_switch) | 166 | percpu_ref_func_t *confirm_switch) |
163 | { | 167 | { |
164 | if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) { | 168 | if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) { |
165 | /* switching from percpu to atomic */ | 169 | if (confirm_switch) |
166 | ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; | 170 | confirm_switch(ref); |
167 | 171 | return; | |
168 | /* | ||
169 | * Non-NULL ->confirm_switch is used to indicate that | ||
170 | * switching is in progress. Use noop one if unspecified. | ||
171 | */ | ||
172 | WARN_ON_ONCE(ref->confirm_switch); | ||
173 | ref->confirm_switch = | ||
174 | confirm_switch ?: percpu_ref_noop_confirm_switch; | ||
175 | |||
176 | percpu_ref_get(ref); /* put after confirmation */ | ||
177 | call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu); | ||
178 | } else if (confirm_switch) { | ||
179 | /* | ||
180 | * Somebody already set ATOMIC. Switching may still be in | ||
181 | * progress. @confirm_switch must be invoked after the | ||
182 | * switching is complete and a full sched RCU grace period | ||
183 | * has passed. Wait synchronously for the previous | ||
184 | * switching and schedule @confirm_switch invocation. | ||
185 | */ | ||
186 | wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); | ||
187 | ref->confirm_switch = confirm_switch; | ||
188 | |||
189 | percpu_ref_get(ref); /* put after confirmation */ | ||
190 | call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu); | ||
191 | } | 172 | } |
192 | } | ||
193 | 173 | ||
194 | /** | 174 | /* switching from percpu to atomic */ |
195 | * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode | 175 | ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; |
196 | * @ref: percpu_ref to switch to atomic mode | 176 | |
197 | * @confirm_switch: optional confirmation callback | 177 | /* |
198 | * | 178 | * Non-NULL ->confirm_switch is used to indicate that switching is |
199 | * There's no reason to use this function for the usual reference counting. | 179 | * in progress. Use noop one if unspecified. |
200 | * Use percpu_ref_kill[_and_confirm](). | 180 | */ |
201 | * | 181 | ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch; |
202 | * Schedule switching of @ref to atomic mode. All its percpu counts will | 182 | |
203 | * be collected to the main atomic counter. On completion, when all CPUs | 183 | percpu_ref_get(ref); /* put after confirmation */ |
204 | * are guaraneed to be in atomic mode, @confirm_switch, which may not | 184 | call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu); |
205 | * block, is invoked. This function may be invoked concurrently with all | ||
206 | * the get/put operations and can safely be mixed with kill and reinit | ||
207 | * operations. Note that @ref will stay in atomic mode across kill/reinit | ||
208 | * cycles until percpu_ref_switch_to_percpu() is called. | ||
209 | * | ||
210 | * This function normally doesn't block and can be called from any context | ||
211 | * but it may block if @confirm_kill is specified and @ref is already in | ||
212 | * the process of switching to atomic mode. In such cases, @confirm_switch | ||
213 | * will be invoked after the switching is complete. | ||
214 | * | ||
215 | * Due to the way percpu_ref is implemented, @confirm_switch will be called | ||
216 | * after at least one full sched RCU grace period has passed but this is an | ||
217 | * implementation detail and must not be depended upon. | ||
218 | */ | ||
219 | void percpu_ref_switch_to_atomic(struct percpu_ref *ref, | ||
220 | percpu_ref_func_t *confirm_switch) | ||
221 | { | ||
222 | ref->force_atomic = true; | ||
223 | __percpu_ref_switch_to_atomic(ref, confirm_switch); | ||
224 | } | 185 | } |
225 | 186 | ||
226 | static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) | 187 | static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) |
@@ -233,8 +194,6 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) | |||
233 | if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) | 194 | if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) |
234 | return; | 195 | return; |
235 | 196 | ||
236 | wait_event(percpu_ref_switch_waitq, !ref->confirm_switch); | ||
237 | |||
238 | atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); | 197 | atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); |
239 | 198 | ||
240 | /* | 199 | /* |
@@ -250,6 +209,58 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) | |||
250 | ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); | 209 | ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC); |
251 | } | 210 | } |
252 | 211 | ||
212 | static void __percpu_ref_switch_mode(struct percpu_ref *ref, | ||
213 | percpu_ref_func_t *confirm_switch) | ||
214 | { | ||
215 | lockdep_assert_held(&percpu_ref_switch_lock); | ||
216 | |||
217 | /* | ||
218 | * If the previous ATOMIC switching hasn't finished yet, wait for | ||
219 | * its completion. If the caller ensures that ATOMIC switching | ||
220 | * isn't in progress, this function can be called from any context. | ||
221 | */ | ||
222 | wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch, | ||
223 | percpu_ref_switch_lock); | ||
224 | |||
225 | if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD)) | ||
226 | __percpu_ref_switch_to_atomic(ref, confirm_switch); | ||
227 | else | ||
228 | __percpu_ref_switch_to_percpu(ref); | ||
229 | } | ||
230 | |||
231 | /** | ||
232 | * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode | ||
233 | * @ref: percpu_ref to switch to atomic mode | ||
234 | * @confirm_switch: optional confirmation callback | ||
235 | * | ||
236 | * There's no reason to use this function for the usual reference counting. | ||
237 | * Use percpu_ref_kill[_and_confirm](). | ||
238 | * | ||
239 | * Schedule switching of @ref to atomic mode. All its percpu counts will | ||
240 | * be collected to the main atomic counter. On completion, when all CPUs | ||
241 | * are guaraneed to be in atomic mode, @confirm_switch, which may not | ||
242 | * block, is invoked. This function may be invoked concurrently with all | ||
243 | * the get/put operations and can safely be mixed with kill and reinit | ||
244 | * operations. Note that @ref will stay in atomic mode across kill/reinit | ||
245 | * cycles until percpu_ref_switch_to_percpu() is called. | ||
246 | * | ||
247 | * This function may block if @ref is in the process of switching to atomic | ||
248 | * mode. If the caller ensures that @ref is not in the process of | ||
249 | * switching to atomic mode, this function can be called from any context. | ||
250 | */ | ||
251 | void percpu_ref_switch_to_atomic(struct percpu_ref *ref, | ||
252 | percpu_ref_func_t *confirm_switch) | ||
253 | { | ||
254 | unsigned long flags; | ||
255 | |||
256 | spin_lock_irqsave(&percpu_ref_switch_lock, flags); | ||
257 | |||
258 | ref->force_atomic = true; | ||
259 | __percpu_ref_switch_mode(ref, confirm_switch); | ||
260 | |||
261 | spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); | ||
262 | } | ||
263 | |||
253 | /** | 264 | /** |
254 | * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode | 265 | * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode |
255 | * @ref: percpu_ref to switch to percpu mode | 266 | * @ref: percpu_ref to switch to percpu mode |
@@ -264,17 +275,20 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) | |||
264 | * dying or dead, the actual switching takes place on the following | 275 | * dying or dead, the actual switching takes place on the following |
265 | * percpu_ref_reinit(). | 276 | * percpu_ref_reinit(). |
266 | * | 277 | * |
267 | * This function normally doesn't block and can be called from any context | 278 | * This function may block if @ref is in the process of switching to atomic |
268 | * but it may block if @ref is in the process of switching to atomic mode | 279 | * mode. If the caller ensures that @ref is not in the process of |
269 | * by percpu_ref_switch_atomic(). | 280 | * switching to atomic mode, this function can be called from any context. |
270 | */ | 281 | */ |
271 | void percpu_ref_switch_to_percpu(struct percpu_ref *ref) | 282 | void percpu_ref_switch_to_percpu(struct percpu_ref *ref) |
272 | { | 283 | { |
284 | unsigned long flags; | ||
285 | |||
286 | spin_lock_irqsave(&percpu_ref_switch_lock, flags); | ||
287 | |||
273 | ref->force_atomic = false; | 288 | ref->force_atomic = false; |
289 | __percpu_ref_switch_mode(ref, NULL); | ||
274 | 290 | ||
275 | /* a dying or dead ref can't be switched to percpu mode w/o reinit */ | 291 | spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); |
276 | if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) | ||
277 | __percpu_ref_switch_to_percpu(ref); | ||
278 | } | 292 | } |
279 | 293 | ||
280 | /** | 294 | /** |
@@ -290,21 +304,23 @@ void percpu_ref_switch_to_percpu(struct percpu_ref *ref) | |||
290 | * | 304 | * |
291 | * This function normally doesn't block and can be called from any context | 305 | * This function normally doesn't block and can be called from any context |
292 | * but it may block if @confirm_kill is specified and @ref is in the | 306 | * but it may block if @confirm_kill is specified and @ref is in the |
293 | * process of switching to atomic mode by percpu_ref_switch_atomic(). | 307 | * process of switching to atomic mode by percpu_ref_switch_to_atomic(). |
294 | * | ||
295 | * Due to the way percpu_ref is implemented, @confirm_switch will be called | ||
296 | * after at least one full sched RCU grace period has passed but this is an | ||
297 | * implementation detail and must not be depended upon. | ||
298 | */ | 308 | */ |
299 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, | 309 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, |
300 | percpu_ref_func_t *confirm_kill) | 310 | percpu_ref_func_t *confirm_kill) |
301 | { | 311 | { |
312 | unsigned long flags; | ||
313 | |||
314 | spin_lock_irqsave(&percpu_ref_switch_lock, flags); | ||
315 | |||
302 | WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, | 316 | WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD, |
303 | "%s called more than once on %pf!", __func__, ref->release); | 317 | "%s called more than once on %pf!", __func__, ref->release); |
304 | 318 | ||
305 | ref->percpu_count_ptr |= __PERCPU_REF_DEAD; | 319 | ref->percpu_count_ptr |= __PERCPU_REF_DEAD; |
306 | __percpu_ref_switch_to_atomic(ref, confirm_kill); | 320 | __percpu_ref_switch_mode(ref, confirm_kill); |
307 | percpu_ref_put(ref); | 321 | percpu_ref_put(ref); |
322 | |||
323 | spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); | ||
308 | } | 324 | } |
309 | EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); | 325 | EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); |
310 | 326 | ||
@@ -321,11 +337,16 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); | |||
321 | */ | 337 | */ |
322 | void percpu_ref_reinit(struct percpu_ref *ref) | 338 | void percpu_ref_reinit(struct percpu_ref *ref) |
323 | { | 339 | { |
340 | unsigned long flags; | ||
341 | |||
342 | spin_lock_irqsave(&percpu_ref_switch_lock, flags); | ||
343 | |||
324 | WARN_ON_ONCE(!percpu_ref_is_zero(ref)); | 344 | WARN_ON_ONCE(!percpu_ref_is_zero(ref)); |
325 | 345 | ||
326 | ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; | 346 | ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD; |
327 | percpu_ref_get(ref); | 347 | percpu_ref_get(ref); |
328 | if (!ref->force_atomic) | 348 | __percpu_ref_switch_mode(ref, NULL); |
329 | __percpu_ref_switch_to_percpu(ref); | 349 | |
350 | spin_unlock_irqrestore(&percpu_ref_switch_lock, flags); | ||
330 | } | 351 | } |
331 | EXPORT_SYMBOL_GPL(percpu_ref_reinit); | 352 | EXPORT_SYMBOL_GPL(percpu_ref_reinit); |
diff --git a/mm/percpu.c b/mm/percpu.c index 9903830aaebb..255714302394 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -1961,8 +1961,9 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, | |||
1961 | void *base = (void *)ULONG_MAX; | 1961 | void *base = (void *)ULONG_MAX; |
1962 | void **areas = NULL; | 1962 | void **areas = NULL; |
1963 | struct pcpu_alloc_info *ai; | 1963 | struct pcpu_alloc_info *ai; |
1964 | size_t size_sum, areas_size, max_distance; | 1964 | size_t size_sum, areas_size; |
1965 | int group, i, rc; | 1965 | unsigned long max_distance; |
1966 | int group, i, highest_group, rc; | ||
1966 | 1967 | ||
1967 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, | 1968 | ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, |
1968 | cpu_distance_fn); | 1969 | cpu_distance_fn); |
@@ -1978,7 +1979,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, | |||
1978 | goto out_free; | 1979 | goto out_free; |
1979 | } | 1980 | } |
1980 | 1981 | ||
1981 | /* allocate, copy and determine base address */ | 1982 | /* allocate, copy and determine base address & max_distance */ |
1983 | highest_group = 0; | ||
1982 | for (group = 0; group < ai->nr_groups; group++) { | 1984 | for (group = 0; group < ai->nr_groups; group++) { |
1983 | struct pcpu_group_info *gi = &ai->groups[group]; | 1985 | struct pcpu_group_info *gi = &ai->groups[group]; |
1984 | unsigned int cpu = NR_CPUS; | 1986 | unsigned int cpu = NR_CPUS; |
@@ -1999,6 +2001,21 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, | |||
1999 | areas[group] = ptr; | 2001 | areas[group] = ptr; |
2000 | 2002 | ||
2001 | base = min(ptr, base); | 2003 | base = min(ptr, base); |
2004 | if (ptr > areas[highest_group]) | ||
2005 | highest_group = group; | ||
2006 | } | ||
2007 | max_distance = areas[highest_group] - base; | ||
2008 | max_distance += ai->unit_size * ai->groups[highest_group].nr_units; | ||
2009 | |||
2010 | /* warn if maximum distance is further than 75% of vmalloc space */ | ||
2011 | if (max_distance > VMALLOC_TOTAL * 3 / 4) { | ||
2012 | pr_warn("max_distance=0x%lx too large for vmalloc space 0x%lx\n", | ||
2013 | max_distance, VMALLOC_TOTAL); | ||
2014 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | ||
2015 | /* and fail if we have fallback */ | ||
2016 | rc = -EINVAL; | ||
2017 | goto out_free_areas; | ||
2018 | #endif | ||
2002 | } | 2019 | } |
2003 | 2020 | ||
2004 | /* | 2021 | /* |
@@ -2023,23 +2040,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, | |||
2023 | } | 2040 | } |
2024 | 2041 | ||
2025 | /* base address is now known, determine group base offsets */ | 2042 | /* base address is now known, determine group base offsets */ |
2026 | max_distance = 0; | ||
2027 | for (group = 0; group < ai->nr_groups; group++) { | 2043 | for (group = 0; group < ai->nr_groups; group++) { |
2028 | ai->groups[group].base_offset = areas[group] - base; | 2044 | ai->groups[group].base_offset = areas[group] - base; |
2029 | max_distance = max_t(size_t, max_distance, | ||
2030 | ai->groups[group].base_offset); | ||
2031 | } | ||
2032 | max_distance += ai->unit_size; | ||
2033 | |||
2034 | /* warn if maximum distance is further than 75% of vmalloc space */ | ||
2035 | if (max_distance > VMALLOC_TOTAL * 3 / 4) { | ||
2036 | pr_warn("max_distance=0x%zx too large for vmalloc space 0x%lx\n", | ||
2037 | max_distance, VMALLOC_TOTAL); | ||
2038 | #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK | ||
2039 | /* and fail if we have fallback */ | ||
2040 | rc = -EINVAL; | ||
2041 | goto out_free; | ||
2042 | #endif | ||
2043 | } | 2045 | } |
2044 | 2046 | ||
2045 | pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", | 2047 | pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", |