diff options
-rw-r--r-- | kernel/smp.c | 162 |
1 files changed, 86 insertions, 76 deletions
diff --git a/kernel/smp.c b/kernel/smp.c index f5308258891a..7ad2262d2eca 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -2,13 +2,12 @@ | |||
2 | * Generic helpers for smp ipi calls | 2 | * Generic helpers for smp ipi calls |
3 | * | 3 | * |
4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 | 4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 |
5 | * | ||
6 | */ | 5 | */ |
7 | #include <linux/init.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/rcupdate.h> | 6 | #include <linux/rcupdate.h> |
11 | #include <linux/rculist.h> | 7 | #include <linux/rculist.h> |
8 | #include <linux/module.h> | ||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/init.h> | ||
12 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
13 | #include <linux/cpu.h> | 12 | #include <linux/cpu.h> |
14 | 13 | ||
@@ -17,29 +16,30 @@ static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); | |||
17 | static struct { | 16 | static struct { |
18 | struct list_head queue; | 17 | struct list_head queue; |
19 | spinlock_t lock; | 18 | spinlock_t lock; |
20 | } call_function __cacheline_aligned_in_smp = { | 19 | } call_function __cacheline_aligned_in_smp = |
21 | .queue = LIST_HEAD_INIT(call_function.queue), | 20 | { |
22 | .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), | 21 | .queue = LIST_HEAD_INIT(call_function.queue), |
23 | }; | 22 | .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), |
23 | }; | ||
24 | 24 | ||
25 | enum { | 25 | enum { |
26 | CSD_FLAG_LOCK = 0x01, | 26 | CSD_FLAG_LOCK = 0x01, |
27 | }; | 27 | }; |
28 | 28 | ||
29 | struct call_function_data { | 29 | struct call_function_data { |
30 | struct call_single_data csd; | 30 | struct call_single_data csd; |
31 | spinlock_t lock; | 31 | spinlock_t lock; |
32 | unsigned int refs; | 32 | unsigned int refs; |
33 | cpumask_var_t cpumask; | 33 | cpumask_var_t cpumask; |
34 | }; | 34 | }; |
35 | 35 | ||
36 | struct call_single_queue { | 36 | struct call_single_queue { |
37 | struct list_head list; | 37 | struct list_head list; |
38 | spinlock_t lock; | 38 | spinlock_t lock; |
39 | }; | 39 | }; |
40 | 40 | ||
41 | static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { | 41 | static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { |
42 | .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), | 42 | .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), |
43 | }; | 43 | }; |
44 | 44 | ||
45 | static int | 45 | static int |
@@ -71,7 +71,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
71 | } | 71 | } |
72 | 72 | ||
73 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | 73 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { |
74 | .notifier_call = hotplug_cfd, | 74 | .notifier_call = hotplug_cfd, |
75 | }; | 75 | }; |
76 | 76 | ||
77 | static int __cpuinit init_call_single_data(void) | 77 | static int __cpuinit init_call_single_data(void) |
@@ -96,9 +96,9 @@ early_initcall(init_call_single_data); | |||
96 | /* | 96 | /* |
97 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources | 97 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources |
98 | * | 98 | * |
99 | * For non-synchronous ipi calls the csd can still be in use by the previous | 99 | * For non-synchronous ipi calls the csd can still be in use by the |
100 | * function call. For multi-cpu calls its even more interesting as we'll have | 100 | * previous function call. For multi-cpu calls its even more interesting |
101 | * to ensure no other cpu is observing our csd. | 101 | * as we'll have to ensure no other cpu is observing our csd. |
102 | */ | 102 | */ |
103 | static void csd_lock_wait(struct call_single_data *data) | 103 | static void csd_lock_wait(struct call_single_data *data) |
104 | { | 104 | { |
@@ -112,27 +112,29 @@ static void csd_lock(struct call_single_data *data) | |||
112 | data->flags = CSD_FLAG_LOCK; | 112 | data->flags = CSD_FLAG_LOCK; |
113 | 113 | ||
114 | /* | 114 | /* |
115 | * prevent CPU from reordering the above assignment to ->flags | 115 | * prevent CPU from reordering the above assignment |
116 | * with any subsequent assignments to other fields of the | 116 | * to ->flags with any subsequent assignments to other |
117 | * specified call_single_data structure. | 117 | * fields of the specified call_single_data structure: |
118 | */ | 118 | */ |
119 | |||
120 | smp_mb(); | 119 | smp_mb(); |
121 | } | 120 | } |
122 | 121 | ||
123 | static void csd_unlock(struct call_single_data *data) | 122 | static void csd_unlock(struct call_single_data *data) |
124 | { | 123 | { |
125 | WARN_ON(!(data->flags & CSD_FLAG_LOCK)); | 124 | WARN_ON(!(data->flags & CSD_FLAG_LOCK)); |
125 | |||
126 | /* | 126 | /* |
127 | * ensure we're all done before releasing data | 127 | * ensure we're all done before releasing data: |
128 | */ | 128 | */ |
129 | smp_mb(); | 129 | smp_mb(); |
130 | |||
130 | data->flags &= ~CSD_FLAG_LOCK; | 131 | data->flags &= ~CSD_FLAG_LOCK; |
131 | } | 132 | } |
132 | 133 | ||
133 | /* | 134 | /* |
134 | * Insert a previously allocated call_single_data element for execution | 135 | * Insert a previously allocated call_single_data element |
135 | * on the given CPU. data must already have ->func, ->info, and ->flags set. | 136 | * for execution on the given CPU. data must already have |
137 | * ->func, ->info, and ->flags set. | ||
136 | */ | 138 | */ |
137 | static | 139 | static |
138 | void generic_exec_single(int cpu, struct call_single_data *data, int wait) | 140 | void generic_exec_single(int cpu, struct call_single_data *data, int wait) |
@@ -154,10 +156,9 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait) | |||
154 | * If IPIs can go out of order to the cache coherency protocol | 156 | * If IPIs can go out of order to the cache coherency protocol |
155 | * in an architecture, sufficient synchronisation should be added | 157 | * in an architecture, sufficient synchronisation should be added |
156 | * to arch code to make it appear to obey cache coherency WRT | 158 | * to arch code to make it appear to obey cache coherency WRT |
157 | * locking and barrier primitives. Generic code isn't really equipped | 159 | * locking and barrier primitives. Generic code isn't really |
158 | * to do the right thing... | 160 | * equipped to do the right thing... |
159 | */ | 161 | */ |
160 | |||
161 | if (ipi) | 162 | if (ipi) |
162 | arch_send_call_function_single_ipi(cpu); | 163 | arch_send_call_function_single_ipi(cpu); |
163 | 164 | ||
@@ -183,8 +184,8 @@ void generic_smp_call_function_interrupt(void) | |||
183 | smp_mb(); | 184 | smp_mb(); |
184 | 185 | ||
185 | /* | 186 | /* |
186 | * It's ok to use list_for_each_rcu() here even though we may delete | 187 | * It's ok to use list_for_each_rcu() here even though we may |
187 | * 'pos', since list_del_rcu() doesn't clear ->next | 188 | * delete 'pos', since list_del_rcu() doesn't clear ->next |
188 | */ | 189 | */ |
189 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | 190 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
190 | int refs; | 191 | int refs; |
@@ -219,14 +220,14 @@ void generic_smp_call_function_interrupt(void) | |||
219 | } | 220 | } |
220 | 221 | ||
221 | /* | 222 | /* |
222 | * Invoked by arch to handle an IPI for call function single. Must be called | 223 | * Invoked by arch to handle an IPI for call function single. Must be |
223 | * from the arch with interrupts disabled. | 224 | * called from the arch with interrupts disabled. |
224 | */ | 225 | */ |
225 | void generic_smp_call_function_single_interrupt(void) | 226 | void generic_smp_call_function_single_interrupt(void) |
226 | { | 227 | { |
227 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); | 228 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); |
228 | LIST_HEAD(list); | ||
229 | unsigned int data_flags; | 229 | unsigned int data_flags; |
230 | LIST_HEAD(list); | ||
230 | 231 | ||
231 | spin_lock(&q->lock); | 232 | spin_lock(&q->lock); |
232 | list_replace_init(&q->list, &list); | 233 | list_replace_init(&q->list, &list); |
@@ -235,22 +236,20 @@ void generic_smp_call_function_single_interrupt(void) | |||
235 | while (!list_empty(&list)) { | 236 | while (!list_empty(&list)) { |
236 | struct call_single_data *data; | 237 | struct call_single_data *data; |
237 | 238 | ||
238 | data = list_entry(list.next, struct call_single_data, | 239 | data = list_entry(list.next, struct call_single_data, list); |
239 | list); | ||
240 | list_del(&data->list); | 240 | list_del(&data->list); |
241 | 241 | ||
242 | /* | 242 | /* |
243 | * 'data' can be invalid after this call if | 243 | * 'data' can be invalid after this call if flags == 0 |
244 | * flags == 0 (when called through | 244 | * (when called through generic_exec_single()), |
245 | * generic_exec_single(), so save them away before | 245 | * so save them away before making the call: |
246 | * making the call. | ||
247 | */ | 246 | */ |
248 | data_flags = data->flags; | 247 | data_flags = data->flags; |
249 | 248 | ||
250 | data->func(data->info); | 249 | data->func(data->info); |
251 | 250 | ||
252 | /* | 251 | /* |
253 | * Unlocked CSDs are valid through generic_exec_single() | 252 | * Unlocked CSDs are valid through generic_exec_single(): |
254 | */ | 253 | */ |
255 | if (data_flags & CSD_FLAG_LOCK) | 254 | if (data_flags & CSD_FLAG_LOCK) |
256 | csd_unlock(data); | 255 | csd_unlock(data); |
@@ -276,34 +275,41 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | |||
276 | .flags = 0, | 275 | .flags = 0, |
277 | }; | 276 | }; |
278 | unsigned long flags; | 277 | unsigned long flags; |
279 | /* prevent preemption and reschedule on another processor, | 278 | int this_cpu; |
280 | as well as CPU removal */ | ||
281 | int me = get_cpu(); | ||
282 | int err = 0; | 279 | int err = 0; |
283 | 280 | ||
281 | /* | ||
282 | * prevent preemption and reschedule on another processor, | ||
283 | * as well as CPU removal | ||
284 | */ | ||
285 | this_cpu = get_cpu(); | ||
286 | |||
284 | /* Can deadlock when called with interrupts disabled */ | 287 | /* Can deadlock when called with interrupts disabled */ |
285 | WARN_ON(irqs_disabled()); | 288 | WARN_ON(irqs_disabled()); |
286 | 289 | ||
287 | if (cpu == me) { | 290 | if (cpu == this_cpu) { |
288 | local_irq_save(flags); | 291 | local_irq_save(flags); |
289 | func(info); | 292 | func(info); |
290 | local_irq_restore(flags); | 293 | local_irq_restore(flags); |
291 | } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { | 294 | } else { |
292 | struct call_single_data *data = &d; | 295 | if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { |
296 | struct call_single_data *data = &d; | ||
293 | 297 | ||
294 | if (!wait) | 298 | if (!wait) |
295 | data = &__get_cpu_var(csd_data); | 299 | data = &__get_cpu_var(csd_data); |
296 | 300 | ||
297 | csd_lock(data); | 301 | csd_lock(data); |
298 | 302 | ||
299 | data->func = func; | 303 | data->func = func; |
300 | data->info = info; | 304 | data->info = info; |
301 | generic_exec_single(cpu, data, wait); | 305 | generic_exec_single(cpu, data, wait); |
302 | } else { | 306 | } else { |
303 | err = -ENXIO; /* CPU not online */ | 307 | err = -ENXIO; /* CPU not online */ |
308 | } | ||
304 | } | 309 | } |
305 | 310 | ||
306 | put_cpu(); | 311 | put_cpu(); |
312 | |||
307 | return err; | 313 | return err; |
308 | } | 314 | } |
309 | EXPORT_SYMBOL(smp_call_function_single); | 315 | EXPORT_SYMBOL(smp_call_function_single); |
@@ -313,10 +319,9 @@ EXPORT_SYMBOL(smp_call_function_single); | |||
313 | * @cpu: The CPU to run on. | 319 | * @cpu: The CPU to run on. |
314 | * @data: Pre-allocated and setup data structure | 320 | * @data: Pre-allocated and setup data structure |
315 | * | 321 | * |
316 | * Like smp_call_function_single(), but allow caller to pass in a pre-allocated | 322 | * Like smp_call_function_single(), but allow caller to pass in a |
317 | * data structure. Useful for embedding @data inside other structures, for | 323 | * pre-allocated data structure. Useful for embedding @data inside |
318 | * instance. | 324 | * other structures, for instance. |
319 | * | ||
320 | */ | 325 | */ |
321 | void __smp_call_function_single(int cpu, struct call_single_data *data, | 326 | void __smp_call_function_single(int cpu, struct call_single_data *data, |
322 | int wait) | 327 | int wait) |
@@ -329,10 +334,11 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
329 | generic_exec_single(cpu, data, wait); | 334 | generic_exec_single(cpu, data, wait); |
330 | } | 335 | } |
331 | 336 | ||
332 | /* FIXME: Shim for archs using old arch_send_call_function_ipi API. */ | 337 | /* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ |
338 | |||
333 | #ifndef arch_send_call_function_ipi_mask | 339 | #ifndef arch_send_call_function_ipi_mask |
334 | #define arch_send_call_function_ipi_mask(maskp) \ | 340 | # define arch_send_call_function_ipi_mask(maskp) \ |
335 | arch_send_call_function_ipi(*(maskp)) | 341 | arch_send_call_function_ipi(*(maskp)) |
336 | #endif | 342 | #endif |
337 | 343 | ||
338 | /** | 344 | /** |
@@ -340,7 +346,8 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
340 | * @mask: The set of cpus to run on (only runs on online subset). | 346 | * @mask: The set of cpus to run on (only runs on online subset). |
341 | * @func: The function to run. This must be fast and non-blocking. | 347 | * @func: The function to run. This must be fast and non-blocking. |
342 | * @info: An arbitrary pointer to pass to the function. | 348 | * @info: An arbitrary pointer to pass to the function. |
343 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 349 | * @wait: If true, wait (atomically) until function has completed |
350 | * on other CPUs. | ||
344 | * | 351 | * |
345 | * If @wait is true, then returns once @func has returned. Note that @wait | 352 | * If @wait is true, then returns once @func has returned. Note that @wait |
346 | * will be implicitly turned on in case of allocation failures, since | 353 | * will be implicitly turned on in case of allocation failures, since |
@@ -351,27 +358,27 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
351 | * must be disabled when calling this function. | 358 | * must be disabled when calling this function. |
352 | */ | 359 | */ |
353 | void smp_call_function_many(const struct cpumask *mask, | 360 | void smp_call_function_many(const struct cpumask *mask, |
354 | void (*func)(void *), void *info, | 361 | void (*func)(void *), void *info, bool wait) |
355 | bool wait) | ||
356 | { | 362 | { |
357 | struct call_function_data *data; | 363 | struct call_function_data *data; |
358 | unsigned long flags; | 364 | unsigned long flags; |
359 | int cpu, next_cpu, me = smp_processor_id(); | 365 | int cpu, next_cpu, this_cpu = smp_processor_id(); |
360 | 366 | ||
361 | /* Can deadlock when called with interrupts disabled */ | 367 | /* Can deadlock when called with interrupts disabled */ |
362 | WARN_ON(irqs_disabled()); | 368 | WARN_ON(irqs_disabled()); |
363 | 369 | ||
364 | /* So, what's a CPU they want? Ignoring this one. */ | 370 | /* So, what's a CPU they want? Ignoring this one. */ |
365 | cpu = cpumask_first_and(mask, cpu_online_mask); | 371 | cpu = cpumask_first_and(mask, cpu_online_mask); |
366 | if (cpu == me) | 372 | if (cpu == this_cpu) |
367 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 373 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
374 | |||
368 | /* No online cpus? We're done. */ | 375 | /* No online cpus? We're done. */ |
369 | if (cpu >= nr_cpu_ids) | 376 | if (cpu >= nr_cpu_ids) |
370 | return; | 377 | return; |
371 | 378 | ||
372 | /* Do we have another CPU which isn't us? */ | 379 | /* Do we have another CPU which isn't us? */ |
373 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 380 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
374 | if (next_cpu == me) | 381 | if (next_cpu == this_cpu) |
375 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); | 382 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); |
376 | 383 | ||
377 | /* Fastpath: do that cpu by itself. */ | 384 | /* Fastpath: do that cpu by itself. */ |
@@ -387,30 +394,31 @@ void smp_call_function_many(const struct cpumask *mask, | |||
387 | data->csd.func = func; | 394 | data->csd.func = func; |
388 | data->csd.info = info; | 395 | data->csd.info = info; |
389 | cpumask_and(data->cpumask, mask, cpu_online_mask); | 396 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
390 | cpumask_clear_cpu(me, data->cpumask); | 397 | cpumask_clear_cpu(this_cpu, data->cpumask); |
391 | data->refs = cpumask_weight(data->cpumask); | 398 | data->refs = cpumask_weight(data->cpumask); |
392 | 399 | ||
393 | spin_lock(&call_function.lock); | 400 | spin_lock(&call_function.lock); |
394 | /* | 401 | /* |
395 | * Place entry at the _HEAD_ of the list, so that any cpu still | 402 | * Place entry at the _HEAD_ of the list, so that any cpu still |
396 | * observing the entry in generic_smp_call_function_interrupt() will | 403 | * observing the entry in generic_smp_call_function_interrupt() |
397 | * not miss any other list entries. | 404 | * will not miss any other list entries: |
398 | */ | 405 | */ |
399 | list_add_rcu(&data->csd.list, &call_function.queue); | 406 | list_add_rcu(&data->csd.list, &call_function.queue); |
400 | spin_unlock(&call_function.lock); | 407 | spin_unlock(&call_function.lock); |
408 | |||
401 | spin_unlock_irqrestore(&data->lock, flags); | 409 | spin_unlock_irqrestore(&data->lock, flags); |
402 | 410 | ||
403 | /* | 411 | /* |
404 | * Make the list addition visible before sending the ipi. | 412 | * Make the list addition visible before sending the ipi. |
405 | * (IPIs must obey or appear to obey normal Linux cache coherency | 413 | * (IPIs must obey or appear to obey normal Linux cache |
406 | * rules -- see comment in generic_exec_single). | 414 | * coherency rules -- see comment in generic_exec_single). |
407 | */ | 415 | */ |
408 | smp_mb(); | 416 | smp_mb(); |
409 | 417 | ||
410 | /* Send a message to all CPUs in the map */ | 418 | /* Send a message to all CPUs in the map */ |
411 | arch_send_call_function_ipi_mask(data->cpumask); | 419 | arch_send_call_function_ipi_mask(data->cpumask); |
412 | 420 | ||
413 | /* optionally wait for the CPUs to complete */ | 421 | /* Optionally wait for the CPUs to complete */ |
414 | if (wait) | 422 | if (wait) |
415 | csd_lock_wait(&data->csd); | 423 | csd_lock_wait(&data->csd); |
416 | } | 424 | } |
@@ -420,7 +428,8 @@ EXPORT_SYMBOL(smp_call_function_many); | |||
420 | * smp_call_function(): Run a function on all other CPUs. | 428 | * smp_call_function(): Run a function on all other CPUs. |
421 | * @func: The function to run. This must be fast and non-blocking. | 429 | * @func: The function to run. This must be fast and non-blocking. |
422 | * @info: An arbitrary pointer to pass to the function. | 430 | * @info: An arbitrary pointer to pass to the function. |
423 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 431 | * @wait: If true, wait (atomically) until function has completed |
432 | * on other CPUs. | ||
424 | * | 433 | * |
425 | * Returns 0. | 434 | * Returns 0. |
426 | * | 435 | * |
@@ -436,6 +445,7 @@ int smp_call_function(void (*func)(void *), void *info, int wait) | |||
436 | preempt_disable(); | 445 | preempt_disable(); |
437 | smp_call_function_many(cpu_online_mask, func, info, wait); | 446 | smp_call_function_many(cpu_online_mask, func, info, wait); |
438 | preempt_enable(); | 447 | preempt_enable(); |
448 | |||
439 | return 0; | 449 | return 0; |
440 | } | 450 | } |
441 | EXPORT_SYMBOL(smp_call_function); | 451 | EXPORT_SYMBOL(smp_call_function); |