diff options
Diffstat (limited to 'kernel/smp.c')
-rw-r--r-- | kernel/smp.c | 210 |
1 files changed, 191 insertions, 19 deletions
diff --git a/kernel/smp.c b/kernel/smp.c index ed6aacfcb7ef..fb67dfa8394e 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/smp.h> | 13 | #include <linux/smp.h> |
14 | #include <linux/cpu.h> | 14 | #include <linux/cpu.h> |
15 | 15 | ||
16 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | ||
16 | static struct { | 17 | static struct { |
17 | struct list_head queue; | 18 | struct list_head queue; |
18 | raw_spinlock_t lock; | 19 | raw_spinlock_t lock; |
@@ -73,7 +74,7 @@ static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | |||
73 | .notifier_call = hotplug_cfd, | 74 | .notifier_call = hotplug_cfd, |
74 | }; | 75 | }; |
75 | 76 | ||
76 | static int __cpuinit init_call_single_data(void) | 77 | void __init call_function_init(void) |
77 | { | 78 | { |
78 | void *cpu = (void *)(long)smp_processor_id(); | 79 | void *cpu = (void *)(long)smp_processor_id(); |
79 | int i; | 80 | int i; |
@@ -87,10 +88,7 @@ static int __cpuinit init_call_single_data(void) | |||
87 | 88 | ||
88 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); | 89 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); |
89 | register_cpu_notifier(&hotplug_cfd_notifier); | 90 | register_cpu_notifier(&hotplug_cfd_notifier); |
90 | |||
91 | return 0; | ||
92 | } | 91 | } |
93 | early_initcall(init_call_single_data); | ||
94 | 92 | ||
95 | /* | 93 | /* |
96 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources | 94 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources |
@@ -193,23 +191,52 @@ void generic_smp_call_function_interrupt(void) | |||
193 | */ | 191 | */ |
194 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | 192 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
195 | int refs; | 193 | int refs; |
194 | smp_call_func_t func; | ||
195 | |||
196 | /* | ||
197 | * Since we walk the list without any locks, we might | ||
198 | * see an entry that was completed, removed from the | ||
199 | * list and is in the process of being reused. | ||
200 | * | ||
201 | * We must check that the cpu is in the cpumask before | ||
202 | * checking the refs, and both must be set before | ||
203 | * executing the callback on this cpu. | ||
204 | */ | ||
196 | 205 | ||
197 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) | 206 | if (!cpumask_test_cpu(cpu, data->cpumask)) |
198 | continue; | 207 | continue; |
199 | 208 | ||
200 | data->csd.func(data->csd.info); | 209 | smp_rmb(); |
210 | |||
211 | if (atomic_read(&data->refs) == 0) | ||
212 | continue; | ||
213 | |||
214 | func = data->csd.func; /* save for later warn */ | ||
215 | func(data->csd.info); | ||
216 | |||
217 | /* | ||
218 | * If the cpu mask is not still set then func enabled | ||
219 | * interrupts (BUG), and this cpu took another smp call | ||
220 | * function interrupt and executed func(info) twice | ||
221 | * on this cpu. That nested execution decremented refs. | ||
222 | */ | ||
223 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { | ||
224 | WARN(1, "%pf enabled interrupts and double executed\n", func); | ||
225 | continue; | ||
226 | } | ||
201 | 227 | ||
202 | refs = atomic_dec_return(&data->refs); | 228 | refs = atomic_dec_return(&data->refs); |
203 | WARN_ON(refs < 0); | 229 | WARN_ON(refs < 0); |
204 | if (!refs) { | ||
205 | raw_spin_lock(&call_function.lock); | ||
206 | list_del_rcu(&data->csd.list); | ||
207 | raw_spin_unlock(&call_function.lock); | ||
208 | } | ||
209 | 230 | ||
210 | if (refs) | 231 | if (refs) |
211 | continue; | 232 | continue; |
212 | 233 | ||
234 | WARN_ON(!cpumask_empty(data->cpumask)); | ||
235 | |||
236 | raw_spin_lock(&call_function.lock); | ||
237 | list_del_rcu(&data->csd.list); | ||
238 | raw_spin_unlock(&call_function.lock); | ||
239 | |||
213 | csd_unlock(&data->csd); | 240 | csd_unlock(&data->csd); |
214 | } | 241 | } |
215 | 242 | ||
@@ -267,7 +294,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); | |||
267 | * | 294 | * |
268 | * Returns 0 on success, else a negative status code. | 295 | * Returns 0 on success, else a negative status code. |
269 | */ | 296 | */ |
270 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 297 | int smp_call_function_single(int cpu, smp_call_func_t func, void *info, |
271 | int wait) | 298 | int wait) |
272 | { | 299 | { |
273 | struct call_single_data d = { | 300 | struct call_single_data d = { |
@@ -336,7 +363,7 @@ EXPORT_SYMBOL(smp_call_function_single); | |||
336 | * 3) any other online cpu in @mask | 363 | * 3) any other online cpu in @mask |
337 | */ | 364 | */ |
338 | int smp_call_function_any(const struct cpumask *mask, | 365 | int smp_call_function_any(const struct cpumask *mask, |
339 | void (*func)(void *info), void *info, int wait) | 366 | smp_call_func_t func, void *info, int wait) |
340 | { | 367 | { |
341 | unsigned int cpu; | 368 | unsigned int cpu; |
342 | const struct cpumask *nodemask; | 369 | const struct cpumask *nodemask; |
@@ -416,11 +443,11 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
416 | * must be disabled when calling this function. | 443 | * must be disabled when calling this function. |
417 | */ | 444 | */ |
418 | void smp_call_function_many(const struct cpumask *mask, | 445 | void smp_call_function_many(const struct cpumask *mask, |
419 | void (*func)(void *), void *info, bool wait) | 446 | smp_call_func_t func, void *info, bool wait) |
420 | { | 447 | { |
421 | struct call_function_data *data; | 448 | struct call_function_data *data; |
422 | unsigned long flags; | 449 | unsigned long flags; |
423 | int cpu, next_cpu, this_cpu = smp_processor_id(); | 450 | int refs, cpu, next_cpu, this_cpu = smp_processor_id(); |
424 | 451 | ||
425 | /* | 452 | /* |
426 | * Can deadlock when called with interrupts disabled. | 453 | * Can deadlock when called with interrupts disabled. |
@@ -429,9 +456,9 @@ void smp_call_function_many(const struct cpumask *mask, | |||
429 | * can't happen. | 456 | * can't happen. |
430 | */ | 457 | */ |
431 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | 458 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() |
432 | && !oops_in_progress); | 459 | && !oops_in_progress && !early_boot_irqs_disabled); |
433 | 460 | ||
434 | /* So, what's a CPU they want? Ignoring this one. */ | 461 | /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ |
435 | cpu = cpumask_first_and(mask, cpu_online_mask); | 462 | cpu = cpumask_first_and(mask, cpu_online_mask); |
436 | if (cpu == this_cpu) | 463 | if (cpu == this_cpu) |
437 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 464 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
@@ -454,11 +481,48 @@ void smp_call_function_many(const struct cpumask *mask, | |||
454 | data = &__get_cpu_var(cfd_data); | 481 | data = &__get_cpu_var(cfd_data); |
455 | csd_lock(&data->csd); | 482 | csd_lock(&data->csd); |
456 | 483 | ||
484 | /* This BUG_ON verifies our reuse assertions and can be removed */ | ||
485 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); | ||
486 | |||
487 | /* | ||
488 | * The global call function queue list add and delete are protected | ||
489 | * by a lock, but the list is traversed without any lock, relying | ||
490 | * on the rcu list add and delete to allow safe concurrent traversal. | ||
491 | * We reuse the call function data without waiting for any grace | ||
492 | * period after some other cpu removes it from the global queue. | ||
493 | * This means a cpu might find our data block as it is being | ||
494 | * filled out. | ||
495 | * | ||
496 | * We hold off the interrupt handler on the other cpu by | ||
497 | * ordering our writes to the cpu mask vs our setting of the | ||
498 | * refs counter. We assert only the cpu owning the data block | ||
499 | * will set a bit in cpumask, and each bit will only be cleared | ||
500 | * by the subject cpu. Each cpu must first find its bit is | ||
501 | * set and then check that refs is set indicating the element is | ||
502 | * ready to be processed, otherwise it must skip the entry. | ||
503 | * | ||
504 | * On the previous iteration refs was set to 0 by another cpu. | ||
505 | * To avoid the use of transitivity, set the counter to 0 here | ||
506 | * so the wmb will pair with the rmb in the interrupt handler. | ||
507 | */ | ||
508 | atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */ | ||
509 | |||
457 | data->csd.func = func; | 510 | data->csd.func = func; |
458 | data->csd.info = info; | 511 | data->csd.info = info; |
512 | |||
513 | /* Ensure 0 refs is visible before mask. Also orders func and info */ | ||
514 | smp_wmb(); | ||
515 | |||
516 | /* We rely on the "and" being processed before the store */ | ||
459 | cpumask_and(data->cpumask, mask, cpu_online_mask); | 517 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
460 | cpumask_clear_cpu(this_cpu, data->cpumask); | 518 | cpumask_clear_cpu(this_cpu, data->cpumask); |
461 | atomic_set(&data->refs, cpumask_weight(data->cpumask)); | 519 | refs = cpumask_weight(data->cpumask); |
520 | |||
521 | /* Some callers race with other cpus changing the passed mask */ | ||
522 | if (unlikely(!refs)) { | ||
523 | csd_unlock(&data->csd); | ||
524 | return; | ||
525 | } | ||
462 | 526 | ||
463 | raw_spin_lock_irqsave(&call_function.lock, flags); | 527 | raw_spin_lock_irqsave(&call_function.lock, flags); |
464 | /* | 528 | /* |
@@ -467,6 +531,12 @@ void smp_call_function_many(const struct cpumask *mask, | |||
467 | * will not miss any other list entries: | 531 | * will not miss any other list entries: |
468 | */ | 532 | */ |
469 | list_add_rcu(&data->csd.list, &call_function.queue); | 533 | list_add_rcu(&data->csd.list, &call_function.queue); |
534 | /* | ||
535 | * We rely on the wmb() in list_add_rcu to complete our writes | ||
536 | * to the cpumask before this write to refs, which indicates | ||
537 | * data is on the list and is ready to be processed. | ||
538 | */ | ||
539 | atomic_set(&data->refs, refs); | ||
470 | raw_spin_unlock_irqrestore(&call_function.lock, flags); | 540 | raw_spin_unlock_irqrestore(&call_function.lock, flags); |
471 | 541 | ||
472 | /* | 542 | /* |
@@ -500,7 +570,7 @@ EXPORT_SYMBOL(smp_call_function_many); | |||
500 | * You must not call this function with disabled interrupts or from a | 570 | * You must not call this function with disabled interrupts or from a |
501 | * hardware interrupt handler or from a bottom half handler. | 571 | * hardware interrupt handler or from a bottom half handler. |
502 | */ | 572 | */ |
503 | int smp_call_function(void (*func)(void *), void *info, int wait) | 573 | int smp_call_function(smp_call_func_t func, void *info, int wait) |
504 | { | 574 | { |
505 | preempt_disable(); | 575 | preempt_disable(); |
506 | smp_call_function_many(cpu_online_mask, func, info, wait); | 576 | smp_call_function_many(cpu_online_mask, func, info, wait); |
@@ -529,3 +599,105 @@ void ipi_call_unlock_irq(void) | |||
529 | { | 599 | { |
530 | raw_spin_unlock_irq(&call_function.lock); | 600 | raw_spin_unlock_irq(&call_function.lock); |
531 | } | 601 | } |
602 | #endif /* USE_GENERIC_SMP_HELPERS */ | ||
603 | |||
604 | /* Setup configured maximum number of CPUs to activate */ | ||
605 | unsigned int setup_max_cpus = NR_CPUS; | ||
606 | EXPORT_SYMBOL(setup_max_cpus); | ||
607 | |||
608 | |||
609 | /* | ||
610 | * Setup routine for controlling SMP activation | ||
611 | * | ||
612 | * Command-line option of "nosmp" or "maxcpus=0" will disable SMP | ||
613 | * activation entirely (the MPS table probe still happens, though). | ||
614 | * | ||
615 | * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer | ||
616 | * greater than 0, limits the maximum number of CPUs activated in | ||
617 | * SMP mode to <NUM>. | ||
618 | */ | ||
619 | |||
620 | void __weak arch_disable_smp_support(void) { } | ||
621 | |||
622 | static int __init nosmp(char *str) | ||
623 | { | ||
624 | setup_max_cpus = 0; | ||
625 | arch_disable_smp_support(); | ||
626 | |||
627 | return 0; | ||
628 | } | ||
629 | |||
630 | early_param("nosmp", nosmp); | ||
631 | |||
632 | /* this is hard limit */ | ||
633 | static int __init nrcpus(char *str) | ||
634 | { | ||
635 | int nr_cpus; | ||
636 | |||
637 | get_option(&str, &nr_cpus); | ||
638 | if (nr_cpus > 0 && nr_cpus < nr_cpu_ids) | ||
639 | nr_cpu_ids = nr_cpus; | ||
640 | |||
641 | return 0; | ||
642 | } | ||
643 | |||
644 | early_param("nr_cpus", nrcpus); | ||
645 | |||
646 | static int __init maxcpus(char *str) | ||
647 | { | ||
648 | get_option(&str, &setup_max_cpus); | ||
649 | if (setup_max_cpus == 0) | ||
650 | arch_disable_smp_support(); | ||
651 | |||
652 | return 0; | ||
653 | } | ||
654 | |||
655 | early_param("maxcpus", maxcpus); | ||
656 | |||
657 | /* Setup number of possible processor ids */ | ||
658 | int nr_cpu_ids __read_mostly = NR_CPUS; | ||
659 | EXPORT_SYMBOL(nr_cpu_ids); | ||
660 | |||
661 | /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ | ||
662 | void __init setup_nr_cpu_ids(void) | ||
663 | { | ||
664 | nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; | ||
665 | } | ||
666 | |||
667 | /* Called by boot processor to activate the rest. */ | ||
668 | void __init smp_init(void) | ||
669 | { | ||
670 | unsigned int cpu; | ||
671 | |||
672 | /* FIXME: This should be done in userspace --RR */ | ||
673 | for_each_present_cpu(cpu) { | ||
674 | if (num_online_cpus() >= setup_max_cpus) | ||
675 | break; | ||
676 | if (!cpu_online(cpu)) | ||
677 | cpu_up(cpu); | ||
678 | } | ||
679 | |||
680 | /* Any cleanup work */ | ||
681 | printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); | ||
682 | smp_cpus_done(setup_max_cpus); | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * Call a function on all processors. May be used during early boot while | ||
687 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead | ||
688 | * of local_irq_disable/enable(). | ||
689 | */ | ||
690 | int on_each_cpu(void (*func) (void *info), void *info, int wait) | ||
691 | { | ||
692 | unsigned long flags; | ||
693 | int ret = 0; | ||
694 | |||
695 | preempt_disable(); | ||
696 | ret = smp_call_function(func, info, wait); | ||
697 | local_irq_save(flags); | ||
698 | func(info); | ||
699 | local_irq_restore(flags); | ||
700 | preempt_enable(); | ||
701 | return ret; | ||
702 | } | ||
703 | EXPORT_SYMBOL(on_each_cpu); | ||