diff options
Diffstat (limited to 'kernel/smp.c')
-rw-r--r-- | kernel/smp.c | 152 |
1 files changed, 133 insertions, 19 deletions
diff --git a/kernel/smp.c b/kernel/smp.c index 9910744f0856..73a195193558 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -194,7 +194,7 @@ void generic_smp_call_function_interrupt(void) | |||
194 | */ | 194 | */ |
195 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | 195 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
196 | int refs; | 196 | int refs; |
197 | void (*func) (void *info); | 197 | smp_call_func_t func; |
198 | 198 | ||
199 | /* | 199 | /* |
200 | * Since we walk the list without any locks, we might | 200 | * Since we walk the list without any locks, we might |
@@ -214,17 +214,17 @@ void generic_smp_call_function_interrupt(void) | |||
214 | if (atomic_read(&data->refs) == 0) | 214 | if (atomic_read(&data->refs) == 0) |
215 | continue; | 215 | continue; |
216 | 216 | ||
217 | func = data->csd.func; /* for later warn */ | 217 | func = data->csd.func; /* save for later warn */ |
218 | data->csd.func(data->csd.info); | 218 | func(data->csd.info); |
219 | 219 | ||
220 | /* | 220 | /* |
221 | * If the cpu mask is not still set then it enabled interrupts, | 221 | * If the cpu mask is not still set then func enabled |
222 | * we took another smp interrupt, and executed the function | 222 | * interrupts (BUG), and this cpu took another smp call |
223 | * twice on this cpu. In theory that copy decremented refs. | 223 | * function interrupt and executed func(info) twice |
224 | * on this cpu. That nested execution decremented refs. | ||
224 | */ | 225 | */ |
225 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { | 226 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { |
226 | WARN(1, "%pS enabled interrupts and double executed\n", | 227 | WARN(1, "%pf enabled interrupts and double executed\n", func); |
227 | func); | ||
228 | continue; | 228 | continue; |
229 | } | 229 | } |
230 | 230 | ||
@@ -450,7 +450,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
450 | { | 450 | { |
451 | struct call_function_data *data; | 451 | struct call_function_data *data; |
452 | unsigned long flags; | 452 | unsigned long flags; |
453 | int cpu, next_cpu, this_cpu = smp_processor_id(); | 453 | int refs, cpu, next_cpu, this_cpu = smp_processor_id(); |
454 | 454 | ||
455 | /* | 455 | /* |
456 | * Can deadlock when called with interrupts disabled. | 456 | * Can deadlock when called with interrupts disabled. |
@@ -461,7 +461,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
461 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | 461 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() |
462 | && !oops_in_progress && !early_boot_irqs_disabled); | 462 | && !oops_in_progress && !early_boot_irqs_disabled); |
463 | 463 | ||
464 | /* So, what's a CPU they want? Ignoring this one. */ | 464 | /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ |
465 | cpu = cpumask_first_and(mask, cpu_online_mask); | 465 | cpu = cpumask_first_and(mask, cpu_online_mask); |
466 | if (cpu == this_cpu) | 466 | if (cpu == this_cpu) |
467 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 467 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
@@ -483,22 +483,49 @@ void smp_call_function_many(const struct cpumask *mask, | |||
483 | 483 | ||
484 | data = &__get_cpu_var(cfd_data); | 484 | data = &__get_cpu_var(cfd_data); |
485 | csd_lock(&data->csd); | 485 | csd_lock(&data->csd); |
486 | |||
487 | /* This BUG_ON verifies our reuse assertions and can be removed */ | ||
486 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); | 488 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); |
487 | 489 | ||
490 | /* | ||
491 | * The global call function queue list add and delete are protected | ||
492 | * by a lock, but the list is traversed without any lock, relying | ||
493 | * on the rcu list add and delete to allow safe concurrent traversal. | ||
494 | * We reuse the call function data without waiting for any grace | ||
495 | * period after some other cpu removes it from the global queue. | ||
496 | * This means a cpu might find our data block as it is being | ||
497 | * filled out. | ||
498 | * | ||
499 | * We hold off the interrupt handler on the other cpu by | ||
500 | * ordering our writes to the cpu mask vs our setting of the | ||
501 | * refs counter. We assert only the cpu owning the data block | ||
502 | * will set a bit in cpumask, and each bit will only be cleared | ||
503 | * by the subject cpu. Each cpu must first find its bit is | ||
504 | * set and then check that refs is set indicating the element is | ||
505 | * ready to be processed, otherwise it must skip the entry. | ||
506 | * | ||
507 | * On the previous iteration refs was set to 0 by another cpu. | ||
508 | * To avoid the use of transitivity, set the counter to 0 here | ||
509 | * so the wmb will pair with the rmb in the interrupt handler. | ||
510 | */ | ||
511 | atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */ | ||
512 | |||
488 | data->csd.func = func; | 513 | data->csd.func = func; |
489 | data->csd.info = info; | 514 | data->csd.info = info; |
490 | cpumask_and(data->cpumask, mask, cpu_online_mask); | ||
491 | cpumask_clear_cpu(this_cpu, data->cpumask); | ||
492 | 515 | ||
493 | /* | 516 | /* Ensure 0 refs is visible before mask. Also orders func and info */ |
494 | * To ensure the interrupt handler gets an complete view | ||
495 | * we order the cpumask and refs writes and order the read | ||
496 | * of them in the interrupt handler. In addition we may | ||
497 | * only clear our own cpu bit from the mask. | ||
498 | */ | ||
499 | smp_wmb(); | 517 | smp_wmb(); |
500 | 518 | ||
501 | atomic_set(&data->refs, cpumask_weight(data->cpumask)); | 519 | /* We rely on the "and" being processed before the store */ |
520 | cpumask_and(data->cpumask, mask, cpu_online_mask); | ||
521 | cpumask_clear_cpu(this_cpu, data->cpumask); | ||
522 | refs = cpumask_weight(data->cpumask); | ||
523 | |||
524 | /* Some callers race with other cpus changing the passed mask */ | ||
525 | if (unlikely(!refs)) { | ||
526 | csd_unlock(&data->csd); | ||
527 | return; | ||
528 | } | ||
502 | 529 | ||
503 | raw_spin_lock_irqsave(&call_function.lock, flags); | 530 | raw_spin_lock_irqsave(&call_function.lock, flags); |
504 | /* | 531 | /* |
@@ -507,6 +534,12 @@ void smp_call_function_many(const struct cpumask *mask, | |||
507 | * will not miss any other list entries: | 534 | * will not miss any other list entries: |
508 | */ | 535 | */ |
509 | list_add_rcu(&data->csd.list, &call_function.queue); | 536 | list_add_rcu(&data->csd.list, &call_function.queue); |
537 | /* | ||
538 | * We rely on the wmb() in list_add_rcu to complete our writes | ||
539 | * to the cpumask before this write to refs, which indicates | ||
540 | * data is on the list and is ready to be processed. | ||
541 | */ | ||
542 | atomic_set(&data->refs, refs); | ||
510 | raw_spin_unlock_irqrestore(&call_function.lock, flags); | 543 | raw_spin_unlock_irqrestore(&call_function.lock, flags); |
511 | 544 | ||
512 | /* | 545 | /* |
@@ -571,6 +604,87 @@ void ipi_call_unlock_irq(void) | |||
571 | } | 604 | } |
572 | #endif /* USE_GENERIC_SMP_HELPERS */ | 605 | #endif /* USE_GENERIC_SMP_HELPERS */ |
573 | 606 | ||
607 | /* Setup configured maximum number of CPUs to activate */ | ||
608 | unsigned int setup_max_cpus = NR_CPUS; | ||
609 | EXPORT_SYMBOL(setup_max_cpus); | ||
610 | |||
611 | |||
612 | /* | ||
613 | * Setup routine for controlling SMP activation | ||
614 | * | ||
615 | * Command-line option of "nosmp" or "maxcpus=0" will disable SMP | ||
616 | * activation entirely (the MPS table probe still happens, though). | ||
617 | * | ||
618 | * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer | ||
619 | * greater than 0, limits the maximum number of CPUs activated in | ||
620 | * SMP mode to <NUM>. | ||
621 | */ | ||
622 | |||
623 | void __weak arch_disable_smp_support(void) { } | ||
624 | |||
625 | static int __init nosmp(char *str) | ||
626 | { | ||
627 | setup_max_cpus = 0; | ||
628 | arch_disable_smp_support(); | ||
629 | |||
630 | return 0; | ||
631 | } | ||
632 | |||
633 | early_param("nosmp", nosmp); | ||
634 | |||
635 | /* this is hard limit */ | ||
636 | static int __init nrcpus(char *str) | ||
637 | { | ||
638 | int nr_cpus; | ||
639 | |||
640 | get_option(&str, &nr_cpus); | ||
641 | if (nr_cpus > 0 && nr_cpus < nr_cpu_ids) | ||
642 | nr_cpu_ids = nr_cpus; | ||
643 | |||
644 | return 0; | ||
645 | } | ||
646 | |||
647 | early_param("nr_cpus", nrcpus); | ||
648 | |||
649 | static int __init maxcpus(char *str) | ||
650 | { | ||
651 | get_option(&str, &setup_max_cpus); | ||
652 | if (setup_max_cpus == 0) | ||
653 | arch_disable_smp_support(); | ||
654 | |||
655 | return 0; | ||
656 | } | ||
657 | |||
658 | early_param("maxcpus", maxcpus); | ||
659 | |||
660 | /* Setup number of possible processor ids */ | ||
661 | int nr_cpu_ids __read_mostly = NR_CPUS; | ||
662 | EXPORT_SYMBOL(nr_cpu_ids); | ||
663 | |||
664 | /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ | ||
665 | void __init setup_nr_cpu_ids(void) | ||
666 | { | ||
667 | nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; | ||
668 | } | ||
669 | |||
670 | /* Called by boot processor to activate the rest. */ | ||
671 | void __init smp_init(void) | ||
672 | { | ||
673 | unsigned int cpu; | ||
674 | |||
675 | /* FIXME: This should be done in userspace --RR */ | ||
676 | for_each_present_cpu(cpu) { | ||
677 | if (num_online_cpus() >= setup_max_cpus) | ||
678 | break; | ||
679 | if (!cpu_online(cpu)) | ||
680 | cpu_up(cpu); | ||
681 | } | ||
682 | |||
683 | /* Any cleanup work */ | ||
684 | printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); | ||
685 | smp_cpus_done(setup_max_cpus); | ||
686 | } | ||
687 | |||
574 | /* | 688 | /* |
575 | * Call a function on all processors. May be used during early boot while | 689 | * Call a function on all processors. May be used during early boot while |
576 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead | 690 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead |