diff options
Diffstat (limited to 'kernel/smp.c')
-rw-r--r-- | kernel/smp.c | 184 |
1 files changed, 170 insertions, 14 deletions
diff --git a/kernel/smp.c b/kernel/smp.c index 4ec30e069987..73a195193558 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -194,23 +194,52 @@ void generic_smp_call_function_interrupt(void) | |||
194 | */ | 194 | */ |
195 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | 195 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
196 | int refs; | 196 | int refs; |
197 | smp_call_func_t func; | ||
197 | 198 | ||
198 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) | 199 | /* |
200 | * Since we walk the list without any locks, we might | ||
201 | * see an entry that was completed, removed from the | ||
202 | * list and is in the process of being reused. | ||
203 | * | ||
204 | * We must check that the cpu is in the cpumask before | ||
205 | * checking the refs, and both must be set before | ||
206 | * executing the callback on this cpu. | ||
207 | */ | ||
208 | |||
209 | if (!cpumask_test_cpu(cpu, data->cpumask)) | ||
199 | continue; | 210 | continue; |
200 | 211 | ||
201 | data->csd.func(data->csd.info); | 212 | smp_rmb(); |
213 | |||
214 | if (atomic_read(&data->refs) == 0) | ||
215 | continue; | ||
216 | |||
217 | func = data->csd.func; /* save for later warn */ | ||
218 | func(data->csd.info); | ||
219 | |||
220 | /* | ||
221 | * If the cpu mask is not still set then func enabled | ||
222 | * interrupts (BUG), and this cpu took another smp call | ||
223 | * function interrupt and executed func(info) twice | ||
224 | * on this cpu. That nested execution decremented refs. | ||
225 | */ | ||
226 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { | ||
227 | WARN(1, "%pf enabled interrupts and double executed\n", func); | ||
228 | continue; | ||
229 | } | ||
202 | 230 | ||
203 | refs = atomic_dec_return(&data->refs); | 231 | refs = atomic_dec_return(&data->refs); |
204 | WARN_ON(refs < 0); | 232 | WARN_ON(refs < 0); |
205 | if (!refs) { | ||
206 | raw_spin_lock(&call_function.lock); | ||
207 | list_del_rcu(&data->csd.list); | ||
208 | raw_spin_unlock(&call_function.lock); | ||
209 | } | ||
210 | 233 | ||
211 | if (refs) | 234 | if (refs) |
212 | continue; | 235 | continue; |
213 | 236 | ||
237 | WARN_ON(!cpumask_empty(data->cpumask)); | ||
238 | |||
239 | raw_spin_lock(&call_function.lock); | ||
240 | list_del_rcu(&data->csd.list); | ||
241 | raw_spin_unlock(&call_function.lock); | ||
242 | |||
214 | csd_unlock(&data->csd); | 243 | csd_unlock(&data->csd); |
215 | } | 244 | } |
216 | 245 | ||
@@ -421,7 +450,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
421 | { | 450 | { |
422 | struct call_function_data *data; | 451 | struct call_function_data *data; |
423 | unsigned long flags; | 452 | unsigned long flags; |
424 | int cpu, next_cpu, this_cpu = smp_processor_id(); | 453 | int refs, cpu, next_cpu, this_cpu = smp_processor_id(); |
425 | 454 | ||
426 | /* | 455 | /* |
427 | * Can deadlock when called with interrupts disabled. | 456 | * Can deadlock when called with interrupts disabled. |
@@ -430,9 +459,9 @@ void smp_call_function_many(const struct cpumask *mask, | |||
430 | * can't happen. | 459 | * can't happen. |
431 | */ | 460 | */ |
432 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | 461 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() |
433 | && !oops_in_progress); | 462 | && !oops_in_progress && !early_boot_irqs_disabled); |
434 | 463 | ||
435 | /* So, what's a CPU they want? Ignoring this one. */ | 464 | /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */ |
436 | cpu = cpumask_first_and(mask, cpu_online_mask); | 465 | cpu = cpumask_first_and(mask, cpu_online_mask); |
437 | if (cpu == this_cpu) | 466 | if (cpu == this_cpu) |
438 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 467 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
@@ -455,11 +484,48 @@ void smp_call_function_many(const struct cpumask *mask, | |||
455 | data = &__get_cpu_var(cfd_data); | 484 | data = &__get_cpu_var(cfd_data); |
456 | csd_lock(&data->csd); | 485 | csd_lock(&data->csd); |
457 | 486 | ||
487 | /* This BUG_ON verifies our reuse assertions and can be removed */ | ||
488 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); | ||
489 | |||
490 | /* | ||
491 | * The global call function queue list add and delete are protected | ||
492 | * by a lock, but the list is traversed without any lock, relying | ||
493 | * on the rcu list add and delete to allow safe concurrent traversal. | ||
494 | * We reuse the call function data without waiting for any grace | ||
495 | * period after some other cpu removes it from the global queue. | ||
496 | * This means a cpu might find our data block as it is being | ||
497 | * filled out. | ||
498 | * | ||
499 | * We hold off the interrupt handler on the other cpu by | ||
500 | * ordering our writes to the cpu mask vs our setting of the | ||
501 | * refs counter. We assert only the cpu owning the data block | ||
502 | * will set a bit in cpumask, and each bit will only be cleared | ||
503 | * by the subject cpu. Each cpu must first find its bit is | ||
504 | * set and then check that refs is set indicating the element is | ||
505 | * ready to be processed, otherwise it must skip the entry. | ||
506 | * | ||
507 | * On the previous iteration refs was set to 0 by another cpu. | ||
508 | * To avoid the use of transitivity, set the counter to 0 here | ||
509 | * so the wmb will pair with the rmb in the interrupt handler. | ||
510 | */ | ||
511 | atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */ | ||
512 | |||
458 | data->csd.func = func; | 513 | data->csd.func = func; |
459 | data->csd.info = info; | 514 | data->csd.info = info; |
515 | |||
516 | /* Ensure 0 refs is visible before mask. Also orders func and info */ | ||
517 | smp_wmb(); | ||
518 | |||
519 | /* We rely on the "and" being processed before the store */ | ||
460 | cpumask_and(data->cpumask, mask, cpu_online_mask); | 520 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
461 | cpumask_clear_cpu(this_cpu, data->cpumask); | 521 | cpumask_clear_cpu(this_cpu, data->cpumask); |
462 | atomic_set(&data->refs, cpumask_weight(data->cpumask)); | 522 | refs = cpumask_weight(data->cpumask); |
523 | |||
524 | /* Some callers race with other cpus changing the passed mask */ | ||
525 | if (unlikely(!refs)) { | ||
526 | csd_unlock(&data->csd); | ||
527 | return; | ||
528 | } | ||
463 | 529 | ||
464 | raw_spin_lock_irqsave(&call_function.lock, flags); | 530 | raw_spin_lock_irqsave(&call_function.lock, flags); |
465 | /* | 531 | /* |
@@ -468,6 +534,12 @@ void smp_call_function_many(const struct cpumask *mask, | |||
468 | * will not miss any other list entries: | 534 | * will not miss any other list entries: |
469 | */ | 535 | */ |
470 | list_add_rcu(&data->csd.list, &call_function.queue); | 536 | list_add_rcu(&data->csd.list, &call_function.queue); |
537 | /* | ||
538 | * We rely on the wmb() in list_add_rcu to complete our writes | ||
539 | * to the cpumask before this write to refs, which indicates | ||
540 | * data is on the list and is ready to be processed. | ||
541 | */ | ||
542 | atomic_set(&data->refs, refs); | ||
471 | raw_spin_unlock_irqrestore(&call_function.lock, flags); | 543 | raw_spin_unlock_irqrestore(&call_function.lock, flags); |
472 | 544 | ||
473 | /* | 545 | /* |
@@ -532,18 +604,102 @@ void ipi_call_unlock_irq(void) | |||
532 | } | 604 | } |
533 | #endif /* USE_GENERIC_SMP_HELPERS */ | 605 | #endif /* USE_GENERIC_SMP_HELPERS */ |
534 | 606 | ||
607 | /* Setup configured maximum number of CPUs to activate */ | ||
608 | unsigned int setup_max_cpus = NR_CPUS; | ||
609 | EXPORT_SYMBOL(setup_max_cpus); | ||
610 | |||
611 | |||
612 | /* | ||
613 | * Setup routine for controlling SMP activation | ||
614 | * | ||
615 | * Command-line option of "nosmp" or "maxcpus=0" will disable SMP | ||
616 | * activation entirely (the MPS table probe still happens, though). | ||
617 | * | ||
618 | * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer | ||
619 | * greater than 0, limits the maximum number of CPUs activated in | ||
620 | * SMP mode to <NUM>. | ||
621 | */ | ||
622 | |||
623 | void __weak arch_disable_smp_support(void) { } | ||
624 | |||
625 | static int __init nosmp(char *str) | ||
626 | { | ||
627 | setup_max_cpus = 0; | ||
628 | arch_disable_smp_support(); | ||
629 | |||
630 | return 0; | ||
631 | } | ||
632 | |||
633 | early_param("nosmp", nosmp); | ||
634 | |||
635 | /* this is hard limit */ | ||
636 | static int __init nrcpus(char *str) | ||
637 | { | ||
638 | int nr_cpus; | ||
639 | |||
640 | get_option(&str, &nr_cpus); | ||
641 | if (nr_cpus > 0 && nr_cpus < nr_cpu_ids) | ||
642 | nr_cpu_ids = nr_cpus; | ||
643 | |||
644 | return 0; | ||
645 | } | ||
646 | |||
647 | early_param("nr_cpus", nrcpus); | ||
648 | |||
649 | static int __init maxcpus(char *str) | ||
650 | { | ||
651 | get_option(&str, &setup_max_cpus); | ||
652 | if (setup_max_cpus == 0) | ||
653 | arch_disable_smp_support(); | ||
654 | |||
655 | return 0; | ||
656 | } | ||
657 | |||
658 | early_param("maxcpus", maxcpus); | ||
659 | |||
660 | /* Setup number of possible processor ids */ | ||
661 | int nr_cpu_ids __read_mostly = NR_CPUS; | ||
662 | EXPORT_SYMBOL(nr_cpu_ids); | ||
663 | |||
664 | /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ | ||
665 | void __init setup_nr_cpu_ids(void) | ||
666 | { | ||
667 | nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; | ||
668 | } | ||
669 | |||
670 | /* Called by boot processor to activate the rest. */ | ||
671 | void __init smp_init(void) | ||
672 | { | ||
673 | unsigned int cpu; | ||
674 | |||
675 | /* FIXME: This should be done in userspace --RR */ | ||
676 | for_each_present_cpu(cpu) { | ||
677 | if (num_online_cpus() >= setup_max_cpus) | ||
678 | break; | ||
679 | if (!cpu_online(cpu)) | ||
680 | cpu_up(cpu); | ||
681 | } | ||
682 | |||
683 | /* Any cleanup work */ | ||
684 | printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus()); | ||
685 | smp_cpus_done(setup_max_cpus); | ||
686 | } | ||
687 | |||
535 | /* | 688 | /* |
536 | * Call a function on all processors | 689 | * Call a function on all processors. May be used during early boot while |
690 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead | ||
691 | * of local_irq_disable/enable(). | ||
537 | */ | 692 | */ |
538 | int on_each_cpu(void (*func) (void *info), void *info, int wait) | 693 | int on_each_cpu(void (*func) (void *info), void *info, int wait) |
539 | { | 694 | { |
695 | unsigned long flags; | ||
540 | int ret = 0; | 696 | int ret = 0; |
541 | 697 | ||
542 | preempt_disable(); | 698 | preempt_disable(); |
543 | ret = smp_call_function(func, info, wait); | 699 | ret = smp_call_function(func, info, wait); |
544 | local_irq_disable(); | 700 | local_irq_save(flags); |
545 | func(info); | 701 | func(info); |
546 | local_irq_enable(); | 702 | local_irq_restore(flags); |
547 | preempt_enable(); | 703 | preempt_enable(); |
548 | return ret; | 704 | return ret; |
549 | } | 705 | } |