aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/smp.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/smp.c')
-rw-r--r--kernel/smp.c184
1 files changed, 170 insertions, 14 deletions
diff --git a/kernel/smp.c b/kernel/smp.c
index 4ec30e069987..73a195193558 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -194,23 +194,52 @@ void generic_smp_call_function_interrupt(void)
194 */ 194 */
195 list_for_each_entry_rcu(data, &call_function.queue, csd.list) { 195 list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
196 int refs; 196 int refs;
197 smp_call_func_t func;
197 198
198 if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) 199 /*
200 * Since we walk the list without any locks, we might
201 * see an entry that was completed, removed from the
202 * list and is in the process of being reused.
203 *
204 * We must check that the cpu is in the cpumask before
205 * checking the refs, and both must be set before
206 * executing the callback on this cpu.
207 */
208
209 if (!cpumask_test_cpu(cpu, data->cpumask))
199 continue; 210 continue;
200 211
201 data->csd.func(data->csd.info); 212 smp_rmb();
213
214 if (atomic_read(&data->refs) == 0)
215 continue;
216
217 func = data->csd.func; /* save for later warn */
218 func(data->csd.info);
219
220 /*
221 * If the cpu mask is not still set then func enabled
222 * interrupts (BUG), and this cpu took another smp call
223 * function interrupt and executed func(info) twice
224 * on this cpu. That nested execution decremented refs.
225 */
226 if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) {
227 WARN(1, "%pf enabled interrupts and double executed\n", func);
228 continue;
229 }
202 230
203 refs = atomic_dec_return(&data->refs); 231 refs = atomic_dec_return(&data->refs);
204 WARN_ON(refs < 0); 232 WARN_ON(refs < 0);
205 if (!refs) {
206 raw_spin_lock(&call_function.lock);
207 list_del_rcu(&data->csd.list);
208 raw_spin_unlock(&call_function.lock);
209 }
210 233
211 if (refs) 234 if (refs)
212 continue; 235 continue;
213 236
237 WARN_ON(!cpumask_empty(data->cpumask));
238
239 raw_spin_lock(&call_function.lock);
240 list_del_rcu(&data->csd.list);
241 raw_spin_unlock(&call_function.lock);
242
214 csd_unlock(&data->csd); 243 csd_unlock(&data->csd);
215 } 244 }
216 245
@@ -421,7 +450,7 @@ void smp_call_function_many(const struct cpumask *mask,
421{ 450{
422 struct call_function_data *data; 451 struct call_function_data *data;
423 unsigned long flags; 452 unsigned long flags;
424 int cpu, next_cpu, this_cpu = smp_processor_id(); 453 int refs, cpu, next_cpu, this_cpu = smp_processor_id();
425 454
426 /* 455 /*
427 * Can deadlock when called with interrupts disabled. 456 * Can deadlock when called with interrupts disabled.
@@ -430,9 +459,9 @@ void smp_call_function_many(const struct cpumask *mask,
430 * can't happen. 459 * can't happen.
431 */ 460 */
432 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() 461 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
433 && !oops_in_progress); 462 && !oops_in_progress && !early_boot_irqs_disabled);
434 463
435 /* So, what's a CPU they want? Ignoring this one. */ 464 /* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
436 cpu = cpumask_first_and(mask, cpu_online_mask); 465 cpu = cpumask_first_and(mask, cpu_online_mask);
437 if (cpu == this_cpu) 466 if (cpu == this_cpu)
438 cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 467 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
@@ -455,11 +484,48 @@ void smp_call_function_many(const struct cpumask *mask,
455 data = &__get_cpu_var(cfd_data); 484 data = &__get_cpu_var(cfd_data);
456 csd_lock(&data->csd); 485 csd_lock(&data->csd);
457 486
487 /* This BUG_ON verifies our reuse assertions and can be removed */
488 BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask));
489
490 /*
491 * The global call function queue list add and delete are protected
492 * by a lock, but the list is traversed without any lock, relying
493 * on the rcu list add and delete to allow safe concurrent traversal.
494 * We reuse the call function data without waiting for any grace
495 * period after some other cpu removes it from the global queue.
496 * This means a cpu might find our data block as it is being
497 * filled out.
498 *
499 * We hold off the interrupt handler on the other cpu by
500 * ordering our writes to the cpu mask vs our setting of the
501 * refs counter. We assert only the cpu owning the data block
502 * will set a bit in cpumask, and each bit will only be cleared
503 * by the subject cpu. Each cpu must first find its bit is
504 * set and then check that refs is set indicating the element is
505 * ready to be processed, otherwise it must skip the entry.
506 *
507 * On the previous iteration refs was set to 0 by another cpu.
508 * To avoid the use of transitivity, set the counter to 0 here
509 * so the wmb will pair with the rmb in the interrupt handler.
510 */
511 atomic_set(&data->refs, 0); /* convert 3rd to 1st party write */
512
458 data->csd.func = func; 513 data->csd.func = func;
459 data->csd.info = info; 514 data->csd.info = info;
515
516 /* Ensure 0 refs is visible before mask. Also orders func and info */
517 smp_wmb();
518
519 /* We rely on the "and" being processed before the store */
460 cpumask_and(data->cpumask, mask, cpu_online_mask); 520 cpumask_and(data->cpumask, mask, cpu_online_mask);
461 cpumask_clear_cpu(this_cpu, data->cpumask); 521 cpumask_clear_cpu(this_cpu, data->cpumask);
462 atomic_set(&data->refs, cpumask_weight(data->cpumask)); 522 refs = cpumask_weight(data->cpumask);
523
524 /* Some callers race with other cpus changing the passed mask */
525 if (unlikely(!refs)) {
526 csd_unlock(&data->csd);
527 return;
528 }
463 529
464 raw_spin_lock_irqsave(&call_function.lock, flags); 530 raw_spin_lock_irqsave(&call_function.lock, flags);
465 /* 531 /*
@@ -468,6 +534,12 @@ void smp_call_function_many(const struct cpumask *mask,
468 * will not miss any other list entries: 534 * will not miss any other list entries:
469 */ 535 */
470 list_add_rcu(&data->csd.list, &call_function.queue); 536 list_add_rcu(&data->csd.list, &call_function.queue);
537 /*
538 * We rely on the wmb() in list_add_rcu to complete our writes
539 * to the cpumask before this write to refs, which indicates
540 * data is on the list and is ready to be processed.
541 */
542 atomic_set(&data->refs, refs);
471 raw_spin_unlock_irqrestore(&call_function.lock, flags); 543 raw_spin_unlock_irqrestore(&call_function.lock, flags);
472 544
473 /* 545 /*
@@ -532,18 +604,102 @@ void ipi_call_unlock_irq(void)
532} 604}
533#endif /* USE_GENERIC_SMP_HELPERS */ 605#endif /* USE_GENERIC_SMP_HELPERS */
534 606
607/* Setup configured maximum number of CPUs to activate */
608unsigned int setup_max_cpus = NR_CPUS;
609EXPORT_SYMBOL(setup_max_cpus);
610
611
612/*
613 * Setup routine for controlling SMP activation
614 *
615 * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
616 * activation entirely (the MPS table probe still happens, though).
617 *
618 * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
619 * greater than 0, limits the maximum number of CPUs activated in
620 * SMP mode to <NUM>.
621 */
622
623void __weak arch_disable_smp_support(void) { }
624
625static int __init nosmp(char *str)
626{
627 setup_max_cpus = 0;
628 arch_disable_smp_support();
629
630 return 0;
631}
632
633early_param("nosmp", nosmp);
634
635/* this is hard limit */
636static int __init nrcpus(char *str)
637{
638 int nr_cpus;
639
640 get_option(&str, &nr_cpus);
641 if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
642 nr_cpu_ids = nr_cpus;
643
644 return 0;
645}
646
647early_param("nr_cpus", nrcpus);
648
649static int __init maxcpus(char *str)
650{
651 get_option(&str, &setup_max_cpus);
652 if (setup_max_cpus == 0)
653 arch_disable_smp_support();
654
655 return 0;
656}
657
658early_param("maxcpus", maxcpus);
659
660/* Setup number of possible processor ids */
661int nr_cpu_ids __read_mostly = NR_CPUS;
662EXPORT_SYMBOL(nr_cpu_ids);
663
664/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
665void __init setup_nr_cpu_ids(void)
666{
667 nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
668}
669
670/* Called by boot processor to activate the rest. */
671void __init smp_init(void)
672{
673 unsigned int cpu;
674
675 /* FIXME: This should be done in userspace --RR */
676 for_each_present_cpu(cpu) {
677 if (num_online_cpus() >= setup_max_cpus)
678 break;
679 if (!cpu_online(cpu))
680 cpu_up(cpu);
681 }
682
683 /* Any cleanup work */
684 printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
685 smp_cpus_done(setup_max_cpus);
686}
687
535/* 688/*
536 * Call a function on all processors 689 * Call a function on all processors. May be used during early boot while
690 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
691 * of local_irq_disable/enable().
537 */ 692 */
538int on_each_cpu(void (*func) (void *info), void *info, int wait) 693int on_each_cpu(void (*func) (void *info), void *info, int wait)
539{ 694{
695 unsigned long flags;
540 int ret = 0; 696 int ret = 0;
541 697
542 preempt_disable(); 698 preempt_disable();
543 ret = smp_call_function(func, info, wait); 699 ret = smp_call_function(func, info, wait);
544 local_irq_disable(); 700 local_irq_save(flags);
545 func(info); 701 func(info);
546 local_irq_enable(); 702 local_irq_restore(flags);
547 preempt_enable(); 703 preempt_enable();
548 return ret; 704 return ret;
549} 705}