aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r--arch/x86/kernel/smpboot.c191
1 files changed, 80 insertions, 111 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6e1e406038c2..433529e29be4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -76,20 +76,8 @@
76/* State of each CPU */ 76/* State of each CPU */
77DEFINE_PER_CPU(int, cpu_state) = { 0 }; 77DEFINE_PER_CPU(int, cpu_state) = { 0 };
78 78
79/* Store all idle threads, this can be reused instead of creating
80* a new thread. Also avoids complicated thread destroy functionality
81* for idle threads.
82*/
83#ifdef CONFIG_HOTPLUG_CPU 79#ifdef CONFIG_HOTPLUG_CPU
84/* 80/*
85 * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
86 * removed after init for !CONFIG_HOTPLUG_CPU.
87 */
88static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
89#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
90#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
91
92/*
93 * We need this for trampoline_base protection from concurrent accesses when 81 * We need this for trampoline_base protection from concurrent accesses when
94 * off- and onlining cores wildly. 82 * off- and onlining cores wildly.
95 */ 83 */
@@ -97,20 +85,16 @@ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
97 85
98void cpu_hotplug_driver_lock(void) 86void cpu_hotplug_driver_lock(void)
99{ 87{
100 mutex_lock(&x86_cpu_hotplug_driver_mutex); 88 mutex_lock(&x86_cpu_hotplug_driver_mutex);
101} 89}
102 90
103void cpu_hotplug_driver_unlock(void) 91void cpu_hotplug_driver_unlock(void)
104{ 92{
105 mutex_unlock(&x86_cpu_hotplug_driver_mutex); 93 mutex_unlock(&x86_cpu_hotplug_driver_mutex);
106} 94}
107 95
108ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } 96ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
109ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } 97ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
110#else
111static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
112#define get_idle_for_cpu(x) (idle_thread_array[(x)])
113#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p))
114#endif 98#endif
115 99
116/* Number of siblings per CPU package */ 100/* Number of siblings per CPU package */
@@ -315,59 +299,90 @@ void __cpuinit smp_store_cpu_info(int id)
315 identify_secondary_cpu(c); 299 identify_secondary_cpu(c);
316} 300}
317 301
318static void __cpuinit link_thread_siblings(int cpu1, int cpu2) 302static bool __cpuinit
303topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
319{ 304{
320 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); 305 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
321 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); 306
322 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); 307 return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
323 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); 308 "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
324 cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); 309 "[node: %d != %d]. Ignoring dependency.\n",
325 cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); 310 cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
326} 311}
327 312
313#define link_mask(_m, c1, c2) \
314do { \
315 cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \
316 cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \
317} while (0)
318
319static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
320{
321 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
322 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
323
324 if (c->phys_proc_id == o->phys_proc_id &&
325 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) &&
326 c->compute_unit_id == o->compute_unit_id)
327 return topology_sane(c, o, "smt");
328
329 } else if (c->phys_proc_id == o->phys_proc_id &&
330 c->cpu_core_id == o->cpu_core_id) {
331 return topology_sane(c, o, "smt");
332 }
333
334 return false;
335}
336
337static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
338{
339 int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
340
341 if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID &&
342 per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2))
343 return topology_sane(c, o, "llc");
344
345 return false;
346}
347
348static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
349{
350 if (c->phys_proc_id == o->phys_proc_id)
351 return topology_sane(c, o, "mc");
352
353 return false;
354}
328 355
329void __cpuinit set_cpu_sibling_map(int cpu) 356void __cpuinit set_cpu_sibling_map(int cpu)
330{ 357{
331 int i; 358 bool has_mc = boot_cpu_data.x86_max_cores > 1;
359 bool has_smt = smp_num_siblings > 1;
332 struct cpuinfo_x86 *c = &cpu_data(cpu); 360 struct cpuinfo_x86 *c = &cpu_data(cpu);
361 struct cpuinfo_x86 *o;
362 int i;
333 363
334 cpumask_set_cpu(cpu, cpu_sibling_setup_mask); 364 cpumask_set_cpu(cpu, cpu_sibling_setup_mask);
335 365
336 if (smp_num_siblings > 1) { 366 if (!has_smt && !has_mc) {
337 for_each_cpu(i, cpu_sibling_setup_mask) {
338 struct cpuinfo_x86 *o = &cpu_data(i);
339
340 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
341 if (c->phys_proc_id == o->phys_proc_id &&
342 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
343 c->compute_unit_id == o->compute_unit_id)
344 link_thread_siblings(cpu, i);
345 } else if (c->phys_proc_id == o->phys_proc_id &&
346 c->cpu_core_id == o->cpu_core_id) {
347 link_thread_siblings(cpu, i);
348 }
349 }
350 } else {
351 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 367 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
352 } 368 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
353 369 cpumask_set_cpu(cpu, cpu_core_mask(cpu));
354 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
355
356 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
357 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
358 c->booted_cores = 1; 370 c->booted_cores = 1;
359 return; 371 return;
360 } 372 }
361 373
362 for_each_cpu(i, cpu_sibling_setup_mask) { 374 for_each_cpu(i, cpu_sibling_setup_mask) {
363 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 375 o = &cpu_data(i);
364 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 376
365 cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); 377 if ((i == cpu) || (has_smt && match_smt(c, o)))
366 cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); 378 link_mask(sibling, cpu, i);
367 } 379
368 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 380 if ((i == cpu) || (has_mc && match_llc(c, o)))
369 cpumask_set_cpu(i, cpu_core_mask(cpu)); 381 link_mask(llc_shared, cpu, i);
370 cpumask_set_cpu(cpu, cpu_core_mask(i)); 382
383 if ((i == cpu) || (has_mc && match_mc(c, o))) {
384 link_mask(core, cpu, i);
385
371 /* 386 /*
372 * Does this new cpu bringup a new core? 387 * Does this new cpu bringup a new core?
373 */ 388 */
@@ -398,8 +413,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
398 * For perf, we return last level cache shared map. 413 * For perf, we return last level cache shared map.
399 * And for power savings, we return cpu_core_map 414 * And for power savings, we return cpu_core_map
400 */ 415 */
401 if ((sched_mc_power_savings || sched_smt_power_savings) && 416 if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
402 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
403 return cpu_core_mask(cpu); 417 return cpu_core_mask(cpu);
404 else 418 else
405 return cpu_llc_shared_mask(cpu); 419 return cpu_llc_shared_mask(cpu);
@@ -618,22 +632,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
618 return (send_status | accept_status); 632 return (send_status | accept_status);
619} 633}
620 634
621struct create_idle {
622 struct work_struct work;
623 struct task_struct *idle;
624 struct completion done;
625 int cpu;
626};
627
628static void __cpuinit do_fork_idle(struct work_struct *work)
629{
630 struct create_idle *c_idle =
631 container_of(work, struct create_idle, work);
632
633 c_idle->idle = fork_idle(c_idle->cpu);
634 complete(&c_idle->done);
635}
636
637/* reduce the number of lines printed when booting a large cpu count system */ 635/* reduce the number of lines printed when booting a large cpu count system */
638static void __cpuinit announce_cpu(int cpu, int apicid) 636static void __cpuinit announce_cpu(int cpu, int apicid)
639{ 637{
@@ -660,58 +658,31 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
660 * Returns zero if CPU booted OK, else error code from 658 * Returns zero if CPU booted OK, else error code from
661 * ->wakeup_secondary_cpu. 659 * ->wakeup_secondary_cpu.
662 */ 660 */
663static int __cpuinit do_boot_cpu(int apicid, int cpu) 661static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
664{ 662{
665 unsigned long boot_error = 0; 663 unsigned long boot_error = 0;
666 unsigned long start_ip; 664 unsigned long start_ip;
667 int timeout; 665 int timeout;
668 struct create_idle c_idle = {
669 .cpu = cpu,
670 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
671 };
672
673 INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
674 666
675 alternatives_smp_switch(1); 667 alternatives_smp_switch(1);
676 668
677 c_idle.idle = get_idle_for_cpu(cpu); 669 idle->thread.sp = (unsigned long) (((struct pt_regs *)
678 670 (THREAD_SIZE + task_stack_page(idle))) - 1);
679 /* 671 per_cpu(current_task, cpu) = idle;
680 * We can't use kernel_thread since we must avoid to
681 * reschedule the child.
682 */
683 if (c_idle.idle) {
684 c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
685 (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1);
686 init_idle(c_idle.idle, cpu);
687 goto do_rest;
688 }
689
690 schedule_work(&c_idle.work);
691 wait_for_completion(&c_idle.done);
692 672
693 if (IS_ERR(c_idle.idle)) {
694 printk("failed fork for CPU %d\n", cpu);
695 destroy_work_on_stack(&c_idle.work);
696 return PTR_ERR(c_idle.idle);
697 }
698
699 set_idle_for_cpu(cpu, c_idle.idle);
700do_rest:
701 per_cpu(current_task, cpu) = c_idle.idle;
702#ifdef CONFIG_X86_32 673#ifdef CONFIG_X86_32
703 /* Stack for startup_32 can be just as for start_secondary onwards */ 674 /* Stack for startup_32 can be just as for start_secondary onwards */
704 irq_ctx_init(cpu); 675 irq_ctx_init(cpu);
705#else 676#else
706 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 677 clear_tsk_thread_flag(idle, TIF_FORK);
707 initial_gs = per_cpu_offset(cpu); 678 initial_gs = per_cpu_offset(cpu);
708 per_cpu(kernel_stack, cpu) = 679 per_cpu(kernel_stack, cpu) =
709 (unsigned long)task_stack_page(c_idle.idle) - 680 (unsigned long)task_stack_page(idle) -
710 KERNEL_STACK_OFFSET + THREAD_SIZE; 681 KERNEL_STACK_OFFSET + THREAD_SIZE;
711#endif 682#endif
712 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 683 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
713 initial_code = (unsigned long)start_secondary; 684 initial_code = (unsigned long)start_secondary;
714 stack_start = c_idle.idle->thread.sp; 685 stack_start = idle->thread.sp;
715 686
716 /* start_ip had better be page-aligned! */ 687 /* start_ip had better be page-aligned! */
717 start_ip = trampoline_address(); 688 start_ip = trampoline_address();
@@ -813,12 +784,10 @@ do_rest:
813 */ 784 */
814 smpboot_restore_warm_reset_vector(); 785 smpboot_restore_warm_reset_vector();
815 } 786 }
816
817 destroy_work_on_stack(&c_idle.work);
818 return boot_error; 787 return boot_error;
819} 788}
820 789
821int __cpuinit native_cpu_up(unsigned int cpu) 790int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
822{ 791{
823 int apicid = apic->cpu_present_to_apicid(cpu); 792 int apicid = apic->cpu_present_to_apicid(cpu);
824 unsigned long flags; 793 unsigned long flags;
@@ -851,7 +820,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
851 820
852 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 821 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
853 822
854 err = do_boot_cpu(apicid, cpu); 823 err = do_boot_cpu(apicid, cpu, tidle);
855 if (err) { 824 if (err) {
856 pr_debug("do_boot_cpu failed %d\n", err); 825 pr_debug("do_boot_cpu failed %d\n", err);
857 return -EIO; 826 return -EIO;