diff options
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r-- | arch/x86/kernel/smpboot.c | 191 |
1 files changed, 80 insertions, 111 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6e1e406038c2..433529e29be4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -76,20 +76,8 @@ | |||
76 | /* State of each CPU */ | 76 | /* State of each CPU */ |
77 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; | 77 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; |
78 | 78 | ||
79 | /* Store all idle threads, this can be reused instead of creating | ||
80 | * a new thread. Also avoids complicated thread destroy functionality | ||
81 | * for idle threads. | ||
82 | */ | ||
83 | #ifdef CONFIG_HOTPLUG_CPU | 79 | #ifdef CONFIG_HOTPLUG_CPU |
84 | /* | 80 | /* |
85 | * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is | ||
86 | * removed after init for !CONFIG_HOTPLUG_CPU. | ||
87 | */ | ||
88 | static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); | ||
89 | #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) | ||
90 | #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) | ||
91 | |||
92 | /* | ||
93 | * We need this for trampoline_base protection from concurrent accesses when | 81 | * We need this for trampoline_base protection from concurrent accesses when |
94 | * off- and onlining cores wildly. | 82 | * off- and onlining cores wildly. |
95 | */ | 83 | */ |
@@ -97,20 +85,16 @@ static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); | |||
97 | 85 | ||
98 | void cpu_hotplug_driver_lock(void) | 86 | void cpu_hotplug_driver_lock(void) |
99 | { | 87 | { |
100 | mutex_lock(&x86_cpu_hotplug_driver_mutex); | 88 | mutex_lock(&x86_cpu_hotplug_driver_mutex); |
101 | } | 89 | } |
102 | 90 | ||
103 | void cpu_hotplug_driver_unlock(void) | 91 | void cpu_hotplug_driver_unlock(void) |
104 | { | 92 | { |
105 | mutex_unlock(&x86_cpu_hotplug_driver_mutex); | 93 | mutex_unlock(&x86_cpu_hotplug_driver_mutex); |
106 | } | 94 | } |
107 | 95 | ||
108 | ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } | 96 | ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } |
109 | ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } | 97 | ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } |
110 | #else | ||
111 | static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; | ||
112 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) | ||
113 | #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) | ||
114 | #endif | 98 | #endif |
115 | 99 | ||
116 | /* Number of siblings per CPU package */ | 100 | /* Number of siblings per CPU package */ |
@@ -315,59 +299,90 @@ void __cpuinit smp_store_cpu_info(int id) | |||
315 | identify_secondary_cpu(c); | 299 | identify_secondary_cpu(c); |
316 | } | 300 | } |
317 | 301 | ||
318 | static void __cpuinit link_thread_siblings(int cpu1, int cpu2) | 302 | static bool __cpuinit |
303 | topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name) | ||
319 | { | 304 | { |
320 | cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); | 305 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; |
321 | cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); | 306 | |
322 | cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); | 307 | return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2), |
323 | cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); | 308 | "sched: CPU #%d's %s-sibling CPU #%d is not on the same node! " |
324 | cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); | 309 | "[node: %d != %d]. Ignoring dependency.\n", |
325 | cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); | 310 | cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2)); |
326 | } | 311 | } |
327 | 312 | ||
313 | #define link_mask(_m, c1, c2) \ | ||
314 | do { \ | ||
315 | cpumask_set_cpu((c1), cpu_##_m##_mask(c2)); \ | ||
316 | cpumask_set_cpu((c2), cpu_##_m##_mask(c1)); \ | ||
317 | } while (0) | ||
318 | |||
319 | static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | ||
320 | { | ||
321 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { | ||
322 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; | ||
323 | |||
324 | if (c->phys_proc_id == o->phys_proc_id && | ||
325 | per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) && | ||
326 | c->compute_unit_id == o->compute_unit_id) | ||
327 | return topology_sane(c, o, "smt"); | ||
328 | |||
329 | } else if (c->phys_proc_id == o->phys_proc_id && | ||
330 | c->cpu_core_id == o->cpu_core_id) { | ||
331 | return topology_sane(c, o, "smt"); | ||
332 | } | ||
333 | |||
334 | return false; | ||
335 | } | ||
336 | |||
337 | static bool __cpuinit match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | ||
338 | { | ||
339 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; | ||
340 | |||
341 | if (per_cpu(cpu_llc_id, cpu1) != BAD_APICID && | ||
342 | per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) | ||
343 | return topology_sane(c, o, "llc"); | ||
344 | |||
345 | return false; | ||
346 | } | ||
347 | |||
348 | static bool __cpuinit match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | ||
349 | { | ||
350 | if (c->phys_proc_id == o->phys_proc_id) | ||
351 | return topology_sane(c, o, "mc"); | ||
352 | |||
353 | return false; | ||
354 | } | ||
328 | 355 | ||
329 | void __cpuinit set_cpu_sibling_map(int cpu) | 356 | void __cpuinit set_cpu_sibling_map(int cpu) |
330 | { | 357 | { |
331 | int i; | 358 | bool has_mc = boot_cpu_data.x86_max_cores > 1; |
359 | bool has_smt = smp_num_siblings > 1; | ||
332 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 360 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
361 | struct cpuinfo_x86 *o; | ||
362 | int i; | ||
333 | 363 | ||
334 | cpumask_set_cpu(cpu, cpu_sibling_setup_mask); | 364 | cpumask_set_cpu(cpu, cpu_sibling_setup_mask); |
335 | 365 | ||
336 | if (smp_num_siblings > 1) { | 366 | if (!has_smt && !has_mc) { |
337 | for_each_cpu(i, cpu_sibling_setup_mask) { | ||
338 | struct cpuinfo_x86 *o = &cpu_data(i); | ||
339 | |||
340 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { | ||
341 | if (c->phys_proc_id == o->phys_proc_id && | ||
342 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) && | ||
343 | c->compute_unit_id == o->compute_unit_id) | ||
344 | link_thread_siblings(cpu, i); | ||
345 | } else if (c->phys_proc_id == o->phys_proc_id && | ||
346 | c->cpu_core_id == o->cpu_core_id) { | ||
347 | link_thread_siblings(cpu, i); | ||
348 | } | ||
349 | } | ||
350 | } else { | ||
351 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); | 367 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); |
352 | } | 368 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); |
353 | 369 | cpumask_set_cpu(cpu, cpu_core_mask(cpu)); | |
354 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); | ||
355 | |||
356 | if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { | ||
357 | cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); | ||
358 | c->booted_cores = 1; | 370 | c->booted_cores = 1; |
359 | return; | 371 | return; |
360 | } | 372 | } |
361 | 373 | ||
362 | for_each_cpu(i, cpu_sibling_setup_mask) { | 374 | for_each_cpu(i, cpu_sibling_setup_mask) { |
363 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && | 375 | o = &cpu_data(i); |
364 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { | 376 | |
365 | cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); | 377 | if ((i == cpu) || (has_smt && match_smt(c, o))) |
366 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); | 378 | link_mask(sibling, cpu, i); |
367 | } | 379 | |
368 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { | 380 | if ((i == cpu) || (has_mc && match_llc(c, o))) |
369 | cpumask_set_cpu(i, cpu_core_mask(cpu)); | 381 | link_mask(llc_shared, cpu, i); |
370 | cpumask_set_cpu(cpu, cpu_core_mask(i)); | 382 | |
383 | if ((i == cpu) || (has_mc && match_mc(c, o))) { | ||
384 | link_mask(core, cpu, i); | ||
385 | |||
371 | /* | 386 | /* |
372 | * Does this new cpu bringup a new core? | 387 | * Does this new cpu bringup a new core? |
373 | */ | 388 | */ |
@@ -398,8 +413,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu) | |||
398 | * For perf, we return last level cache shared map. | 413 | * For perf, we return last level cache shared map. |
399 | * And for power savings, we return cpu_core_map | 414 | * And for power savings, we return cpu_core_map |
400 | */ | 415 | */ |
401 | if ((sched_mc_power_savings || sched_smt_power_savings) && | 416 | if (!(cpu_has(c, X86_FEATURE_AMD_DCM))) |
402 | !(cpu_has(c, X86_FEATURE_AMD_DCM))) | ||
403 | return cpu_core_mask(cpu); | 417 | return cpu_core_mask(cpu); |
404 | else | 418 | else |
405 | return cpu_llc_shared_mask(cpu); | 419 | return cpu_llc_shared_mask(cpu); |
@@ -618,22 +632,6 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | |||
618 | return (send_status | accept_status); | 632 | return (send_status | accept_status); |
619 | } | 633 | } |
620 | 634 | ||
621 | struct create_idle { | ||
622 | struct work_struct work; | ||
623 | struct task_struct *idle; | ||
624 | struct completion done; | ||
625 | int cpu; | ||
626 | }; | ||
627 | |||
628 | static void __cpuinit do_fork_idle(struct work_struct *work) | ||
629 | { | ||
630 | struct create_idle *c_idle = | ||
631 | container_of(work, struct create_idle, work); | ||
632 | |||
633 | c_idle->idle = fork_idle(c_idle->cpu); | ||
634 | complete(&c_idle->done); | ||
635 | } | ||
636 | |||
637 | /* reduce the number of lines printed when booting a large cpu count system */ | 635 | /* reduce the number of lines printed when booting a large cpu count system */ |
638 | static void __cpuinit announce_cpu(int cpu, int apicid) | 636 | static void __cpuinit announce_cpu(int cpu, int apicid) |
639 | { | 637 | { |
@@ -660,58 +658,31 @@ static void __cpuinit announce_cpu(int cpu, int apicid) | |||
660 | * Returns zero if CPU booted OK, else error code from | 658 | * Returns zero if CPU booted OK, else error code from |
661 | * ->wakeup_secondary_cpu. | 659 | * ->wakeup_secondary_cpu. |
662 | */ | 660 | */ |
663 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | 661 | static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) |
664 | { | 662 | { |
665 | unsigned long boot_error = 0; | 663 | unsigned long boot_error = 0; |
666 | unsigned long start_ip; | 664 | unsigned long start_ip; |
667 | int timeout; | 665 | int timeout; |
668 | struct create_idle c_idle = { | ||
669 | .cpu = cpu, | ||
670 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), | ||
671 | }; | ||
672 | |||
673 | INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); | ||
674 | 666 | ||
675 | alternatives_smp_switch(1); | 667 | alternatives_smp_switch(1); |
676 | 668 | ||
677 | c_idle.idle = get_idle_for_cpu(cpu); | 669 | idle->thread.sp = (unsigned long) (((struct pt_regs *) |
678 | 670 | (THREAD_SIZE + task_stack_page(idle))) - 1); | |
679 | /* | 671 | per_cpu(current_task, cpu) = idle; |
680 | * We can't use kernel_thread since we must avoid to | ||
681 | * reschedule the child. | ||
682 | */ | ||
683 | if (c_idle.idle) { | ||
684 | c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *) | ||
685 | (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); | ||
686 | init_idle(c_idle.idle, cpu); | ||
687 | goto do_rest; | ||
688 | } | ||
689 | |||
690 | schedule_work(&c_idle.work); | ||
691 | wait_for_completion(&c_idle.done); | ||
692 | 672 | ||
693 | if (IS_ERR(c_idle.idle)) { | ||
694 | printk("failed fork for CPU %d\n", cpu); | ||
695 | destroy_work_on_stack(&c_idle.work); | ||
696 | return PTR_ERR(c_idle.idle); | ||
697 | } | ||
698 | |||
699 | set_idle_for_cpu(cpu, c_idle.idle); | ||
700 | do_rest: | ||
701 | per_cpu(current_task, cpu) = c_idle.idle; | ||
702 | #ifdef CONFIG_X86_32 | 673 | #ifdef CONFIG_X86_32 |
703 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 674 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
704 | irq_ctx_init(cpu); | 675 | irq_ctx_init(cpu); |
705 | #else | 676 | #else |
706 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | 677 | clear_tsk_thread_flag(idle, TIF_FORK); |
707 | initial_gs = per_cpu_offset(cpu); | 678 | initial_gs = per_cpu_offset(cpu); |
708 | per_cpu(kernel_stack, cpu) = | 679 | per_cpu(kernel_stack, cpu) = |
709 | (unsigned long)task_stack_page(c_idle.idle) - | 680 | (unsigned long)task_stack_page(idle) - |
710 | KERNEL_STACK_OFFSET + THREAD_SIZE; | 681 | KERNEL_STACK_OFFSET + THREAD_SIZE; |
711 | #endif | 682 | #endif |
712 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 683 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
713 | initial_code = (unsigned long)start_secondary; | 684 | initial_code = (unsigned long)start_secondary; |
714 | stack_start = c_idle.idle->thread.sp; | 685 | stack_start = idle->thread.sp; |
715 | 686 | ||
716 | /* start_ip had better be page-aligned! */ | 687 | /* start_ip had better be page-aligned! */ |
717 | start_ip = trampoline_address(); | 688 | start_ip = trampoline_address(); |
@@ -813,12 +784,10 @@ do_rest: | |||
813 | */ | 784 | */ |
814 | smpboot_restore_warm_reset_vector(); | 785 | smpboot_restore_warm_reset_vector(); |
815 | } | 786 | } |
816 | |||
817 | destroy_work_on_stack(&c_idle.work); | ||
818 | return boot_error; | 787 | return boot_error; |
819 | } | 788 | } |
820 | 789 | ||
821 | int __cpuinit native_cpu_up(unsigned int cpu) | 790 | int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) |
822 | { | 791 | { |
823 | int apicid = apic->cpu_present_to_apicid(cpu); | 792 | int apicid = apic->cpu_present_to_apicid(cpu); |
824 | unsigned long flags; | 793 | unsigned long flags; |
@@ -851,7 +820,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
851 | 820 | ||
852 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 821 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
853 | 822 | ||
854 | err = do_boot_cpu(apicid, cpu); | 823 | err = do_boot_cpu(apicid, cpu, tidle); |
855 | if (err) { | 824 | if (err) { |
856 | pr_debug("do_boot_cpu failed %d\n", err); | 825 | pr_debug("do_boot_cpu failed %d\n", err); |
857 | return -EIO; | 826 | return -EIO; |